Tree - rpms/dpdk - CentOS Git server

rpms / dpdk

Blame SOURCES/0001-vhost-improve-dirty-pages-logging-performance.patch

Blob History Raw

		c7ffa4	`From 4d8b1e6aa5d7ecfc1d2ee606b4bd838b4f1ac9d2 Mon Sep 17 00:00:00 2001`
		c7ffa4	`From: Maxime Coquelin <maxime.coquelin@redhat.com>`
		c7ffa4	`Date: Thu, 17 May 2018 13:44:47 +0200`
		c7ffa4	`Subject: [PATCH] vhost: improve dirty pages logging performance`
		c7ffa4
		c7ffa4	`[ upstream commit c16915b8710911a75f0fbdb1aa5243f4cdfaf26a ]`
		c7ffa4
		c7ffa4	`This patch caches all dirty pages logging until the used ring index`
		c7ffa4	`is updated.`
		c7ffa4
		c7ffa4	`The goal of this optimization is to fix a performance regression`
		c7ffa4	`introduced when the vhost library started to use atomic operations`
		c7ffa4	`to set bits in the shared dirty log map. While the fix was valid`
		c7ffa4	`as previous implementation wasn't safe against concurrent accesses,`
		c7ffa4	`contention was induced.`
		c7ffa4
		c7ffa4	`With this patch, during migration, we have:`
		c7ffa4	`1. Less atomic operations as only a single atomic OR operation`
		c7ffa4	`per 32 or 64 (depending on CPU) pages.`
		c7ffa4	`2. Less atomic operations as during a burst, the same page will`
		c7ffa4	`be marked dirty only once.`
		c7ffa4	`3. Less write memory barriers.`
		c7ffa4
		c7ffa4	`Fixes: 897f13a1f726 ("vhost: make page logging atomic")`
		c7ffa4	`Cc: stable@dpdk.org`
		c7ffa4
		c7ffa4	`Suggested-by: Michael S. Tsirkin <mst@redhat.com>`
		c7ffa4	`Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>`
		c7ffa4	`Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>`
		c7ffa4	`---`
		c7ffa4	`lib/librte_vhost/vhost.h \| 119 +++++++++++++++++++++++++++++++++++++++++-`
		c7ffa4	`lib/librte_vhost/virtio_net.c \| 29 ++++++----`
		c7ffa4	`2 files changed, 137 insertions(+), 11 deletions(-)`
		c7ffa4
		c7ffa4	`diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h`
		c7ffa4	`index 16d6b8913..42c6a3a75 100644`
		c7ffa4	`--- a/lib/librte_vhost/vhost.h`
		c7ffa4	`+++ b/lib/librte_vhost/vhost.h`
		c7ffa4	`@@ -59,6 +59,8 @@`
		c7ffa4
		c7ffa4	`#define BUF_VECTOR_MAX 256`
		c7ffa4
		c7ffa4	`+#define VHOST_LOG_CACHE_NR 32`
		c7ffa4	`+`
		c7ffa4	`/**`
		c7ffa4	`* Structure contains buffer address, length and descriptor index`
		c7ffa4	`* from vring to do scatter RX.`
		c7ffa4	`@@ -92,6 +94,14 @@ struct batch_copy_elem {`
		c7ffa4	`uint64_t log_addr;`
		c7ffa4	`};`
		c7ffa4
		c7ffa4	`+/*`
		c7ffa4	`+ * Structure that contains the info for batched dirty logging.`
		c7ffa4	`+ */`
		c7ffa4	`+struct log_cache_entry {`
		c7ffa4	`+ uint32_t offset;`
		c7ffa4	`+ unsigned long val;`
		c7ffa4	`+};`
		c7ffa4	`+`
		c7ffa4	`/**`
		c7ffa4	`* Structure contains variables relevant to RX/TX virtqueues.`
		c7ffa4	`*/`
		c7ffa4	`@@ -133,6 +143,9 @@ struct vhost_virtqueue {`
		c7ffa4	`struct batch_copy_elem *batch_copy_elems;`
		c7ffa4	`uint16_t batch_copy_nb_elems;`
		c7ffa4
		c7ffa4	`+ struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];`
		c7ffa4	`+ uint16_t log_cache_nb_elem;`
		c7ffa4	`+`
		c7ffa4	`rte_rwlock_t iotlb_lock;`
		c7ffa4	`rte_rwlock_t iotlb_pending_lock;`
		c7ffa4	`struct rte_mempool *iotlb_pool;`
		c7ffa4	`@@ -266,7 +279,15 @@ struct virtio_net {`
		c7ffa4	`static __rte_always_inline void`
		c7ffa4	`vhost_set_bit(unsigned int nr, volatile uint8_t *addr)`
		c7ffa4	`{`
		c7ffa4	`- __sync_fetch_and_or_8(addr, (1U << nr));`
		c7ffa4	`+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)`
		c7ffa4	`+ /*`
		c7ffa4	`+ * __sync_ built-ins are deprecated, but __atomic_ ones`
		c7ffa4	`+ * are sub-optimized in older GCC versions.`
		c7ffa4	`+ */`
		c7ffa4	`+ __sync_fetch_and_or_1(addr, (1U << nr));`
		c7ffa4	`+#else`
		c7ffa4	`+ __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);`
		c7ffa4	`+#endif`
		c7ffa4	`}`
		c7ffa4
		c7ffa4	`static __rte_always_inline void`
		c7ffa4	`@@ -297,6 +318,102 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)`
		c7ffa4	`}`
		c7ffa4	`}`
		c7ffa4
		c7ffa4	`+static __rte_always_inline void`
		c7ffa4	`+vhost_log_cache_sync(struct virtio_net dev, struct vhost_virtqueue vq)`
		c7ffa4	`+{`
		c7ffa4	`+ unsigned long *log_base;`
		c7ffa4	`+ int i;`
		c7ffa4	`+`
		c7ffa4	`+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) \|\|`
		c7ffa4	`+ !dev->log_base))`
		c7ffa4	`+ return;`
		c7ffa4	`+`
		c7ffa4	`+ log_base = (unsigned long *)(uintptr_t)dev->log_base;`
		c7ffa4	`+`
		c7ffa4	`+ /*`
		c7ffa4	`+ * It is expected a write memory barrier has been issued`
		c7ffa4	`+ * before this function is called.`
		c7ffa4	`+ */`
		c7ffa4	`+`
		c7ffa4	`+ for (i = 0; i < vq->log_cache_nb_elem; i++) {`
		c7ffa4	`+ struct log_cache_entry *elem = vq->log_cache + i;`
		c7ffa4	`+`
		c7ffa4	`+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)`
		c7ffa4	`+ /*`
		c7ffa4	`+ * '__sync' builtins are deprecated, but '__atomic' ones`
		c7ffa4	`+ * are sub-optimized in older GCC versions.`
		c7ffa4	`+ */`
		c7ffa4	`+ __sync_fetch_and_or(log_base + elem->offset, elem->val);`
		c7ffa4	`+#else`
		c7ffa4	`+ __atomic_fetch_or(log_base + elem->offset, elem->val,`
		c7ffa4	`+ __ATOMIC_RELAXED);`
		c7ffa4	`+#endif`
		c7ffa4	`+ }`
		c7ffa4	`+`
		c7ffa4	`+ rte_smp_wmb();`
		c7ffa4	`+`
		c7ffa4	`+ vq->log_cache_nb_elem = 0;`
		c7ffa4	`+}`
		c7ffa4	`+`
		c7ffa4	`+static __rte_always_inline void`
		c7ffa4	`+vhost_log_cache_page(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`+ uint64_t page)`
		c7ffa4	`+{`
		c7ffa4	`+ uint32_t bit_nr = page % (sizeof(unsigned long) << 3);`
		c7ffa4	`+ uint32_t offset = page / (sizeof(unsigned long) << 3);`
		c7ffa4	`+ int i;`
		c7ffa4	`+`
		c7ffa4	`+ for (i = 0; i < vq->log_cache_nb_elem; i++) {`
		c7ffa4	`+ struct log_cache_entry *elem = vq->log_cache + i;`
		c7ffa4	`+`
		c7ffa4	`+ if (elem->offset == offset) {`
		c7ffa4	`+ elem->val \|= (1UL << bit_nr);`
		c7ffa4	`+ return;`
		c7ffa4	`+ }`
		c7ffa4	`+ }`
		c7ffa4	`+`
		c7ffa4	`+ if (unlikely(i >= VHOST_LOG_CACHE_NR)) {`
		c7ffa4	`+ /*`
		c7ffa4	`+ * No more room for a new log cache entry,`
		c7ffa4	`+ * so write the dirty log map directly.`
		c7ffa4	`+ */`
		c7ffa4	`+ rte_smp_wmb();`
		c7ffa4	`+ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);`
		c7ffa4	`+`
		c7ffa4	`+ return;`
		c7ffa4	`+ }`
		c7ffa4	`+`
		c7ffa4	`+ vq->log_cache[i].offset = offset;`
		c7ffa4	`+ vq->log_cache[i].val = (1UL << bit_nr);`
		c7ffa4	`+}`
		c7ffa4	`+`
		c7ffa4	`+static __rte_always_inline void`
		c7ffa4	`+vhost_log_cache_write(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`+ uint64_t addr, uint64_t len)`
		c7ffa4	`+{`
		c7ffa4	`+ uint64_t page;`
		c7ffa4	`+`
		c7ffa4	`+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) \|\|`
		c7ffa4	`+ !dev->log_base \|\| !len))`
		c7ffa4	`+ return;`
		c7ffa4	`+`
		c7ffa4	`+ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))`
		c7ffa4	`+ return;`
		c7ffa4	`+`
		c7ffa4	`+ page = addr / VHOST_LOG_PAGE;`
		c7ffa4	`+ while (page * VHOST_LOG_PAGE < addr + len) {`
		c7ffa4	`+ vhost_log_cache_page(dev, vq, page);`
		c7ffa4	`+ page += 1;`
		c7ffa4	`+ }`
		c7ffa4	`+}`
		c7ffa4	`+`
		c7ffa4	`+static __rte_always_inline void`
		c7ffa4	`+vhost_log_cache_used_vring(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`+ uint64_t offset, uint64_t len)`
		c7ffa4	`+{`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len);`
		c7ffa4	`+}`
		c7ffa4	`+`
		c7ffa4	`static __rte_always_inline void`
		c7ffa4	`vhost_log_used_vring(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`uint64_t offset, uint64_t len)`
		c7ffa4	`diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c`
		c7ffa4	`index a013c07b0..5f8763d3a 100644`
		c7ffa4	`--- a/lib/librte_vhost/virtio_net.c`
		c7ffa4	`+++ b/lib/librte_vhost/virtio_net.c`
		c7ffa4	`@@ -107,7 +107,7 @@ do_flush_shadow_used_ring(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`rte_memcpy(&vq->used->ring[to],`
		c7ffa4	`&vq->shadow_used_ring[from],`
		c7ffa4	`size * sizeof(struct vring_used_elem));`
		c7ffa4	`- vhost_log_used_vring(dev, vq,`
		c7ffa4	`+ vhost_log_cache_used_vring(dev, vq,`
		c7ffa4	`offsetof(struct vring_used, ring[to]),`
		c7ffa4	`size * sizeof(struct vring_used_elem));`
		c7ffa4	`}`
		c7ffa4	`@@ -135,6 +135,8 @@ flush_shadow_used_ring(struct virtio_net dev, struct vhost_virtqueue vq)`
		c7ffa4
		c7ffa4	`rte_smp_wmb();`
		c7ffa4
		c7ffa4	`+ vhost_log_cache_sync(dev, vq);`
		c7ffa4	`+`
		c7ffa4	`(volatile uint16_t )&vq->used->idx += vq->shadow_used_idx;`
		c7ffa4	`vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),`
		c7ffa4	`sizeof(vq->used->idx));`
		c7ffa4	`@@ -159,7 +161,7 @@ do_data_copy_enqueue(struct virtio_net dev, struct vhost_virtqueue vq)`
		c7ffa4
		c7ffa4	`for (i = 0; i < count; i++) {`
		c7ffa4	`rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);`
		c7ffa4	`- vhost_log_write(dev, elem[i].log_addr, elem[i].len);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);`
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);`
		c7ffa4	`}`
		c7ffa4	`}`
		c7ffa4	`@@ -275,7 +277,7 @@ copy_mbuf_to_desc(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`virtio_enqueue_offload(m,`
		c7ffa4	`(struct virtio_net_hdr *)(uintptr_t)desc_addr);`
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);`
		c7ffa4	`- vhost_log_write(dev, desc_gaddr, dev->vhost_hlen);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);`
		c7ffa4	`} else {`
		c7ffa4	`struct virtio_net_hdr vnet_hdr;`
		c7ffa4	`uint64_t remain = dev->vhost_hlen;`
		c7ffa4	`@@ -298,7 +300,7 @@ copy_mbuf_to_desc(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`(void *)(uintptr_t)src, len);`
		c7ffa4
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)dst, len, 0);`
		c7ffa4	`- vhost_log_write(dev, guest_addr, len);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, guest_addr, len);`
		c7ffa4	`remain -= len;`
		c7ffa4	`guest_addr += len;`
		c7ffa4	`dst += len;`
		c7ffa4	`@@ -379,7 +381,8 @@ copy_mbuf_to_desc(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`desc_offset)),`
		c7ffa4	`rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),`
		c7ffa4	`cpy_len);`
		c7ffa4	`- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,`
		c7ffa4	`+ cpy_len);`
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),`
		c7ffa4	`cpy_len, 0);`
		c7ffa4	`} else {`
		c7ffa4	`@@ -468,7 +471,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,`
		c7ffa4	`vq->used->ring[used_idx].id = desc_indexes[i];`
		c7ffa4	`vq->used->ring[used_idx].len = pkts[i]->pkt_len +`
		c7ffa4	`dev->vhost_hlen;`
		c7ffa4	`- vhost_log_used_vring(dev, vq,`
		c7ffa4	`+ vhost_log_cache_used_vring(dev, vq,`
		c7ffa4	`offsetof(struct vring_used, ring[used_idx]),`
		c7ffa4	`sizeof(vq->used->ring[used_idx]));`
		c7ffa4	`}`
		c7ffa4	`@@ -528,6 +531,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,`
		c7ffa4
		c7ffa4	`rte_smp_wmb();`
		c7ffa4
		c7ffa4	`+ vhost_log_cache_sync(dev, vq);`
		c7ffa4	`+`
		c7ffa4	`(volatile uint16_t )&vq->used->idx += count;`
		c7ffa4	`vq->last_used_idx += count;`
		c7ffa4	`vhost_log_used_vring(dev, vq,`
		c7ffa4	`@@ -797,7 +802,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)dst,`
		c7ffa4	`len, 0);`
		c7ffa4	`- vhost_log_write(dev, guest_addr, len);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq,`
		c7ffa4	`+ guest_addr, len);`
		c7ffa4
		c7ffa4	`remain -= len;`
		c7ffa4	`guest_addr += len;`
		c7ffa4	`@@ -806,7 +812,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`} else {`
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)hdr_addr,`
		c7ffa4	`dev->vhost_hlen, 0);`
		c7ffa4	`- vhost_log_write(dev, hdr_phys_addr,`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, hdr_phys_addr,`
		c7ffa4	`dev->vhost_hlen);`
		c7ffa4	`}`
		c7ffa4
		c7ffa4	`@@ -820,7 +826,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`desc_offset)),`
		c7ffa4	`rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),`
		c7ffa4	`cpy_len);`
		c7ffa4	`- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);`
		c7ffa4	`+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,`
		c7ffa4	`+ cpy_len);`
		c7ffa4	`PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),`
		c7ffa4	`cpy_len, 0);`
		c7ffa4	`} else {`
		c7ffa4	`@@ -1384,7 +1391,7 @@ update_used_ring(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`{`
		c7ffa4	`vq->used->ring[used_idx].id = desc_idx;`
		c7ffa4	`vq->used->ring[used_idx].len = 0;`
		c7ffa4	`- vhost_log_used_vring(dev, vq,`
		c7ffa4	`+ vhost_log_cache_used_vring(dev, vq,`
		c7ffa4	`offsetof(struct vring_used, ring[used_idx]),`
		c7ffa4	`sizeof(vq->used->ring[used_idx]));`
		c7ffa4	`}`
		c7ffa4	`@@ -1399,6 +1406,8 @@ update_used_idx(struct virtio_net dev, struct vhost_virtqueue vq,`
		c7ffa4	`rte_smp_wmb();`
		c7ffa4	`rte_smp_rmb();`
		c7ffa4
		c7ffa4	`+ vhost_log_cache_sync(dev, vq);`
		c7ffa4	`+`
		c7ffa4	`vq->used->idx += count;`
		c7ffa4	`vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),`
		c7ffa4	`sizeof(vq->used->idx));`
		c7ffa4	`--`
		c7ffa4	`2.14.3`
		c7ffa4

rpms / dpdk

Source Code

Blame SOURCES/0001-vhost-improve-dirty-pages-logging-performance.patch