Blame SOURCES/0018-vhost-batch-used-descs-chains-write-back-with-packed.patch

b91920
From f3bf9a1a9b1ad3419b436855306ad8b5d8efab2f Mon Sep 17 00:00:00 2001
b91920
From: Maxime Coquelin <maxime.coquelin@redhat.com>
b91920
Date: Thu, 20 Dec 2018 17:47:55 +0100
b91920
Subject: [PATCH 18/18] vhost: batch used descs chains write-back with packed
b91920
 ring
b91920
b91920
[ upstream commit b473ec1131ee44ee25e0536a04be65246b93f4f3 ]
b91920
b91920
Instead of writing back descriptors chains in order, let's
b91920
write the first chain flags last in order to improve batching.
b91920
b91920
Also, move the write barrier in logging cache sync, so that it
b91920
is done only when logging is enabled. It means there is now
b91920
one more barrier for split ring when logging is enabled.
b91920
b91920
With Kernel's pktgen benchmark, ~3% performance gain is measured.
b91920
b91920
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
b91920
Acked-by: Michael S. Tsirkin <mst@redhat.com>
b91920
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
b91920
(cherry picked from commit b473ec1131ee44ee25e0536a04be65246b93f4f3)
b91920
Signed-off-by: Jens Freimann <jfreimann@redhat.com>
b91920
---
b91920
 lib/librte_vhost/vhost.h      |  7 ++-----
b91920
 lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
b91920
 2 files changed, 18 insertions(+), 8 deletions(-)
b91920
b91920
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
b91920
index 552b9298d..adc2fb78e 100644
b91920
--- a/lib/librte_vhost/vhost.h
b91920
+++ b/lib/librte_vhost/vhost.h
b91920
@@ -456,12 +456,9 @@ vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
b91920
 		   !dev->log_base))
b91920
 		return;
b91920
 
b91920
-	log_base = (unsigned long *)(uintptr_t)dev->log_base;
b91920
+	rte_smp_wmb();
b91920
 
b91920
-	/*
b91920
-	 * It is expected a write memory barrier has been issued
b91920
-	 * before this function is called.
b91920
-	 */
b91920
+	log_base = (unsigned long *)(uintptr_t)dev->log_base;
b91920
 
b91920
 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
b91920
 		struct log_cache_entry *elem = vq->log_cache + i;
b91920
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
b91920
index 15d682c3c..ec70ef947 100644
b91920
--- a/lib/librte_vhost/virtio_net.c
b91920
+++ b/lib/librte_vhost/virtio_net.c
b91920
@@ -136,6 +136,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
b91920
 {
b91920
 	int i;
b91920
 	uint16_t used_idx = vq->last_used_idx;
b91920
+	uint16_t head_idx = vq->last_used_idx;
b91920
+	uint16_t head_flags = 0;
b91920
 
b91920
 	/* Split loop in two to save memory barriers */
b91920
 	for (i = 0; i < vq->shadow_used_idx; i++) {
b91920
@@ -165,12 +167,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
b91920
 			flags &= ~VRING_DESC_F_AVAIL;
b91920
 		}
b91920
 
b91920
-		vq->desc_packed[vq->last_used_idx].flags = flags;
b91920
+		if (i > 0) {
b91920
+			vq->desc_packed[vq->last_used_idx].flags = flags;
b91920
 
b91920
-		vhost_log_cache_used_vring(dev, vq,
b91920
+			vhost_log_cache_used_vring(dev, vq,
b91920
 					vq->last_used_idx *
b91920
 					sizeof(struct vring_packed_desc),
b91920
 					sizeof(struct vring_packed_desc));
b91920
+		} else {
b91920
+			head_idx = vq->last_used_idx;
b91920
+			head_flags = flags;
b91920
+		}
b91920
 
b91920
 		vq->last_used_idx += vq->shadow_used_packed[i].count;
b91920
 		if (vq->last_used_idx >= vq->size) {
b91920
@@ -179,7 +186,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
b91920
 		}
b91920
 	}
b91920
 
b91920
-	rte_smp_wmb();
b91920
+	vq->desc_packed[head_idx].flags = head_flags;
b91920
+
b91920
+	vhost_log_cache_used_vring(dev, vq,
b91920
+				head_idx *
b91920
+				sizeof(struct vring_packed_desc),
b91920
+				sizeof(struct vring_packed_desc));
b91920
+
b91920
 	vq->shadow_used_idx = 0;
b91920
 	vhost_log_cache_sync(dev, vq);
b91920
 }
b91920
-- 
b91920
2.21.0
b91920