Blame SOURCES/0018-vhost-batch-used-descs-chains-write-back-with-packed.patch

2c1bf6
From f3bf9a1a9b1ad3419b436855306ad8b5d8efab2f Mon Sep 17 00:00:00 2001
2c1bf6
From: Maxime Coquelin <maxime.coquelin@redhat.com>
2c1bf6
Date: Thu, 20 Dec 2018 17:47:55 +0100
2c1bf6
Subject: [PATCH 18/18] vhost: batch used descs chains write-back with packed
2c1bf6
 ring
2c1bf6
2c1bf6
[ upstream commit b473ec1131ee44ee25e0536a04be65246b93f4f3 ]
2c1bf6
2c1bf6
Instead of writing back descriptors chains in order, let's
2c1bf6
write the first chain flags last in order to improve batching.
2c1bf6
2c1bf6
Also, move the write barrier in logging cache sync, so that it
2c1bf6
is done only when logging is enabled. It means there is now
2c1bf6
one more barrier for split ring when logging is enabled.
2c1bf6
2c1bf6
With Kernel's pktgen benchmark, ~3% performance gain is measured.
2c1bf6
2c1bf6
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
2c1bf6
Acked-by: Michael S. Tsirkin <mst@redhat.com>
2c1bf6
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
2c1bf6
(cherry picked from commit b473ec1131ee44ee25e0536a04be65246b93f4f3)
2c1bf6
Signed-off-by: Jens Freimann <jfreimann@redhat.com>
2c1bf6
---
2c1bf6
 lib/librte_vhost/vhost.h      |  7 ++-----
2c1bf6
 lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++---
2c1bf6
 2 files changed, 18 insertions(+), 8 deletions(-)
2c1bf6
2c1bf6
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
2c1bf6
index 552b9298d..adc2fb78e 100644
2c1bf6
--- a/lib/librte_vhost/vhost.h
2c1bf6
+++ b/lib/librte_vhost/vhost.h
2c1bf6
@@ -456,12 +456,9 @@ vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
2c1bf6
 		   !dev->log_base))
2c1bf6
 		return;
2c1bf6
 
2c1bf6
-	log_base = (unsigned long *)(uintptr_t)dev->log_base;
2c1bf6
+	rte_smp_wmb();
2c1bf6
 
2c1bf6
-	/*
2c1bf6
-	 * It is expected a write memory barrier has been issued
2c1bf6
-	 * before this function is called.
2c1bf6
-	 */
2c1bf6
+	log_base = (unsigned long *)(uintptr_t)dev->log_base;
2c1bf6
 
2c1bf6
 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
2c1bf6
 		struct log_cache_entry *elem = vq->log_cache + i;
2c1bf6
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
2c1bf6
index 15d682c3c..ec70ef947 100644
2c1bf6
--- a/lib/librte_vhost/virtio_net.c
2c1bf6
+++ b/lib/librte_vhost/virtio_net.c
2c1bf6
@@ -136,6 +136,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
2c1bf6
 {
2c1bf6
 	int i;
2c1bf6
 	uint16_t used_idx = vq->last_used_idx;
2c1bf6
+	uint16_t head_idx = vq->last_used_idx;
2c1bf6
+	uint16_t head_flags = 0;
2c1bf6
 
2c1bf6
 	/* Split loop in two to save memory barriers */
2c1bf6
 	for (i = 0; i < vq->shadow_used_idx; i++) {
2c1bf6
@@ -165,12 +167,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
2c1bf6
 			flags &= ~VRING_DESC_F_AVAIL;
2c1bf6
 		}
2c1bf6
 
2c1bf6
-		vq->desc_packed[vq->last_used_idx].flags = flags;
2c1bf6
+		if (i > 0) {
2c1bf6
+			vq->desc_packed[vq->last_used_idx].flags = flags;
2c1bf6
 
2c1bf6
-		vhost_log_cache_used_vring(dev, vq,
2c1bf6
+			vhost_log_cache_used_vring(dev, vq,
2c1bf6
 					vq->last_used_idx *
2c1bf6
 					sizeof(struct vring_packed_desc),
2c1bf6
 					sizeof(struct vring_packed_desc));
2c1bf6
+		} else {
2c1bf6
+			head_idx = vq->last_used_idx;
2c1bf6
+			head_flags = flags;
2c1bf6
+		}
2c1bf6
 
2c1bf6
 		vq->last_used_idx += vq->shadow_used_packed[i].count;
2c1bf6
 		if (vq->last_used_idx >= vq->size) {
2c1bf6
@@ -179,7 +186,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
2c1bf6
 		}
2c1bf6
 	}
2c1bf6
 
2c1bf6
-	rte_smp_wmb();
2c1bf6
+	vq->desc_packed[head_idx].flags = head_flags;
2c1bf6
+
2c1bf6
+	vhost_log_cache_used_vring(dev, vq,
2c1bf6
+				head_idx *
2c1bf6
+				sizeof(struct vring_packed_desc),
2c1bf6
+				sizeof(struct vring_packed_desc));
2c1bf6
+
2c1bf6
 	vq->shadow_used_idx = 0;
2c1bf6
 	vhost_log_cache_sync(dev, vq);
2c1bf6
 }
2c1bf6
-- 
2c1bf6
2.21.0
2c1bf6