Tree - rpms/qemu-kvm - CentOS Git server

yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago

Source
Stats

Blame SOURCES/kvm-async-use-explicit-memory-barriers.patch

Blob History Raw

		1072c8	`From 82a02aec3a8b3c2ac925d0b71ea4c35aa5d6463b Mon Sep 17 00:00:00 2001`
		1072c8	`From: Andrew Jones <drjones@redhat.com>`
		1072c8	`Date: Wed, 4 Aug 2021 03:27:24 -0400`
		1072c8	`Subject: [PATCH 2/2] async: use explicit memory barriers`
		1072c8
		1072c8	`RH-Author: Andrew Jones <drjones@redhat.com>`
		1072c8	`Message-id: <20210729134448.4995-3-drjones@redhat.com>`
		1072c8	`Patchwork-id: 101937`
		1072c8	`O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/2] async: use explicit memory barriers`
		1072c8	`Bugzilla: 1969848`
		1072c8	`RH-Acked-by: Gavin Shan <gshan@redhat.com>`
		1072c8	`RH-Acked-by: Auger Eric <eric.auger@redhat.com>`
		1072c8	`RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>`
		1072c8
		1072c8	`From: Paolo Bonzini <pbonzini@redhat.com>`
		1072c8
		1072c8	`When using C11 atomics, non-seqcst reads and writes do not participate`
		1072c8	`in the total order of seqcst operations. In util/async.c and util/aio-posix.c,`
		1072c8	`in particular, the pattern that we use`
		1072c8
		1072c8	`write ctx->notify_me write bh->scheduled`
		1072c8	`read bh->scheduled read ctx->notify_me`
		1072c8	`if !bh->scheduled, sleep if ctx->notify_me, notify`
		1072c8
		1072c8	`needs to use seqcst operations for both the write and the read. In`
		1072c8	`general this is something that we do not want, because there can be`
		1072c8	`many sources that are polled in addition to bottom halves. The`
		1072c8	`alternative is to place a seqcst memory barrier between the write`
		1072c8	`and the read. This also comes with a disadvantage, in that the`
		1072c8	`memory barrier is implicit on strongly-ordered architectures and`
		1072c8	`it wastes a few dozen clock cycles.`
		1072c8
		1072c8	`Fortunately, ctx->notify_me is never written concurrently by two`
		1072c8	`threads, so we can assert that and relax the writes to ctx->notify_me.`
		1072c8	`The resulting solution works and performs well on both aarch64 and x86.`
		1072c8
		1072c8	`Note that the atomic_set/atomic_read combination is not an atomic`
		1072c8	`read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;`
		1072c8	`on x86, ATOMIC_RELAXED compiles to a locked operation.`
		1072c8
		1072c8	`Analyzed-by: Ying Fang <fangying1@huawei.com>`
		1072c8	`Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>`
		1072c8	`Tested-by: Ying Fang <fangying1@huawei.com>`
		1072c8	`Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>`
		1072c8	`Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>`
		1072c8	`(cherry picked from commit 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c)`
		1072c8	`Signed-off-by: Andrew Jones <drjones@redhat.com>`
		1072c8	`Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>`
		1072c8	`---`
		1072c8	`util/aio-posix.c \| 16 ++++++++++++++--`
		1072c8	`util/aio-win32.c \| 17 ++++++++++++++---`
		1072c8	`util/async.c \| 16 ++++++++++++----`
		1072c8	`3 files changed, 40 insertions(+), 9 deletions(-)`
		1072c8
		1072c8	`diff --git a/util/aio-posix.c b/util/aio-posix.c`
		1072c8	`index abc396d030..8cfb25650d 100644`
		1072c8	`--- a/util/aio-posix.c`
		1072c8	`+++ b/util/aio-posix.c`
		1072c8	`@@ -624,6 +624,11 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`int64_t timeout;`
		1072c8	`int64_t start = 0;`
		1072c8
		1072c8	`+ /*`
		1072c8	`+ * There cannot be two concurrent aio_poll calls for the same AioContext (or`
		1072c8	`+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).`
		1072c8	`+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.`
		1072c8	`+ */`
		1072c8	`assert(in_aio_context_home_thread(ctx));`
		1072c8
		1072c8	`/* aio_notify can avoid the expensive event_notifier_set if`
		1072c8	`@@ -634,7 +639,13 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`* so disable the optimization now.`
		1072c8	`*/`
		1072c8	`if (blocking) {`
		1072c8	`- atomic_add(&ctx->notify_me, 2);`
		1072c8	`+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);`
		1072c8	`+ /*`
		1072c8	`+ * Write ctx->notify_me before computing the timeout`
		1072c8	`+ * (reading bottom half flags, etc.). Pairs with`
		1072c8	`+ * smp_mb in aio_notify().`
		1072c8	`+ */`
		1072c8	`+ smp_mb();`
		1072c8	`}`
		1072c8
		1072c8	`qemu_lockcnt_inc(&ctx->list_lock);`
		1072c8	`@@ -679,7 +690,8 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`}`
		1072c8
		1072c8	`if (blocking) {`
		1072c8	`- atomic_sub(&ctx->notify_me, 2);`
		1072c8	`+ /* Finish the poll before clearing the flag. */`
		1072c8	`+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);`
		1072c8	`aio_notify_accept(ctx);`
		1072c8	`}`
		1072c8
		1072c8	`diff --git a/util/aio-win32.c b/util/aio-win32.c`
		1072c8	`index a23b9c364d..729d533faf 100644`
		1072c8	`--- a/util/aio-win32.c`
		1072c8	`+++ b/util/aio-win32.c`
		1072c8	`@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`int count;`
		1072c8	`int timeout;`
		1072c8
		1072c8	`+ /*`
		1072c8	`+ * There cannot be two concurrent aio_poll calls for the same AioContext (or`
		1072c8	`+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).`
		1072c8	`+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.`
		1072c8	`+ */`
		1072c8	`+ assert(in_aio_context_home_thread(ctx));`
		1072c8	`progress = false;`
		1072c8
		1072c8	`/* aio_notify can avoid the expensive event_notifier_set if`
		1072c8	`@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`* so disable the optimization now.`
		1072c8	`*/`
		1072c8	`if (blocking) {`
		1072c8	`- atomic_add(&ctx->notify_me, 2);`
		1072c8	`+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);`
		1072c8	`+ /*`
		1072c8	`+ * Write ctx->notify_me before computing the timeout`
		1072c8	`+ * (reading bottom half flags, etc.). Pairs with`
		1072c8	`+ * smp_mb in aio_notify().`
		1072c8	`+ */`
		1072c8	`+ smp_mb();`
		1072c8	`}`
		1072c8
		1072c8	`qemu_lockcnt_inc(&ctx->list_lock);`
		1072c8	`@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)`
		1072c8	`ret = WaitForMultipleObjects(count, events, FALSE, timeout);`
		1072c8	`if (blocking) {`
		1072c8	`assert(first);`
		1072c8	`- assert(in_aio_context_home_thread(ctx));`
		1072c8	`- atomic_sub(&ctx->notify_me, 2);`
		1072c8	`+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);`
		1072c8	`aio_notify_accept(ctx);`
		1072c8	`}`
		1072c8
		1072c8	`diff --git a/util/async.c b/util/async.c`
		1072c8	`index b1fa5319e5..c65c58bbc9 100644`
		1072c8	`--- a/util/async.c`
		1072c8	`+++ b/util/async.c`
		1072c8	`@@ -220,7 +220,14 @@ aio_ctx_prepare(GSource source, gint timeout)`
		1072c8	`{`
		1072c8	`AioContext ctx = (AioContext ) source;`
		1072c8
		1072c8	`- atomic_or(&ctx->notify_me, 1);`
		1072c8	`+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) \| 1);`
		1072c8	`+`
		1072c8	`+ /*`
		1072c8	`+ * Write ctx->notify_me before computing the timeout`
		1072c8	`+ * (reading bottom half flags, etc.). Pairs with`
		1072c8	`+ * smp_mb in aio_notify().`
		1072c8	`+ */`
		1072c8	`+ smp_mb();`
		1072c8
		1072c8	`/* We assume there is no timeout already supplied */`
		1072c8	`*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));`
		1072c8	`@@ -238,7 +245,8 @@ aio_ctx_check(GSource *source)`
		1072c8	`AioContext ctx = (AioContext ) source;`
		1072c8	`QEMUBH *bh;`
		1072c8
		1072c8	`- atomic_and(&ctx->notify_me, ~1);`
		1072c8	`+ /* Finish computing the timeout before clearing the flag. */`
		1072c8	`+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);`
		1072c8	`aio_notify_accept(ctx);`
		1072c8
		1072c8	`for (bh = ctx->first_bh; bh; bh = bh->next) {`
		1072c8	`@@ -343,10 +351,10 @@ LinuxAioState aio_get_linux_aio(AioContext ctx)`
		1072c8	`void aio_notify(AioContext *ctx)`
		1072c8	`{`
		1072c8	`/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs`
		1072c8	`- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.`
		1072c8	`+ * with smp_mb in aio_ctx_prepare or aio_poll.`
		1072c8	`*/`
		1072c8	`smp_mb();`
		1072c8	`- if (ctx->notify_me) {`
		1072c8	`+ if (atomic_read(&ctx->notify_me)) {`
		1072c8	`event_notifier_set(&ctx->notifier);`
		1072c8	`atomic_mb_set(&ctx->notified, true);`
		1072c8	`}`
		1072c8	`--`
		1072c8	`2.27.0`
		1072c8

yeahuh / rpms / qemu-kvm

Source Code

Blame SOURCES/kvm-async-use-explicit-memory-barriers.patch