|
|
6e7d01 |
From 82a02aec3a8b3c2ac925d0b71ea4c35aa5d6463b Mon Sep 17 00:00:00 2001
|
|
|
6e7d01 |
From: Andrew Jones <drjones@redhat.com>
|
|
|
6e7d01 |
Date: Wed, 4 Aug 2021 03:27:24 -0400
|
|
|
6e7d01 |
Subject: [PATCH 2/2] async: use explicit memory barriers
|
|
|
6e7d01 |
|
|
|
6e7d01 |
RH-Author: Andrew Jones <drjones@redhat.com>
|
|
|
6e7d01 |
Message-id: <20210729134448.4995-3-drjones@redhat.com>
|
|
|
6e7d01 |
Patchwork-id: 101937
|
|
|
6e7d01 |
O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/2] async: use explicit memory barriers
|
|
|
6e7d01 |
Bugzilla: 1969848
|
|
|
6e7d01 |
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
|
|
6e7d01 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
6e7d01 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
6e7d01 |
|
|
|
6e7d01 |
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
6e7d01 |
|
|
|
6e7d01 |
When using C11 atomics, non-seqcst reads and writes do not participate
|
|
|
6e7d01 |
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
|
|
|
6e7d01 |
in particular, the pattern that we use
|
|
|
6e7d01 |
|
|
|
6e7d01 |
write ctx->notify_me write bh->scheduled
|
|
|
6e7d01 |
read bh->scheduled read ctx->notify_me
|
|
|
6e7d01 |
if !bh->scheduled, sleep if ctx->notify_me, notify
|
|
|
6e7d01 |
|
|
|
6e7d01 |
needs to use seqcst operations for both the write and the read. In
|
|
|
6e7d01 |
general this is something that we do not want, because there can be
|
|
|
6e7d01 |
many sources that are polled in addition to bottom halves. The
|
|
|
6e7d01 |
alternative is to place a seqcst memory barrier between the write
|
|
|
6e7d01 |
and the read. This also comes with a disadvantage, in that the
|
|
|
6e7d01 |
memory barrier is implicit on strongly-ordered architectures and
|
|
|
6e7d01 |
it wastes a few dozen clock cycles.
|
|
|
6e7d01 |
|
|
|
6e7d01 |
Fortunately, ctx->notify_me is never written concurrently by two
|
|
|
6e7d01 |
threads, so we can assert that and relax the writes to ctx->notify_me.
|
|
|
6e7d01 |
The resulting solution works and performs well on both aarch64 and x86.
|
|
|
6e7d01 |
|
|
|
6e7d01 |
Note that the atomic_set/atomic_read combination is not an atomic
|
|
|
6e7d01 |
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
|
|
|
6e7d01 |
on x86, ATOMIC_RELAXED compiles to a locked operation.
|
|
|
6e7d01 |
|
|
|
6e7d01 |
Analyzed-by: Ying Fang <fangying1@huawei.com>
|
|
|
6e7d01 |
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
6e7d01 |
Tested-by: Ying Fang <fangying1@huawei.com>
|
|
|
6e7d01 |
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
|
|
|
6e7d01 |
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
6e7d01 |
(cherry picked from commit 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c)
|
|
|
6e7d01 |
Signed-off-by: Andrew Jones <drjones@redhat.com>
|
|
|
6e7d01 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
6e7d01 |
---
|
|
|
6e7d01 |
util/aio-posix.c | 16 ++++++++++++++--
|
|
|
6e7d01 |
util/aio-win32.c | 17 ++++++++++++++---
|
|
|
6e7d01 |
util/async.c | 16 ++++++++++++----
|
|
|
6e7d01 |
3 files changed, 40 insertions(+), 9 deletions(-)
|
|
|
6e7d01 |
|
|
|
6e7d01 |
diff --git a/util/aio-posix.c b/util/aio-posix.c
|
|
|
6e7d01 |
index abc396d030..8cfb25650d 100644
|
|
|
6e7d01 |
--- a/util/aio-posix.c
|
|
|
6e7d01 |
+++ b/util/aio-posix.c
|
|
|
6e7d01 |
@@ -624,6 +624,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
int64_t timeout;
|
|
|
6e7d01 |
int64_t start = 0;
|
|
|
6e7d01 |
|
|
|
6e7d01 |
+ /*
|
|
|
6e7d01 |
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
|
|
6e7d01 |
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
|
|
6e7d01 |
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
|
|
6e7d01 |
+ */
|
|
|
6e7d01 |
assert(in_aio_context_home_thread(ctx));
|
|
|
6e7d01 |
|
|
|
6e7d01 |
/* aio_notify can avoid the expensive event_notifier_set if
|
|
|
6e7d01 |
@@ -634,7 +639,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
* so disable the optimization now.
|
|
|
6e7d01 |
*/
|
|
|
6e7d01 |
if (blocking) {
|
|
|
6e7d01 |
- atomic_add(&ctx->notify_me, 2);
|
|
|
6e7d01 |
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
|
|
6e7d01 |
+ /*
|
|
|
6e7d01 |
+ * Write ctx->notify_me before computing the timeout
|
|
|
6e7d01 |
+ * (reading bottom half flags, etc.). Pairs with
|
|
|
6e7d01 |
+ * smp_mb in aio_notify().
|
|
|
6e7d01 |
+ */
|
|
|
6e7d01 |
+ smp_mb();
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
|
|
|
6e7d01 |
qemu_lockcnt_inc(&ctx->list_lock);
|
|
|
6e7d01 |
@@ -679,7 +690,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
|
|
|
6e7d01 |
if (blocking) {
|
|
|
6e7d01 |
- atomic_sub(&ctx->notify_me, 2);
|
|
|
6e7d01 |
+ /* Finish the poll before clearing the flag. */
|
|
|
6e7d01 |
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
|
|
6e7d01 |
aio_notify_accept(ctx);
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
|
|
|
6e7d01 |
diff --git a/util/aio-win32.c b/util/aio-win32.c
|
|
|
6e7d01 |
index a23b9c364d..729d533faf 100644
|
|
|
6e7d01 |
--- a/util/aio-win32.c
|
|
|
6e7d01 |
+++ b/util/aio-win32.c
|
|
|
6e7d01 |
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
int count;
|
|
|
6e7d01 |
int timeout;
|
|
|
6e7d01 |
|
|
|
6e7d01 |
+ /*
|
|
|
6e7d01 |
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
|
|
6e7d01 |
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
|
|
6e7d01 |
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
|
|
6e7d01 |
+ */
|
|
|
6e7d01 |
+ assert(in_aio_context_home_thread(ctx));
|
|
|
6e7d01 |
progress = false;
|
|
|
6e7d01 |
|
|
|
6e7d01 |
/* aio_notify can avoid the expensive event_notifier_set if
|
|
|
6e7d01 |
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
* so disable the optimization now.
|
|
|
6e7d01 |
*/
|
|
|
6e7d01 |
if (blocking) {
|
|
|
6e7d01 |
- atomic_add(&ctx->notify_me, 2);
|
|
|
6e7d01 |
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
|
|
6e7d01 |
+ /*
|
|
|
6e7d01 |
+ * Write ctx->notify_me before computing the timeout
|
|
|
6e7d01 |
+ * (reading bottom half flags, etc.). Pairs with
|
|
|
6e7d01 |
+ * smp_mb in aio_notify().
|
|
|
6e7d01 |
+ */
|
|
|
6e7d01 |
+ smp_mb();
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
|
|
|
6e7d01 |
qemu_lockcnt_inc(&ctx->list_lock);
|
|
|
6e7d01 |
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
6e7d01 |
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
|
|
|
6e7d01 |
if (blocking) {
|
|
|
6e7d01 |
assert(first);
|
|
|
6e7d01 |
- assert(in_aio_context_home_thread(ctx));
|
|
|
6e7d01 |
- atomic_sub(&ctx->notify_me, 2);
|
|
|
6e7d01 |
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
|
|
6e7d01 |
aio_notify_accept(ctx);
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
|
|
|
6e7d01 |
diff --git a/util/async.c b/util/async.c
|
|
|
6e7d01 |
index b1fa5319e5..c65c58bbc9 100644
|
|
|
6e7d01 |
--- a/util/async.c
|
|
|
6e7d01 |
+++ b/util/async.c
|
|
|
6e7d01 |
@@ -220,7 +220,14 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
|
|
6e7d01 |
{
|
|
|
6e7d01 |
AioContext *ctx = (AioContext *) source;
|
|
|
6e7d01 |
|
|
|
6e7d01 |
- atomic_or(&ctx->notify_me, 1);
|
|
|
6e7d01 |
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
|
|
|
6e7d01 |
+
|
|
|
6e7d01 |
+ /*
|
|
|
6e7d01 |
+ * Write ctx->notify_me before computing the timeout
|
|
|
6e7d01 |
+ * (reading bottom half flags, etc.). Pairs with
|
|
|
6e7d01 |
+ * smp_mb in aio_notify().
|
|
|
6e7d01 |
+ */
|
|
|
6e7d01 |
+ smp_mb();
|
|
|
6e7d01 |
|
|
|
6e7d01 |
/* We assume there is no timeout already supplied */
|
|
|
6e7d01 |
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
|
|
6e7d01 |
@@ -238,7 +245,8 @@ aio_ctx_check(GSource *source)
|
|
|
6e7d01 |
AioContext *ctx = (AioContext *) source;
|
|
|
6e7d01 |
QEMUBH *bh;
|
|
|
6e7d01 |
|
|
|
6e7d01 |
- atomic_and(&ctx->notify_me, ~1);
|
|
|
6e7d01 |
+ /* Finish computing the timeout before clearing the flag. */
|
|
|
6e7d01 |
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
|
|
|
6e7d01 |
aio_notify_accept(ctx);
|
|
|
6e7d01 |
|
|
|
6e7d01 |
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
|
|
6e7d01 |
@@ -343,10 +351,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
|
|
|
6e7d01 |
void aio_notify(AioContext *ctx)
|
|
|
6e7d01 |
{
|
|
|
6e7d01 |
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
|
|
|
6e7d01 |
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
|
|
|
6e7d01 |
+ * with smp_mb in aio_ctx_prepare or aio_poll.
|
|
|
6e7d01 |
*/
|
|
|
6e7d01 |
smp_mb();
|
|
|
6e7d01 |
- if (ctx->notify_me) {
|
|
|
6e7d01 |
+ if (atomic_read(&ctx->notify_me)) {
|
|
|
6e7d01 |
event_notifier_set(&ctx->notifier);
|
|
|
6e7d01 |
atomic_mb_set(&ctx->notified, true);
|
|
|
6e7d01 |
}
|
|
|
6e7d01 |
--
|
|
|
6e7d01 |
2.27.0
|
|
|
6e7d01 |
|