|
|
26ba25 |
From f29b1e17713739baf416b64eeee9549f07717ea8 Mon Sep 17 00:00:00 2001
|
|
|
26ba25 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
26ba25 |
Date: Wed, 10 Oct 2018 20:21:53 +0100
|
|
|
26ba25 |
Subject: [PATCH 27/49] util/async: use qemu_aio_coroutine_enter in
|
|
|
26ba25 |
co_schedule_bh_cb
|
|
|
26ba25 |
|
|
|
26ba25 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
26ba25 |
Message-id: <20181010202213.7372-15-kwolf@redhat.com>
|
|
|
26ba25 |
Patchwork-id: 82604
|
|
|
26ba25 |
O-Subject: [RHEL-8 qemu-kvm PATCH 24/44] util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb
|
|
|
26ba25 |
Bugzilla: 1637976
|
|
|
26ba25 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
|
26ba25 |
|
|
|
26ba25 |
From: Sergio Lopez <slp@redhat.com>
|
|
|
26ba25 |
|
|
|
26ba25 |
AIO Coroutines shouldn't by managed by an AioContext different than the
|
|
|
26ba25 |
one assigned when they are created. aio_co_enter avoids entering a
|
|
|
26ba25 |
coroutine from a different AioContext, calling aio_co_schedule instead.
|
|
|
26ba25 |
|
|
|
26ba25 |
Scheduled coroutines are then entered by co_schedule_bh_cb using
|
|
|
26ba25 |
qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the
|
|
|
26ba25 |
current AioContext obtained with qemu_get_current_aio_context.
|
|
|
26ba25 |
Eventually, co->ctx will be set to the AioContext passed as an argument
|
|
|
26ba25 |
to qemu_aio_coroutine_enter.
|
|
|
26ba25 |
|
|
|
26ba25 |
This means that, if an IO Thread's AioConext is being processed by the
|
|
|
26ba25 |
Main Thread (due to aio_poll being called with a BDS AioContext, as it
|
|
|
26ba25 |
happens in AIO_WAIT_WHILE among other places), the AioContext from some
|
|
|
26ba25 |
coroutines may be wrongly replaced with the one from the Main Thread.
|
|
|
26ba25 |
|
|
|
26ba25 |
This is the root cause behind some crashes, mainly triggered by the
|
|
|
26ba25 |
drain code at block/io.c. The most common are these abort and failed
|
|
|
26ba25 |
assertion:
|
|
|
26ba25 |
|
|
|
26ba25 |
util/async.c:aio_co_schedule
|
|
|
26ba25 |
456 if (scheduled) {
|
|
|
26ba25 |
457 fprintf(stderr,
|
|
|
26ba25 |
458 "%s: Co-routine was already scheduled in '%s'\n",
|
|
|
26ba25 |
459 __func__, scheduled);
|
|
|
26ba25 |
460 abort();
|
|
|
26ba25 |
461 }
|
|
|
26ba25 |
|
|
|
26ba25 |
util/qemu-coroutine-lock.c:
|
|
|
26ba25 |
286 assert(mutex->holder == self);
|
|
|
26ba25 |
|
|
|
26ba25 |
But it's also known to cause random errors at different locations, and
|
|
|
26ba25 |
even SIGSEGV with broken coroutine backtraces.
|
|
|
26ba25 |
|
|
|
26ba25 |
By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can
|
|
|
26ba25 |
pass the correct AioContext as an argument, making sure co->ctx is not
|
|
|
26ba25 |
wrongly altered.
|
|
|
26ba25 |
|
|
|
26ba25 |
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
|
26ba25 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
26ba25 |
(cherry picked from commit 6808ae0417131f8dbe7b051256dff7a16634dc1d)
|
|
|
26ba25 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
26ba25 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
26ba25 |
---
|
|
|
26ba25 |
util/async.c | 2 +-
|
|
|
26ba25 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
26ba25 |
|
|
|
26ba25 |
diff --git a/util/async.c b/util/async.c
|
|
|
26ba25 |
index 4dd9d95..5693191 100644
|
|
|
26ba25 |
--- a/util/async.c
|
|
|
26ba25 |
+++ b/util/async.c
|
|
|
26ba25 |
@@ -391,7 +391,7 @@ static void co_schedule_bh_cb(void *opaque)
|
|
|
26ba25 |
|
|
|
26ba25 |
/* Protected by write barrier in qemu_aio_coroutine_enter */
|
|
|
26ba25 |
atomic_set(&co->scheduled, NULL);
|
|
|
26ba25 |
- qemu_coroutine_enter(co);
|
|
|
26ba25 |
+ qemu_aio_coroutine_enter(ctx, co);
|
|
|
26ba25 |
aio_context_release(ctx);
|
|
|
26ba25 |
}
|
|
|
26ba25 |
}
|
|
|
26ba25 |
--
|
|
|
26ba25 |
1.8.3.1
|
|
|
26ba25 |
|