|
|
ae23c9 |
From 24c1b53802b14ce45767d17b6dec88a917d24a70 Mon Sep 17 00:00:00 2001
|
|
|
ae23c9 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Date: Wed, 10 Oct 2018 20:21:52 +0100
|
|
|
ae23c9 |
Subject: [PATCH 26/49] block/linux-aio: acquire AioContext before
|
|
|
ae23c9 |
qemu_laio_process_completions
|
|
|
ae23c9 |
|
|
|
ae23c9 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Message-id: <20181010202213.7372-14-kwolf@redhat.com>
|
|
|
ae23c9 |
Patchwork-id: 82603
|
|
|
ae23c9 |
O-Subject: [RHEL-8 qemu-kvm PATCH 23/44] block/linux-aio: acquire AioContext before qemu_laio_process_completions
|
|
|
ae23c9 |
Bugzilla: 1637976
|
|
|
ae23c9 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
From: Sergio Lopez <slp@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
In qemu_laio_process_completions_and_submit, the AioContext is acquired
|
|
|
ae23c9 |
before the ioq_submit iteration and after qemu_laio_process_completions,
|
|
|
ae23c9 |
but the latter is not thread safe either.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
This change avoids a number of random crashes when the Main Thread and
|
|
|
ae23c9 |
an IO Thread collide processing completions for the same AioContext.
|
|
|
ae23c9 |
This is an example of such crash:
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- The IO Thread is trying to acquire the AioContext at aio_co_enter,
|
|
|
ae23c9 |
which evidences that it didn't lock it before:
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Thread 3 (Thread 0x7fdfd8bd8700 (LWP 36743)):
|
|
|
ae23c9 |
#0 0x00007fdfe0dd542d in __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
|
|
|
ae23c9 |
#1 0x00007fdfe0dd0de6 in _L_lock_870 () at /lib64/libpthread.so.0
|
|
|
ae23c9 |
#2 0x00007fdfe0dd0cdf in __GI___pthread_mutex_lock (mutex=mutex@entry=0x5631fde0e6c0)
|
|
|
ae23c9 |
at ../nptl/pthread_mutex_lock.c:114
|
|
|
ae23c9 |
#3 0x00005631fc0603a7 in qemu_mutex_lock_impl (mutex=0x5631fde0e6c0, file=0x5631fc23520f "util/async.c", line=511) at util/qemu-thread-posix.c:66
|
|
|
ae23c9 |
#4 0x00005631fc05b558 in aio_co_enter (ctx=0x5631fde0e660, co=0x7fdfcc0c2b40) at util/async.c:493
|
|
|
ae23c9 |
#5 0x00005631fc05b5ac in aio_co_wake (co=<optimized out>) at util/async.c:478
|
|
|
ae23c9 |
#6 0x00005631fbfc51ad in qemu_laio_process_completion (laiocb=<optimized out>) at block/linux-aio.c:104
|
|
|
ae23c9 |
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
|
|
|
ae23c9 |
at block/linux-aio.c:222
|
|
|
ae23c9 |
#8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
|
|
|
ae23c9 |
at block/linux-aio.c:237
|
|
|
ae23c9 |
#9 0x00005631fc05d978 in aio_dispatch_handlers (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:406
|
|
|
ae23c9 |
#10 0x00005631fc05e3ea in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=true)
|
|
|
ae23c9 |
at util/aio-posix.c:693
|
|
|
ae23c9 |
#11 0x00005631fbd7ad96 in iothread_run (opaque=0x5631fde0e1c0) at iothread.c:64
|
|
|
ae23c9 |
#12 0x00007fdfe0dcee25 in start_thread (arg=0x7fdfd8bd8700) at pthread_create.c:308
|
|
|
ae23c9 |
#13 0x00007fdfe0afc34d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- The Main Thread is also processing completions from the same
|
|
|
ae23c9 |
AioContext, and crashes due to failed assertion at util/iov.c:78:
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Thread 1 (Thread 0x7fdfeb5eac80 (LWP 36740)):
|
|
|
ae23c9 |
#0 0x00007fdfe0a391f7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
|
|
|
ae23c9 |
#1 0x00007fdfe0a3a8e8 in __GI_abort () at abort.c:90
|
|
|
ae23c9 |
#2 0x00007fdfe0a32266 in __assert_fail_base (fmt=0x7fdfe0b84e68 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset")
|
|
|
ae23c9 |
at assert.c:92
|
|
|
ae23c9 |
#3 0x00007fdfe0a32312 in __GI___assert_fail (assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") at assert.c:101
|
|
|
ae23c9 |
#4 0x00005631fc065287 in iov_memset (iov=<optimized out>, iov_cnt=<optimized out>, offset=<optimized out>, offset@entry=65536, fillc=fillc@entry=0, bytes=15515191315812405248) at util/iov.c:78
|
|
|
ae23c9 |
#5 0x00005631fc065a63 in qemu_iovec_memset (qiov=<optimized out>, offset=offset@entry=65536, fillc=fillc@entry=0, bytes=<optimized out>) at util/iov.c:410
|
|
|
ae23c9 |
#6 0x00005631fbfc5178 in qemu_laio_process_completion (laiocb=0x7fdd920df630) at block/linux-aio.c:88
|
|
|
ae23c9 |
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
|
|
|
ae23c9 |
at block/linux-aio.c:222
|
|
|
ae23c9 |
#8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
|
|
|
ae23c9 |
at block/linux-aio.c:237
|
|
|
ae23c9 |
#9 0x00005631fbfc54ed in qemu_laio_poll_cb (opaque=<optimized out>) at block/linux-aio.c:272
|
|
|
ae23c9 |
#10 0x00005631fc05d85e in run_poll_handlers_once (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:497
|
|
|
ae23c9 |
#11 0x00005631fc05e2ca in aio_poll (blocking=false, ctx=0x5631fde0e660) at util/aio-posix.c:574
|
|
|
ae23c9 |
#12 0x00005631fc05e2ca in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=false)
|
|
|
ae23c9 |
at util/aio-posix.c:604
|
|
|
ae23c9 |
#13 0x00005631fbfcb8a3 in bdrv_do_drained_begin (ignore_parent=<optimized out>, recursive=<optimized out>, bs=<optimized out>) at block/io.c:273
|
|
|
ae23c9 |
#14 0x00005631fbfcb8a3 in bdrv_do_drained_begin (bs=0x5631fe8b6200, recursive=<optimized out>, parent=0x0, ignore_bds_parents=<optimized out>, poll=<optimized out>) at block/io.c:390
|
|
|
ae23c9 |
#15 0x00005631fbfbcd2e in blk_drain (blk=0x5631fe83ac80) at block/block-backend.c:1590
|
|
|
ae23c9 |
#16 0x00005631fbfbe138 in blk_remove_bs (blk=blk@entry=0x5631fe83ac80) at block/block-backend.c:774
|
|
|
ae23c9 |
#17 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:401
|
|
|
ae23c9 |
#18 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:449
|
|
|
ae23c9 |
#19 0x00005631fbfc9a69 in commit_complete (job=0x5631fe8b94b0, opaque=0x7fdfcc1bb080)
|
|
|
ae23c9 |
at block/commit.c:92
|
|
|
ae23c9 |
#20 0x00005631fbf7d662 in job_defer_to_main_loop_bh (opaque=0x7fdfcc1b4560) at job.c:973
|
|
|
ae23c9 |
#21 0x00005631fc05ad41 in aio_bh_poll (bh=0x7fdfcc01ad90) at util/async.c:90
|
|
|
ae23c9 |
#22 0x00005631fc05ad41 in aio_bh_poll (ctx=ctx@entry=0x5631fddffdb0) at util/async.c:118
|
|
|
ae23c9 |
#23 0x00005631fc05e210 in aio_dispatch (ctx=0x5631fddffdb0) at util/aio-posix.c:436
|
|
|
ae23c9 |
#24 0x00005631fc05ac1e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:261
|
|
|
ae23c9 |
#25 0x00007fdfeaae44c9 in g_main_context_dispatch (context=0x5631fde00140) at gmain.c:3201
|
|
|
ae23c9 |
#26 0x00007fdfeaae44c9 in g_main_context_dispatch (context=context@entry=0x5631fde00140) at gmain.c:3854
|
|
|
ae23c9 |
#27 0x00005631fc05d503 in main_loop_wait () at util/main-loop.c:215
|
|
|
ae23c9 |
#28 0x00005631fc05d503 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:238
|
|
|
ae23c9 |
#29 0x00005631fc05d503 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497
|
|
|
ae23c9 |
#30 0x00005631fbd81412 in main_loop () at vl.c:1866
|
|
|
ae23c9 |
#31 0x00005631fbc18ff3 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
|
|
|
ae23c9 |
at vl.c:4647
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- A closer examination shows that s->io_q.in_flight appears to have
|
|
|
ae23c9 |
gone backwards:
|
|
|
ae23c9 |
|
|
|
ae23c9 |
(gdb) frame 7
|
|
|
ae23c9 |
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
|
|
|
ae23c9 |
at block/linux-aio.c:222
|
|
|
ae23c9 |
222 qemu_laio_process_completion(laiocb);
|
|
|
ae23c9 |
(gdb) p s
|
|
|
ae23c9 |
$2 = (LinuxAioState *) 0x7fdfc0297670
|
|
|
ae23c9 |
(gdb) p *s
|
|
|
ae23c9 |
$3 = {aio_context = 0x5631fde0e660, ctx = 0x7fdfeb43b000, e = {rfd = 33, wfd = 33}, io_q = {plugged = 0,
|
|
|
ae23c9 |
in_queue = 0, in_flight = 4294967280, blocked = false, pending = {sqh_first = 0x0,
|
|
|
ae23c9 |
sqh_last = 0x7fdfc0297698}}, completion_bh = 0x7fdfc0280ef0, event_idx = 21, event_max = 241}
|
|
|
ae23c9 |
(gdb) p/x s->io_q.in_flight
|
|
|
ae23c9 |
$4 = 0xfffffff0
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
(cherry picked from commit e091f0e905a4481f347913420f327d427f18d9d4)
|
|
|
ae23c9 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
ae23c9 |
---
|
|
|
ae23c9 |
block/linux-aio.c | 2 +-
|
|
|
ae23c9 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
diff --git a/block/linux-aio.c b/block/linux-aio.c
|
|
|
ae23c9 |
index 88b8d55..abd8886 100644
|
|
|
ae23c9 |
--- a/block/linux-aio.c
|
|
|
ae23c9 |
+++ b/block/linux-aio.c
|
|
|
ae23c9 |
@@ -233,9 +233,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
+ aio_context_acquire(s->aio_context);
|
|
|
ae23c9 |
qemu_laio_process_completions(s);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- aio_context_acquire(s->aio_context);
|
|
|
ae23c9 |
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
|
|
|
ae23c9 |
ioq_submit(s);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
--
|
|
|
ae23c9 |
1.8.3.1
|
|
|
ae23c9 |
|