495e37
From 3d2d7a46713d362d2ff5137841e689593da976a3 Mon Sep 17 00:00:00 2001
495e37
From: Hanna Reitz <hreitz@redhat.com>
495e37
Date: Fri, 4 Feb 2022 12:10:10 +0100
495e37
Subject: [PATCH 6/8] iotests/281: Test lingering timers
495e37
495e37
RH-Author: Hanna Reitz <hreitz@redhat.com>
495e37
RH-MergeRequest: 74: block/nbd: Handle AioContext changes
495e37
RH-Commit: [4/6] d228ba3fcdfaab2d54dd5b023688a1c055cce2c2 (hreitz/qemu-kvm-c-9-s)
495e37
RH-Bugzilla: 2033626
495e37
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
495e37
RH-Acked-by: Eric Blake <eblake@redhat.com>
495e37
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
495e37
495e37
Prior to "block/nbd: Delete reconnect delay timer when done" and
495e37
"block/nbd: Delete open timer when done", both of those timers would
495e37
remain scheduled even after successfully (re-)connecting to the server,
495e37
and they would not even be deleted when the BDS is deleted.
495e37
495e37
This test constructs exactly this situation:
495e37
(1) Configure an @open-timeout, so the open timer is armed, and
495e37
(2) Configure a @reconnect-delay and trigger a reconnect situation
495e37
    (which succeeds immediately), so the reconnect delay timer is armed.
495e37
Then we immediately delete the BDS, and sleep for longer than the
495e37
@open-timeout and @reconnect-delay.  Prior to said patches, this caused
495e37
one (or both) of the timer CBs to access already-freed data.
495e37
495e37
Accessing freed data may or may not crash, so this test can produce
495e37
false successes, but I do not know how to show the problem in a better
495e37
or more reliable way.  If you run this test on "block/nbd: Assert there
495e37
are no timers when closed" and without the fix patches mentioned above,
495e37
you should reliably see an assertion failure.
495e37
(But all other tests that use the reconnect delay timer (264 and 277)
495e37
will fail in that configuration, too; as will nbd-reconnect-on-open,
495e37
which uses the open timer.)
495e37
495e37
Remove this test from the quick group because of the two second sleep
495e37
this patch introduces.
495e37
495e37
(I decided to put this test case into 281, because the main bug this
495e37
series addresses is in the interaction of the NBD block driver and I/O
495e37
threads, which is precisely the scope of 281.  The test case for that
495e37
other bug will also be put into the test class added here.
495e37
495e37
Also, excuse the test class's name, I couldn't come up with anything
495e37
better.  The "yield" part will make sense two patches from now.)
495e37
495e37
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
495e37
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
495e37
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
495e37
(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0)
495e37
495e37
Conflict:
495e37
- @open-timeout was introduced after the 6.2 release, and has not been
495e37
  backported.  Consequently, there is no open_timer, and we can (and
495e37
  must) drop the respective parts of the test here.
495e37
495e37
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
495e37
---
495e37
 tests/qemu-iotests/281     | 73 ++++++++++++++++++++++++++++++++++++--
495e37
 tests/qemu-iotests/281.out |  4 +--
495e37
 2 files changed, 73 insertions(+), 4 deletions(-)
495e37
495e37
diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281
495e37
index 956698083f..13c588be75 100755
495e37
--- a/tests/qemu-iotests/281
495e37
+++ b/tests/qemu-iotests/281
495e37
@@ -1,5 +1,5 @@
495e37
 #!/usr/bin/env python3
495e37
-# group: rw quick
495e37
+# group: rw
495e37
 #
495e37
 # Test cases for blockdev + IOThread interactions
495e37
 #
495e37
@@ -20,8 +20,9 @@
495e37
 #
495e37
 
495e37
 import os
495e37
+import time
495e37
 import iotests
495e37
-from iotests import qemu_img
495e37
+from iotests import qemu_img, QemuStorageDaemon
495e37
 
495e37
 image_len = 64 * 1024 * 1024
495e37
 
495e37
@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase):
495e37
         # Hangs on failure, we expect this error.
495e37
         self.assert_qmp(result, 'error/class', 'GenericError')
495e37
 
495e37
+# Test for RHBZ#2033626
495e37
+class TestYieldingAndTimers(iotests.QMPTestCase):
495e37
+    sock = os.path.join(iotests.sock_dir, 'nbd.sock')
495e37
+    qsd = None
495e37
+
495e37
+    def setUp(self):
495e37
+        self.create_nbd_export()
495e37
+
495e37
+        # Simple VM with an NBD block device connected to the NBD export
495e37
+        # provided by the QSD
495e37
+        self.vm = iotests.VM()
495e37
+        self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' +
495e37
+                             f'server.path={self.sock},export=exp,' +
495e37
+                             'reconnect-delay=1')
495e37
+
495e37
+        self.vm.launch()
495e37
+
495e37
+    def tearDown(self):
495e37
+        self.stop_nbd_export()
495e37
+        self.vm.shutdown()
495e37
+
495e37
+    def test_timers_with_blockdev_del(self):
495e37
+        # Stop and restart the NBD server, and do some I/O on the client to
495e37
+        # trigger a reconnect and start the reconnect delay timer
495e37
+        self.stop_nbd_export()
495e37
+        self.create_nbd_export()
495e37
+
495e37
+        result = self.vm.qmp('human-monitor-command',
495e37
+                             command_line='qemu-io nbd "write 0 512"')
495e37
+        self.assert_qmp(result, 'return', '')
495e37
+
495e37
+        # Reconnect is done, so the reconnect delay timer should be gone.
495e37
+        # (But there used to be a bug where it remained active, for which this
495e37
+        # is a regression test.)
495e37
+
495e37
+        # Delete the BDS to see whether the timer is gone.  If it is not,
495e37
+        # it will remain active, fire later, and then access freed data.
495e37
+        # (Or, with "block/nbd: Assert there are no timers when closed"
495e37
+        # applied, the assertion added in that patch will fail.)
495e37
+        result = self.vm.qmp('blockdev-del', node_name='nbd')
495e37
+        self.assert_qmp(result, 'return', {})
495e37
+
495e37
+        # Give the timer some time to fire (it has a timeout of 1 s).
495e37
+        # (Sleeping in an iotest may ring some alarm bells, but note that if
495e37
+        # the timing is off here, the test will just always pass.  If we kill
495e37
+        # the VM too early, then we just kill the timer before it can fire,
495e37
+        # thus not see the error, and so the test will pass.)
495e37
+        time.sleep(2)
495e37
+
495e37
+    def create_nbd_export(self):
495e37
+        assert self.qsd is None
495e37
+
495e37
+        # Simple NBD export of a null-co BDS
495e37
+        self.qsd = QemuStorageDaemon(
495e37
+            '--blockdev',
495e37
+            'null-co,node-name=null,read-zeroes=true',
495e37
+
495e37
+            '--nbd-server',
495e37
+            f'addr.type=unix,addr.path={self.sock}',
495e37
+
495e37
+            '--export',
495e37
+            'nbd,id=exp,node-name=null,name=exp,writable=true'
495e37
+        )
495e37
+
495e37
+    def stop_nbd_export(self):
495e37
+        self.qsd.stop()
495e37
+        self.qsd = None
495e37
+
495e37
 if __name__ == '__main__':
495e37
     iotests.main(supported_fmts=['qcow2'],
495e37
                  supported_protocols=['file'])
495e37
diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out
495e37
index 89968f35d7..914e3737bd 100644
495e37
--- a/tests/qemu-iotests/281.out
495e37
+++ b/tests/qemu-iotests/281.out
495e37
@@ -1,5 +1,5 @@
495e37
-....
495e37
+.....
495e37
 ----------------------------------------------------------------------
495e37
-Ran 4 tests
495e37
+Ran 5 tests
495e37
 
495e37
 OK
495e37
-- 
495e37
2.27.0
495e37