Blame SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch

97168e
From 31e9e3691789469b93a75d0221387bab3e526094 Mon Sep 17 00:00:00 2001
97168e
From: Stefan Hajnoczi <stefanha@redhat.com>
97168e
Date: Tue, 21 Feb 2023 16:22:18 -0500
97168e
Subject: [PATCH 13/13] virtio-scsi: reset SCSI devices from main loop thread
97168e
97168e
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
97168e
RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock
97168e
RH-Bugzilla: 2090990
97168e
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
97168e
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
97168e
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
97168e
RH-Commit: [3/3] 30d7c2bd868efa6694992e75ace22fb48aef161b
97168e
97168e
When an IOThread is configured, the ctrl virtqueue is processed in the
97168e
IOThread. TMFs that reset SCSI devices are currently called directly
97168e
from the IOThread and trigger an assertion failure in blk_drain() from
97168e
the following call stack:
97168e
97168e
virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset
97168e
-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain
97168e
97168e
  ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed.
97168e
97168e
The blk_drain() function is not designed to be called from an IOThread
97168e
because it needs the Big QEMU Lock (BQL).
97168e
97168e
This patch defers TMFs that reset SCSI devices to a Bottom Half (BH)
97168e
that runs in the main loop thread under the BQL. This way it's safe to
97168e
call blk_drain() and the assertion failure is avoided.
97168e
97168e
Introduce s->tmf_bh_list for tracking TMF requests that have been
97168e
deferred to the BH. When the BH runs it will grab the entire list and
97168e
process all requests. Care must be taken to clear the list when the
97168e
virtio-scsi device is reset or unrealized. Otherwise deferred TMF
97168e
requests could execute later and lead to use-after-free or other
97168e
undefined behavior.
97168e
97168e
The s->resetting counter that's used by TMFs that reset SCSI devices is
97168e
accessed from multiple threads. This patch makes that explicit by using
97168e
atomic accessor functions. With this patch applied the counter is only
97168e
modified by the main loop thread under the BQL but can be read by any
97168e
thread.
97168e
97168e
Reported-by: Qing Wang <qinwang@redhat.com>
97168e
Cc: Paolo Bonzini <pbonzini@redhat.com>
97168e
Reviewed-by: Eric Blake <eblake@redhat.com>
97168e
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
97168e
Message-Id: <20230221212218.1378734-4-stefanha@redhat.com>
97168e
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
97168e
(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5)
97168e
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
97168e
97168e
Conflicts:
97168e
- hw/scsi/virtio-scsi.c
97168e
  - VirtIOSCSIReq is defined in include/hw/virtio/virtio-scsi.h
97168e
    downstream instead of hw/scsi/virtio-scsi.c because commit
97168e
    3dc584abeef0 ("virtio-scsi: move request-related items from .h to
97168e
    .c") is missing. Update the struct fields in virtio-scsi.h
97168e
    downstream.
97168e
97168e
  - Use qbus_reset_all() downstream instead of bus_cold_reset() because
97168e
    commit 4a5fc890b1d3 ("scsi: Use device_cold_reset() and
97168e
    bus_cold_reset()") is missing.
97168e
97168e
  - Drop GLOBAL_STATE_CODE() because these macros don't exist
97168e
    downstream. They are assertions/documentation and can be removed
97168e
    without affecting the code.
97168e
---
97168e
 hw/scsi/virtio-scsi.c           | 155 +++++++++++++++++++++++++-------
97168e
 include/hw/virtio/virtio-scsi.h |  21 +++--
97168e
 2 files changed, 139 insertions(+), 37 deletions(-)
97168e
97168e
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
97168e
index a35257c35a..ef19a9bcd0 100644
97168e
--- a/hw/scsi/virtio-scsi.c
97168e
+++ b/hw/scsi/virtio-scsi.c
97168e
@@ -256,6 +256,118 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
97168e
     }
97168e
 }
97168e
 
97168e
+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
97168e
+{
97168e
+    VirtIOSCSI *s = req->dev;
97168e
+    SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
97168e
+    BusChild *kid;
97168e
+    int target;
97168e
+
97168e
+    switch (req->req.tmf.subtype) {
97168e
+    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
97168e
+        if (!d) {
97168e
+            req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
97168e
+            goto out;
97168e
+        }
97168e
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
97168e
+            req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
97168e
+            goto out;
97168e
+        }
97168e
+        qatomic_inc(&s->resetting);
97168e
+        qdev_reset_all(&d->qdev);
97168e
+        qatomic_dec(&s->resetting);
97168e
+        break;
97168e
+
97168e
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
97168e
+        target = req->req.tmf.lun[1];
97168e
+        qatomic_inc(&s->resetting);
97168e
+
97168e
+        rcu_read_lock();
97168e
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
97168e
+            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
97168e
+            if (d1->channel == 0 && d1->id == target) {
97168e
+                qdev_reset_all(&d1->qdev);
97168e
+            }
97168e
+        }
97168e
+        rcu_read_unlock();
97168e
+
97168e
+        qatomic_dec(&s->resetting);
97168e
+        break;
97168e
+
97168e
+    default:
97168e
+        g_assert_not_reached();
97168e
+        break;
97168e
+    }
97168e
+
97168e
+out:
97168e
+    object_unref(OBJECT(d));
97168e
+
97168e
+    virtio_scsi_acquire(s);
97168e
+    virtio_scsi_complete_req(req);
97168e
+    virtio_scsi_release(s);
97168e
+}
97168e
+
97168e
+/* Some TMFs must be processed from the main loop thread */
97168e
+static void virtio_scsi_do_tmf_bh(void *opaque)
97168e
+{
97168e
+    VirtIOSCSI *s = opaque;
97168e
+    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
97168e
+    VirtIOSCSIReq *req;
97168e
+    VirtIOSCSIReq *tmp;
97168e
+
97168e
+    virtio_scsi_acquire(s);
97168e
+
97168e
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
97168e
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
97168e
+        QTAILQ_INSERT_TAIL(&reqs, req, next);
97168e
+    }
97168e
+
97168e
+    qemu_bh_delete(s->tmf_bh);
97168e
+    s->tmf_bh = NULL;
97168e
+
97168e
+    virtio_scsi_release(s);
97168e
+
97168e
+    QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
97168e
+        QTAILQ_REMOVE(&reqs, req, next);
97168e
+        virtio_scsi_do_one_tmf_bh(req);
97168e
+    }
97168e
+}
97168e
+
97168e
+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
97168e
+{
97168e
+    VirtIOSCSIReq *req;
97168e
+    VirtIOSCSIReq *tmp;
97168e
+
97168e
+    virtio_scsi_acquire(s);
97168e
+
97168e
+    if (s->tmf_bh) {
97168e
+        qemu_bh_delete(s->tmf_bh);
97168e
+        s->tmf_bh = NULL;
97168e
+    }
97168e
+
97168e
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
97168e
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
97168e
+
97168e
+        /* SAM-6 6.3.2 Hard reset */
97168e
+        req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
97168e
+        virtio_scsi_complete_req(req);
97168e
+    }
97168e
+
97168e
+    virtio_scsi_release(s);
97168e
+}
97168e
+
97168e
+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
97168e
+{
97168e
+    VirtIOSCSI *s = req->dev;
97168e
+
97168e
+    QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
97168e
+
97168e
+    if (!s->tmf_bh) {
97168e
+        s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
97168e
+        qemu_bh_schedule(s->tmf_bh);
97168e
+    }
97168e
+}
97168e
+
97168e
 /* Return 0 if the request is ready to be completed and return to guest;
97168e
  * -EINPROGRESS if the request is submitted and will be completed later, in the
97168e
  *  case of async cancellation. */
97168e
@@ -263,8 +375,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
97168e
 {
97168e
     SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
97168e
     SCSIRequest *r, *next;
97168e
-    BusChild *kid;
97168e
-    int target;
97168e
     int ret = 0;
97168e
 
97168e
     virtio_scsi_ctx_check(s, d);
97168e
@@ -321,15 +431,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
97168e
         break;
97168e
 
97168e
     case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
97168e
-        if (!d) {
97168e
-            goto fail;
97168e
-        }
97168e
-        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
97168e
-            goto incorrect_lun;
97168e
-        }
97168e
-        s->resetting++;
97168e
-        qdev_reset_all(&d->qdev);
97168e
-        s->resetting--;
97168e
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
97168e
+        virtio_scsi_defer_tmf_to_bh(req);
97168e
+        ret = -EINPROGRESS;
97168e
         break;
97168e
 
97168e
     case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
97168e
@@ -372,22 +476,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
97168e
         }
97168e
         break;
97168e
 
97168e
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
97168e
-        target = req->req.tmf.lun[1];
97168e
-        s->resetting++;
97168e
-
97168e
-        rcu_read_lock();
97168e
-        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
97168e
-            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
97168e
-            if (d1->channel == 0 && d1->id == target) {
97168e
-                qdev_reset_all(&d1->qdev);
97168e
-            }
97168e
-        }
97168e
-        rcu_read_unlock();
97168e
-
97168e
-        s->resetting--;
97168e
-        break;
97168e
-
97168e
     case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
97168e
     default:
97168e
         req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
97168e
@@ -603,7 +691,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r)
97168e
     if (!req) {
97168e
         return;
97168e
     }
97168e
-    if (req->dev->resetting) {
97168e
+    if (qatomic_read(&req->dev->resetting)) {
97168e
         req->resp.cmd.response = VIRTIO_SCSI_S_RESET;
97168e
     } else {
97168e
         req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED;
97168e
@@ -784,9 +872,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
97168e
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
97168e
 
97168e
     assert(!s->dataplane_started);
97168e
-    s->resetting++;
97168e
+
97168e
+    virtio_scsi_reset_tmf_bh(s);
97168e
+
97168e
+    qatomic_inc(&s->resetting);
97168e
     qbus_reset_all(BUS(&s->bus));
97168e
-    s->resetting--;
97168e
+    qatomic_dec(&s->resetting);
97168e
 
97168e
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
97168e
     vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
97168e
@@ -1018,6 +1109,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
97168e
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
97168e
     Error *err = NULL;
97168e
 
97168e
+    QTAILQ_INIT(&s->tmf_bh_list);
97168e
+
97168e
     virtio_scsi_common_realize(dev,
97168e
                                virtio_scsi_handle_ctrl,
97168e
                                virtio_scsi_handle_event,
97168e
@@ -1055,6 +1148,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
97168e
 {
97168e
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
97168e
 
97168e
+    virtio_scsi_reset_tmf_bh(s);
97168e
+
97168e
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
97168e
     virtio_scsi_common_unrealize(dev);
97168e
 }
97168e
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
97168e
index 543681bc18..b0e36f25aa 100644
97168e
--- a/include/hw/virtio/virtio-scsi.h
97168e
+++ b/include/hw/virtio/virtio-scsi.h
97168e
@@ -77,13 +77,22 @@ struct VirtIOSCSICommon {
97168e
     VirtQueue **cmd_vqs;
97168e
 };
97168e
 
97168e
+struct VirtIOSCSIReq;
97168e
+
97168e
 struct VirtIOSCSI {
97168e
     VirtIOSCSICommon parent_obj;
97168e
 
97168e
     SCSIBus bus;
97168e
-    int resetting;
97168e
+    int resetting; /* written from main loop thread, read from any thread */
97168e
     bool events_dropped;
97168e
 
97168e
+    /*
97168e
+     * TMFs deferred to main loop BH. These fields are protected by
97168e
+     * virtio_scsi_acquire().
97168e
+     */
97168e
+    QEMUBH *tmf_bh;
97168e
+    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
97168e
+
97168e
     /* Fields for dataplane below */
97168e
     AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
97168e
 
97168e
@@ -106,13 +115,11 @@ typedef struct VirtIOSCSIReq {
97168e
     QEMUSGList qsgl;
97168e
     QEMUIOVector resp_iov;
97168e
 
97168e
-    union {
97168e
-        /* Used for two-stage request submission */
97168e
-        QTAILQ_ENTRY(VirtIOSCSIReq) next;
97168e
+    /* Used for two-stage request submission and TMFs deferred to BH */
97168e
+    QTAILQ_ENTRY(VirtIOSCSIReq) next;
97168e
 
97168e
-        /* Used for cancellation of request during TMFs */
97168e
-        int remaining;
97168e
-    };
97168e
+    /* Used for cancellation of request during TMFs */
97168e
+    int remaining;
97168e
 
97168e
     SCSIRequest *sreq;
97168e
     size_t resp_size;
97168e
-- 
97168e
2.37.3
97168e