bf143f
From a5e7bb1f7a88efb5574266a76e80fd7604d19921 Mon Sep 17 00:00:00 2001
bf143f
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
bf143f
Date: Mon, 16 Jan 2023 07:49:59 -0500
bf143f
Subject: [PATCH 04/11] accel: introduce accelerator blocker API
bf143f
MIME-Version: 1.0
bf143f
Content-Type: text/plain; charset=UTF-8
bf143f
Content-Transfer-Encoding: 8bit
bf143f
bf143f
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
bf143f
RH-MergeRequest: 247: accel: introduce accelerator blocker API
bf143f
RH-Bugzilla: 2161188
bf143f
RH-Acked-by: David Hildenbrand <david@redhat.com>
bf143f
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
bf143f
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
bf143f
RH-Commit: [1/3] 9d3d7f9554974a79042c915763288cce07aef135
bf143f
bf143f
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
bf143f
bf143f
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
bf143f
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
bf143f
Date:   Fri Nov 11 10:47:56 2022 -0500
bf143f
bf143f
    accel: introduce accelerator blocker API
bf143f
bf143f
    This API allows the accelerators to prevent vcpus from issuing
bf143f
    new ioctls while execting a critical section marked with the
bf143f
    accel_ioctl_inhibit_begin/end functions.
bf143f
bf143f
    Note that all functions submitting ioctls must mark where the
bf143f
    ioctl is being called with accel_{cpu_}ioctl_begin/end().
bf143f
bf143f
    This API requires the caller to always hold the BQL.
bf143f
    API documentation is in sysemu/accel-blocker.h
bf143f
bf143f
    Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
bf143f
    (to minimize cache line bouncing) to keep avoid that new ioctls
bf143f
    run when the critical section starts, and a QemuEvent to wait
bf143f
    that all running ioctls finish.
bf143f
bf143f
    Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
bf143f
    Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
bf143f
    Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
bf143f
    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
bf143f
bf143f
Conflicts:
bf143f
	util/meson.build: files are missing in rhel 8.8.0
bf143f
	namely int128.c, memalign.c and interval-tree.c
bf143f
bf143f
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
bf143f
---
bf143f
 accel/accel-blocker.c          | 154 +++++++++++++++++++++++++++++++++
bf143f
 accel/meson.build              |   2 +-
bf143f
 hw/core/cpu-common.c           |   2 +
bf143f
 include/hw/core/cpu.h          |   3 +
bf143f
 include/sysemu/accel-blocker.h |  56 ++++++++++++
bf143f
 util/meson.build               |   2 +-
bf143f
 6 files changed, 217 insertions(+), 2 deletions(-)
bf143f
 create mode 100644 accel/accel-blocker.c
bf143f
 create mode 100644 include/sysemu/accel-blocker.h
bf143f
bf143f
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
bf143f
new file mode 100644
bf143f
index 0000000000..1e7f423462
bf143f
--- /dev/null
bf143f
+++ b/accel/accel-blocker.c
bf143f
@@ -0,0 +1,154 @@
bf143f
+/*
bf143f
+ * Lock to inhibit accelerator ioctls
bf143f
+ *
bf143f
+ * Copyright (c) 2022 Red Hat Inc.
bf143f
+ *
bf143f
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
bf143f
+ *
bf143f
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
bf143f
+ * of this software and associated documentation files (the "Software"), to deal
bf143f
+ * in the Software without restriction, including without limitation the rights
bf143f
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
bf143f
+ * copies of the Software, and to permit persons to whom the Software is
bf143f
+ * furnished to do so, subject to the following conditions:
bf143f
+ *
bf143f
+ * The above copyright notice and this permission notice shall be included in
bf143f
+ * all copies or substantial portions of the Software.
bf143f
+ *
bf143f
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bf143f
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bf143f
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
bf143f
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bf143f
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
bf143f
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
bf143f
+ * THE SOFTWARE.
bf143f
+ */
bf143f
+
bf143f
+#include "qemu/osdep.h"
bf143f
+#include "qemu/thread.h"
bf143f
+#include "qemu/main-loop.h"
bf143f
+#include "hw/core/cpu.h"
bf143f
+#include "sysemu/accel-blocker.h"
bf143f
+
bf143f
+static QemuLockCnt accel_in_ioctl_lock;
bf143f
+static QemuEvent accel_in_ioctl_event;
bf143f
+
bf143f
+void accel_blocker_init(void)
bf143f
+{
bf143f
+    qemu_lockcnt_init(&accel_in_ioctl_lock);
bf143f
+    qemu_event_init(&accel_in_ioctl_event, false);
bf143f
+}
bf143f
+
bf143f
+void accel_ioctl_begin(void)
bf143f
+{
bf143f
+    if (likely(qemu_mutex_iothread_locked())) {
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    /* block if lock is taken in kvm_ioctl_inhibit_begin() */
bf143f
+    qemu_lockcnt_inc(&accel_in_ioctl_lock);
bf143f
+}
bf143f
+
bf143f
+void accel_ioctl_end(void)
bf143f
+{
bf143f
+    if (likely(qemu_mutex_iothread_locked())) {
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    qemu_lockcnt_dec(&accel_in_ioctl_lock);
bf143f
+    /* change event to SET. If event was BUSY, wake up all waiters */
bf143f
+    qemu_event_set(&accel_in_ioctl_event);
bf143f
+}
bf143f
+
bf143f
+void accel_cpu_ioctl_begin(CPUState *cpu)
bf143f
+{
bf143f
+    if (unlikely(qemu_mutex_iothread_locked())) {
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    /* block if lock is taken in kvm_ioctl_inhibit_begin() */
bf143f
+    qemu_lockcnt_inc(&cpu->in_ioctl_lock);
bf143f
+}
bf143f
+
bf143f
+void accel_cpu_ioctl_end(CPUState *cpu)
bf143f
+{
bf143f
+    if (unlikely(qemu_mutex_iothread_locked())) {
bf143f
+        return;
bf143f
+    }
bf143f
+
bf143f
+    qemu_lockcnt_dec(&cpu->in_ioctl_lock);
bf143f
+    /* change event to SET. If event was BUSY, wake up all waiters */
bf143f
+    qemu_event_set(&accel_in_ioctl_event);
bf143f
+}
bf143f
+
bf143f
+static bool accel_has_to_wait(void)
bf143f
+{
bf143f
+    CPUState *cpu;
bf143f
+    bool needs_to_wait = false;
bf143f
+
bf143f
+    CPU_FOREACH(cpu) {
bf143f
+        if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
bf143f
+            /* exit the ioctl, if vcpu is running it */
bf143f
+            qemu_cpu_kick(cpu);
bf143f
+            needs_to_wait = true;
bf143f
+        }
bf143f
+    }
bf143f
+
bf143f
+    return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
bf143f
+}
bf143f
+
bf143f
+void accel_ioctl_inhibit_begin(void)
bf143f
+{
bf143f
+    CPUState *cpu;
bf143f
+
bf143f
+    /*
bf143f
+     * We allow to inhibit only when holding the BQL, so we can identify
bf143f
+     * when an inhibitor wants to issue an ioctl easily.
bf143f
+     */
bf143f
+    g_assert(qemu_mutex_iothread_locked());
bf143f
+
bf143f
+    /* Block further invocations of the ioctls outside the BQL.  */
bf143f
+    CPU_FOREACH(cpu) {
bf143f
+        qemu_lockcnt_lock(&cpu->in_ioctl_lock);
bf143f
+    }
bf143f
+    qemu_lockcnt_lock(&accel_in_ioctl_lock);
bf143f
+
bf143f
+    /* Keep waiting until there are running ioctls */
bf143f
+    while (true) {
bf143f
+
bf143f
+        /* Reset event to FREE. */
bf143f
+        qemu_event_reset(&accel_in_ioctl_event);
bf143f
+
bf143f
+        if (accel_has_to_wait()) {
bf143f
+            /*
bf143f
+             * If event is still FREE, and there are ioctls still in progress,
bf143f
+             * wait.
bf143f
+             *
bf143f
+             *  If an ioctl finishes before qemu_event_wait(), it will change
bf143f
+             * the event state to SET. This will prevent qemu_event_wait() from
bf143f
+             * blocking, but it's not a problem because if other ioctls are
bf143f
+             * still running the loop will iterate once more and reset the event
bf143f
+             * status to FREE so that it can wait properly.
bf143f
+             *
bf143f
+             * If an ioctls finishes while qemu_event_wait() is blocking, then
bf143f
+             * it will be waken up, but also here the while loop makes sure
bf143f
+             * to re-enter the wait if there are other running ioctls.
bf143f
+             */
bf143f
+            qemu_event_wait(&accel_in_ioctl_event);
bf143f
+        } else {
bf143f
+            /* No ioctl is running */
bf143f
+            return;
bf143f
+        }
bf143f
+    }
bf143f
+}
bf143f
+
bf143f
+void accel_ioctl_inhibit_end(void)
bf143f
+{
bf143f
+    CPUState *cpu;
bf143f
+
bf143f
+    qemu_lockcnt_unlock(&accel_in_ioctl_lock);
bf143f
+    CPU_FOREACH(cpu) {
bf143f
+        qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
bf143f
+    }
bf143f
+}
bf143f
+
bf143f
diff --git a/accel/meson.build b/accel/meson.build
bf143f
index dfd808d2c8..801b4d44e8 100644
bf143f
--- a/accel/meson.build
bf143f
+++ b/accel/meson.build
bf143f
@@ -1,4 +1,4 @@
bf143f
-specific_ss.add(files('accel-common.c'))
bf143f
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
bf143f
 softmmu_ss.add(files('accel-softmmu.c'))
bf143f
 user_ss.add(files('accel-user.c'))
bf143f
 
bf143f
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
bf143f
index 9e3241b430..b6e83acf0a 100644
bf143f
--- a/hw/core/cpu-common.c
bf143f
+++ b/hw/core/cpu-common.c
bf143f
@@ -238,6 +238,7 @@ static void cpu_common_initfn(Object *obj)
bf143f
     cpu->nr_threads = 1;
bf143f
 
bf143f
     qemu_mutex_init(&cpu->work_mutex);
bf143f
+    qemu_lockcnt_init(&cpu->in_ioctl_lock);
bf143f
     QSIMPLEQ_INIT(&cpu->work_list);
bf143f
     QTAILQ_INIT(&cpu->breakpoints);
bf143f
     QTAILQ_INIT(&cpu->watchpoints);
bf143f
@@ -249,6 +250,7 @@ static void cpu_common_finalize(Object *obj)
bf143f
 {
bf143f
     CPUState *cpu = CPU(obj);
bf143f
 
bf143f
+    qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
bf143f
     qemu_mutex_destroy(&cpu->work_mutex);
bf143f
 }
bf143f
 
bf143f
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
bf143f
index e948e81f1a..49d9c73f97 100644
bf143f
--- a/include/hw/core/cpu.h
bf143f
+++ b/include/hw/core/cpu.h
bf143f
@@ -383,6 +383,9 @@ struct CPUState {
bf143f
     uint32_t kvm_fetch_index;
bf143f
     uint64_t dirty_pages;
bf143f
 
bf143f
+    /* Use by accel-block: CPU is executing an ioctl() */
bf143f
+    QemuLockCnt in_ioctl_lock;
bf143f
+
bf143f
     /* Used for events with 'vcpu' and *without* the 'disabled' properties */
bf143f
     DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
bf143f
     DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
bf143f
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
bf143f
new file mode 100644
bf143f
index 0000000000..72020529ef
bf143f
--- /dev/null
bf143f
+++ b/include/sysemu/accel-blocker.h
bf143f
@@ -0,0 +1,56 @@
bf143f
+/*
bf143f
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
bf143f
+ * running ones finish.
bf143f
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
bf143f
+ * release the BQL.
bf143f
+ *
bf143f
+ *  Copyright (c) 2022 Red Hat Inc.
bf143f
+ *
bf143f
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
bf143f
+ *
bf143f
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
bf143f
+ * See the COPYING file in the top-level directory.
bf143f
+ */
bf143f
+#ifndef ACCEL_BLOCKER_H
bf143f
+#define ACCEL_BLOCKER_H
bf143f
+
bf143f
+#include "qemu/osdep.h"
bf143f
+#include "sysemu/cpus.h"
bf143f
+
bf143f
+extern void accel_blocker_init(void);
bf143f
+
bf143f
+/*
bf143f
+ * accel_{cpu_}ioctl_begin/end:
bf143f
+ * Mark when ioctl is about to run or just finished.
bf143f
+ *
bf143f
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
bf143f
+ * called, preventing new ioctls to run. They will continue only after
bf143f
+ * accel_ioctl_inibith_end().
bf143f
+ */
bf143f
+extern void accel_ioctl_begin(void);
bf143f
+extern void accel_ioctl_end(void);
bf143f
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
bf143f
+extern void accel_cpu_ioctl_end(CPUState *cpu);
bf143f
+
bf143f
+/*
bf143f
+ * accel_ioctl_inhibit_begin: start critical section
bf143f
+ *
bf143f
+ * This function makes sure that:
bf143f
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
bf143f
+ * 2) wait that all ioctls that were already running reach
bf143f
+ *    accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
bf143f
+ *
bf143f
+ * This allows the caller to access shared data or perform operations without
bf143f
+ * worrying of concurrent vcpus accesses.
bf143f
+ */
bf143f
+extern void accel_ioctl_inhibit_begin(void);
bf143f
+
bf143f
+/*
bf143f
+ * accel_ioctl_inhibit_end: end critical section started by
bf143f
+ * accel_ioctl_inhibit_begin()
bf143f
+ *
bf143f
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
bf143f
+ */
bf143f
+extern void accel_ioctl_inhibit_end(void);
bf143f
+
bf143f
+#endif /* ACCEL_BLOCKER_H */
bf143f
diff --git a/util/meson.build b/util/meson.build
bf143f
index 05b593055a..b5f153b0e8 100644
bf143f
--- a/util/meson.build
bf143f
+++ b/util/meson.build
bf143f
@@ -48,6 +48,7 @@ util_ss.add(files('transactions.c'))
bf143f
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
bf143f
 util_ss.add(files('guest-random.c'))
bf143f
 util_ss.add(files('yank.c'))
bf143f
+util_ss.add(files('lockcnt.c'))
bf143f
 
bf143f
 if have_user
bf143f
   util_ss.add(files('selfmap.c'))
bf143f
@@ -69,7 +70,6 @@ if have_block
bf143f
   util_ss.add(files('hexdump.c'))
bf143f
   util_ss.add(files('iova-tree.c'))
bf143f
   util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c'))
bf143f
-  util_ss.add(files('lockcnt.c'))
bf143f
   util_ss.add(files('main-loop.c'))
bf143f
   util_ss.add(files('nvdimm-utils.c'))
bf143f
   util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
bf143f
-- 
bf143f
2.37.3
bf143f