From 97168e752985380a2fcc8dd2874ac34fb41f5377 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: May 16 2023 06:05:48 +0000 Subject: import qemu-kvm-6.2.0-32.module+el8.8.0+18361+9f407f6e --- diff --git a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch new file mode 100644 index 0000000..8fad887 --- /dev/null +++ b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch @@ -0,0 +1,82 @@ +From 9bacf8c4104ff3cff2e0e2c2179ec4fda633167f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:51:08 -0500 +Subject: [PATCH 05/11] KVM: keep track of running ioctls + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 357508389e2a0fd996206b406e9e235e50b5f0b6 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit a27dd2de68f37ba96fe164a42121daa5f0750afc +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:57 2022 -0500 + + KVM: keep track of running ioctls + + Using the new accel-blocker API, mark where ioctls are being called + in KVM. Next, we will implement the critical section that will take + care of performing memslots modifications atomically, therefore + preventing any new ioctl from running and allowing the running ones + to finish. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 8f2a53438f..221aadfda7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2337,6 +2337,7 @@ static int kvm_init(MachineState *ms) + assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size); + + s->sigmask_len = 8; ++ accel_blocker_init(); + + #ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_INIT(&s->kvm_sw_breakpoints); +@@ -3018,7 +3019,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) + va_end(ap); + + trace_kvm_vm_ioctl(type, arg); ++ accel_ioctl_begin(); + ret = ioctl(s->vmfd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +@@ -3036,7 +3039,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) + va_end(ap); + + trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); ++ accel_cpu_ioctl_begin(cpu); + ret = ioctl(cpu->kvm_fd, type, arg); ++ accel_cpu_ioctl_end(cpu); + if (ret == -1) { + ret = -errno; + } +@@ -3054,7 +3059,9 @@ int kvm_device_ioctl(int fd, int type, ...) + va_end(ap); + + trace_kvm_device_ioctl(fd, type, arg); ++ accel_ioctl_begin(); + ret = ioctl(fd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch b/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch new file mode 100644 index 0000000..39e152b --- /dev/null +++ b/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch @@ -0,0 +1,171 @@ +From 10fc28b61a6fba1e6dc44fd544cf31c7f313c622 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 28 Oct 2022 17:48:00 +0100 +Subject: [PATCH 05/42] Update linux headers to v6.0-rc4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [5/41] ca55f497d1bf1e72179330f8f613781bf999d898 + +Based on upstream commit d525f73f9186a5bc641b8caf0b2c9bb94e5aa963 +("Update linux headers to v6.0-rc4"), but this is focusing only on the +ZPCI and protected dump changes. + +Signed-off-by: Cédric Le Goater +--- + linux-headers/linux/kvm.h | 87 +++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio_zdev.h | 7 +++ + 2 files changed, 94 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0d05d02ee4..c65930288c 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1150,6 +1150,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_DISABLE_QUIRKS2 213 + /* #define KVM_CAP_VM_TSC_CONTROL 214 */ + #define KVM_CAP_SYSTEM_EVENT_DATA 215 ++#define KVM_CAP_S390_PROTECTED_DUMP 217 ++#define KVM_CAP_S390_ZPCI_OP 221 ++#define KVM_CAP_S390_CPU_TOPOLOGY 222 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1651,6 +1654,55 @@ struct kvm_s390_pv_unp { + __u64 tweak; + }; + ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ + enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, +@@ -1659,6 +1711,8 @@ enum pv_cmd_id { + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, + }; + + struct kvm_pv_cmd { +@@ -2066,4 +2120,37 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_XSAVE2 */ + #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + ++/* Available with KVM_CAP_S390_PROTECTED_DUMP */ ++#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) ++ ++/* Available with KVM_CAP_S390_ZPCI_OP */ ++#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) ++ + #endif /* __LINUX_KVM_H */ +diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h +index b4309397b6..77f2aff1f2 100644 +--- a/linux-headers/linux/vfio_zdev.h ++++ b/linux-headers/linux/vfio_zdev.h +@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ ++ /* End of version 1 */ ++ __u32 fh; /* PCI function handle */ ++ /* End of version 2 */ + }; + + /** +@@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ ++ /* End of version 1 */ ++ __u8 reserved; ++ __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ ++ /* End of version 2 */ + }; + + /** +-- +2.37.3 + diff --git a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch new file mode 100644 index 0000000..7db7fd2 --- /dev/null +++ b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch @@ -0,0 +1,349 @@ +From a5e7bb1f7a88efb5574266a76e80fd7604d19921 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:49:59 -0500 +Subject: [PATCH 04/11] accel: introduce accelerator blocker API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 9d3d7f9554974a79042c915763288cce07aef135 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:56 2022 -0500 + + accel: introduce accelerator blocker API + + This API allows the accelerators to prevent vcpus from issuing + new ioctls while execting a critical section marked with the + accel_ioctl_inhibit_begin/end functions. + + Note that all functions submitting ioctls must mark where the + ioctl is being called with accel_{cpu_}ioctl_begin/end(). + + This API requires the caller to always hold the BQL. + API documentation is in sysemu/accel-blocker.h + + Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt + (to minimize cache line bouncing) to keep avoid that new ioctls + run when the critical section starts, and a QemuEvent to wait + that all running ioctls finish. + + Signed-off-by: Emanuele Giuseppe Esposito + Reviewed-by: Philippe Mathieu-Daudé + Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + util/meson.build: files are missing in rhel 8.8.0 + namely int128.c, memalign.c and interval-tree.c + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ + accel/meson.build | 2 +- + hw/core/cpu-common.c | 2 + + include/hw/core/cpu.h | 3 + + include/sysemu/accel-blocker.h | 56 ++++++++++++ + util/meson.build | 2 +- + 6 files changed, 217 insertions(+), 2 deletions(-) + create mode 100644 accel/accel-blocker.c + create mode 100644 include/sysemu/accel-blocker.h + +diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c +new file mode 100644 +index 0000000000..1e7f423462 +--- /dev/null ++++ b/accel/accel-blocker.c +@@ -0,0 +1,154 @@ ++/* ++ * Lock to inhibit accelerator ioctls ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/thread.h" ++#include "qemu/main-loop.h" ++#include "hw/core/cpu.h" ++#include "sysemu/accel-blocker.h" ++ ++static QemuLockCnt accel_in_ioctl_lock; ++static QemuEvent accel_in_ioctl_event; ++ ++void accel_blocker_init(void) ++{ ++ qemu_lockcnt_init(&accel_in_ioctl_lock); ++ qemu_event_init(&accel_in_ioctl_event, false); ++} ++ ++void accel_ioctl_begin(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_end(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&accel_in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++void accel_cpu_ioctl_begin(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&cpu->in_ioctl_lock); ++} ++ ++void accel_cpu_ioctl_end(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&cpu->in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++static bool accel_has_to_wait(void) ++{ ++ CPUState *cpu; ++ bool needs_to_wait = false; ++ ++ CPU_FOREACH(cpu) { ++ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { ++ /* exit the ioctl, if vcpu is running it */ ++ qemu_cpu_kick(cpu); ++ needs_to_wait = true; ++ } ++ } ++ ++ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_inhibit_begin(void) ++{ ++ CPUState *cpu; ++ ++ /* ++ * We allow to inhibit only when holding the BQL, so we can identify ++ * when an inhibitor wants to issue an ioctl easily. ++ */ ++ g_assert(qemu_mutex_iothread_locked()); ++ ++ /* Block further invocations of the ioctls outside the BQL. */ ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_lock(&cpu->in_ioctl_lock); ++ } ++ qemu_lockcnt_lock(&accel_in_ioctl_lock); ++ ++ /* Keep waiting until there are running ioctls */ ++ while (true) { ++ ++ /* Reset event to FREE. */ ++ qemu_event_reset(&accel_in_ioctl_event); ++ ++ if (accel_has_to_wait()) { ++ /* ++ * If event is still FREE, and there are ioctls still in progress, ++ * wait. ++ * ++ * If an ioctl finishes before qemu_event_wait(), it will change ++ * the event state to SET. This will prevent qemu_event_wait() from ++ * blocking, but it's not a problem because if other ioctls are ++ * still running the loop will iterate once more and reset the event ++ * status to FREE so that it can wait properly. ++ * ++ * If an ioctls finishes while qemu_event_wait() is blocking, then ++ * it will be waken up, but also here the while loop makes sure ++ * to re-enter the wait if there are other running ioctls. ++ */ ++ qemu_event_wait(&accel_in_ioctl_event); ++ } else { ++ /* No ioctl is running */ ++ return; ++ } ++ } ++} ++ ++void accel_ioctl_inhibit_end(void) ++{ ++ CPUState *cpu; ++ ++ qemu_lockcnt_unlock(&accel_in_ioctl_lock); ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); ++ } ++} ++ +diff --git a/accel/meson.build b/accel/meson.build +index dfd808d2c8..801b4d44e8 100644 +--- a/accel/meson.build ++++ b/accel/meson.build +@@ -1,4 +1,4 @@ +-specific_ss.add(files('accel-common.c')) ++specific_ss.add(files('accel-common.c', 'accel-blocker.c')) + softmmu_ss.add(files('accel-softmmu.c')) + user_ss.add(files('accel-user.c')) + +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index 9e3241b430..b6e83acf0a 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -238,6 +238,7 @@ static void cpu_common_initfn(Object *obj) + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); ++ qemu_lockcnt_init(&cpu->in_ioctl_lock); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); +@@ -249,6 +250,7 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index e948e81f1a..49d9c73f97 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -383,6 +383,9 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + ++ /* Use by accel-block: CPU is executing an ioctl() */ ++ QemuLockCnt in_ioctl_lock; ++ + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); +diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h +new file mode 100644 +index 0000000000..72020529ef +--- /dev/null ++++ b/include/sysemu/accel-blocker.h +@@ -0,0 +1,56 @@ ++/* ++ * Accelerator blocking API, to prevent new ioctls from starting and wait the ++ * running ones finish. ++ * This mechanism differs from pause/resume_all_vcpus() in that it does not ++ * release the BQL. ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef ACCEL_BLOCKER_H ++#define ACCEL_BLOCKER_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/cpus.h" ++ ++extern void accel_blocker_init(void); ++ ++/* ++ * accel_{cpu_}ioctl_begin/end: ++ * Mark when ioctl is about to run or just finished. ++ * ++ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is ++ * called, preventing new ioctls to run. They will continue only after ++ * accel_ioctl_inibith_end(). ++ */ ++extern void accel_ioctl_begin(void); ++extern void accel_ioctl_end(void); ++extern void accel_cpu_ioctl_begin(CPUState *cpu); ++extern void accel_cpu_ioctl_end(CPUState *cpu); ++ ++/* ++ * accel_ioctl_inhibit_begin: start critical section ++ * ++ * This function makes sure that: ++ * 1) incoming accel_{cpu_}ioctl_begin() calls block ++ * 2) wait that all ioctls that were already running reach ++ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. ++ * ++ * This allows the caller to access shared data or perform operations without ++ * worrying of concurrent vcpus accesses. ++ */ ++extern void accel_ioctl_inhibit_begin(void); ++ ++/* ++ * accel_ioctl_inhibit_end: end critical section started by ++ * accel_ioctl_inhibit_begin() ++ * ++ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. ++ */ ++extern void accel_ioctl_inhibit_end(void); ++ ++#endif /* ACCEL_BLOCKER_H */ +diff --git a/util/meson.build b/util/meson.build +index 05b593055a..b5f153b0e8 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -48,6 +48,7 @@ util_ss.add(files('transactions.c')) + util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c')) + util_ss.add(files('guest-random.c')) + util_ss.add(files('yank.c')) ++util_ss.add(files('lockcnt.c')) + + if have_user + util_ss.add(files('selfmap.c')) +@@ -69,7 +70,6 @@ if have_block + util_ss.add(files('hexdump.c')) + util_ss.add(files('iova-tree.c')) + util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c')) +- util_ss.add(files('lockcnt.c')) + util_ss.add(files('main-loop.c')) + util_ss.add(files('nvdimm-utils.c')) + util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) +-- +2.37.3 + diff --git a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch new file mode 100644 index 0000000..e3b79cf --- /dev/null +++ b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch @@ -0,0 +1,50 @@ +From 953c5c0982b61b0a3f8f03452844b5487eb22fc7 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:13:17 -0500 +Subject: [PATCH 06/13] aio-wait: switch to smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [6/10] 9f30f97754139ffd18d36b2350f9ed4e59ac496e + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit b532526a07ef3b903ead2e055fe6cc87b41057a3 +Author: Paolo Bonzini +Date: Fri Mar 3 11:03:52 2023 +0100 + + aio-wait: switch to smp_mb__after_rmw() + + The barrier comes after an atomic increment, so it is enough to use + smp_mb__after_rmw(); this avoids a double barrier on x86 systems. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index 54840f8622..03b6394c78 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -82,7 +82,7 @@ extern AioWait global_aio_wait; + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ + /* Paired with smp_mb in aio_wait_kick(). */ \ +- smp_mb(); \ ++ smp_mb__after_rmw(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +-- +2.37.3 + diff --git a/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch b/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch new file mode 100644 index 0000000..9a9ae00 --- /dev/null +++ b/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch @@ -0,0 +1,86 @@ +From d7eae0ff4c7f7f7bf10f10272adf7c6971c0db9b Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 09:26:35 -0500 +Subject: [PATCH 01/13] aio_wait_kick: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [1/10] eb774aee79864052e14e706d931e52e7bd1162c8 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 7455ff1aa01564cc175db5b2373e610503ad4411 +Author: Emanuele Giuseppe Esposito +Date: Tue May 24 13:30:54 2022 -0400 + + aio_wait_kick: add missing memory barrier + + It seems that aio_wait_kick always required a memory barrier + or atomic operation in the caller, but nobody actually + took care of doing it. + + Let's put the barrier in the function instead, and pair it + with another one in AIO_WAIT_WHILE. Read aio_wait_kick() + comment for further explanation. + + Suggested-by: Paolo Bonzini + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20220524173054.12651-1-eesposit@redhat.com> + Reviewed-by: Vladimir Sementsov-Ogievskiy + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Kevin Wolf + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 ++ + util/aio-wait.c | 16 +++++++++++++++- + 2 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index b39eefb38d..54840f8622 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -81,6 +81,8 @@ extern AioWait global_aio_wait; + AioContext *ctx_ = (ctx); \ + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ ++ /* Paired with smp_mb in aio_wait_kick(). */ \ ++ smp_mb(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +diff --git a/util/aio-wait.c b/util/aio-wait.c +index bdb3d3af22..98c5accd29 100644 +--- a/util/aio-wait.c ++++ b/util/aio-wait.c +@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque) + + void aio_wait_kick(void) + { +- /* The barrier (or an atomic op) is in the caller. */ ++ /* ++ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have: ++ * write(condition); ++ * aio_wait_kick() { ++ * smp_mb(); ++ * read(num_waiters); ++ * } ++ * ++ * And in AIO_WAIT_WHILE: ++ * write(num_waiters); ++ * smp_mb(); ++ * read(condition); ++ */ ++ smp_mb(); ++ + if (qatomic_read(&global_aio_wait.num_waiters)) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch new file mode 100644 index 0000000..5338a8d --- /dev/null +++ b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch @@ -0,0 +1,66 @@ +From 187eb7a418af93375e42298d06e231e2bec3cf00 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:42 -0500 +Subject: [PATCH 10/13] async: clarify usage of barriers in the polling case + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [10/10] 3be07ccc6137a0336becfe63a818d9cbadb38e9c + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 6229438cca037d42f44a96d38feb15cb102a444f +Author: Paolo Bonzini +Date: Mon Mar 6 10:43:52 2023 +0100 + + async: clarify usage of barriers in the polling case + + Explain that aio_context_notifier_poll() relies on + aio_notify_accept() to catch all the memory writes that were + done before ctx->notified was set to true. + + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 795fe699b6..2a63bf90f2 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -463,8 +463,9 @@ void aio_notify_accept(AioContext *ctx) + qatomic_set(&ctx->notified, false); + + /* +- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb +- * in aio_notify. ++ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the ++ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs ++ * with smp_wmb() in aio_notify. + */ + smp_mb(); + } +@@ -487,6 +488,11 @@ static bool aio_context_notifier_poll(void *opaque) + EventNotifier *e = opaque; + AioContext *ctx = container_of(e, AioContext, notifier); + ++ /* ++ * No need for load-acquire because we just want to kick the ++ * event loop. aio_notify_accept() takes care of synchronizing ++ * the event loop with the producers. ++ */ + return qatomic_read(&ctx->notified); + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch new file mode 100644 index 0000000..aea20ea --- /dev/null +++ b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch @@ -0,0 +1,111 @@ +From ea3856bb545d19499602830cdc3076d83a981e7a Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:36 -0500 +Subject: [PATCH 09/13] async: update documentation of the memory barriers + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [9/10] d471da2acf7a107cf75f3327c5e8d7456307160e + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 8dd48650b43dfde4ebea34191ac267e474bcc29e +Author: Paolo Bonzini +Date: Mon Mar 6 10:15:06 2023 +0100 + + async: update documentation of the memory barriers + + Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", + 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() + is happening when the bottom half is enqueued in the bh_list; not + when the flags are set. Update the documentation to match. + + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 6f6717a34b..795fe699b6 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -71,14 +71,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) + unsigned old_flags; + + /* +- * The memory barrier implicit in qatomic_fetch_or makes sure that: +- * 1. idle & any writes needed by the callback are done before the +- * locations are read in the aio_bh_poll. +- * 2. ctx is loaded before the callback has a chance to execute and bh +- * could be freed. ++ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that ++ * insertion starts after BH_PENDING is set. + */ + old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); ++ + if (!(old_flags & BH_PENDING)) { ++ /* ++ * At this point the bottom half becomes visible to aio_bh_poll(). ++ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in ++ * aio_bh_poll(), ensuring that: ++ * 1. any writes needed by the callback are visible from the callback ++ * after aio_bh_dequeue() returns bh. ++ * 2. ctx is loaded before the callback has a chance to execute and bh ++ * could be freed. ++ */ + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); + } + +@@ -97,11 +104,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) + QSLIST_REMOVE_HEAD(head, next); + + /* +- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory +- * barrier ensures that the callback sees all writes done by the scheduling +- * thread. It also ensures that the scheduling thread sees the cleared +- * flag before bh->cb has run, and thus will call aio_notify again if +- * necessary. ++ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that ++ * the removal finishes before BH_PENDING is reset. + */ + *flags = qatomic_fetch_and(&bh->flags, + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); +@@ -148,6 +152,7 @@ int aio_bh_poll(AioContext *ctx) + BHListSlice *s; + int ret = 0; + ++ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); + +@@ -437,15 +442,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* +- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in +- * aio_notify_accept. ++ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with ++ * smp_mb() in aio_notify_accept(). + */ + smp_wmb(); + qatomic_set(&ctx->notified, true); + + /* +- * Write ctx->notified before reading ctx->notify_me. Pairs +- * with smp_mb in aio_ctx_prepare or aio_poll. ++ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. ++ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. + */ + smp_mb(); + if (qatomic_read(&ctx->notify_me)) { +-- +2.37.3 + diff --git a/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch b/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch new file mode 100644 index 0000000..b29289b --- /dev/null +++ b/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch @@ -0,0 +1,71 @@ +From 60da56e3685969493ae483c3cc2c66af13d00baf Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 10 Aug 2022 14:57:18 +0200 +Subject: [PATCH 1/3] backends/hostmem: Fix support of memory-backend-memfd in + qemu_maxrampagesize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 221: backends/hostmem: Fix support of memory-backend-memfd in qemu_maxrampagesize() +RH-Bugzilla: 2117149 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] b5a1047750af32c0a261b8385ea0e819eb16681a + +It is currently not possible yet to use "memory-backend-memfd" on s390x +with hugepages enabled. This problem is caused by qemu_maxrampagesize() +not taking memory-backend-memfd objects into account yet, so the code +in s390_memory_init() fails to enable the huge page support there via +s390_set_max_pagesize(). Fix it by generalizing the code, so that it +looks at qemu_ram_pagesize(memdev->mr.ram_block) instead of re-trying +to get the information from the filesystem. + +Suggested-by: David Hildenbrand +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2116496 +Message-Id: <20220810125720.3849835-2-thuth@redhat.com> +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit 8be934b70e923104da883b990dee18f02552d40e) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2117149 +[clg: Resolved conflict on qemu_real_host_page_size() ] +Signed-off-by: Cédric Le Goater +--- + backends/hostmem.c | 14 ++------------ + 1 file changed, 2 insertions(+), 12 deletions(-) + +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 4c05862ed5..0c4654ea85 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -305,22 +305,12 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend) + return backend->is_mapped; + } + +-#ifdef __linux__ + size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) + { +- Object *obj = OBJECT(memdev); +- char *path = object_property_get_str(obj, "mem-path", NULL); +- size_t pagesize = qemu_mempath_getpagesize(path); +- +- g_free(path); ++ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block); ++ g_assert(pagesize >= qemu_real_host_page_size); + return pagesize; + } +-#else +-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) +-{ +- return qemu_real_host_page_size; +-} +-#endif + + static void + host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) +-- +2.35.3 + diff --git a/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch b/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch new file mode 100644 index 0000000..1756d88 --- /dev/null +++ b/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch @@ -0,0 +1,153 @@ +From 192f956f2b0761f270070555f8feb1f0544e5558 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:48 +0100 +Subject: [PATCH 01/11] block/mirror: Do not wait for active writes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [1/3] 652d1e55b954f13eaec2c86f58735d4942837e16 + +Waiting for all active writes to settle before daring to create a +background copying operation means that we will never do background +operations while the guest does anything (in write-blocking mode), and +therefore cannot converge. Yes, we also will not diverge, but actually +converging would be even nicer. + +It is unclear why we did decide to wait for all active writes to settle +before creating a background operation, but it just does not seem +necessary. Active writes will put themselves into the in_flight bitmap +and thus properly block actually conflicting background requests. + +It is important for active requests to wait on overlapping background +requests, which we do in active_write_prepare(). However, so far it was +not documented why it is important. Add such documentation now, and +also to the other call of mirror_wait_on_conflicts(), so that it becomes +more clear why and when requests need to actively wait for other +requests to settle. + +Another thing to note is that of course we need to ensure that there are +no active requests when the job completes, but that is done by virtue of +the BDS being drained anyway, so there cannot be any active requests at +that point. + +With this change, we will need to explicitly keep track of how many +bytes are in flight in active requests so that +job_progress_set_remaining() in mirror_run() can set the correct number +of remaining bytes. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297 +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-2-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d69a879bdf1aed586478eaa161ee064fe1b92f1a) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 37 ++++++++++++++++++++++++++++++------- + 1 file changed, 30 insertions(+), 7 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index efec2c7674..282f428cb7 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -81,6 +81,7 @@ typedef struct MirrorBlockJob { + int max_iov; + bool initial_zeroing_ongoing; + int in_active_write_counter; ++ int64_t active_write_bytes_in_flight; + bool prepared; + bool in_drain; + } MirrorBlockJob; +@@ -493,6 +494,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + } + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + ++ /* ++ * Wait for concurrent requests to @offset. The next loop will limit the ++ * copied area based on in_flight_bitmap so we only copy an area that does ++ * not overlap with concurrent in-flight requests. Still, we would like to ++ * copy something, so wait until there are at least no more requests to the ++ * very beginning of the area. ++ */ + mirror_wait_on_conflicts(NULL, s, offset, 1); + + job_pause_point(&s->common.job); +@@ -993,12 +1001,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + int64_t cnt, delta; + bool should_complete; + +- /* Do not start passive operations while there are active +- * writes in progress */ +- while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, true); +- } +- + if (s->ret < 0) { + ret = s->ret; + goto immediate_exit; +@@ -1015,7 +1017,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is + * the number of bytes currently being processed; together those are + * the current remaining operation length */ +- job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt); ++ job_progress_set_remaining(&s->common.job, ++ s->bytes_in_flight + cnt + ++ s->active_write_bytes_in_flight); + + /* Note that even when no rate limit is applied we need to yield + * periodically with no pending I/O so that bdrv_drain_all() returns. +@@ -1073,6 +1077,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + + s->in_drain = true; + bdrv_drained_begin(bs); ++ ++ /* Must be zero because we are drained */ ++ assert(s->in_active_write_counter == 0); ++ + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + if (cnt > 0 || mirror_flush(s) < 0) { + bdrv_drained_end(bs); +@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + } + + job_progress_increase_remaining(&job->common.job, bytes); ++ job->active_write_bytes_in_flight += bytes; + + switch (method) { + case MIRROR_METHOD_COPY: +@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + abort(); + } + ++ job->active_write_bytes_in_flight -= bytes; + if (ret >= 0) { + job_progress_update(&job->common.job, bytes); + } else { +@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + + s->in_active_write_counter++; + ++ /* ++ * Wait for concurrent requests affecting the area. If there are already ++ * running requests that are copying off now-to-be stale data in the area, ++ * we must wait for them to finish before we begin writing fresh data to the ++ * target so that the write operations appear in the correct order. ++ * Note that background requests (see mirror_iteration()) in contrast only ++ * wait for conflicting requests at the start of the dirty area, and then ++ * (based on the in_flight_bitmap) truncate the area to copy so it will not ++ * conflict with any requests beyond that. For active writes, however, we ++ * cannot truncate that area. The request from our parent must be blocked ++ * until the area is copied in full. Therefore, we must wait for the whole ++ * area to become free of concurrent requests. ++ */ + mirror_wait_on_conflicts(op, s, offset, bytes); + + bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); +-- +2.37.3 + diff --git a/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch b/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch new file mode 100644 index 0000000..457788e --- /dev/null +++ b/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch @@ -0,0 +1,76 @@ +From 57c79ed20cb73aa9aa4dd7487379b85ea3f936f6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:49 +0100 +Subject: [PATCH 02/11] block/mirror: Drop mirror_wait_for_any_operation() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [2/3] dec37883bcc491441ae08d9592d1ec26a47765c0 + +mirror_wait_for_free_in_flight_slot() is the only remaining user of +mirror_wait_for_any_operation(), so inline the latter into the former. + +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-3-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit eb994912993077f178ccb43b20e422ecf9ae4ac7) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 282f428cb7..6b02555ad7 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -304,19 +304,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +- /* Do not wait on pseudo ops, because it may in turn wait on ++ /* ++ * Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation +- * to wait on. */ +- if (!op->is_pseudo_op && op->is_in_flight && +- op->is_active_write == active) +- { ++ * to wait on. ++ * Also, do not wait on active operations, because they do not ++ * use up in-flight slots. ++ */ ++ if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -324,13 +326,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + abort(); + } + +-static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +-{ +- /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, false); +-} +- + /* Perform a mirror copy operation. + * + * *op->bytes_handled is set to the number of bytes copied after and +-- +2.37.3 + diff --git a/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch b/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch new file mode 100644 index 0000000..b353bd1 --- /dev/null +++ b/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch @@ -0,0 +1,75 @@ +From b1f5aa5a342a25dc558ee9d435fed0643fe5155f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:50 +0100 +Subject: [PATCH 03/11] block/mirror: Fix NULL s->job in active writes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [3/3] 49d7ebd15667151a6e14228a8260cfdd0aa27a78 + +There is a small gap in mirror_start_job() before putting the mirror +filter node into the block graph (bdrv_append() call) and the actual job +being created. Before the job is created, MirrorBDSOpaque.job is NULL. + +It is possible that requests come in when bdrv_drained_end() is called, +and those requests would see MirrorBDSOpaque.job == NULL. Have our +filter node handle that case gracefully. + +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-4-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit da93d5c84e56e6b4e84aa8e98b6b984c9b6bb528) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 6b02555ad7..50289fca49 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1438,11 +1438,13 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, + MirrorOp *op = NULL; + MirrorBDSOpaque *s = bs->opaque; + int ret = 0; +- bool copy_to_target; ++ bool copy_to_target = false; + +- copy_to_target = s->job->ret >= 0 && +- !job_is_cancelled(&s->job->common.job) && +- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ if (s->job) { ++ copy_to_target = s->job->ret >= 0 && ++ !job_is_cancelled(&s->job->common.job) && ++ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ } + + if (copy_to_target) { + op = active_write_prepare(s->job, offset, bytes); +@@ -1487,11 +1489,13 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + QEMUIOVector bounce_qiov; + void *bounce_buf; + int ret = 0; +- bool copy_to_target; ++ bool copy_to_target = false; + +- copy_to_target = s->job->ret >= 0 && +- !job_is_cancelled(&s->job->common.job) && +- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ if (s->job) { ++ copy_to_target = s->job->ret >= 0 && ++ !job_is_cancelled(&s->job->common.job) && ++ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ } + + if (copy_to_target) { + /* The guest might concurrently modify the data to write; but +-- +2.37.3 + diff --git a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch new file mode 100644 index 0000000..4be5d14 --- /dev/null +++ b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch @@ -0,0 +1,127 @@ +From 103608465b8bd2edf7f9aaef5c3c93309ccf9ec2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:17 -0500 +Subject: [PATCH 12/13] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() + race + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [2/3] 14f5835093ba8c5111f3ada2fe87730371aca733 + +dma_blk_cb() only takes the AioContext lock around ->io_func(). That +means the rest of dma_blk_cb() is not protected. In particular, the +DMAAIOCB field accesses happen outside the lock. + +There is a race when the main loop thread holds the AioContext lock and +invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> +dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb +field determines how cancellation proceeds. If dma_aio_cancel() sees +dbs->acb == NULL while dma_blk_cb() is still running, the request can be +completed twice (-ECANCELED and the actual return value). + +The following assertion can occur with virtio-scsi when an IOThread is +used: + + ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. + +Fix the race by holding the AioContext across dma_blk_cb(). Now +dma_aio_cancel() under the AioContext lock will not see +inconsistent/intermediate states. + +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 4 +--- + softmmu/dma-helpers.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 179ce22c4a..c8109a673e 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -351,13 +351,12 @@ done: + scsi_req_unref(&r->req); + } + ++/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -367,7 +366,6 @@ static void scsi_dma_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) +diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c +index 7d766a5e89..42af18719a 100644 +--- a/softmmu/dma-helpers.c ++++ b/softmmu/dma-helpers.c +@@ -127,17 +127,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + static void dma_blk_cb(void *opaque, int ret) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; ++ AioContext *ctx = dbs->ctx; + dma_addr_t cur_addr, cur_len; + void *mem; + + trace_dma_blk_cb(dbs, ret); + ++ aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- return; ++ goto out; + } + dma_blk_unmap(dbs); + +@@ -177,9 +179,9 @@ static void dma_blk_cb(void *opaque, int ret) + + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); +- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); ++ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- return; ++ goto out; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -187,11 +189,11 @@ static void dma_blk_cb(void *opaque, int ret) + QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); + } + +- aio_context_acquire(dbs->ctx); + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); +- aio_context_release(dbs->ctx); + assert(dbs->acb); ++out: ++ aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.37.3 + diff --git a/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch b/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch new file mode 100644 index 0000000..4b6c3fe --- /dev/null +++ b/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch @@ -0,0 +1,70 @@ +From 407e23d7f0c9020404247afe7d4df98505222bbb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 14 Nov 2022 14:25:02 +0100 +Subject: [PATCH 1/3] docs/system/s390x: Document the "loadparm" machine + property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] e9589ea32d2a8f82971476b644e1063fa14cf822 + +The "loadparm" machine property is useful for selecting alternative +kernels on the disk of the guest, but so far we do not tell the users +yet how to use it. Add some documentation to fill this gap. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2128235 +Message-Id: <20221114132502.110213-1-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit be5df2edb5d69ff3107c5616aa035a9ba8d0422e) +--- + docs/system/s390x/bootdevices.rst | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/docs/system/s390x/bootdevices.rst b/docs/system/s390x/bootdevices.rst +index 9e591cb9dc..d4bf3b9f0b 100644 +--- a/docs/system/s390x/bootdevices.rst ++++ b/docs/system/s390x/bootdevices.rst +@@ -53,6 +53,32 @@ recommended to specify a CD-ROM device via ``-device scsi-cd`` (as mentioned + above) instead. + + ++Selecting kernels with the ``loadparm`` property ++------------------------------------------------ ++ ++The ``s390-ccw-virtio`` machine supports the so-called ``loadparm`` parameter ++which can be used to select the kernel on the disk of the guest that the ++s390-ccw bios should boot. When starting QEMU, it can be specified like this:: ++ ++ qemu-system-s390x -machine s390-ccw-virtio,loadparm= ++ ++The first way to use this parameter is to use the word ``PROMPT`` as the ++```` here. In that case the s390-ccw bios will show a list of ++installed kernels on the disk of the guest and ask the user to enter a number ++to chose which kernel should be booted -- similar to what can be achieved by ++specifying the ``-boot menu=on`` option when starting QEMU. Note that the menu ++list will only show the names of the installed kernels when using a DASD-like ++disk image with 4k byte sectors. On normal SCSI-style disks with 512-byte ++sectors, there is not enough space for the zipl loader on the disk to store ++the kernel names, so you only get a list without names here. ++ ++The second way to use this parameter is to use a number in the range from 0 ++to 31. The numbers that can be used here correspond to the numbers that are ++shown when using the ``PROMPT`` option, and the s390-ccw bios will then try ++to automatically boot the kernel that is associated with the given number. ++Note that ``0`` can be used to boot the default entry. ++ ++ + Booting from a network device + ----------------------------- + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch b/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch new file mode 100644 index 0000000..bc06fa8 --- /dev/null +++ b/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch @@ -0,0 +1,356 @@ +From f2f3efff83dddd38a97699cd2701f46f61a732e3 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 11:32:10 +0000 +Subject: [PATCH 36/42] dump: Add architecture section and section string table + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [36/41] 83b98ff185e93e62703f686b65546d60c783d783 + +Add hooks which architectures can use to add arbitrary data to custom +sections. + +Also add a section name string table in order to identify section +contents + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017113210.41674-1-frankja@linux.ibm.com> +(cherry picked from commit 9b72224f44612ddd5b434a1bccf79346946d11da) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 186 +++++++++++++++++++++++++++++++------ + include/sysemu/dump-arch.h | 3 + + include/sysemu/dump.h | 3 + + 3 files changed, 166 insertions(+), 26 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7a42401790..4aa8fb64d2 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -104,6 +104,7 @@ static int dump_cleanup(DumpState *s) + memory_mapping_list_free(&s->list); + close(s->fd); + g_free(s->guest_note); ++ g_array_unref(s->string_table_buf); + s->guest_note = NULL; + if (s->resume) { + if (s->detached) { +@@ -153,11 +154,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) +@@ -181,11 +181,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void write_elf_header(DumpState *s, Error **errp) +@@ -196,6 +195,8 @@ static void write_elf_header(DumpState *s, Error **errp) + void *header_ptr; + int ret; + ++ /* The NULL header and the shstrtab are always defined */ ++ assert(s->shdr_num >= 2); + if (dump_is_64bit(s)) { + prepare_elf64_header(s, &elf64_header); + header_size = sizeof(elf64_header); +@@ -394,17 +395,49 @@ static void prepare_elf_section_hdr_zero(DumpState *s) + } + } + +-static void prepare_elf_section_hdrs(DumpState *s) ++static void prepare_elf_section_hdr_string(DumpState *s, void *buff) ++{ ++ uint64_t index = s->string_table_buf->len; ++ const char strtab[] = ".shstrtab"; ++ Elf32_Shdr shdr32 = {}; ++ Elf64_Shdr shdr64 = {}; ++ int shdr_size; ++ void *shdr; ++ ++ g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab)); ++ if (dump_is_64bit(s)) { ++ shdr_size = sizeof(Elf64_Shdr); ++ shdr64.sh_type = SHT_STRTAB; ++ shdr64.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr64.sh_name = index; ++ shdr64.sh_size = s->string_table_buf->len; ++ shdr = &shdr64; ++ } else { ++ shdr_size = sizeof(Elf32_Shdr); ++ shdr32.sh_type = SHT_STRTAB; ++ shdr32.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr32.sh_name = index; ++ shdr32.sh_size = s->string_table_buf->len; ++ shdr = &shdr32; ++ } ++ memcpy(buff, shdr, shdr_size); ++} ++ ++static bool prepare_elf_section_hdrs(DumpState *s, Error **errp) + { + size_t len, sizeof_shdr; ++ void *buff_hdr; + + /* + * Section ordering: + * - HDR zero ++ * - Arch section hdrs ++ * - String table hdr + */ + sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + len = sizeof_shdr * s->shdr_num; + s->elf_section_hdrs = g_malloc0(len); ++ buff_hdr = s->elf_section_hdrs; + + /* + * The first section header is ALWAYS a special initial section +@@ -420,6 +453,26 @@ static void prepare_elf_section_hdrs(DumpState *s) + if (s->phdr_num >= PN_XNUM) { + prepare_elf_section_hdr_zero(s); + } ++ buff_hdr += sizeof_shdr; ++ ++ /* Add architecture defined section headers */ ++ if (s->dump_info.arch_sections_write_hdr_fn ++ && s->shdr_num > 2) { ++ buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr); ++ ++ if (s->shdr_num >= SHN_LORESERVE) { ++ error_setg_errno(errp, EINVAL, ++ "dump: too many architecture defined sections"); ++ return false; ++ } ++ } ++ ++ /* ++ * String table is the last section since strings are added via ++ * arch_sections_write_hdr(). ++ */ ++ prepare_elf_section_hdr_string(s, buff_hdr); ++ return true; + } + + static void write_elf_section_headers(DumpState *s, Error **errp) +@@ -427,7 +480,9 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + int ret; + +- prepare_elf_section_hdrs(s); ++ if (!prepare_elf_section_hdrs(s, errp)) { ++ return; ++ } + + ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +@@ -437,6 +492,29 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + g_free(s->elf_section_hdrs); + } + ++static void write_elf_sections(DumpState *s, Error **errp) ++{ ++ int ret; ++ ++ if (s->elf_section_data_size) { ++ /* Write architecture section data */ ++ ret = fd_write_vmcore(s->elf_section_data, ++ s->elf_section_data_size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write architecture section data"); ++ return; ++ } ++ } ++ ++ /* Write string table */ ++ ret = fd_write_vmcore(s->string_table_buf->data, ++ s->string_table_buf->len, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "dump: failed to write string table data"); ++ } ++} ++ + static void write_data(DumpState *s, void *buf, int length, Error **errp) + { + int ret; +@@ -693,6 +771,31 @@ static void dump_iterate(DumpState *s, Error **errp) + } + } + ++static void dump_end(DumpState *s, Error **errp) ++{ ++ int rc; ++ ERRP_GUARD(); ++ ++ if (s->elf_section_data_size) { ++ s->elf_section_data = g_malloc0(s->elf_section_data_size); ++ } ++ ++ /* Adds the architecture defined section data to s->elf_section_data */ ++ if (s->dump_info.arch_sections_write_fn && ++ s->elf_section_data_size) { ++ rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data); ++ if (rc) { ++ error_setg_errno(errp, rc, ++ "dump: failed to get arch section data"); ++ g_free(s->elf_section_data); ++ return; ++ } ++ } ++ ++ /* write sections to vmcore */ ++ write_elf_sections(s, errp); ++} ++ + static void create_vmcore(DumpState *s, Error **errp) + { + ERRP_GUARD(); +@@ -702,7 +805,14 @@ static void create_vmcore(DumpState *s, Error **errp) + return; + } + ++ /* Iterate over memory and dump it to file */ + dump_iterate(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* Write the section data */ ++ dump_end(s, errp); + } + + static int write_start_flat_header(int fd) +@@ -1720,6 +1830,14 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->filter_area_begin = begin; + s->filter_area_length = length; + ++ /* First index is 0, it's the special null name */ ++ s->string_table_buf = g_array_new(FALSE, TRUE, 1); ++ /* ++ * Allocate the null name, due to the clearing option set to true ++ * it will be 0. ++ */ ++ g_array_set_size(s->string_table_buf, 1); ++ + memory_mapping_list_init(&s->list); + + guest_phys_blocks_init(&s->guest_phys_blocks); +@@ -1856,26 +1974,42 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + /* +- * calculate phdr_num ++ * The first section header is always a special one in which most ++ * fields are 0. The section header string table is also always ++ * set. ++ */ ++ s->shdr_num = 2; ++ ++ /* ++ * Adds the number of architecture sections to shdr_num and sets ++ * elf_section_data_size so we know the offsets and sizes of all ++ * parts. ++ */ ++ if (s->dump_info.arch_sections_add_fn) { ++ s->dump_info.arch_sections_add_fn(s); ++ } ++ ++ /* ++ * calculate shdr_num so we know the offsets and sizes of all ++ * parts. ++ * Calculate phdr_num + * +- * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow ++ * The absolute maximum amount of phdrs is UINT32_MAX - 1 as ++ * sh_info is 32 bit. There's special handling once we go over ++ * UINT16_MAX - 1 but that is handled in the ehdr and section ++ * code. + */ +- s->phdr_num = 1; /* PT_NOTE */ +- if (s->list.num < UINT16_MAX - 2) { +- s->shdr_num = 0; ++ s->phdr_num = 1; /* Reserve PT_NOTE */ ++ if (s->list.num <= UINT32_MAX - 1) { + s->phdr_num += s->list.num; + } else { +- /* sh_info of section 0 holds the real number of phdrs */ +- s->shdr_num = 1; +- +- /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ +- if (s->list.num <= UINT32_MAX - 1) { +- s->phdr_num += s->list.num; +- } else { +- s->phdr_num = UINT32_MAX; +- } ++ s->phdr_num = UINT32_MAX; + } + ++ /* ++ * Now that the number of section and program headers is known we ++ * can calculate the offsets of the headers and data. ++ */ + if (dump_is_64bit(s)) { + s->shdr_offset = sizeof(Elf64_Ehdr); + s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h +index e25b02e990..59bbc9be38 100644 +--- a/include/sysemu/dump-arch.h ++++ b/include/sysemu/dump-arch.h +@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo { + uint32_t page_size; /* The target's page size. If it's variable and + * unknown, then this should be the maximum. */ + uint64_t phys_base; /* The target's physmem base. */ ++ void (*arch_sections_add_fn)(DumpState *s); ++ uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); ++ int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); + } ArchDumpInfo; + + struct GuestPhysBlockList; /* memory_mapping.h */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9ed811b313..38ccac7190 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -180,6 +180,9 @@ typedef struct DumpState { + hwaddr note_offset; + + void *elf_section_hdrs; /* Pointer to section header buffer */ ++ void *elf_section_data; /* Pointer to section data buffer */ ++ uint64_t elf_section_data_size; /* Size of section data */ ++ GArray *string_table_buf; /* String table data buffer */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Add-more-offset-variables.patch b/SOURCES/kvm-dump-Add-more-offset-variables.patch new file mode 100644 index 0000000..373f814 --- /dev/null +++ b/SOURCES/kvm-dump-Add-more-offset-variables.patch @@ -0,0 +1,138 @@ +From bee31226b87d0b05faae84e88cce3af1b8dabbfd Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:59 +0000 +Subject: [PATCH 17/42] dump: Add more offset variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [17/41] fbe629e1476e8a0e039f989af6e1f4707075ba01 + +Offset calculations are easy enough to get wrong. Let's add a few +variables to make moving around elf headers and data sections easier. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Richard Henderson +Message-Id: <20220330123603.107120-6-frankja@linux.ibm.com> +(cherry picked from commit e71d353360bb09a8e784e35d78370c691f6ea185) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 35 +++++++++++++++-------------------- + include/sysemu/dump.h | 4 ++++ + 2 files changed, 19 insertions(+), 20 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 5cc2322325..85a402b38c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -142,13 +142,11 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump64(s, shoff); ++ elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -179,13 +177,11 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump32(s, shoff); ++ elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -248,12 +244,11 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + static void write_elf64_note(DumpState *s, Error **errp) + { + Elf64_Phdr phdr; +- hwaddr begin = s->memory_offset - s->note_size; + int ret; + + memset(&phdr, 0, sizeof(Elf64_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, begin); ++ phdr.p_offset = cpu_to_dump64(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump64(s, s->note_size); + phdr.p_memsz = cpu_to_dump64(s, s->note_size); +@@ -313,13 +308,12 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + + static void write_elf32_note(DumpState *s, Error **errp) + { +- hwaddr begin = s->memory_offset - s->note_size; + Elf32_Phdr phdr; + int ret; + + memset(&phdr, 0, sizeof(Elf32_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, begin); ++ phdr.p_offset = cpu_to_dump32(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump32(s, s->note_size); + phdr.p_memsz = cpu_to_dump32(s, s->note_size); +@@ -1826,15 +1820,16 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + +- s->note_size; ++ s->phdr_offset = sizeof(Elf64_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + +- s->note_size; ++ ++ s->phdr_offset = sizeof(Elf32_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } + + return; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 19458bffbd..ffc2ea1072 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -159,6 +159,10 @@ typedef struct DumpState { + bool resume; + bool detached; + ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; + hwaddr memory_offset; + int fd; + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch b/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch new file mode 100644 index 0000000..449aab4 --- /dev/null +++ b/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch @@ -0,0 +1,94 @@ +From cbb653d73e32513ccd46b293a52384eed6a5f84f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:02 +0000 +Subject: [PATCH 20/42] dump: Cleanup dump_begin write functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [20/41] 18ea1457a3e54fd368e556d96c3be50c6ad0a6bd + +There's no need to have a gigantic if in there let's move the elf +32/64 bit logic into the section, segment or note code. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-9-frankja@linux.ibm.com> +(cherry picked from commit 5ff2e5a3e1e67930e523486e39549a33fcf97227) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 42 +++++++++++------------------------------- + 1 file changed, 11 insertions(+), 31 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 823ca32883..88abde355a 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -565,46 +565,26 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOAD to vmcore */ ++ write_elf_loads(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* write section to vmcore */ ++ if (s->shdr_num) { ++ write_elf_section(s, 1, errp); + if (*errp) { + return; + } ++ } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } +- } +- ++ if (dump_is_64bit(s)) { + /* write notes to vmcore */ + write_elf64_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } else { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); +- if (*errp) { +- return; +- } +- +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 0, errp); +- if (*errp) { +- return; +- } +- } +- + /* write notes to vmcore */ + write_elf32_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Consolidate-elf-note-function.patch b/SOURCES/kvm-dump-Consolidate-elf-note-function.patch new file mode 100644 index 0000000..3353e4a --- /dev/null +++ b/SOURCES/kvm-dump-Consolidate-elf-note-function.patch @@ -0,0 +1,67 @@ +From 0547599cf507930f91943f22d5f917ebacf69484 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:03 +0000 +Subject: [PATCH 21/42] dump: Consolidate elf note function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [21/41] 52298c098c116aea75ad15894731ff412c2c4e73 + +Just like with the other write functions let's move the 32/64 bit elf +handling to a function to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-10-frankja@linux.ibm.com> +(cherry picked from commit c68124738bc29017e4254c898bc40be7be477af7) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88abde355a..a451abc590 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -520,6 +520,15 @@ static void write_elf_loads(DumpState *s, Error **errp) + } + } + ++static void write_elf_notes(DumpState *s, Error **errp) ++{ ++ if (dump_is_64bit(s)) { ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ } else { ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ } ++} ++ + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +@@ -579,13 +588,8 @@ static void dump_begin(DumpState *s, Error **errp) + } + } + +- if (dump_is_64bit(s)) { +- /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, errp); +- } else { +- /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, errp); +- } ++ /* write notes to vmcore */ ++ write_elf_notes(s, errp); + } + + static int get_next_block(DumpState *s, GuestPhysBlock *block) +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch b/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch new file mode 100644 index 0000000..700927a --- /dev/null +++ b/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch @@ -0,0 +1,169 @@ +From f87abe1ef14e80731249ebe9fe1bea569a68e9b4 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:01 +0000 +Subject: [PATCH 19/42] dump: Consolidate phdr note writes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [19/41] 180c4c0ab4941a0bf366dc7f32ee035e03daa6c0 + +There's no need to have two write functions. Let's rather have two +functions that set the data for elf 32/64 and then write it in a +common function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-8-frankja@linux.ibm.com> +(cherry picked from commit bc7d558017e6700f9a05c61b0b638a8994945f0d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 94 +++++++++++++++++++++++++++-------------------------- + 1 file changed, 48 insertions(+), 46 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 6394e94023..823ca32883 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -246,24 +246,15 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_note(DumpState *s, Error **errp) ++static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { +- Elf64_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf64_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump64(s, s->note_size); +- phdr.p_memsz = cpu_to_dump64(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump64(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump64(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump64(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static inline int cpu_index(CPUState *cpu) +@@ -311,24 +302,15 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_note(DumpState *s, Error **errp) ++static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { +- Elf32_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf32_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump32(s, s->note_size); +- phdr.p_memsz = cpu_to_dump32(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump32(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump32(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump32(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, +@@ -358,6 +340,32 @@ static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + ++static void write_elf_phdr_note(DumpState *s, Error **errp) ++{ ++ ERRP_GUARD(); ++ Elf32_Phdr phdr32; ++ Elf64_Phdr phdr64; ++ void *phdr; ++ size_t size; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ write_elf64_phdr_note(s, &phdr64); ++ size = sizeof(phdr64); ++ phdr = &phdr64; ++ } else { ++ write_elf32_phdr_note(s, &phdr32); ++ size = sizeof(phdr32); ++ phdr = &phdr32; ++ } ++ ++ ret = fd_write_vmcore(phdr, size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write program header table"); ++ } ++} ++ + static void write_elf_section(DumpState *s, int type, Error **errp) + { + Elf32_Shdr shdr32; +@@ -551,13 +559,13 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write PT_NOTE to vmcore */ +- write_elf64_note(s, errp); +- if (*errp) { +- return; +- } ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); ++ if (*errp) { ++ return; ++ } + ++ if (dump_is_64bit(s)) { + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +@@ -578,12 +586,6 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + } else { +- /* write PT_NOTE to vmcore */ +- write_elf32_note(s, errp); +- if (*errp) { +- return; +- } +- + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch b/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch new file mode 100644 index 0000000..ac5d269 --- /dev/null +++ b/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch @@ -0,0 +1,118 @@ +From c851676d202b5b76962529f3b6d433936becbd8a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:00 +0000 +Subject: [PATCH 18/42] dump: Introduce dump_is_64bit() helper function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [18/41] a0fd2d1985c61b8e50d4a7ca26bc0ee6fcaa6196 + +Checking d_class in dump_info leads to lengthy conditionals so let's +shorten things a bit by introducing a helper function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-7-frankja@linux.ibm.com> +(cherry picked from commit 05bbaa5040ccb3419e8b93af8040485430e2db42) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 85a402b38c..6394e94023 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -55,6 +55,11 @@ static Error *dump_migration_blocker; + DIV_ROUND_UP((name_size), 4) + \ + DIV_ROUND_UP((desc_size), 4)) * 4) + ++static inline bool dump_is_64bit(DumpState *s) ++{ ++ return s->dump_info.d_class == ELFCLASS64; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -489,7 +494,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + get_offset_range(memory_mapping->phys_addr, + memory_mapping->length, + s, &offset, &filesz); +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, + filesz, errp); + } else { +@@ -537,7 +542,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_header(s, errp); + } else { + write_elf32_header(s, errp); +@@ -546,7 +551,7 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + /* write PT_NOTE to vmcore */ + write_elf64_note(s, errp); + if (*errp) { +@@ -757,7 +762,7 @@ static void get_note_sizes(DumpState *s, const void *note, + uint64_t name_sz; + uint64_t desc_sz; + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + const Elf64_Nhdr *hdr = note; + note_head_sz = sizeof(Elf64_Nhdr); + name_sz = tswap64(hdr->n_namesz); +@@ -1017,10 +1022,10 @@ out: + + static void write_dump_header(DumpState *s, Error **errp) + { +- if (s->dump_info.d_class == ELFCLASS32) { +- create_header32(s, errp); +- } else { ++ if (dump_is_64bit(s)) { + create_header64(s, errp); ++ } else { ++ create_header32(s, errp); + } + } + +@@ -1715,8 +1720,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + uint32_t size; + uint16_t format; + +- note_head_size = s->dump_info.d_class == ELFCLASS32 ? +- sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr); ++ note_head_size = dump_is_64bit(s) ? ++ sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); + + format = le16_to_cpu(vmci->vmcoreinfo.guest_format); + size = le32_to_cpu(vmci->vmcoreinfo.size); +@@ -1819,7 +1824,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + s->phdr_offset = sizeof(Elf64_Ehdr); + s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch b/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch new file mode 100644 index 0000000..71414ed --- /dev/null +++ b/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch @@ -0,0 +1,136 @@ +From 255722667a4fa4d522bb0b7e0825cbbe635abb8d Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:57 +0000 +Subject: [PATCH 15/42] dump: Introduce shdr_num to decrease complexity +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [15/41] b0215ea5d381ef7f6abfe3f3bafea51ce933da56 + +Let's move from a boolean to a int variable which will later enable us +to store the number of sections that are in the dump file. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-4-frankja@linux.ibm.com> +(cherry picked from commit 862a395858e5a302ed5921487777acdc95a3a31b) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++++++------------ + include/sysemu/dump.h | 2 +- + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7236b167cc..972e28b089 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -145,12 +145,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -182,12 +182,12 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -566,7 +566,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 1, errp); + if (*errp) { + return; +@@ -592,7 +592,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 0, errp); + if (*errp) { + return; +@@ -1811,11 +1811,11 @@ static void dump_init(DumpState *s, int fd, bool has_format, + */ + s->phdr_num = 1; /* PT_NOTE */ + if (s->list.num < UINT16_MAX - 2) { ++ s->shdr_num = 0; + s->phdr_num += s->list.num; +- s->have_section = false; + } else { + /* sh_info of section 0 holds the real number of phdrs */ +- s->have_section = true; ++ s->shdr_num = 1; + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +@@ -1826,19 +1826,19 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) + s->note_size; ++ sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; + } + } else { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) + s->note_size; ++ sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b463fc9c02..19458bffbd 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -155,7 +155,7 @@ typedef struct DumpState { + ArchDumpInfo dump_info; + MemoryMappingList list; + uint32_t phdr_num; +- bool have_section; ++ uint32_t shdr_num; + bool resume; + bool detached; + ssize_t note_size; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch b/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch new file mode 100644 index 0000000..5d66c34 --- /dev/null +++ b/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch @@ -0,0 +1,142 @@ +From a18ba2fbaf132724e81be92da42b36d8f365e66c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:56 +0000 +Subject: [PATCH 24/42] dump: Refactor dump_iterate and introduce + dump_filter_memblock_*() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [24/41] 74ef470f24d9d98093c4d63730a99474587033fd + +The iteration over the memblocks in dump_iterate() is hard to +understand so it's about time to clean it up. Instead of manually +grabbing the next memblock we can use QTAILQ_FOREACH to iterate over +all memblocks. + +Additionally we move the calculation of the offset and length out by +introducing and using the dump_filter_memblock_*() functions. These +functions will later be used to cleanup other parts of dump.c. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-4-frankja@linux.ibm.com> +(cherry picked from commit 1e8113032f5b1efc5da66382470ce4809c76f8f2) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 74 ++++++++++++++++++++++++++++++----------------------- + 1 file changed, 42 insertions(+), 32 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index fa787f379f..d981e843dd 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -592,31 +592,43 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int get_next_block(DumpState *s, GuestPhysBlock *block) ++static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { +- while (1) { +- block = QTAILQ_NEXT(block, next); +- if (!block) { +- /* no more block */ +- return 1; +- } ++ int64_t size, left, right; + +- s->start = 0; +- s->next_block = block; +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { +- /* This block is out of the range */ +- continue; +- } ++ /* No filter, return full size */ ++ if (!filter_area_length) { ++ return block->target_end - block->target_start; ++ } + +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } ++ /* calculate the overlapped region. */ ++ left = MAX(filter_area_start, block->target_start); ++ right = MIN(filter_area_start + filter_area_length, block->target_end); ++ size = right - left; ++ size = size > 0 ? size : 0; ++ ++ return size; ++} ++ ++static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) ++{ ++ if (filter_area_length) { ++ /* return -1 if the block is not within filter area */ ++ if (block->target_start >= filter_area_start + filter_area_length || ++ block->target_end <= filter_area_start) { ++ return -1; + } + +- return 0; ++ if (filter_area_start > block->target_start) { ++ return filter_area_start - block->target_start; ++ } + } ++ ++ return 0; + } + + /* write all memory to vmcore */ +@@ -624,24 +636,22 @@ static void dump_iterate(DumpState *s, Error **errp) + { + ERRP_GUARD(); + GuestPhysBlock *block; +- int64_t size; +- +- do { +- block = s->next_block; ++ int64_t memblock_size, memblock_start; + +- size = block->target_end - block->target_start; +- if (s->has_filter) { +- size -= s->start; +- if (s->begin + s->length < block->target_end) { +- size -= block->target_end - (s->begin + s->length); +- } ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ if (memblock_start == -1) { ++ continue; + } +- write_memory(s, block, s->start, size, errp); ++ ++ memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ ++ /* Write the memory to file */ ++ write_memory(s, block, memblock_start, memblock_size, errp); + if (*errp) { + return; + } +- +- } while (!get_next_block(s, block)); ++ } + } + + static void create_vmcore(DumpState *s, Error **errp) +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch b/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch new file mode 100644 index 0000000..0bb95f9 --- /dev/null +++ b/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch @@ -0,0 +1,45 @@ +From 6932fe3afbec443bbf6acff5b707536254e1bc37 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:16 +0000 +Subject: [PATCH 35/42] dump: Reintroduce memory_offset and section_offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [35/41] e60c0d066aeeedb42e724712bc3aa7b7591c6c79 + +section_offset will later be used to store the offset to the section +data which will be stored last. For now memory_offset is only needed +to make section_offset look nicer. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-5-frankja@linux.ibm.com> +(cherry picked from commit 13fd417ddc81a1685c6a8f4e1c80bbfe7150f164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index d17537d4e9..7a42401790 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1885,6 +1885,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; + s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } ++ s->memory_offset = s->note_offset + s->note_size; ++ s->section_offset = s->memory_offset + s->total_size; + + return; + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch b/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch new file mode 100644 index 0000000..b4a1f10 --- /dev/null +++ b/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch @@ -0,0 +1,70 @@ +From a8eeab6936a2bd27b33b63aed7e2ef96034f7772 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:58 +0000 +Subject: [PATCH 16/42] dump: Remove the section if when calculating the memory + offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [16/41] ff214d2c23b9cb16fd49d22d976829267df43133 + +When s->shdr_num is 0 we'll add 0 bytes of section headers which is +equivalent to not adding section headers but with the multiplication +we can remove a if/else. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-5-frankja@linux.ibm.com> +(cherry picked from commit 344107e07bd81546474a54ab83800158ca953059) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 972e28b089..5cc2322325 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1826,23 +1826,15 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf64_Ehdr) + ++ sizeof(Elf64_Phdr) * s->phdr_num + ++ sizeof(Elf64_Shdr) * s->shdr_num + ++ s->note_size; + } else { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf32_Ehdr) + ++ sizeof(Elf32_Phdr) * s->phdr_num + ++ sizeof(Elf32_Shdr) * s->shdr_num + ++ s->note_size; + } + + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch b/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch new file mode 100644 index 0000000..3c9fe51 --- /dev/null +++ b/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch @@ -0,0 +1,176 @@ +From eb763bec53d6b9aea7a6b60b0cf8c5d8b5f1b35c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 7 Apr 2022 09:48:24 +0000 +Subject: [PATCH 14/42] dump: Remove the sh_info variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [14/41] 24af12b78c8f5a02cf85df2f6b1d64249f9499c9 + +There's no need to have phdr_num and sh_info at the same time. We can +make phdr_num 32 bit and set PN_XNUM when we write the header if +phdr_num >= PN_XNUM. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220407094824.5074-1-frankja@linux.ibm.com> +(cherry picked from commit 046bc4160bc780eaacc2d702a2589f1a7a01188d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 44 +++++++++++++++++++++++-------------------- + include/sysemu/dump.h | 3 +-- + 2 files changed, 25 insertions(+), 22 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 9876123f2e..7236b167cc 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -124,6 +124,12 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + + static void write_elf64_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf64_Ehdr elf_header; + int ret; + +@@ -138,9 +144,9 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info; ++ uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +@@ -155,6 +161,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + + static void write_elf32_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf32_Ehdr elf_header; + int ret; + +@@ -169,9 +181,9 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info; ++ uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +@@ -358,12 +370,12 @@ static void write_elf_section(DumpState *s, int type, Error **errp) + if (type == 0) { + shdr_size = sizeof(Elf32_Shdr); + memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr32; + } else { + shdr_size = sizeof(Elf64_Shdr); + memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr64; + } + +@@ -478,13 +490,6 @@ static void write_elf_loads(DumpState *s, Error **errp) + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; +- uint32_t max_index; +- +- if (s->have_section) { +- max_index = s->sh_info; +- } else { +- max_index = s->phdr_num; +- } + + QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { + get_offset_range(memory_mapping->phys_addr, +@@ -502,7 +507,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + return; + } + +- if (phdr_index >= max_index) { ++ if (phdr_index >= s->phdr_num) { + break; + } + } +@@ -1809,22 +1814,21 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_num += s->list.num; + s->have_section = false; + } else { ++ /* sh_info of section 0 holds the real number of phdrs */ + s->have_section = true; +- s->phdr_num = PN_XNUM; +- s->sh_info = 1; /* PT_NOTE */ + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +- s->sh_info += s->list.num; ++ s->phdr_num += s->list.num; + } else { +- s->sh_info = UINT32_MAX; ++ s->phdr_num = UINT32_MAX; + } + } + + if (s->dump_info.d_class == ELFCLASS64) { + if (s->have_section) { + s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->sh_info + ++ sizeof(Elf64_Phdr) * s->phdr_num + + sizeof(Elf64_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + +@@ -1833,7 +1837,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } else { + if (s->have_section) { + s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->sh_info + ++ sizeof(Elf32_Phdr) * s->phdr_num + + sizeof(Elf32_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 250143cb5a..b463fc9c02 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,8 +154,7 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint16_t phdr_num; +- uint32_t sh_info; ++ uint32_t phdr_num; + bool have_section; + bool resume; + bool detached; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch b/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch new file mode 100644 index 0000000..bdcaccd --- /dev/null +++ b/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch @@ -0,0 +1,69 @@ +From 18fef7f02801d51207d67b8f8ec5f0d828889c78 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:01 +0000 +Subject: [PATCH 29/42] dump: Rename write_elf*_phdr_note to + prepare_elf*_phdr_note +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [29/41] 876cea6f6e51be8df2763f56d0daef99d11fdd49 + +The functions in question do not actually write to the file descriptor +they set up a buffer which is later written to the fd. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-9-frankja@linux.ibm.com> +(cherry picked from commit 2341a94d3a0a8a93a5a977e642da1807b8edaab8) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 8d5226f861..c2c1341ad7 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -261,7 +261,7 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) ++static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -317,7 +317,7 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) ++static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -365,11 +365,11 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + int ret; + + if (dump_is_64bit(s)) { +- write_elf64_phdr_note(s, &phdr64); ++ prepare_elf64_phdr_note(s, &phdr64); + size = sizeof(phdr64); + phdr = &phdr64; + } else { +- write_elf32_phdr_note(s, &phdr32); ++ prepare_elf32_phdr_note(s, &phdr32); + size = sizeof(phdr32); + phdr = &phdr32; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch b/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch new file mode 100644 index 0000000..b5758cf --- /dev/null +++ b/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch @@ -0,0 +1,57 @@ +From 04d4947a22fe3192384ff486d0a979d799ded98e Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:55 +0000 +Subject: [PATCH 23/42] dump: Rename write_elf_loads to write_elf_phdr_loads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [23/41] 18e3ef70b97c525b7c43cf12143204bdb1060e4f + +Let's make it a bit clearer that we write the program headers of the +PT_LOAD type. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +Message-Id: <20220811121111.9878-3-frankja@linux.ibm.com> +(cherry picked from commit afae6056ea79e2d89fd90867de3a01732eae724f) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index a451abc590..fa787f379f 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -491,7 +491,7 @@ static void get_offset_range(hwaddr phys_addr, + } + } + +-static void write_elf_loads(DumpState *s, Error **errp) ++static void write_elf_phdr_loads(DumpState *s, Error **errp) + { + ERRP_GUARD(); + hwaddr offset, filesz; +@@ -574,8 +574,8 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Reorder-struct-DumpState.patch b/SOURCES/kvm-dump-Reorder-struct-DumpState.patch new file mode 100644 index 0000000..2ca3f2a --- /dev/null +++ b/SOURCES/kvm-dump-Reorder-struct-DumpState.patch @@ -0,0 +1,68 @@ +From 7e8d6290099b33f88621b45e62652a97704c9573 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:15 +0000 +Subject: [PATCH 34/42] dump: Reorder struct DumpState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [34/41] 8d44e5e8c86ea5b33644eba141046cd657d0071e + +Let's move ELF related members into one block and guest memory related +ones into another to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-4-frankja@linux.ibm.com> +(cherry picked from commit 8384b73c46fd474847d7e74d121318e344edc3c4) +Signed-off-by: Cédric Le Goater +--- + include/sysemu/dump.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9995f65dc8..9ed811b313 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,15 +154,8 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint32_t phdr_num; +- uint32_t shdr_num; + bool resume; + bool detached; +- ssize_t note_size; +- hwaddr shdr_offset; +- hwaddr phdr_offset; +- hwaddr section_offset; +- hwaddr note_offset; + hwaddr memory_offset; + int fd; + +@@ -177,6 +170,15 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ /* Elf dump related data */ ++ uint32_t phdr_num; ++ uint32_t shdr_num; ++ ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; ++ + void *elf_section_hdrs; /* Pointer to section header buffer */ + + uint8_t *note_buf; /* buffer for notes */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch b/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch new file mode 100644 index 0000000..421a98e --- /dev/null +++ b/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch @@ -0,0 +1,467 @@ +From 8f674e0e12e4b88fc035948612a0b0949e0ad892 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:54 +0000 +Subject: [PATCH 22/42] dump: Replace opaque DumpState pointer with a typed one +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [22/41] 5f071d7ef441ae6f5da70eb56018c4657deee3d7 + +It's always better to convey the type of a pointer if at all +possible. So let's add the DumpState typedef to typedefs.h and move +the dump note functions from the opaque pointers to DumpState +pointers. + +Signed-off-by: Janosch Frank +CC: Peter Maydell +CC: Cédric Le Goater +CC: Daniel Henrique Barboza +CC: David Gibson +CC: Greg Kurz +CC: Palmer Dabbelt +CC: Alistair Francis +CC: Bin Meng +CC: Cornelia Huck +CC: Thomas Huth +CC: Richard Henderson +CC: David Hildenbrand +Acked-by: Daniel Henrique Barboza +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-2-frankja@linux.ibm.com> +(cherry picked from commit 1af0006ab959864dfa2f59e9136c5fb93000b61f) +Signed-off-by: Cédric Le Goater +--- + include/hw/core/sysemu-cpu-ops.h | 8 ++++---- + include/qemu/typedefs.h | 1 + + target/arm/arch_dump.c | 6 ++---- + target/arm/cpu.h | 4 ++-- + target/i386/arch_dump.c | 30 +++++++++++++++--------------- + target/i386/cpu.h | 8 ++++---- + target/ppc/arch_dump.c | 18 +++++++++--------- + target/ppc/cpu.h | 4 ++-- + target/riscv/arch_dump.c | 6 ++---- + target/riscv/cpu.h | 4 ++-- + target/s390x/arch_dump.c | 10 +++++----- + target/s390x/s390x-internal.h | 2 +- + 12 files changed, 49 insertions(+), 52 deletions(-) + +diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h +index a9ba39e5f2..ee169b872c 100644 +--- a/include/hw/core/sysemu-cpu-ops.h ++++ b/include/hw/core/sysemu-cpu-ops.h +@@ -53,25 +53,25 @@ typedef struct SysemuCPUOps { + * 32-bit VM coredump. + */ + int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf64_note: Callback for writing a CPU-specific ELF note to a + * 64-bit VM coredump. + */ + int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + */ + int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 64-bit VM coredump. + */ + int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @virtio_is_big_endian: Callback to return %true if a CPU which supports + * runtime configurable endianness is currently big-endian. +diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h +index ee60eb3de4..ac9d031be6 100644 +--- a/include/qemu/typedefs.h ++++ b/include/qemu/typedefs.h +@@ -125,6 +125,7 @@ typedef struct VirtIODevice VirtIODevice; + typedef struct Visitor Visitor; + typedef struct VMChangeStateEntry VMChangeStateEntry; + typedef struct VMStateDescription VMStateDescription; ++typedef struct DumpState DumpState; + + /* + * Pointer types +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 0184845310..3a824e0aa6 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -232,12 +232,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, + #endif + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct aarch64_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; + +@@ -360,12 +359,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, + } + + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct arm_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index e33f37b70a..8d2f496ef9 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1065,9 +1065,9 @@ int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); + const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname); + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + #ifdef TARGET_AARCH64 + int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); +diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c +index 004141fc04..c290910a04 100644 +--- a/target/i386/arch_dump.c ++++ b/target/i386/arch_dump.c +@@ -42,7 +42,7 @@ typedef struct { + + static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + CPUX86State *env, int id, +- void *opaque) ++ DumpState *s) + { + x86_64_user_regs_struct regs; + Elf64_Nhdr *note; +@@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong); + memcpy(buf, ®s, sizeof(x86_64_user_regs_struct)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env, + } + + static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, +- int id, void *opaque) ++ int id, DumpState *s) + { + x86_elf_prstatus prstatus; + Elf64_Nhdr *note; +@@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + } + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + int ret; +@@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); + + if (lma) { +- ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s); + } else { + #endif +- ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_write_elf64_note(f, &cpu->env, cpuid, s); + #ifdef TARGET_X86_64 + } + #endif +@@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + } + + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + x86_elf_prstatus prstatus; +@@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env) + + static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + CPUX86State *env, +- void *opaque, ++ DumpState *s, + int type) + { + QEMUCPUState state; +@@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &state, sizeof(state)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + } + + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 1); ++ return cpu_write_qemu_note(f, &cpu->env, s, 1); + } + + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 0); ++ return cpu_write_qemu_note(f, &cpu->env, s, 0); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 006b735fe4..5d2ddd81b9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1887,13 +1887,13 @@ extern const VMStateDescription vmstate_x86_cpu; + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + + void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, + Error **errp); +diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c +index bb392f6d88..e9f512bcd4 100644 +--- a/target/ppc/arch_dump.c ++++ b/target/ppc/arch_dump.c +@@ -270,23 +270,23 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + static int ppc_write_all_elf_notes(const char *note_name, + WriteCoreDumpFunction f, + PowerPCCPU *cpu, int id, +- void *opaque) ++ DumpState *s) + { +- NoteFuncArg arg = { .state = opaque }; ++ NoteFuncArg arg = { .state = s }; + int ret = -1; + int note_size; + const NoteFuncDesc *nf; + + for (nf = note_func; nf->note_contents_func; nf++) { +- arg.note.hdr.n_namesz = cpu_to_dump32(opaque, sizeof(arg.note.name)); +- arg.note.hdr.n_descsz = cpu_to_dump32(opaque, nf->contents_size); ++ arg.note.hdr.n_namesz = cpu_to_dump32(s, sizeof(arg.note.name)); ++ arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size); + strncpy(arg.note.name, note_name, sizeof(arg.note.name)); + + (*nf->note_contents_func)(&arg, cpu); + + note_size = + sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size; +- ret = f(&arg.note, note_size, opaque); ++ ret = f(&arg.note, note_size, s); + if (ret < 0) { + return -1; + } +@@ -295,15 +295,15 @@ static int ppc_write_all_elf_notes(const char *note_name, + } + + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } + + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 23e8b76c85..f5fb284706 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1289,9 +1289,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu); + const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name); + #endif + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + #ifndef CONFIG_USER_ONLY + void ppc_cpu_do_interrupt(CPUState *cpu); + bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); +diff --git a/target/riscv/arch_dump.c b/target/riscv/arch_dump.c +index 709f621d82..736a232956 100644 +--- a/target/riscv/arch_dump.c ++++ b/target/riscv/arch_dump.c +@@ -64,12 +64,11 @@ static void riscv64_note_init(struct riscv64_note *note, DumpState *s, + } + + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv64_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i = 0; + const char name[] = "CORE"; + +@@ -134,12 +133,11 @@ static void riscv32_note_init(struct riscv32_note *note, DumpState *s, + } + + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv32_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + const char name[] = "CORE"; + +diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h +index 0760c0af93..4cce524b2c 100644 +--- a/target/riscv/cpu.h ++++ b/target/riscv/cpu.h +@@ -344,9 +344,9 @@ extern const char * const riscv_fpr_regnames[]; + const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); + void riscv_cpu_do_interrupt(CPUState *cpu); + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); + int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + bool riscv_cpu_fp_enabled(CPURISCVState *env); +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index 08daf93ae1..f60a14920d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -204,7 +204,7 @@ static const NoteFuncDesc note_linux[] = { + static int s390x_write_elf64_notes(const char *note_name, + WriteCoreDumpFunction f, + S390CPU *cpu, int id, +- void *opaque, ++ DumpState *s, + const NoteFuncDesc *funcs) + { + Note note; +@@ -222,7 +222,7 @@ static int s390x_write_elf64_notes(const char *note_name, + (*nf->note_contents_func)(¬e, cpu, id); + + note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, opaque); ++ ret = f(¬e, note_size, s); + + if (ret < 0) { + return -1; +@@ -235,16 +235,16 @@ static int s390x_write_elf64_notes(const char *note_name, + + + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + S390CPU *cpu = S390_CPU(cs); + int r; + +- r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core); ++ r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, s, note_core); + if (r) { + return r; + } +- return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux); ++ return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h +index 1a178aed41..02cf6c3f43 100644 +--- a/target/s390x/s390x-internal.h ++++ b/target/s390x/s390x-internal.h +@@ -228,7 +228,7 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb, + + /* arch_dump.c */ + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + + /* cc_helper.c */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch b/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch new file mode 100644 index 0000000..e077bea --- /dev/null +++ b/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch @@ -0,0 +1,73 @@ +From 1f7cb73592a1922b3a981eb3232098281e07679f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:59 +0000 +Subject: [PATCH 27/42] dump: Rework dump_calculate_size function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [27/41] eaa05c39109b57a119752ad3df66f4c2ace2cbe4 + +dump_calculate_size() sums up all the sizes of the guest memory +blocks. Since we already have a function that calculates the size of a +single memory block (dump_get_memblock_size()) we can simply iterate +over the blocks and use the function instead of calculating the size +ourselves. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-7-frankja@linux.ibm.com> +(cherry picked from commit c370d5300f9ac1f90f8158082d22262b904fe30e) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index f6fe13e258..902a85ef8e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1557,25 +1557,19 @@ bool dump_in_progress(void) + return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE); + } + +-/* calculate total size of memory to be dumped (taking filter into +- * acoount.) */ ++/* ++ * calculate total size of memory to be dumped (taking filter into ++ * account.) ++ */ + static int64_t dump_calculate_size(DumpState *s) + { + GuestPhysBlock *block; +- int64_t size = 0, total = 0, left = 0, right = 0; ++ int64_t total = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (dump_has_filter(s)) { +- /* calculate the overlapped region. */ +- left = MAX(s->filter_area_begin, block->target_start); +- right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); +- size = right - left; +- size = size > 0 ? size : 0; +- } else { +- /* count the whole region in */ +- size = (block->target_end - block->target_start); +- } +- total += size; ++ total += dump_filtered_memblock_size(block, ++ s->filter_area_begin, ++ s->filter_area_length); + } + + return total; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-filter-area-variables.patch b/SOURCES/kvm-dump-Rework-filter-area-variables.patch new file mode 100644 index 0000000..4e22f41 --- /dev/null +++ b/SOURCES/kvm-dump-Rework-filter-area-variables.patch @@ -0,0 +1,187 @@ +From 411f5354b809f6b783946e58d7655135814fb809 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:58 +0000 +Subject: [PATCH 26/42] dump: Rework filter area variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [26/41] f10a5523dfd2724f7a8637fca3ed68ba6df659a5 + +While the DumpState begin and length variables directly mirror the API +variable names they are not very descriptive. So let's add a +"filter_area_" prefix and make has_filter a function checking length > 0. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-6-frankja@linux.ibm.com> +(cherry picked from commit dddf725f70bfe7f5adb41fa31dbd06e767271bda) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 53 +++++++++++++++++++++++++------------------ + include/sysemu/dump.h | 13 ++++++++--- + 2 files changed, 41 insertions(+), 25 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index e6aa037f59..f6fe13e258 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -60,6 +60,11 @@ static inline bool dump_is_64bit(DumpState *s) + return s->dump_info.d_class == ELFCLASS64; + } + ++static inline bool dump_has_filter(DumpState *s) ++{ ++ return s->filter_area_length > 0; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -444,29 +449,30 @@ static void get_offset_range(hwaddr phys_addr, + *p_offset = -1; + *p_filesz = 0; + +- if (s->has_filter) { +- if (phys_addr < s->begin || phys_addr >= s->begin + s->length) { ++ if (dump_has_filter(s)) { ++ if (phys_addr < s->filter_area_begin || ++ phys_addr >= s->filter_area_begin + s->filter_area_length) { + return; + } + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (dump_has_filter(s)) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + /* This block is out of the range */ + continue; + } + +- if (s->begin <= block->target_start) { ++ if (s->filter_area_begin <= block->target_start) { + start = block->target_start; + } else { +- start = s->begin; ++ start = s->filter_area_begin; + } + + size_in_block = block->target_end - start; +- if (s->begin + s->length < block->target_end) { +- size_in_block -= block->target_end - (s->begin + s->length); ++ if (s->filter_area_begin + s->filter_area_length < block->target_end) { ++ size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length); + } + } else { + start = block->target_start; +@@ -639,12 +645,12 @@ static void dump_iterate(DumpState *s, Error **errp) + int64_t memblock_size, memblock_start; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length); + if (memblock_start == -1) { + continue; + } + +- memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length); + + /* Write the memory to file */ + write_memory(s, block, memblock_start, memblock_size, errp); +@@ -1513,14 +1519,14 @@ static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + +- if (!s->has_filter) { ++ if (!dump_has_filter(s)) { + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { + /* This block is out of the range */ +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + continue; + } + return 0; +@@ -1559,10 +1565,10 @@ static int64_t dump_calculate_size(DumpState *s) + int64_t size = 0, total = 0, left = 0, right = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { ++ if (dump_has_filter(s)) { + /* calculate the overlapped region. */ +- left = MAX(s->begin, block->target_start); +- right = MIN(s->begin + s->length, block->target_end); ++ left = MAX(s->filter_area_begin, block->target_start); ++ right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); + size = right - left; + size = size > 0 ? size : 0; + } else { +@@ -1652,9 +1658,12 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + s->fd = fd; +- s->has_filter = has_filter; +- s->begin = begin; +- s->length = length; ++ if (has_filter && !length) { ++ error_setg(errp, QERR_INVALID_PARAMETER, "length"); ++ goto cleanup; ++ } ++ s->filter_area_begin = begin; ++ s->filter_area_length = length; + + memory_mapping_list_init(&s->list); + +@@ -1787,8 +1796,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + return; + } + +- if (s->has_filter) { +- memory_mapping_filter(&s->list, s->begin, s->length); ++ if (dump_has_filter(s)) { ++ memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length); + } + + /* +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 7fce1d4af6..b62513d87d 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,9 +166,16 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- bool has_filter; +- int64_t begin; +- int64_t length; ++ /* ++ * Dump filter area variables ++ * ++ * A filtered dump only contains the guest memory designated by ++ * the start address and length variables defined below. ++ * ++ * If length is 0, no filtering is applied. ++ */ ++ int64_t filter_area_begin; /* Start address of partial guest memory area */ ++ int64_t filter_area_length; /* Length of partial guest memory area */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-get_start_block.patch b/SOURCES/kvm-dump-Rework-get_start_block.patch new file mode 100644 index 0000000..f6bdde2 --- /dev/null +++ b/SOURCES/kvm-dump-Rework-get_start_block.patch @@ -0,0 +1,102 @@ +From b56c362132baef40cc25d910c1e0d217d83cfe44 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:57 +0000 +Subject: [PATCH 25/42] dump: Rework get_start_block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [25/41] c93842a1aaeadcc11e91c194452fcd05d163b3ca + +get_start_block() returns the start address of the first memory block +or -1. + +With the GuestPhysBlock iterator conversion we don't need to set the +start address and can therefore remove that code and the "start" +DumpState struct member. The only functionality left is the validation +of the start block so it only makes sense to re-name the function to +validate_start_block() + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-5-frankja@linux.ibm.com> +(cherry picked from commit 0c2994ac9009577b967529ce18e269da5b280351) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 20 ++++++-------------- + include/sysemu/dump.h | 2 -- + 2 files changed, 6 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index d981e843dd..e6aa037f59 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1509,30 +1509,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + } + } + +-static ram_addr_t get_start_block(DumpState *s) ++static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + + if (!s->has_filter) { +- s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ /* This block is out of the range */ + if (block->target_start >= s->begin + s->length || + block->target_end <= s->begin) { +- /* This block is out of the range */ + continue; + } +- +- s->next_block = block; +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } else { +- s->start = 0; +- } +- return s->start; +- } ++ return 0; ++ } + + return -1; + } +@@ -1679,8 +1671,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + goto cleanup; + } + +- s->start = get_start_block(s); +- if (s->start == -1) { ++ /* Is the filter filtering everything? */ ++ if (validate_start_block(s) == -1) { + error_setg(errp, QERR_INVALID_PARAMETER, "begin"); + goto cleanup; + } +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index ffc2ea1072..7fce1d4af6 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,8 +166,6 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- GuestPhysBlock *next_block; +- ram_addr_t start; + bool has_filter; + int64_t begin; + int64_t length; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch b/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch new file mode 100644 index 0000000..1f53426 --- /dev/null +++ b/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch @@ -0,0 +1,173 @@ +From d1e147a3133d4d31d4b0c02c05916366fadd9c30 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:00 +0000 +Subject: [PATCH 28/42] dump: Split elf header functions into prepare and write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [28/41] f70a13ad443835e7f46b7c5e176e372d370ac797 + +Let's split the write from the modification of the elf header so we +can consolidate the write of the data in one function. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-8-frankja@linux.ibm.com> +(cherry picked from commit 670e76998a61ca171200fcded3865b294a2d1243) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 100 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 53 insertions(+), 47 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 902a85ef8e..8d5226f861 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -132,7 +132,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + return 0; + } + +-static void write_elf64_header(DumpState *s, Error **errp) ++static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -140,34 +140,27 @@ static void write_elf64_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf64_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf64_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS64; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf64_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS64; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); +- } +- +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "dump: failed to write elf header"); ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } + } + +-static void write_elf32_header(DumpState *s, Error **errp) ++static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -175,28 +168,45 @@ static void write_elf32_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf32_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf32_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS32; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf32_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS32; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } ++} + +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); ++static void write_elf_header(DumpState *s, Error **errp) ++{ ++ Elf32_Ehdr elf32_header; ++ Elf64_Ehdr elf64_header; ++ size_t header_size; ++ void *header_ptr; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ prepare_elf64_header(s, &elf64_header); ++ header_size = sizeof(elf64_header); ++ header_ptr = &elf64_header; ++ } else { ++ prepare_elf32_header(s, &elf32_header); ++ header_size = sizeof(elf32_header); ++ header_ptr = &elf32_header; ++ } ++ ++ ret = fd_write_vmcore(header_ptr, header_size, s); + if (ret < 0) { + error_setg_errno(errp, -ret, "dump: failed to write elf header"); + } +@@ -565,11 +575,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (dump_is_64bit(s)) { +- write_elf64_header(s, errp); +- } else { +- write_elf32_header(s, errp); +- } ++ write_elf_header(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Use-ERRP_GUARD.patch b/SOURCES/kvm-dump-Use-ERRP_GUARD.patch new file mode 100644 index 0000000..1ef42ee --- /dev/null +++ b/SOURCES/kvm-dump-Use-ERRP_GUARD.patch @@ -0,0 +1,420 @@ +From 4ca61efe246d62d420eb332655c0c8ead4cc762b Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:55 +0000 +Subject: [PATCH 13/42] dump: Use ERRP_GUARD() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [13/41] f735cd1dab0230000cfadd878765fdf4647b239c + +Let's move to the new way of handling errors before changing the dump +code. This patch has mostly been generated by the coccinelle script +scripts/coccinelle/errp-guard.cocci. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-2-frankja@linux.ibm.com> +(cherry picked from commit 86a518bba4f4d7c9016fc5b104fe1e58b00ad756) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 144 ++++++++++++++++++++++------------------------------ + 1 file changed, 61 insertions(+), 83 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 662d0a62cd..9876123f2e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -390,23 +390,21 @@ static void write_data(DumpState *s, void *buf, int length, Error **errp) + static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start, + int64_t size, Error **errp) + { ++ ERRP_GUARD(); + int64_t i; +- Error *local_err = NULL; + + for (i = 0; i < size / s->dump_info.page_size; i++) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } + + if ((size % s->dump_info.page_size) != 0) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- size % s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ size % s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } +@@ -476,11 +474,11 @@ static void get_offset_range(hwaddr phys_addr, + + static void write_elf_loads(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; + uint32_t max_index; +- Error *local_err = NULL; + + if (s->have_section) { + max_index = s->sh_info; +@@ -494,14 +492,13 @@ static void write_elf_loads(DumpState *s, Error **errp) + s, &offset, &filesz); + if (s->dump_info.d_class == ELFCLASS64) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } else { + write_elf32_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } + +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + +@@ -514,7 +511,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + + /* + * the vmcore's format is: +@@ -542,73 +539,64 @@ static void dump_begin(DumpState *s, Error **errp) + + /* write elf header to vmcore */ + if (s->dump_info.d_class == ELFCLASS64) { +- write_elf64_header(s, &local_err); ++ write_elf64_header(s, errp); + } else { +- write_elf32_header(s, &local_err); ++ write_elf32_header(s, errp); + } +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + + if (s->dump_info.d_class == ELFCLASS64) { + /* write PT_NOTE to vmcore */ +- write_elf64_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 1, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 1, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } else { + /* write PT_NOTE to vmcore */ +- write_elf32_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 0, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 0, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } +@@ -644,9 +632,9 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block) + /* write all memory to vmcore */ + static void dump_iterate(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + GuestPhysBlock *block; + int64_t size; +- Error *local_err = NULL; + + do { + block = s->next_block; +@@ -658,9 +646,8 @@ static void dump_iterate(DumpState *s, Error **errp) + size -= block->target_end - (s->begin + s->length); + } + } +- write_memory(s, block, s->start, size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_memory(s, block, s->start, size, errp); ++ if (*errp) { + return; + } + +@@ -669,11 +656,10 @@ static void dump_iterate(DumpState *s, Error **errp) + + static void create_vmcore(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + +- dump_begin(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ dump_begin(s, errp); ++ if (*errp) { + return; + } + +@@ -810,6 +796,7 @@ static bool note_name_equal(DumpState *s, + /* write common header, sub header and elf note to vmcore */ + static void create_header32(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader32 *dh = NULL; + KdumpSubHeader32 *kh = NULL; + size_t size; +@@ -818,7 +805,6 @@ static void create_header32(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader32); +@@ -894,9 +880,8 @@ static void create_header32(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf32_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + if (write_buffer(s->fd, offset_note, s->note_buf, +@@ -922,6 +907,7 @@ out: + /* write common header, sub header and elf note to vmcore */ + static void create_header64(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader64 *dh = NULL; + KdumpSubHeader64 *kh = NULL; + size_t size; +@@ -930,7 +916,6 @@ static void create_header64(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader64); +@@ -1006,9 +991,8 @@ static void create_header64(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf64_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + +@@ -1472,8 +1456,8 @@ out: + + static void create_kdump_vmcore(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + int ret; +- Error *local_err = NULL; + + /* + * the kdump-compressed format is: +@@ -1503,21 +1487,18 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + return; + } + +- write_dump_header(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_header(s, errp); ++ if (*errp) { + return; + } + +- write_dump_bitmap(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_bitmap(s, errp); ++ if (*errp) { + return; + } + +- write_dump_pages(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_pages(s, errp); ++ if (*errp) { + return; + } + +@@ -1647,10 +1628,10 @@ static void dump_init(DumpState *s, int fd, bool has_format, + DumpGuestMemoryFormat format, bool paging, bool has_filter, + int64_t begin, int64_t length, Error **errp) + { ++ ERRP_GUARD(); + VMCoreInfoState *vmci = vmcoreinfo_find(); + CPUState *cpu; + int nr_cpus; +- Error *err = NULL; + int ret; + + s->has_format = has_format; +@@ -1769,9 +1750,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + + /* get memory mapping */ + if (paging) { +- qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err); +- if (err != NULL) { +- error_propagate(errp, err); ++ qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp); ++ if (*errp) { + goto cleanup; + } + } else { +@@ -1870,33 +1850,32 @@ cleanup: + /* this operation might be time consuming. */ + static void dump_process(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + DumpQueryResult *result = NULL; + + if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) { + #ifdef TARGET_X86_64 +- create_win_dump(s, &local_err); ++ create_win_dump(s, errp); + #endif + } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { +- create_kdump_vmcore(s, &local_err); ++ create_kdump_vmcore(s, errp); + } else { +- create_vmcore(s, &local_err); ++ create_vmcore(s, errp); + } + + /* make sure status is written after written_size updates */ + smp_wmb(); + qatomic_set(&s->status, +- (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); ++ (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); + + /* send DUMP_COMPLETED message (unconditionally) */ + result = qmp_query_dump(NULL); + /* should never fail */ + assert(result); +- qapi_event_send_dump_completed(result, !!local_err, (local_err ? +- error_get_pretty(local_err) : NULL)); ++ qapi_event_send_dump_completed(result, !!*errp, (*errp ? ++ error_get_pretty(*errp) : NULL)); + qapi_free_DumpQueryResult(result); + +- error_propagate(errp, local_err); + dump_cleanup(s); + } + +@@ -1925,10 +1904,10 @@ void qmp_dump_guest_memory(bool paging, const char *file, + int64_t length, bool has_format, + DumpGuestMemoryFormat format, Error **errp) + { ++ ERRP_GUARD(); + const char *p; + int fd = -1; + DumpState *s; +- Error *local_err = NULL; + bool detach_p = false; + + if (runstate_check(RUN_STATE_INMIGRATE)) { +@@ -2028,9 +2007,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, + dump_state_prepare(s); + + dump_init(s, fd, has_format, format, paging, has_begin, +- begin, length, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ begin, length, errp); ++ if (*errp) { + qatomic_set(&s->status, DUMP_STATUS_FAILED); + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch b/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch new file mode 100644 index 0000000..8ea0a7e --- /dev/null +++ b/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch @@ -0,0 +1,150 @@ +From a918c7305ec7c68e8bc37b449f71e75d84124cd0 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:13 +0000 +Subject: [PATCH 32/42] dump: Use a buffer for ELF section data and headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [32/41] e1a03e202e67764581e486f37e13e479200e5846 + +Currently we're writing the NULL section header if we overflow the +physical header number in the ELF header. But in the future we'll add +custom section headers AND section data. + +To facilitate this we need to rearange section handling a bit. As with +the other ELF headers we split the code into a prepare and a write +step. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-2-frankja@linux.ibm.com> +(cherry picked from commit e41ed29bcee5cb16715317bcf290f6b5c196eb0a) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 75 +++++++++++++++++++++++++++++-------------- + include/sysemu/dump.h | 2 ++ + 2 files changed, 53 insertions(+), 24 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88177fa886..4142b4cc0c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -381,31 +381,60 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + } + } + +-static void write_elf_section(DumpState *s, int type, Error **errp) ++static void prepare_elf_section_hdr_zero(DumpState *s) + { +- Elf32_Shdr shdr32; +- Elf64_Shdr shdr64; +- int shdr_size; +- void *shdr; +- int ret; ++ if (dump_is_64bit(s)) { ++ Elf64_Shdr *shdr64 = s->elf_section_hdrs; + +- if (type == 0) { +- shdr_size = sizeof(Elf32_Shdr); +- memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr32; ++ shdr64->sh_info = cpu_to_dump32(s, s->phdr_num); + } else { +- shdr_size = sizeof(Elf64_Shdr); +- memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr64; ++ Elf32_Shdr *shdr32 = s->elf_section_hdrs; ++ ++ shdr32->sh_info = cpu_to_dump32(s, s->phdr_num); ++ } ++} ++ ++static void prepare_elf_section_hdrs(DumpState *s) ++{ ++ size_t len, sizeof_shdr; ++ ++ /* ++ * Section ordering: ++ * - HDR zero ++ */ ++ sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ len = sizeof_shdr * s->shdr_num; ++ s->elf_section_hdrs = g_malloc0(len); ++ ++ /* ++ * The first section header is ALWAYS a special initial section ++ * header. ++ * ++ * The header should be 0 with one exception being that if ++ * phdr_num is PN_XNUM then the sh_info field contains the real ++ * number of segment entries. ++ * ++ * As we zero allocate the buffer we will only need to modify ++ * sh_info for the PN_XNUM case. ++ */ ++ if (s->phdr_num >= PN_XNUM) { ++ prepare_elf_section_hdr_zero(s); + } ++} + +- ret = fd_write_vmcore(shdr, shdr_size, s); ++static void write_elf_section_headers(DumpState *s, Error **errp) ++{ ++ size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ int ret; ++ ++ prepare_elf_section_hdrs(s); ++ ++ ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write section header table"); ++ error_setg_errno(errp, -ret, "dump: failed to write section headers"); + } ++ ++ g_free(s->elf_section_hdrs); + } + + static void write_data(DumpState *s, void *buf, int length, Error **errp) +@@ -592,12 +621,10 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); ++ if (*errp) { ++ return; + } + + /* write notes to vmcore */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b62513d87d..9995f65dc8 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -177,6 +177,8 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ void *elf_section_hdrs; /* Pointer to section header buffer */ ++ + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ + uint32_t nr_cpus; /* number of guest's cpu */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch b/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch new file mode 100644 index 0000000..2efd686 --- /dev/null +++ b/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch @@ -0,0 +1,104 @@ +From 987ede93fa4e3d058acddc19874e467faa116ede Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:14 +0000 +Subject: [PATCH 33/42] dump: Write ELF section headers right after ELF header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [33/41] e956040753533ac376e9763145192de1e216027d + +Let's start bundling the writes of the headers and of the data so we +have a clear ordering between them. Since the ELF header uses offsets +to the headers we can freely order them. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-3-frankja@linux.ibm.com> +(cherry picked from commit cb415fd61e48d52f81dcf38956e3f913651cff1c) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 31 ++++++++++++++----------------- + 1 file changed, 14 insertions(+), 17 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4142b4cc0c..d17537d4e9 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -584,6 +584,8 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | elf header | + * -------------- ++ * | sctn_hdr | ++ * -------------- + * | PT_NOTE | + * -------------- + * | PT_LOAD | +@@ -592,8 +594,6 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | PT_LOAD | + * -------------- +- * | sec_hdr | +- * -------------- + * | elf note | + * -------------- + * | memory | +@@ -609,20 +609,20 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write PT_NOTE to vmcore */ +- write_elf_phdr_note(s, errp); ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); + if (*errp) { + return; + } + +- /* write all PT_LOADs to vmcore */ +- write_elf_phdr_loads(s, errp); ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); + if (*errp) { + return; + } + +- /* write section headers to vmcore */ +- write_elf_section_headers(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +@@ -1877,16 +1877,13 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (dump_is_64bit(s)) { +- s->phdr_offset = sizeof(Elf64_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf64_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + } else { +- +- s->phdr_offset = sizeof(Elf32_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf32_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } + + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch b/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch new file mode 100644 index 0000000..16e6e87 --- /dev/null +++ b/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch @@ -0,0 +1,173 @@ +From deaf4e0f5e90d227b7b9f3e5d1dff7fd0bc0206a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 5 Sep 2022 16:06:21 +0400 +Subject: [PATCH 31/42] dump: fix kdump to work over non-aligned blocks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [31/41] b307bdce4a4791fc30160fa2a1678bd238f2432e + +Rewrite get_next_page() to work over non-aligned blocks. When it +encounters non aligned addresses, it will try to fill a page provided by +the caller. + +This solves a kdump crash with "tpm-crb-cmd" RAM memory region, +qemu-kvm: ../dump/dump.c:1162: _Bool get_next_page(GuestPhysBlock **, +uint64_t *, uint8_t **, DumpState *): Assertion `(block->target_start & +~target_page_mask) == 0' failed. + +because: +guest_phys_block_add_section: target_start=00000000fed40080 target_end=00000000fed41000: added (count: 4) + +Fixes: +https://bugzilla.redhat.com/show_bug.cgi?id=2120480 + +Signed-off-by: Marc-André Lureau +Acked-by: David Hildenbrand +(cherry picked from commit 94d788408d2d5a6474c99b2c9cf06913b9db7c58) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 79 +++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 56 insertions(+), 23 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 1c49232390..88177fa886 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1117,50 +1117,81 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn) + } + + /* +- * exam every page and return the page frame number and the address of the page. +- * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys +- * blocks, so block->target_start and block->target_end should be interal +- * multiples of the target page size. ++ * Return the page frame number and the page content in *bufptr. bufptr can be ++ * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated ++ * memory. This is not necessarily the memory returned. + */ + static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + uint8_t **bufptr, DumpState *s) + { + GuestPhysBlock *block = *blockptr; +- hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1); +- uint8_t *buf; ++ uint32_t page_size = s->dump_info.page_size; ++ uint8_t *buf = NULL, *hbuf; ++ hwaddr addr; + + /* block == NULL means the start of the iteration */ + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; + addr = block->target_start; ++ *pfnptr = dump_paddr_to_pfn(s, addr); + } else { +- addr = dump_pfn_to_paddr(s, *pfnptr + 1); ++ *pfnptr += 1; ++ addr = dump_pfn_to_paddr(s, *pfnptr); + } + assert(block != NULL); + +- if ((addr >= block->target_start) && +- (addr + s->dump_info.page_size <= block->target_end)) { +- buf = block->host_addr + (addr - block->target_start); +- } else { +- /* the next page is in the next block */ +- block = QTAILQ_NEXT(block, next); +- *blockptr = block; +- if (!block) { +- return false; ++ while (1) { ++ if (addr >= block->target_start && addr < block->target_end) { ++ size_t n = MIN(block->target_end - addr, page_size - addr % page_size); ++ hbuf = block->host_addr + (addr - block->target_start); ++ if (!buf) { ++ if (n == page_size) { ++ /* this is a whole target page, go for it */ ++ assert(addr % page_size == 0); ++ buf = hbuf; ++ break; ++ } else if (bufptr) { ++ assert(*bufptr); ++ buf = *bufptr; ++ memset(buf, 0, page_size); ++ } else { ++ return true; ++ } ++ } ++ ++ memcpy(buf + addr % page_size, hbuf, n); ++ addr += n; ++ if (addr % page_size == 0) { ++ /* we filled up the page */ ++ break; ++ } ++ } else { ++ /* the next page is in the next block */ ++ *blockptr = block = QTAILQ_NEXT(block, next); ++ if (!block) { ++ break; ++ } ++ ++ addr = block->target_start; ++ /* are we still in the same page? */ ++ if (dump_paddr_to_pfn(s, addr) != *pfnptr) { ++ if (buf) { ++ /* no, but we already filled something earlier, return it */ ++ break; ++ } else { ++ /* else continue from there */ ++ *pfnptr = dump_paddr_to_pfn(s, addr); ++ } ++ } + } +- addr = block->target_start; +- buf = block->host_addr; + } + +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } + +- return true; ++ return buf != NULL; + } + + static void write_dump_bitmap(DumpState *s, Error **errp) +@@ -1306,6 +1337,7 @@ static void write_dump_pages(DumpState *s, Error **errp) + uint8_t *buf; + GuestPhysBlock *block_iter = NULL; + uint64_t pfn_iter; ++ g_autofree uint8_t *page = NULL; + + /* get offset of page_desc and page_data in dump file */ + offset_desc = s->offset_page; +@@ -1341,12 +1373,13 @@ static void write_dump_pages(DumpState *s, Error **errp) + } + + offset_data += s->dump_info.page_size; ++ page = g_malloc(s->dump_info.page_size); + + /* + * dump memory to vmcore page by page. zero page will all be resided in the + * first page of page section + */ +- while (get_next_page(&block_iter, &pfn_iter, &buf, s)) { ++ for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) { + /* check zero page */ + if (is_zero_page(buf, s->dump_info.page_size)) { + ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor), +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch b/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch new file mode 100644 index 0000000..9780d90 --- /dev/null +++ b/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch @@ -0,0 +1,75 @@ +From bb55fde4d8ca587e2ef52ce58a0c22e4d66a08dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 25 Aug 2022 12:40:12 +0400 +Subject: [PATCH 30/42] dump: simplify a bit kdump get_next_page() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [30/41] 417ac19fa96036e0242f40121ac6e87a9f3f70ba + +This should be functionally equivalent, but slightly easier to read, +with simplified paths and checks at the end of the function. + +The following patch is a major rewrite to get rid of the assert(). + +Signed-off-by: Marc-André Lureau +Reviewed-by: David Hildenbrand +(cherry picked from commit 08df343874fcddd260021a04ce3c5a34f2c48164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index c2c1341ad7..1c49232390 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1133,17 +1133,11 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); +- if (bufptr) { +- *bufptr = block->host_addr; +- } +- return true; ++ addr = block->target_start; ++ } else { ++ addr = dump_pfn_to_paddr(s, *pfnptr + 1); + } +- +- *pfnptr = *pfnptr + 1; +- addr = dump_pfn_to_paddr(s, *pfnptr); ++ assert(block != NULL); + + if ((addr >= block->target_start) && + (addr + s->dump_info.page_size <= block->target_end)) { +@@ -1155,12 +1149,13 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + return false; + } +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); ++ addr = block->target_start; + buf = block->host_addr; + } + ++ assert((block->target_start & ~target_page_mask) == 0); ++ assert((block->target_end & ~target_page_mask) == 0); ++ *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..eea0eea --- /dev/null +++ b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch @@ -0,0 +1,61 @@ +From 7693449b235bbab6d32a1b87fa1d0e101c786f3b Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:11:14 -0500 +Subject: [PATCH 05/13] edu: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [5/10] 300901290e08b253b1278eedc39cd07c1e202b96 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a +Author: Paolo Bonzini +Date: Thu Mar 2 11:16:13 2023 +0100 + + edu: add smp_mb__after_rmw() + + Ensure ordering between clearing the COMPUTING flag and checking + IRQFACT, and between setting the IRQFACT flag and checking + COMPUTING. This ensures that no wakeups are lost. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/misc/edu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/edu.c b/hw/misc/edu.c +index e935c418d4..a1f8bc77e7 100644 +--- a/hw/misc/edu.c ++++ b/hw/misc/edu.c +@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, + case 0x20: + if (val & EDU_STATUS_IRQFACT) { + qatomic_or(&edu->status, EDU_STATUS_IRQFACT); ++ /* Order check of the COMPUTING flag after setting IRQFACT. */ ++ smp_mb__after_rmw(); + } else { + qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); + } +@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) + qemu_mutex_unlock(&edu->thr_mutex); + qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + ++ /* Clear COMPUTING flag before checking IRQFACT. */ ++ smp_mb__after_rmw(); ++ + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { + qemu_mutex_lock_iothread(); + edu_raise_irq(edu, FACT_IRQ); +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch b/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch new file mode 100644 index 0000000..5070722 --- /dev/null +++ b/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch @@ -0,0 +1,81 @@ +From edead46187b1e55ad5e238332780aef19f1bc214 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 1/2] hw/acpi: Add ospm_status hook implementation for acpi-ged + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [1/2] 99730b1a27666ca745dc28d90751c938d43f1682 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9 +Author: Keqian Zhu +Date: Tue Aug 16 17:49:57 2022 +0800 + + hw/acpi: Add ospm_status hook implementation for acpi-ged + + Setup an ARM virtual machine of machine virt and execute qmp "query-acpi-ospm-status" + causes segmentation fault with following dumpstack: + #1 0x0000aaaaab64235c in qmp_query_acpi_ospm_status (errp=errp@entry=0xfffffffff030) at ../monitor/qmp-cmds.c:312 + #2 0x0000aaaaabfc4e20 in qmp_marshal_query_acpi_ospm_status (args=, ret=0xffffea4ffe90, errp=0xffffea4ffe88) at qapi/qapi-commands-acpi.c:63 + #3 0x0000aaaaabff8ba0 in do_qmp_dispatch_bh (opaque=0xffffea4ffe98) at ../qapi/qmp-dispatch.c:128 + #4 0x0000aaaaac02e594 in aio_bh_call (bh=0xffffe0004d80) at ../util/async.c:150 + #5 aio_bh_poll (ctx=ctx@entry=0xaaaaad0f6040) at ../util/async.c:178 + #6 0x0000aaaaac00bd40 in aio_dispatch (ctx=ctx@entry=0xaaaaad0f6040) at ../util/aio-posix.c:421 + #7 0x0000aaaaac02e010 in aio_ctx_dispatch (source=0xaaaaad0f6040, callback=, user_data=) at ../util/async.c:320 + #8 0x0000fffff76f6884 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 + #9 0x0000aaaaac0452d4 in glib_pollfds_poll () at ../util/main-loop.c:297 + #10 os_host_main_loop_wait (timeout=0) at ../util/main-loop.c:320 + #11 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:596 + #12 0x0000aaaaab5c9e50 in qemu_main_loop () at ../softmmu/runstate.c:734 + #13 0x0000aaaaab185370 in qemu_main (argc=argc@entry=47, argv=argv@entry=0xfffffffff518, envp=envp@entry=0x0) at ../softmmu/main.c:38 + #14 0x0000aaaaab16f99c in main (argc=47, argv=0xfffffffff518) at ../softmmu/main.c:47 + + Fixes: ebb62075021a ("hw/acpi: Add ACPI Generic Event Device Support") + Signed-off-by: Keqian Zhu + Reviewed-by: Igor Mammedov + Message-id: 20220816094957.31700-1-zhukeqian1@huawei.com + Signed-off-by: Peter Maydell + +(cherry picked from commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9) +Signed-off-by: Jon Maloy +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index e28457a7d1..a3d31631fe 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -267,6 +267,13 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + } + } + ++static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) ++{ ++ AcpiGedState *s = ACPI_GED(adev); ++ ++ acpi_memory_ospm_status(&s->memhp_state, list); ++} ++ + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + { + AcpiGedState *s = ACPI_GED(adev); +@@ -409,6 +416,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + hc->unplug_request = acpi_ged_unplug_request_cb; + hc->unplug = acpi_ged_unplug_cb; + ++ adevc->ospm_status = acpi_ged_ospm_status; + adevc->send_event = acpi_ged_send_event; + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch b/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch index 13b2187..d4ca84f 100644 --- a/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch +++ b/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch @@ -1,21 +1,21 @@ -From a77c0c98570dbfcd0376d115733393b3658ffff9 Mon Sep 17 00:00:00 2001 +From 100f33ff8a1d55986e43b99ba8726abc29ee8d26 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH 6/6] hw/display/qxl: Assert memory slot fits in preallocated +Subject: [PATCH 5/5] hw/display/qxl: Assert memory slot fits in preallocated MemoryRegion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 242: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler -RH-Bugzilla: 2152085 +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Marc-André Lureau RH-Acked-by: Stefan Hajnoczi -RH-Commit: [5/5] 90eb0289592bedb0c9c087190083c85b042f8908 (jmaloy/jons-qemu-kvm) +RH-Acked-by: Marc-André Lureau +RH-Commit: [5/5] f809ce48e7989dd6547b7c8bf1a5efc3fdcacbac (jmaloy/jons-qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2152085 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 CVE: CVE-2022-4144 Upstream: Merged diff --git a/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch b/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch index ad5a1cf..9163570 100644 --- a/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch +++ b/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch @@ -1,21 +1,21 @@ -From e3fcb7370760b801f52907ae5bb385cebe2e2e1c Mon Sep 17 00:00:00 2001 +From 4e1bfbe3a0a113fe3cf39336a9d7da4e8c2a21ea Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH 5/6] hw/display/qxl: Avoid buffer overrun in qxl_phys2virt +Subject: [PATCH 4/5] hw/display/qxl: Avoid buffer overrun in qxl_phys2virt (CVE-2022-4144) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 242: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler -RH-Bugzilla: 2152085 +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Marc-André Lureau RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/5] 50e06fe6db196d327a68c7d7f35239404fbb39ee (jmaloy/jons-qemu-kvm) +RH-Acked-by: Marc-André Lureau +RH-Commit: [4/5] afe53f8d9b31c6fd8211fe172173151f3255e67c (jmaloy/jons-qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2152085 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 CVE: CVE-2022-4144 Upstream: Merged diff --git a/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch b/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch index 02569f5..9bf2fe2 100644 --- a/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch +++ b/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch @@ -1,20 +1,20 @@ -From ef0563a3c3f32680e0584821eccd9a205e371b34 Mon Sep 17 00:00:00 2001 +From 068c531fb968ec04509b85f524d0745e6acf5449 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH 3/6] hw/display/qxl: Document qxl_phys2virt() +Subject: [PATCH 2/5] hw/display/qxl: Document qxl_phys2virt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 242: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler -RH-Bugzilla: 2152085 +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Marc-André Lureau RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/5] 8d1045449270a3aef4bc4b39c61da1592b83259a (jmaloy/jons-qemu-kvm) +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/5] f84c0b379022c527fc2508a242443d86454944c0 (jmaloy/jons-qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2152085 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 CVE: CVE-2022-4144 Upstream: Merged diff --git a/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch b/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch index 49a179c..c644ab2 100644 --- a/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch +++ b/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch @@ -1,21 +1,21 @@ -From d5a0396f7b836603a409ab265798de010694815b Mon Sep 17 00:00:00 2001 +From 5ec8d909d40fa04ef2c3572e01509a1866786070 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH 2/6] hw/display/qxl: Have qxl_log_command Return early if no +Subject: [PATCH 1/5] hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 242: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler -RH-Bugzilla: 2152085 +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Marc-André Lureau RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/5] 1016976223d919605be9b0ae8a5a71613d9fc19b (jmaloy/jons-qemu-kvm) +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/5] 33d94f40c46cccbc32d108d1035365917bf90356 (jmaloy/jons-qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2152085 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 CVE: CVE-2022-4144 Upstream: Merged diff --git a/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch b/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch index b980034..dd902f7 100644 --- a/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch +++ b/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch @@ -1,21 +1,21 @@ -From ac90823eccd05943a2ece5f7a8a9892d0c44e2a7 Mon Sep 17 00:00:00 2001 +From 0e6bd3911c4971f575aac7e9cd726467b52fe544 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH 4/6] hw/display/qxl: Pass requested buffer size to +Subject: [PATCH 3/5] hw/display/qxl: Pass requested buffer size to qxl_phys2virt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 242: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler -RH-Bugzilla: 2152085 +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Marc-André Lureau RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/5] 89de533fc1f5cc142cfa0ffd213d8683d009ac54 (jmaloy/jons-qemu-kvm) +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/5] 8e362d67fe7fef9eb457cfb15d75b298fed725c3 (jmaloy/jons-qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2152085 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 CVE: CVE-2022-4144 Upstream: Merged diff --git a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch index 1c2094e..514dd55 100644 --- a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +++ b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch @@ -1,16 +1,15 @@ -From 1bd939d374ec2e994ff47c84e16fa3bc1323a0fd Mon Sep 17 00:00:00 2001 +From f96220d64a31a4a52b2d132a503048579946f982 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 18 Aug 2022 17:01:13 +0200 -Subject: [PATCH 2/2] i386: do kvm_put_msr_feature_control() first thing when +Subject: [PATCH 3/3] i386: do kvm_put_msr_feature_control() first thing when vCPU is reset -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 216: i386: fix 'system_reset' when the VM is in VMX root operation -RH-Bugzilla: 2116743 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Commit: [2/2] f838a57f74487eb394794de00006d5d2b9e84344 +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 08e1e67db96801e4a35aa6b60a93b2c2f1641220 kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when it is in VMX root operation. Do kvm_put_msr_feature_control() before @@ -64,5 +63,5 @@ index 81d729dc40..a06221d3e5 100644 if (level == KVM_PUT_FULL_STATE) { -- -2.31.1 +2.35.3 diff --git a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch index ac5b579..411bed4 100644 --- a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +++ b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch @@ -1,15 +1,14 @@ -From 4ad00e318f8afbee0e455cfbb6bc693c808d87f3 Mon Sep 17 00:00:00 2001 +From 46e54544c3480658111d6f111d6c265dcea2e19b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 18 Aug 2022 17:01:12 +0200 -Subject: [PATCH 1/2] i386: reset KVM nested state upon CPU reset +Subject: [PATCH 2/3] i386: reset KVM nested state upon CPU reset -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 216: i386: fix 'system_reset' when the VM is in VMX root operation -RH-Bugzilla: 2116743 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Commit: [1/2] 20d2dabeda74b8cd5135228980a2414e66dc64f3 +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] de4db7bceb6baaf69aec8b0ae9aa8887aa869e15 Make sure env->nested_state is cleaned up when a vCPU is reset, it may be stale after an incoming migration, kvm_arch_put_registers() may @@ -91,5 +90,5 @@ index bd439e56ad..81d729dc40 100644 } -- -2.31.1 +2.35.3 diff --git a/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch b/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch new file mode 100644 index 0000000..9e17d2c --- /dev/null +++ b/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch @@ -0,0 +1,43 @@ +From 3fceb3b60a60c5008eecf99e45e269b757042b5a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:20 +0000 +Subject: [PATCH 39/42] include/elf.h: add s390x note types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [39/41] ebf0873744905abbe9cfc423a56c6d1b4f2ae936 + +Adding two s390x note types + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Message-Id: <20221017083822.43118-9-frankja@linux.ibm.com> +(cherry picked from commit 5433669c7a1884cc0394c360148965edf7519884) +Signed-off-by: Cédric Le Goater +--- + include/elf.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/elf.h b/include/elf.h +index 811bf4a1cb..4edab8e5a2 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1647,6 +1647,8 @@ typedef struct elf64_shdr { + #define NT_TASKSTRUCT 4 + #define NT_AUXV 6 + #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ ++#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ ++#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ + #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ + #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ + #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */ +-- +2.37.3 + diff --git a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch new file mode 100644 index 0000000..6f2cc3f --- /dev/null +++ b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch @@ -0,0 +1,367 @@ +From 88b5e059462a72ca758d84c0d4d0895a03baac50 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:17 +0000 +Subject: [PATCH 1/3] io: Add support for MSG_PEEK for socket channel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder +RH-Bugzilla: 2137740 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/2] 04fc6fae358599b8509f5355469d2e8720f01903 + +Conflicts: + io/channel-null.c + migration/channel-block.c + + Because these two files do not exist in rhel8.8 tree, dropping the + changes. + +MSG_PEEK peeks at the channel, The data is treated as unread and +the next read shall still return this data. This support is +currently added only for socket class. Extra parameter 'flags' +is added to io_readv calls to pass extra read flags like MSG_PEEK. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) +Signed-off-by: Peter Xu +--- + chardev/char-socket.c | 4 ++-- + include/io/channel.h | 6 ++++++ + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 19 ++++++++++++++++++- + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 16 ++++++++++++---- + migration/rdma.c | 1 + + scsi/qemu-pr-helper.c | 2 +- + tests/qtest/tpm-emu.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + util/vhost-user-server.c | 2 +- + 14 files changed, 48 insertions(+), 10 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 836cfa0bc2..4cdf79e0c2 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -339,11 +339,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, +- NULL); ++ 0, NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, +- NULL); ++ 0, NULL); + } + + if (ret == QIO_CHANNEL_ERR_BLOCK) { +diff --git a/include/io/channel.h b/include/io/channel.h +index c680ee7480..716235d496 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + ++#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { +@@ -41,6 +43,7 @@ enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + }; + + +@@ -114,6 +117,7 @@ struct QIOChannelClass { + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); +@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return ++ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the +@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + + +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index bf52011be2..8096180f85 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 5ff1691bad..2834413b3a 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -230,6 +230,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index 348a48545e..490f0e5d84 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 6010ad7017..ca8b180b69 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -174,6 +174,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + } + #endif + ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + return 0; + } + +@@ -407,6 +410,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + } + #endif /* WIN32 */ + ++ qio_channel_set_feature(QIO_CHANNEL(cioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + +@@ -497,6 +503,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -518,6 +525,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + + } + ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } ++ + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { +@@ -625,11 +636,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; ++ int sflags = 0; ++ ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } + + for (i = 0; i < niov; i++) { + ssize_t ret; +@@ -637,7 +654,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, +- 0); ++ sflags); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 4ce890a538..c730cb8ec5 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 035dd6075b..13c94f2afe 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index 0640941ac5..a8c7f11649 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); +@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + return -1; + } + +- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); ++ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support peek read"); ++ return -1; ++ } ++ ++ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, + while ((nlocal_iov > 0) || local_fds) { + ssize_t len; + len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, +- local_nfds, errp); ++ local_nfds, 0, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); +@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); + } + + +@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; +- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); + } + + +diff --git a/migration/rdma.c b/migration/rdma.c +index 54acd2000e..dcf98bd7f8 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2917,6 +2917,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index f281daeced..12ec8e9368 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -612,7 +612,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, + iov.iov_base = buf; + iov.iov_len = sz; + n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, +- &fds, &nfds, errp); ++ &fds, &nfds, 0, errp); + + if (n_read == QIO_CHANNEL_ERR_BLOCK) { + qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); +diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c +index 2994d1cf42..3cf1acaf7d 100644 +--- a/tests/qtest/tpm-emu.c ++++ b/tests/qtest/tpm-emu.c +@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) + int *pfd = NULL; + size_t nfd = 0; + +- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); ++ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); + cmd = be32_to_cpu(cmd); + g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); + g_assert_cmpint(nfd, ==, 1); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index 6713886d02..de2930f203 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -452,6 +452,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iorecv), + &fdrecv, + &nfdrecv, ++ 0, + &error_abort); + + g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index 783d847a6d..e6a9ef72b7 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -102,7 +102,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) + * qio_channel_readv_full may have short reads, keeping calling it + * until getting VHOST_USER_HDR_SIZE or 0 bytes in total + */ +- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); ++ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); + if (rc < 0) { + if (rc == QIO_CHANNEL_ERR_BLOCK) { + assert(local_err == NULL); +-- +2.37.3 + diff --git a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch new file mode 100644 index 0000000..d97a2c9 --- /dev/null +++ b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch @@ -0,0 +1,290 @@ +From 93ec857c46911b95ed8e3abc6a9d432ae847c084 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:51:56 -0500 +Subject: [PATCH 06/11] kvm: Atomic memslot updates + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 520e41c0f58066a7381a5f6b32b81bc01cce51c0 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 +Author: David Hildenbrand +Date: Fri Nov 11 10:47:58 2022 -0500 + + kvm: Atomic memslot updates + + If we update an existing memslot (e.g., resize, split), we temporarily + remove the memslot to re-add it immediately afterwards. These updates + are not atomic, especially not for KVM VCPU threads, such that we can + get spurious faults. + + Let's inhibit most KVM ioctls while performing relevant updates, such + that we can perform the update just as if it would happen atomically + without additional kernel support. + + We capture the add/del changes and apply them in the notifier commit + stage instead. There, we can check for overlaps and perform the ioctl + inhibiting only if really required (-> overlap). + + To keep things simple we don't perform additional checks that wouldn't + actually result in an overlap -- such as !RAM memory regions in some + cases (see kvm_set_phys_mem()). + + To minimize cache-line bouncing, use a separate indicator + (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock + while performing both actions (removing+re-adding). + + We have to wait until all IOCTLs were exited and block new ones from + getting executed. + + This approach cannot result in a deadlock as long as the inhibitor does + not hold any locks that might hinder an IOCTL from getting finished and + exited - something fairly unusual. The inhibitor will always hold the BQL. + + AFAIKs, one possible candidate would be userfaultfd. If a page cannot be + placed (e.g., during postcopy), because we're waiting for a lock, or if the + userfaultfd thread cannot process a fault, because it is waiting for a + lock, there could be a deadlock. However, the BQL is not applicable here, + because any other guest memory access while holding the BQL would already + result in a deadlock. + + Nothing else in the kernel should block forever and wait for userspace + intervention. + + Note: pause_all_vcpus()/resume_all_vcpus() or + start_exclusive()/end_exclusive() cannot be used, as they either drop + the BQL or require to be called without the BQL - something inhibitors + cannot handle. We need a low-level locking mechanism that is + deadlock-free even when not releasing the BQL. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Tested-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + accel/kvm/kvm-all.c: include "sysemu/dirtylimit.h" is missing in + rhel 8.8.0 + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- + include/sysemu/kvm_int.h | 8 ++++ + 2 files changed, 98 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 221aadfda7..3b7bc39823 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/accel-blocker.h" + #include "qemu/bswap.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" +@@ -45,6 +46,7 @@ + #include "qemu/guest-random.h" + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" ++#include "qemu/range.h" + + #include "hw/boards.h" + +@@ -1334,6 +1336,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++/* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -1368,14 +1371,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; + +- kvm_slots_lock(); +- + if (!add) { + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { +- goto out; ++ return; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* +@@ -1413,7 +1414,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + start_addr += slot_size; + size -= slot_size; + } while (size); +- goto out; ++ return; + } + + /* register the new slot */ +@@ -1438,9 +1439,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram += slot_size; + size -= slot_size; + } while (size); +- +-out: +- kvm_slots_unlock(); + } + + static void *kvm_dirty_ring_reaper_thread(void *data) +@@ -1492,18 +1490,95 @@ static void kvm_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; + +- memory_region_ref(section->mr); +- kvm_set_phys_mem(kml, section, true); ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + } + + static void kvm_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; ++ ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); ++} ++ ++static void kvm_region_commit(MemoryListener *listener) ++{ ++ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, ++ listener); ++ KVMMemoryUpdate *u1, *u2; ++ bool need_inhibit = false; ++ ++ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && ++ QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ return; ++ } ++ ++ /* ++ * We have to be careful when regions to add overlap with ranges to remove. ++ * We have to simulate atomic KVM memslot updates by making sure no ioctl() ++ * is currently active. ++ * ++ * The lists are order by addresses, so it's easy to find overlaps. ++ */ ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ while (u1 && u2) { ++ Range r1, r2; ++ ++ range_init_nofail(&r1, u1->section.offset_within_address_space, ++ int128_get64(u1->section.size)); ++ range_init_nofail(&r2, u2->section.offset_within_address_space, ++ int128_get64(u2->section.size)); ++ ++ if (range_overlaps_range(&r1, &r2)) { ++ need_inhibit = true; ++ break; ++ } ++ if (range_lob(&r1) < range_lob(&r2)) { ++ u1 = QSIMPLEQ_NEXT(u1, next); ++ } else { ++ u2 = QSIMPLEQ_NEXT(u2, next); ++ } ++ } ++ ++ kvm_slots_lock(); ++ if (need_inhibit) { ++ accel_ioctl_inhibit_begin(); ++ } ++ ++ /* Remove all memslots before adding the new ones. */ ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); + +- kvm_set_phys_mem(kml, section, false); +- memory_region_unref(section->mr); ++ kvm_set_phys_mem(kml, &u1->section, false); ++ memory_region_unref(u1->section.mr); ++ ++ g_free(u1); ++ } ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); ++ ++ memory_region_ref(u1->section.mr); ++ kvm_set_phys_mem(kml, &u1->section, true); ++ ++ g_free(u1); ++ } ++ ++ if (need_inhibit) { ++ accel_ioctl_inhibit_end(); ++ } ++ kvm_slots_unlock(); + } + + static void kvm_log_sync(MemoryListener *listener, +@@ -1647,8 +1722,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + kml->slots[i].slot = i; + } + ++ QSIMPLEQ_INIT(&kml->transaction_add); ++ QSIMPLEQ_INIT(&kml->transaction_del); ++ + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; ++ kml->listener.commit = kvm_region_commit; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.priority = 10; +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 1f5487d9b7..7e18c0a3c0 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -11,6 +11,7 @@ + + #include "exec/memory.h" + #include "qemu/accel.h" ++#include "qemu/queue.h" + #include "sysemu/kvm.h" + + typedef struct KVMSlot +@@ -30,10 +31,17 @@ typedef struct KVMSlot + ram_addr_t ram_start_offset; + } KVMSlot; + ++typedef struct KVMMemoryUpdate { ++ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; ++ MemoryRegionSection section; ++} KVMMemoryUpdate; ++ + typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; + } KVMMemoryListener; + + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, +-- +2.37.3 + diff --git a/SOURCES/kvm-migration-Read-state-once.patch b/SOURCES/kvm-migration-Read-state-once.patch index c478b36..63fd047 100644 --- a/SOURCES/kvm-migration-Read-state-once.patch +++ b/SOURCES/kvm-migration-Read-state-once.patch @@ -1,15 +1,16 @@ -From 70d9e1e5940c9680f574124fe8cc02e4ce97f790 Mon Sep 17 00:00:00 2001 +From 34eae2d7ef928a7e0e10cc30fe76839c005998eb Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 13 Apr 2022 12:33:29 +0100 -Subject: [PATCH] migration: Read state once +Subject: [PATCH 07/11] migration: Read state once RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 252: migration: Read state once -RH-Bugzilla: 2168217 -RH-Acked-by: Stefano Garzarella +RH-MergeRequest: 249: migration: Read state once +RH-Bugzilla: 2074205 RH-Acked-by: Peter Xu +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Jon Maloy RH-Acked-by: quintela1 -RH-Commit: [1/1] 3f707ccf62e5ac2973f7e8816c50cc48e0bd646c +RH-Commit: [1/1] 9aa47b492a646fce4e66ebd9b7d7a85286d16051 The 'status' field for the migration is updated normally using an atomic operation from the migration thread. diff --git a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch new file mode 100644 index 0000000..7838333 --- /dev/null +++ b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch @@ -0,0 +1,296 @@ +From f21a343af4b4d0c6e5181ae0abd0f6280dc8296c Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:18 +0000 +Subject: [PATCH 2/3] migration: check magic value for deciding the mapping of + channels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder +RH-Bugzilla: 2137740 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [2/2] f97bebef3d3e372cfd660e5ddb6cffba791840d2 + +Conflicts: + migration/migration.c + migration/multifd.c + migration/postcopy-ram.c + migration/postcopy-ram.h + + There're a bunch of conflicts due to missing upstream patches on + e.g. on qemufile reworks, postcopy preempt. We don't plan to have + preempt in rhel8 at all, probably the same as the rest. + +Current logic assumes that channel connections on the destination side are +always established in the same order as the source and the first one will +always be the main channel followed by the multifid or post-copy +preemption channel. This may not be always true, as even if a channel has a +connection established on the source side it can be in the pending state on +the destination side and a newer connection can be established first. +Basically causing out of order mapping of channels on the destination side. +Currently, all channels except post-copy preempt send a magic number, this +patch uses that magic number to decide the type of channel. This logic is +applicable only for precopy(multifd) live migration, as mentioned, the +post-copy preempt channel does not send any magic number. Also, tls live +migrations already does tls handshake before creating other channels, so +this issue is not possible with tls, hence this logic is avoided for tls +live migrations. This patch uses read peek to check the magic number of +channels so that current data/control stream management remains +un-effected. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) +Signed-off-by: Peter Xu +--- + migration/channel.c | 45 ++++++++++++++++++++++++++++++++++++++ + migration/channel.h | 5 +++++ + migration/migration.c | 51 +++++++++++++++++++++++++++++++------------ + migration/multifd.c | 19 ++++++++-------- + migration/multifd.h | 2 +- + 5 files changed, 98 insertions(+), 24 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index 086b5c0d8b..ee308fef23 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -98,3 +98,48 @@ void migration_channel_connect(MigrationState *s, + g_free(s->hostname); + error_free(error); + } ++ ++ ++/** ++ * @migration_channel_read_peek - Peek at migration channel, without ++ * actually removing it from channel buffer. ++ * ++ * @ioc: the channel object ++ * @buf: the memory region to read data into ++ * @buflen: the number of bytes to read in @buf ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Returns 0 if successful, returns -1 and sets @errp if fails. ++ */ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp) ++{ ++ ssize_t len = 0; ++ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; ++ ++ while (true) { ++ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, ++ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); ++ ++ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ error_setg(errp, ++ "Failed to peek at channel"); ++ return -1; ++ } ++ ++ if (len == buflen) { ++ break; ++ } ++ ++ /* 1ms sleep. */ ++ if (qemu_in_coroutine()) { ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ } else { ++ g_usleep(1000); ++ } ++ } ++ ++ return 0; ++} +diff --git a/migration/channel.h b/migration/channel.h +index 67a461c28a..5bdb8208a7 100644 +--- a/migration/channel.h ++++ b/migration/channel.h +@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error *error_in); ++ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp); + #endif +diff --git a/migration/migration.c b/migration/migration.c +index d8b24a2c91..0885549de0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -32,6 +32,7 @@ + #include "savevm.h" + #include "qemu-file-channel.h" + #include "qemu-file.h" ++#include "channel.h" + #include "migration/vmstate.h" + #include "block/block.h" + #include "qapi/error.h" +@@ -637,10 +638,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup(errp) != 0) { +- return false; +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -701,10 +698,42 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) + void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); ++ bool default_channel = true; ++ uint32_t channel_magic = 0; + Error *local_err = NULL; +- bool start_migration; ++ int ret = 0; + +- if (!mis->from_src_file) { ++ if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ /* ++ * With multiple channels, it is possible that we receive channels ++ * out of order on destination side, causing incorrect mapping of ++ * source channels on destination side. Check channel MAGIC to ++ * decide type of channel. Please note this is best effort, postcopy ++ * preempt channel does not send any magic number so avoid it for ++ * postcopy live migration. Also tls live migration already does ++ * tls handshake while initializing main channel so with tls this ++ * issue is not possible. ++ */ ++ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, ++ sizeof(channel_magic), &local_err); ++ ++ if (ret != 0) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); ++ } else { ++ default_channel = !mis->from_src_file; ++ } ++ ++ if (multifd_load_setup(errp) != 0) { ++ error_setg(errp, "Failed to setup multifd channels"); ++ return; ++ } ++ ++ if (default_channel) { + /* The first connection (multifd may have multiple) */ + QEMUFile *f = qemu_fopen_channel_input(ioc); + +@@ -716,23 +745,17 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + if (!migration_incoming_setup(f, errp)) { + return; + } +- +- /* +- * Common migration only needs one channel, so we can start +- * right now. Multifd needs more than one channel, we wait. +- */ +- start_migration = !migrate_use_multifd(); + } else { + /* Multiple connections */ + assert(migrate_use_multifd()); +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + +- if (start_migration) { ++ if (migration_has_all_channels()) { + migration_incoming_process(); + } + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 7c16523e6b..75ac052d2f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1183,9 +1183,14 @@ int multifd_load_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ /* ++ * Return successfully if multiFD recv state is already initialised ++ * or multiFD is not enabled. ++ */ ++ if (multifd_recv_state || !migrate_use_multifd()) { + return 0; + } ++ + if (!migrate_multifd_is_allowed()) { + error_setg(errp, "multifd is not supported by current protocol"); + return -1; +@@ -1244,11 +1249,9 @@ bool multifd_recv_all_channels_created(void) + + /* + * Try to receive all multifd channels to get ready for the migration. +- * - Return true and do not set @errp when correctly receiving all channels; +- * - Return false and do not set @errp when correctly receiving the current one; +- * - Return false and set @errp when failing to receive the current channel. ++ * Sets @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +@@ -1261,7 +1264,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); +- return false; ++ return; + } + trace_multifd_recv_new_channel(id); + +@@ -1271,7 +1274,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1282,6 +1285,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +- return qatomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 11d5e273e6..9c0a2a0701 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -20,7 +20,7 @@ void multifd_save_cleanup(void); + int multifd_load_setup(Error **errp); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +-- +2.37.3 + diff --git a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch new file mode 100644 index 0000000..f6a2137 --- /dev/null +++ b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch @@ -0,0 +1,55 @@ +From 01c09f31978154f0d2fd699621ae958a8c3ea2a5 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:24 -0500 +Subject: [PATCH 08/13] physmem: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [8/10] f6a9659f7cf40b78de6e85e4a7c06842273aa770 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 33828ca11da08436e1b32f3e79dabce3061a0427 +Author: Paolo Bonzini +Date: Fri Mar 3 14:36:32 2023 +0100 + + physmem: add missing memory barrier + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + softmmu/physmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 4d0ef5f92f..2b96fad302 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -3087,6 +3087,8 @@ void cpu_register_map_client(QEMUBH *bh) + qemu_mutex_lock(&map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&map_client_list, client, link); ++ /* Write map_client_list before reading in_use. */ ++ smp_mb(); + if (!qatomic_read(&bounce.in_use)) { + cpu_notify_map_clients_locked(); + } +@@ -3279,6 +3281,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + memory_region_unref(bounce.mr); ++ /* Clear in_use before reading map_client_list. */ + qatomic_mb_set(&bounce.in_use, false); + cpu_notify_map_clients(); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch new file mode 100644 index 0000000..3992f4e --- /dev/null +++ b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch @@ -0,0 +1,177 @@ +From e7d0e29d1962092af58d0445439671a6e1d91f71 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:10:33 -0500 +Subject: [PATCH 02/13] qatomic: add smp_mb__before/after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [2/10] 1f87eb3157abcf23f020881cedce42f76497f348 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 +Author: Paolo Bonzini +Date: Thu Mar 2 11:10:56 2023 +0100 + + qatomic: add smp_mb__before/after_rmw() + + On ARM, seqcst loads and stores (which QEMU does not use) are compiled + respectively as LDAR and STLR instructions. Even though LDAR is + also used for load-acquire operations, it also waits for all STLRs to + leave the store buffer. Thus, LDAR and STLR alone are load-acquire + and store-release operations, but LDAR also provides store-against-load + ordering as long as the previous store is a STLR. + + Compare this to ARMv7, where store-release is DMB+STR and load-acquire + is LDR+DMB, but an additional DMB is needed between store-seqcst and + load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides + load-acquire and store-release semantics and the two can be reordered. + + Likewise, on ARM sequentially consistent read-modify-write operations only + need to use LDAXR and STLXR respectively for the load and the store, while + on x86 they need to use the stronger LOCK prefix. + + In a strange twist of events, however, the _stronger_ semantics + of the ARM instructions can end up causing bugs on ARM, not on x86. + The problems occur when seqcst atomics are mixed with relaxed atomics. + + QEMU's atomics try to bridge the Linux API (that most of the developers + are familiar with) and the C11 API, and the two have a substantial + difference: + + - in Linux, strongly-ordered atomics such as atomic_add_return() affect + the global ordering of _all_ memory operations, including for example + READ_ONCE()/WRITE_ONCE() + + - in C11, sequentially consistent atomics (except for seq-cst fences) + only affect the ordering of sequentially consistent operations. + In particular, since relaxed loads are done with LDR on ARM, they are + not ordered against seqcst stores (which are done with STLR). + + QEMU implements high-level synchronization primitives with the idea that + the primitives contain the necessary memory barriers, and the callers can + use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. + This is very much incompatible with the C11 view that seqcst accesses + are only ordered against other seqcst accesses, and requires using seqcst + fences as in the following example: + + qatomic_set(&y, 1); qatomic_set(&x, 1); + smp_mb(); smp_mb(); + ... qatomic_read(&x) ... ... qatomic_read(&y) ... + + When a qatomic_*() read-modify write operation is used instead of one + or both stores, developers that are more familiar with the Linux API may + be tempted to omit the smp_mb(), which will work on x86 but not on ARM. + + This nasty difference between Linux and C11 read-modify-write operations + has already caused issues in util/async.c and more are being found. + Provide something similar to Linux smp_mb__before/after_atomic(); this + has the double function of documenting clearly why there is a memory + barrier, and avoiding a double barrier on x86 and s390x systems. + + The new macro can already be put to use in qatomic_mb_set(). + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + docs/devel/atomics.rst | 26 +++++++++++++++++++++----- + include/qemu/atomic.h | 17 ++++++++++++++++- + 2 files changed, 37 insertions(+), 6 deletions(-) + +diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst +index 52baa0736d..10fbfc58bb 100644 +--- a/docs/devel/atomics.rst ++++ b/docs/devel/atomics.rst +@@ -25,7 +25,8 @@ provides macros that fall in three camps: + + - weak atomic access and manual memory barriers: ``qatomic_read()``, + ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, +- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; ++ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, ++ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; + + - sequentially consistent atomic access: everything else. + +@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: + sequential consistency. + + - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in +- the total ordering enforced by sequentially-consistent operations. ++ the ordering enforced by read-modify-write operations. + This is because QEMU uses the C11 memory model. The following example + is correct in Linux but not in QEMU: + +@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: + because the read of ``y`` can be moved (by either the processor or the + compiler) before the write of ``x``. + +- Fixing this requires an ``smp_mb()`` memory barrier between the write +- of ``x`` and the read of ``y``. In the common case where only one thread +- writes ``x``, it is also possible to write it like this: ++ Fixing this requires a full memory barrier between the write of ``x`` and ++ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and ++ ``smp_mb__after_rmw()``; they act both as an optimization, ++ avoiding the memory barrier on processors where it is unnecessary, ++ and as a clarification of this corner case of the C11 memory model: ++ ++ +--------------------------------+ ++ | QEMU (correct) | ++ +================================+ ++ | :: | ++ | | ++ | a = qatomic_fetch_add(&x, 2);| ++ | smp_mb__after_rmw(); | ++ | b = qatomic_read(&y); | ++ +--------------------------------+ ++ ++ In the common case where only one thread writes ``x``, it is also possible ++ to write it like this: + + +--------------------------------+ + | QEMU (correct) | +diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h +index 112a29910b..7855443cab 100644 +--- a/include/qemu/atomic.h ++++ b/include/qemu/atomic.h +@@ -243,6 +243,20 @@ + #define smp_wmb() smp_mb_release() + #define smp_rmb() smp_mb_acquire() + ++/* ++ * SEQ_CST is weaker than the older __sync_* builtins and Linux ++ * kernel read-modify-write atomics. Provide a macro to obtain ++ * the same semantics. ++ */ ++#if !defined(QEMU_SANITIZE_THREAD) && \ ++ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) ++# define smp_mb__before_rmw() signal_barrier() ++# define smp_mb__after_rmw() signal_barrier() ++#else ++# define smp_mb__before_rmw() smp_mb() ++# define smp_mb__after_rmw() smp_mb() ++#endif ++ + /* qatomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. +@@ -257,7 +271,8 @@ + #if !defined(__SANITIZE_THREAD__) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) + /* This is more efficient than a store plus a fence. */ +-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) ++# define qatomic_mb_set(ptr, i) \ ++ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) + #else + # define qatomic_mb_set(ptr, i) \ + ({ qatomic_store_release(ptr, i); smp_mb(); }) +-- +2.37.3 + diff --git a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch new file mode 100644 index 0000000..a57bf63 --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch @@ -0,0 +1,67 @@ +From 06c73c4b57dd1f47f819d719a63eb39fbe799304 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:51 +0100 +Subject: [PATCH 1/4] qcow2: Fix theoretical corruption in store_bitmap() error + path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/4] d0a26bed7b16db41e7baee1f8f2b3ae54e52dd52 + +In order to write the bitmap table to the image file, it is converted to +big endian. If the write fails, it is passed to clear_bitmap_table() to +free all of the clusters it had allocated before. However, if we don't +convert it back to native endianness first, we'll free things at a wrong +offset. + +In practical terms, the offsets will be so high that we won't actually +free any allocated clusters, but just run into an error, but in theory +this can cause image corruption. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-2-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) +Signed-off-by: Kevin Wolf +--- + block/qcow2-bitmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index 8fb4731551..869069415c 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) + return bdrv_flush(bs->file->bs); + } + +-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) ++static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) + { + size_t i; + +@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) + goto fail; + } + +- bitmap_table_to_be(tb, tb_size); ++ bitmap_table_bswap_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0])); + if (ret < 0) { ++ bitmap_table_bswap_be(tb, tb_size); + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..95933af --- /dev/null +++ b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch @@ -0,0 +1,75 @@ +From 2f03293910f3ac559f37d45c95325ae29638003a Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:14 -0500 +Subject: [PATCH 07/13] qemu-coroutine-lock: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [7/10] 9cf1b6d3b0dd154489e75ad54a3000ea58983960 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit e3a3b6ec8169eab2feb241b4982585001512cd55 +Author: Paolo Bonzini +Date: Fri Mar 3 10:52:59 2023 +0100 + + qemu-coroutine-lock: add smp_mb__after_rmw() + + mutex->from_push and mutex->handoff in qemu-coroutine-lock implement + the familiar pattern: + + write a write b + smp_mb() smp_mb() + read b read a + + The memory barrier is required by the C memory model even after a + SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. + Add it and avoid the unclear qatomic_mb_read() operation. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-coroutine-lock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c +index 2669403839..a03ed0e664 100644 +--- a/util/qemu-coroutine-lock.c ++++ b/util/qemu-coroutine-lock.c +@@ -206,10 +206,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, + trace_qemu_co_mutex_lock_entry(mutex, self); + push_waiter(mutex, &w); + ++ /* ++ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set ++ * in qemu_co_mutex_unlock. ++ */ ++ smp_mb__after_rmw(); ++ + /* This is the "Responsibility Hand-Off" protocol; a lock() picks from + * a concurrent unlock() the responsibility of waking somebody up. + */ +- old_handoff = qatomic_mb_read(&mutex->handoff); ++ old_handoff = qatomic_read(&mutex->handoff); + if (old_handoff && + has_waiters(mutex) && + qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { +@@ -308,6 +314,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) + } + + our_handoff = mutex->sequence; ++ /* Set handoff before checking for waiters. */ + qatomic_mb_set(&mutex->handoff, our_handoff); + if (!has_waiters(mutex)) { + /* The concurrent lock has not added itself yet, so it +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..ae9850e --- /dev/null +++ b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,70 @@ +From 648193b48d8aeaded90fd657e3610d8040f505fc Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:53 +0100 +Subject: [PATCH 3/4] qemu-img bitmap: Report errors while closing the image +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/4] 8e13e09564718a0badd03af84f036246a46a0eba + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img bitmap' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-4-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index 18833f7d69..7d035c0c7f 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4622,6 +4622,7 @@ static int img_bitmap(int argc, char **argv) + QSIMPLEQ_HEAD(, ImgBitmapAction) actions; + ImgBitmapAction *act, *act_next; + const char *op; ++ int inactivate_ret; + + QSIMPLEQ_INIT(&actions); + +@@ -4806,6 +4807,16 @@ static int img_bitmap(int argc, char **argv) + ret = 0; + + out: ++ /* ++ * Manually inactivate the images first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ inactivate_ret = bdrv_inactivate_all(); ++ if (inactivate_ret < 0) { ++ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); ++ ret = 1; ++ } ++ + blk_unref(src); + blk_unref(blk); + qemu_opts_del(opts); +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..32d3d9d --- /dev/null +++ b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,67 @@ +From 2396df7fe527567e8e78761ef24ea1057ef6fa48 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:52 +0100 +Subject: [PATCH 2/4] qemu-img commit: Report errors while closing the image + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/4] 28f95bf76d1d63e2b0bed0c2ba5206bd3e5ea4f8 + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img commit' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-3-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index f036a1d428..18833f7d69 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -443,6 +443,11 @@ static BlockBackend *img_open(bool image_opts, + blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, + force_share); + } ++ ++ if (blk) { ++ blk_set_force_allow_inactivate(blk); ++ } ++ + return blk; + } + +@@ -1110,6 +1115,14 @@ unref_backing: + done: + qemu_progress_end(); + ++ /* ++ * Manually inactivate the image first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ ret = bdrv_inactivate_all(); ++ if (ret < 0 && !local_err) { ++ error_setg_errno(&local_err, -ret, "Error while closing the image"); ++ } + blk_unref(blk); + + if (local_err) { +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch new file mode 100644 index 0000000..31e60a3 --- /dev/null +++ b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch @@ -0,0 +1,166 @@ +From 7c6faae20638f58681df223e0ca44e0a6cb60d2d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:54 +0100 +Subject: [PATCH 4/4] qemu-iotests: Test qemu-img bitmap/commit exit code on + error + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/4] fb2f9de98ddd2ee1d745119e4f15272ef44e0aae + +This tests that when an error happens while writing back bitmaps to the +image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually +return an error value in their exit code instead of making the operation +look successful to scripts. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-5-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) +Signed-off-by: Kevin Wolf +--- + .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ + .../tests/qemu-img-close-errors.out | 23 +++++ + 2 files changed, 119 insertions(+) + create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors + create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out + +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors +new file mode 100755 +index 0000000000..50bfb6cfa2 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Check that errors while closing the image, in particular writing back dirty ++# bitmaps, is correctly reported with a failing qemu-img exit code. ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto file ++_supported_os Linux ++ ++size=1G ++ ++# The error we are going to use is ENOSPC. Depending on how many bitmaps we ++# create in the backing file (and therefore increase the used up space), we get ++# failures in different places. With a low number, only merging the bitmap ++# fails, whereas with a higher number, already 'qemu-img commit' fails. ++for max_bitmap in 6 7; do ++ echo ++ echo "=== Test with $max_bitmap bitmaps ===" ++ ++ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size ++ for i in $(seq 1 $max_bitmap); do ++ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" ++ done ++ ++ # Simulate a block device of 128 MB by resizing the image file accordingly ++ # and then enforcing the size with the raw driver ++ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" ++ BASE_JSON='json:{ ++ "driver": "qcow2", ++ "file": { ++ "driver": "raw", ++ "size": 134217728, ++ "file": { ++ "driver": "file", ++ "filename":"'"$TEST_IMG.base"'" ++ } ++ } ++ }' ++ ++ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT ++ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" ++ ++ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io ++ ++ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" ++ ++ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" ++ echo "qemu-img bitmap --add exit code: $?" ++ ++ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ ++ "good-bitmap" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 ++ +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out +new file mode 100644 +index 0000000000..1bfe88f176 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out +@@ -0,0 +1,23 @@ ++QA output created by qemu-img-close-errors ++ ++=== Test with 6 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++Image committed. ++qemu-img commit exit code: 0 ++qemu-img bitmap --add exit code: 0 ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img bitmap --merge exit code: 1 ++ ++=== Test with 7 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img commit exit code: 1 ++qemu-img bitmap --add exit code: 0 ++qemu-img bitmap --merge exit code: 0 ++*** done +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..0051baf --- /dev/null +++ b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,146 @@ +From d46ca52c3f42add549bd3790a41d06594821334e Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:10:57 -0500 +Subject: [PATCH 03/13] qemu-thread-posix: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [3/10] 746070c4d78c7f0a9ac4456d9aee69475acb8964 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 +Author: Paolo Bonzini +Date: Thu Mar 2 11:19:52 2023 +0100 + + qemu-thread-posix: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers too. Document more clearly what + is going on. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ + 1 file changed, 49 insertions(+), 20 deletions(-) + +diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c +index e1225b63bd..dd3b6d4670 100644 +--- a/util/qemu-thread-posix.c ++++ b/util/qemu-thread-posix.c +@@ -430,13 +430,21 @@ void qemu_event_destroy(QemuEvent *ev) + + void qemu_event_set(QemuEvent *ev) + { +- /* qemu_event_set has release semantics, but because it *loads* ++ assert(ev->initialized); ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ +- assert(ev->initialized); + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier in kernel futex_wait system call. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake(ev, INT_MAX); + } +@@ -445,18 +453,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* +- * If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -464,20 +473,40 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* +- * Leave the event reset and tell qemu_event_set that there +- * are waiters. No need to retry, because there cannot be +- * a concurrent busy->free transition. After the CAS, the +- * event will be either set or busy. ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. ++ * ++ * This cmpxchg doesn't have particular ordering requirements if it ++ * succeeds (moving the store earlier can only cause qemu_event_set() ++ * to issue _more_ wakeups), the failing case needs acquire semantics ++ * like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } ++ ++ /* ++ * This is the final check for a concurrent set, so it does need ++ * a smp_mb() pairing with the second barrier of qemu_event_set(). ++ * The barrier is inside the FUTEX_WAIT system call. ++ */ + qemu_futex_wait(ev, EV_BUSY); + } + } +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..3b63378 --- /dev/null +++ b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,162 @@ +From fa730378c42567e77eaf3e70983108f31f9001b9 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:11:05 -0500 +Subject: [PATCH 04/13] qemu-thread-win32: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [4/10] 43d5bd903b460d4c3c5793a456820e8c5c8521d9 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 +Author: Paolo Bonzini +Date: Thu Mar 2 11:22:50 2023 +0100 + + qemu-thread-win32: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers that are not really needed and + complicated the functions unnecessarily. Also, it is relying on + a memory barrier in ResetEvent(); the barrier _ought_ to be there + but there is really no documentation about it, so make it explicit. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- + 1 file changed, 56 insertions(+), 26 deletions(-) + +diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c +index 52eb19f351..c10249bc2e 100644 +--- a/util/qemu-thread-win32.c ++++ b/util/qemu-thread-win32.c +@@ -246,12 +246,20 @@ void qemu_event_destroy(QemuEvent *ev) + void qemu_event_set(QemuEvent *ev) + { + assert(ev->initialized); +- /* qemu_event_set has release semantics, but because it *loads* ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier after ResetEvent. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } +@@ -260,17 +268,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -278,29 +288,49 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { +- /* qemu_event_set is not yet going to call SetEvent, but we are +- * going to do another check for EV_SET below when setting EV_BUSY. +- * At that point it is safe to call WaitForSingleObject. ++ /* ++ * Here the underlying kernel event is reset, but qemu_event_set is ++ * not yet going to call SetEvent. However, there will be another ++ * check for EV_SET below when setting EV_BUSY. At that point it ++ * is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + +- /* Tell qemu_event_set that there are waiters. No need to retry +- * because there cannot be a concurrent busy->free transition. +- * After the CAS, the event will be either set or busy. ++ /* ++ * It is not clear whether ResetEvent provides this barrier; kernel ++ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! ++ */ ++ smp_mb(); ++ ++ /* ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { +- value = EV_SET; +- } else { +- value = EV_BUSY; ++ return; + } + } +- if (value == EV_BUSY) { +- WaitForSingleObject(ev->event, INFINITE); +- } ++ ++ /* ++ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, ++ * qemu_event_set() must observe EV_BUSY and call SetEvent(). ++ */ ++ WaitForSingleObject(ev->event, INFINITE); + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch new file mode 100644 index 0000000..2ceb4e4 --- /dev/null +++ b/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -0,0 +1,111 @@ +From 21b19213328826327eba18199b790425659af7d8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 Jul 2022 16:55:34 +0200 +Subject: [PATCH 1/3] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [1/2] e514a00305cb0caab9d3acc0efb325853daa6d51 + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 + +Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f +("linux-headers: Update to v5.18-rc6"), but this is focusing on +the file linux-headers/linux/kvm.h only (since the other changes +related to the VFIO renaming might break some stuff). + +Signed-off-by: Thomas Huth +(cherry picked from commit 71516db15469a02600932a5c1f0d4a9626a91193) +Signed-off-by: Cédric Le Goater +--- + linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d232feaae9..0d05d02ee4 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -445,7 +445,11 @@ struct kvm_run { + #define KVM_SYSTEM_EVENT_RESET 2 + #define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; +- __u64 flags; ++ __u32 ndata; ++ union { ++ __u64 flags; ++ __u64 data[16]; ++ }; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { +@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ +@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { +@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 + #define KVM_CAP_SYS_ATTRIBUTES 209 ++#define KVM_CAP_PPC_AIL_MODE_3 210 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 ++#define KVM_CAP_PMU_CAPABILITY 212 ++#define KVM_CAP_DISABLE_QUIRKS2 213 ++/* #define KVM_CAP_VM_TSC_CONTROL 214 */ ++#define KVM_CAP_SYSTEM_EVENT_DATA 215 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1624,9 +1640,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-/* Available with KVM_CAP_XSAVE2 */ +-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +- + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + ++#define KVM_PMU_CAP_DISABLE (1 << 0) ++ + /** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. +-- +2.35.3 + diff --git a/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch b/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch new file mode 100644 index 0000000..f42410d --- /dev/null +++ b/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch @@ -0,0 +1,124 @@ +From 95c229506a6e7261fce184488e880a94f9ba0789 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:21 +0000 +Subject: [PATCH 40/42] s390x: Add KVM PV dump interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [40/41] 5df512a63b2ed17991489565b70f89f4efc0b639 + +Let's add a few bits of code which hide the new KVM PV dump API from +us via new functions. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +[ Marc-André: fix up for compilation issue ] +Signed-off-by: Marc-André Lureau +Message-Id: <20221017083822.43118-10-frankja@linux.ibm.com> +(cherry picked from commit 753ca06f4706cd6e57750a606afb08c5c5299643) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 51 +++++++++++++++++++++++++++++++++++++++++++ + include/hw/s390x/pv.h | 9 ++++++++ + 2 files changed, 60 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 4c012f2eeb..728ba24547 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -175,6 +175,57 @@ bool kvm_s390_pv_info_basic_valid(void) + return info_valid; + } + ++static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr, ++ uint64_t len) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = subcmd, ++ .buff_addr = uaddr, ++ .buff_len = len, ++ .gaddr = gaddr, ++ }; ++ int ret; ++ ++ ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp); ++ if (ret) { ++ error_report("KVM DUMP command %ld failed", subcmd); ++ } ++ return ret; ++} ++ ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = KVM_PV_DUMP_CPU, ++ .buff_addr = (uint64_t)buff, ++ .gaddr = 0, ++ .buff_len = info_dump.dump_cpu_buffer_len, ++ }; ++ struct kvm_pv_cmd pv = { ++ .cmd = KVM_PV_DUMP, ++ .data = (uint64_t)&dmp, ++ }; ++ ++ return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv); ++} ++ ++int kvm_s390_dump_init(void) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0); ++} ++ ++int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest, ++ gaddr, len); ++} ++ ++int kvm_s390_dump_completion_data(void *buff) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0, ++ info_dump.dump_config_finalize_len); ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index e5ea0eca16..9360aa1091 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -51,6 +51,10 @@ uint64_t kvm_s390_pv_dmp_get_size_cpu(void); + uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); + uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); + bool kvm_s390_pv_info_basic_valid(void); ++int kvm_s390_dump_init(void); ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff); ++int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest); ++int kvm_s390_dump_completion_data(void *buff); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } +@@ -66,6 +70,11 @@ static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } + static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } ++static inline int kvm_s390_dump_init(void) { return 0; } ++static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; } ++static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, ++ void *dest) { return 0; } ++static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Add-protected-dump-cap.patch b/SOURCES/kvm-s390x-Add-protected-dump-cap.patch new file mode 100644 index 0000000..94da295 --- /dev/null +++ b/SOURCES/kvm-s390x-Add-protected-dump-cap.patch @@ -0,0 +1,113 @@ +From 7634eed5aea61dc94f9a828c62ef3da9aeaa62ae Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:18 +0000 +Subject: [PATCH 37/42] s390x: Add protected dump cap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [37/41] 52e1e7bf1a00ce3a220d3db2f733a65548bfec6d + +Add a protected dump capability for later feature checking. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Thomas Huth +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20221017083822.43118-7-frankja@linux.ibm.com> +[ Marc-André - Add missing stubs when !kvm ] +Signed-off-by: Marc-André Lureau +(cherry picked from commit ad3b2e693daac6ed92db7361236028851d37c77c) +Signed-off-by: Cédric Le Goater +--- + target/s390x/kvm/kvm.c | 7 +++++++ + target/s390x/kvm/kvm_s390x.h | 1 + + target/s390x/kvm/meson.build | 2 ++ + target/s390x/kvm/stubs.c | 12 ++++++++++++ + 4 files changed, 22 insertions(+) + create mode 100644 target/s390x/kvm/stubs.c + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 30712487d4..d36b44f32a 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -159,6 +159,7 @@ static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + static int cap_zpci_op; ++static int cap_protected_dump; + + static bool mem_op_storage_key_support; + +@@ -365,6 +366,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); + cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); ++ cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2042,6 +2044,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, + return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); + } + ++int kvm_s390_get_protected_dump(void) ++{ ++ return cap_protected_dump; ++} ++ + int kvm_s390_get_ri(void) + { + return cap_ri; +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index aaae8570de..f9785564d0 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); + void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); ++int kvm_s390_get_protected_dump(void); + int kvm_s390_get_ri(void); + int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); +diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build +index d1356356b1..aef52b6686 100644 +--- a/target/s390x/kvm/meson.build ++++ b/target/s390x/kvm/meson.build +@@ -1,6 +1,8 @@ + + s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 'kvm.c' ++), if_false: files( ++ 'stubs.c' + )) + + # Newer kernels on s390 check for an S390_PGSTE program header and +diff --git a/target/s390x/kvm/stubs.c b/target/s390x/kvm/stubs.c +new file mode 100644 +index 0000000000..5fd63b9a7e +--- /dev/null ++++ b/target/s390x/kvm/stubs.c +@@ -0,0 +1,12 @@ ++/* ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm_s390x.h" ++ ++int kvm_s390_get_protected_dump(void) ++{ ++ return false; ++} +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch b/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch new file mode 100644 index 0000000..dfb0169 --- /dev/null +++ b/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch @@ -0,0 +1,174 @@ +From 760236b3633a8f532631256a899cab969e772196 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:19 +0000 +Subject: [PATCH 38/42] s390x: Introduce PV query interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [38/41] 3090615d81ec6b9e4c306f7fc3709e1935ff5a79 + +Introduce an interface over which we can get information about UV data. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Janis Schoetterl-Glausch +Acked-by: Thomas Huth +Message-Id: <20221017083822.43118-8-frankja@linux.ibm.com> +(cherry picked from commit 03d83ecfae46bf5e0074cb5808043b30df34064b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 61 ++++++++++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 6 ++++ + include/hw/s390x/pv.h | 10 +++++++ + 3 files changed, 77 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 401b63d6cb..4c012f2eeb 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -20,6 +20,11 @@ + #include "exec/confidential-guest-support.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" ++#include "target/s390x/kvm/kvm_s390x.h" ++ ++static bool info_valid; ++static struct kvm_s390_pv_info_vm info_vm; ++static struct kvm_s390_pv_info_dump info_dump; + + static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + { +@@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + } \ + } + ++int s390_pv_query_info(void) ++{ ++ struct kvm_s390_pv_info info = { ++ .header.id = KVM_PV_INFO_VM, ++ .header.len_max = sizeof(info.header) + sizeof(info.vm), ++ }; ++ int rc; ++ ++ /* Info API's first user is dump so they are bundled */ ++ if (!kvm_s390_get_protected_dump()) { ++ return 0; ++ } ++ ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ memcpy(&info_vm, &info.vm, sizeof(info.vm)); ++ ++ info.header.id = KVM_PV_INFO_DUMP; ++ info.header.len_max = sizeof(info.header) + sizeof(info.dump); ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ ++ memcpy(&info_dump, &info.dump, sizeof(info.dump)); ++ info_valid = true; ++ ++ return rc; ++} ++ + int s390_pv_vm_enable(void) + { + return s390_pv_cmd(KVM_PV_ENABLE, NULL); +@@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs) + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; + } + ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void) ++{ ++ return info_dump.dump_cpu_buffer_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) ++{ ++ return info_dump.dump_config_finalize_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) ++{ ++ return info_dump.dump_config_mem_buffer_per_1m; ++} ++ ++bool kvm_s390_pv_info_basic_valid(void) ++{ ++ return info_valid; ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bd80e72cf8..a9617ab79f 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -365,6 +365,12 @@ static int s390_machine_protect(S390CcwMachineState *ms) + + ms->pv = true; + ++ /* Will return 0 if API is not available since it's not vital */ ++ rc = s390_pv_query_info(); ++ if (rc) { ++ goto out_err; ++ } ++ + /* Set SE header and unpack */ + rc = s390_ipl_prepare_pv_header(); + if (rc) { +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 1f1f545bfc..e5ea0eca16 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void) + return ccw->pv; + } + ++int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); +@@ -46,8 +47,13 @@ void s390_pv_prep_reset(void); + int s390_pv_verify(void); + void s390_pv_unshare(void); + void s390_pv_inject_reset_error(CPUState *cs); ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void); ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); ++bool kvm_s390_pv_info_basic_valid(void); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } ++static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } +@@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } + static inline void s390_pv_unshare(void) {} + static inline void s390_pv_inject_reset_error(CPUState *cs) {}; ++static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } ++static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch b/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch new file mode 100644 index 0000000..f150979 --- /dev/null +++ b/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch @@ -0,0 +1,209 @@ +From 429c4cc750affe82b89867668ff2515a8a66732e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 18 Nov 2022 15:23:19 +0100 +Subject: [PATCH 2/3] s390x: Register TYPE_S390_CCW_MACHINE properties as class + properties +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 28a0086cb0e8be2535deafdd9115cadd7ff033f3 + +Currently, when running 'qemu-system-s390x -M s390-ccw-virtio,help' +the s390x-specific properties are not listed anymore. This happens +because since commit d8fb7d0969 ("vl: switch -M parsing to keyval") +the properties have to be defined at the class level and not at the +instance level anymore. Fix it on s390x now, too, by moving the +registration of the properties to the class level" + +Fixes: d8fb7d0969 ("vl: switch -M parsing to keyval") +Signed-off-by: Pierre Morel +Message-Id: <20221103170150.20789-2-pmorel@linux.ibm.com> +[thuth: Add patch description] +Signed-off-by: Thomas Huth +(cherry picked from commit 1fd396e32288bbf536483c74b68cb3ee86005a9f) + +Conflicts: + hw/s390x/s390-virtio-ccw.c + (dropped the "zpcii-disable" property code - it's not used in downstream) +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 117 +++++++++++++++++++++---------------- + 1 file changed, 67 insertions(+), 50 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index a9617ab79f..4a7cd21cac 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -42,6 +42,7 @@ + #include "sysemu/sysemu.h" + #include "hw/s390x/pv.h" + #include "migration/blocker.h" ++#include "qapi/visitor.h" + + static Error *pv_mig_blocker; + +@@ -588,38 +589,6 @@ static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) + return newsz; + } + +-static void ccw_machine_class_init(ObjectClass *oc, void *data) +-{ +- MachineClass *mc = MACHINE_CLASS(oc); +- NMIClass *nc = NMI_CLASS(oc); +- HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); +- +- s390mc->ri_allowed = true; +- s390mc->cpu_model_allowed = true; +- s390mc->css_migration_enabled = true; +- s390mc->hpage_1m_allowed = true; +- mc->init = ccw_init; +- mc->reset = s390_machine_reset; +- mc->block_default_type = IF_VIRTIO; +- mc->no_cdrom = 1; +- mc->no_floppy = 1; +- mc->no_parallel = 1; +- mc->no_sdcard = 1; +- mc->max_cpus = S390_MAX_CPUS; +- mc->has_hotpluggable_cpus = true; +- assert(!mc->get_hotplug_handler); +- mc->get_hotplug_handler = s390_get_hotplug_handler; +- mc->cpu_index_to_instance_props = s390_cpu_index_to_props; +- mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; +- /* it is overridden with 'host' cpu *in kvm_arch_init* */ +- mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); +- hc->plug = s390_machine_device_plug; +- hc->unplug_request = s390_machine_device_unplug_request; +- nc->nmi_monitor_handler = s390_nmi; +- mc->default_ram_id = "s390.ram"; +-} +- + static inline bool machine_get_aes_key_wrap(Object *obj, Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); +@@ -694,19 +663,29 @@ bool hpage_1m_allowed(void) + return get_machine_class()->hpage_1m_allowed; + } + +-static char *machine_get_loadparm(Object *obj, Error **errp) ++static void machine_get_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *str = g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); + +- /* make a NUL-terminated string */ +- return g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); ++ visit_type_str(v, name, &str, errp); ++ g_free(str); + } + +-static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ++static void machine_set_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *val; + int i; + ++ if (!visit_type_str(v, name, &val, errp)) { ++ return; ++ } ++ + for (i = 0; i < sizeof(ms->loadparm) && val[i]; i++) { + uint8_t c = qemu_toupper(val[i]); /* mimic HMC */ + +@@ -724,29 +703,67 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) + ms->loadparm[i] = ' '; /* pad right with spaces */ + } + } +-static inline void s390_machine_initfn(Object *obj) ++ ++static void ccw_machine_class_init(ObjectClass *oc, void *data) + { +- object_property_add_bool(obj, "aes-key-wrap", +- machine_get_aes_key_wrap, +- machine_set_aes_key_wrap); +- object_property_set_description(obj, "aes-key-wrap", ++ MachineClass *mc = MACHINE_CLASS(oc); ++ NMIClass *nc = NMI_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ ++ s390mc->ri_allowed = true; ++ s390mc->cpu_model_allowed = true; ++ s390mc->css_migration_enabled = true; ++ s390mc->hpage_1m_allowed = true; ++ mc->init = ccw_init; ++ mc->reset = s390_machine_reset; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->no_floppy = 1; ++ mc->no_parallel = 1; ++ mc->no_sdcard = 1; ++ mc->max_cpus = S390_MAX_CPUS; ++ mc->has_hotpluggable_cpus = true; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = s390_get_hotplug_handler; ++ mc->cpu_index_to_instance_props = s390_cpu_index_to_props; ++ mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; ++ /* it is overridden with 'host' cpu *in kvm_arch_init* */ ++ mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); ++ hc->plug = s390_machine_device_plug; ++ hc->unplug_request = s390_machine_device_unplug_request; ++ nc->nmi_monitor_handler = s390_nmi; ++ mc->default_ram_id = "s390.ram"; ++ ++ object_class_property_add_bool(oc, "aes-key-wrap", ++ machine_get_aes_key_wrap, ++ machine_set_aes_key_wrap); ++ object_class_property_set_description(oc, "aes-key-wrap", + "enable/disable AES key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "aes-key-wrap", true, NULL); + +- object_property_add_bool(obj, "dea-key-wrap", +- machine_get_dea_key_wrap, +- machine_set_dea_key_wrap); +- object_property_set_description(obj, "dea-key-wrap", ++ object_class_property_add_bool(oc, "dea-key-wrap", ++ machine_get_dea_key_wrap, ++ machine_set_dea_key_wrap); ++ object_class_property_set_description(oc, "dea-key-wrap", + "enable/disable DEA key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "dea-key-wrap", true, NULL); +- object_property_add_str(obj, "loadparm", +- machine_get_loadparm, machine_set_loadparm); +- object_property_set_description(obj, "loadparm", ++ ++ object_class_property_add(oc, "loadparm", "loadparm", ++ machine_get_loadparm, machine_set_loadparm, ++ NULL, NULL); ++ object_class_property_set_description(oc, "loadparm", + "Up to 8 chars in set of [A-Za-z0-9. ] (lower case chars converted" + " to upper case) to pass to machine loader, boot manager," + " and guest kernel"); + } + ++static inline void s390_machine_initfn(Object *obj) ++{ ++ S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ ++ ms->aes_key_wrap = true; ++ ms->dea_key_wrap = true; ++} ++ + static const TypeInfo ccw_machine_info = { + .name = TYPE_S390_CCW_MACHINE, + .parent = TYPE_MACHINE, +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch b/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch new file mode 100644 index 0000000..2e90869 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch @@ -0,0 +1,114 @@ +From 2f0febd6813c4ad7f52e43afb3ecce7aef3557e6 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:56 -0400 +Subject: [PATCH 08/11] s390x/pci: RPCIT second pass when mappings exhausted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 0b4500b9247725b1ef0b290bb85392300a618cac + +If we encounter a new mapping while the number of available DMA entries +in vfio is 0, we are currently skipping that mapping which is a problem +if we manage to free up DMA space after that within the same RPCIT -- +we will return to the guest with CC0 and have not mapped everything +within the specified range. This issue was uncovered while testing +changes to the s390 linux kernel iommu/dma code, where a different +usage pattern was employed (new mappings start at the end of the +aperture and work back towards the front, making us far more likely +to encounter new mappings before invalidated mappings during a +global refresh). + +Fix this by tracking whether any mappings were skipped due to vfio +DMA limit hitting 0; when this occurs, we still continue the range +and unmap/map anything we can - then we must re-run the range again +to pickup anything that was missed. This must occur in a loop until +all requests are satisfied (success) or we detect that we are still +unable to complete all mappings (return ZPCI_RPCIT_ST_INSUFF_RES). + +Link: https://lore.kernel.org/linux-s390/20221019144435.369902-1-schnelle@linux.ibm.com/ +Fixes: 37fa32de70 ("s390x/pci: Honor DMA limits set by vfio") +Reported-by: Niklas Schnelle +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-2-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit 4a8d21ba50fc8625c3bd51dab903872952f95718) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 20a9bcc7af..7cc4bcf850 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -677,8 +677,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + S390PCIBusDevice *pbdev; + S390PCIIOMMU *iommu; + S390IOTLBEntry entry; +- hwaddr start, end; ++ hwaddr start, end, sstart; + uint32_t dma_avail; ++ bool again; + + if (env->psw.mask & PSW_MASK_PSTATE) { + s390_program_interrupt(env, PGM_PRIVILEGED, ra); +@@ -691,7 +692,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + fh = env->regs[r1] >> 32; +- start = env->regs[r2]; ++ sstart = start = env->regs[r2]; + end = start + env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(s390_get_phb(), fh); +@@ -732,6 +733,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + goto err; + } + ++ retry: ++ start = sstart; ++ again = false; + while (start < end) { + error = s390_guest_io_table_walk(iommu->g_iota, start, &entry); + if (error) { +@@ -739,13 +743,24 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + start += entry.len; +- while (entry.iova < start && entry.iova < end && +- (dma_avail > 0 || entry.perm == IOMMU_NONE)) { +- dma_avail = s390_pci_update_iotlb(iommu, &entry); +- entry.iova += TARGET_PAGE_SIZE; +- entry.translated_addr += TARGET_PAGE_SIZE; ++ while (entry.iova < start && entry.iova < end) { ++ if (dma_avail > 0 || entry.perm == IOMMU_NONE) { ++ dma_avail = s390_pci_update_iotlb(iommu, &entry); ++ entry.iova += TARGET_PAGE_SIZE; ++ entry.translated_addr += TARGET_PAGE_SIZE; ++ } else { ++ /* ++ * We are unable to make a new mapping at this time, continue ++ * on and hopefully free up more space. Then attempt another ++ * pass. ++ */ ++ again = true; ++ break; ++ } + } + } ++ if (again && dma_avail > 0) ++ goto retry; + err: + if (error) { + pbdev->state = ZPCI_FS_ERROR; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch b/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch new file mode 100644 index 0000000..88716f5 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch @@ -0,0 +1,178 @@ +From 8020177f1c40da2a9ca09fa20dc90eda65739671 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:31 -0400 +Subject: [PATCH 06/42] s390x/pci: add routine to get host function handle from + CLP info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [6/41] 8ab652cf4095e61f5f55726d41111de227d452e7 + +In order to interface with the underlying host zPCI device, we need +to know its function handle. Add a routine to grab this from the +vfio CLP capabilities chain. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-3-mjrosato@linux.ibm.com> +[thuth: Replace free(info) with g_free(info)] +Signed-off-by: Thomas Huth +(cherry picked from commit 21fa15298d88db2050a713cdf79c10cb0e09146f) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 83 ++++++++++++++++++++++++++------ + include/hw/s390x/s390-pci-vfio.h | 5 ++ + 2 files changed, 72 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 6f80a47e29..08bcc55e85 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,27 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + pbdev->zpci_fn.pft = 0; + } + ++static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, ++ uint32_t *fh) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_device_info_cap_zpci_base *cap; ++ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ ++ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ ++ /* Can only get the host fh with version 2 or greater */ ++ if (hdr == NULL || hdr->version < 2) { ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ return false; ++ } ++ cap = (void *) hdr; ++ ++ *fh = cap->fh; ++ return true; ++} ++ + static void s390_pci_read_group(S390PCIBusDevice *pbdev, + struct vfio_device_info *info) + { +@@ -217,25 +238,13 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-/* +- * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for +- * capabilities that contain information about CLP features provided by the +- * underlying host. +- * On entry, defaults have already been placed into the guest CLP response +- * buffers. On exit, defaults will have been overwritten for any CLP features +- * found in the capability chain; defaults will remain for any CLP features not +- * found in the chain. +- */ +-void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, ++ uint32_t argsz) + { +- g_autofree struct vfio_device_info *info = NULL; ++ struct vfio_device_info *info = g_malloc0(argsz); + VFIOPCIDevice *vfio_pci; +- uint32_t argsz; + int fd; + +- argsz = sizeof(*info); +- info = g_malloc0(argsz); +- + vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + fd = vfio_pci->vbasedev.fd; + +@@ -250,7 +259,8 @@ retry: + + if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { + trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- return; ++ g_free(info); ++ return NULL; + } + + if (info->argsz > argsz) { +@@ -259,6 +269,47 @@ retry: + goto retry; + } + ++ return info; ++} ++ ++/* ++ * Get the host function handle from the vfio CLP capabilities chain. Returns ++ * true if a fh value was placed into the provided buffer. Returns false ++ * if a fh could not be obtained (ioctl failed or capabilitiy version does ++ * not include the fh) ++ */ ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ assert(fh); ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return false; ++ } ++ ++ return get_host_fh(pbdev, info, fh); ++} ++ ++/* ++ * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for ++ * capabilities that contain information about CLP features provided by the ++ * underlying host. ++ * On entry, defaults have already been placed into the guest CLP response ++ * buffers. On exit, defaults will have been overwritten for any CLP features ++ * found in the capability chain; defaults will remain for any CLP features not ++ * found in the chain. ++ */ ++void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return; ++ } ++ + /* + * Find the CLP features provided and fill in the guest CLP responses. + * Always call s390_pci_read_base first as information from this could +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +index ff708aef50..ae1b126ff7 100644 +--- a/include/hw/s390x/s390-pci-vfio.h ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -20,6 +20,7 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail); + S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev); + void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh); + void s390_pci_get_clp_info(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) +@@ -33,6 +34,10 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + } + static inline void s390_pci_end_dma_count(S390pciState *s, + S390PCIDMACount *cnt) { } ++static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ return false; ++} + static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { } + #endif + +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch b/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch new file mode 100644 index 0000000..563f782 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch @@ -0,0 +1,99 @@ +From de6319fe0ce09297beae5ff4636c03217abe6f26 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:06 -0500 +Subject: [PATCH 04/42] s390x/pci: add supported DT information to clp response +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [4/41] 275668f6d38fbc1dfa2f1aa8f58b2c319de2657d + +The DTSM is a mask that specifies which I/O Address Translation designation +types are supported. Today QEMU only supports DT=1. + +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit ac6aa30ac47b2abaf142f76de46374da2a98f6e7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 1 + + hw/s390x/s390-pci-inst.c | 1 + + hw/s390x/s390-pci-vfio.c | 1 + + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-clp.h | 3 ++- + 5 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 1b51a72838..01b58ebc70 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -782,6 +782,7 @@ static void s390_pci_init_default_group(void) + resgrp->i = 128; + resgrp->maxstbl = 128; + resgrp->version = 0; ++ resgrp->dtsm = ZPCI_DTSM; + } + + static void set_pbdev_info(S390PCIBusDevice *pbdev) +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 07bab85ce5..6d400d4147 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -329,6 +329,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + stw_p(&resgrp->i, group->zpci_group.i); + stw_p(&resgrp->maxstbl, group->zpci_group.maxstbl); + resgrp->version = group->zpci_group.version; ++ resgrp->dtsm = group->zpci_group.dtsm; + stw_p(&resgrp->hdr.rsp, CLP_RC_OK); + break; + } +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 2a153fa8c9..6f80a47e29 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -160,6 +160,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->i = cap->noi; + resgrp->maxstbl = cap->maxstbl; + resgrp->version = cap->version; ++ resgrp->dtsm = ZPCI_DTSM; + } + } + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 2727e7bdef..da3cde2bb4 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -37,6 +37,7 @@ + #define ZPCI_MAX_UID 0xffff + #define UID_UNDEFINED 0 + #define UID_CHECKING_ENABLED 0x01 ++#define ZPCI_DTSM 0x40 + + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) +diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h +index 96b8e3f133..cc8c8662b8 100644 +--- a/include/hw/s390x/s390-pci-clp.h ++++ b/include/hw/s390x/s390-pci-clp.h +@@ -163,7 +163,8 @@ typedef struct ClpRspQueryPciGrp { + uint8_t fr; + uint16_t maxstbl; + uint16_t mui; +- uint64_t reserved3; ++ uint8_t dtsm; ++ uint8_t reserved3[7]; + uint64_t dasm; /* dma address space mask */ + uint64_t msia; /* MSI address */ + uint64_t reserved4; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch new file mode 100644 index 0000000..baa0a9c --- /dev/null +++ b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch @@ -0,0 +1,125 @@ +From b972c5a2763a91024725c147cf1691ed8e180c7c Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:57 -0400 +Subject: [PATCH 09/11] s390x/pci: coalesce unmap operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 7b5ee38eca565f5a7cbede4b9883ba3a508fb46c + +Currently, each unmapped page is handled as an individual iommu +region notification. Attempt to group contiguous unmap operations +into fewer notifications to reduce overhead. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 51 insertions(+) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 7cc4bcf850..66e764f901 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + } + g_hash_table_remove(iommu->iotlb, &entry->iova); + inc_dma_avail(iommu); ++ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ ++ goto out; + } else { + if (cache) { + if (cache->perm == entry->perm && +@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + dec_dma_avail(iommu); + } + ++ /* ++ * All associated iotlb entries have already been cleared, trigger the ++ * unmaps. ++ */ + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + + out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; + } + ++static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, ++ uint64_t len) ++{ ++ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .translated_addr = 0, ++ .perm = IOMMU_NONE, ++ }, ++ }; ++ ++ while (remain >= TARGET_PAGE_SIZE) { ++ mask = dma_aligned_pow2_mask(start, end, 64); ++ size = mask + 1; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ start += size; ++ remain -= size; ++ } ++} ++ + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + { + CPUS390XState *env = &cpu->env; ++ uint64_t iova, coalesce = 0; + uint32_t fh; + uint16_t error = 0; + S390PCIBusDevice *pbdev; +@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + break; + } + ++ /* ++ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps ++ * into as few notifier events as possible. ++ */ ++ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { ++ if (coalesce == 0) { ++ iova = entry.iova; ++ } ++ coalesce += entry.len; ++ } else if (coalesce > 0) { ++ /* Unleash the coalesced unmap before processing a new map */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } ++ + start += entry.len; + while (entry.iova < start && entry.iova < end) { + if (dma_avail > 0 || entry.perm == IOMMU_NONE) { +@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + } + } ++ if (coalesce) { ++ /* Unleash the coalesced unmap before finishing rpcit */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } + if (again && dma_avail > 0) + goto retry; + err: +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch b/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch new file mode 100644 index 0000000..4403658 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch @@ -0,0 +1,60 @@ +From 5bd57d8ac3a4e75337eae81a3623b4dc2b417e2f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:33 -0400 +Subject: [PATCH 08/42] s390x/pci: don't fence interpreted devices without + MSI-X +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [8/41] 52bad4368e9494c43133338b386dc0cc159aeedc + +Lack of MSI-X support is not an issue for interpreted passthrough +devices, so let's let these in. This will allow, for example, ISM +devices to be passed through -- but only when interpretation is +available and being used. + +Signed-off-by: Matthew Rosato +Reviewed-by: Thomas Huth +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 15d0e7942d3b31ff71d8e0e8cec3a8203214f19b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 18bfae0465..07c7c155e3 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -881,6 +881,10 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev) + + static void s390_pci_msix_free(S390PCIBusDevice *pbdev) + { ++ if (pbdev->msix.entries == 0) { ++ return; ++ } ++ + memory_region_del_subregion(&pbdev->iommu->mr, &pbdev->msix_notify_mr); + object_unparent(OBJECT(&pbdev->msix_notify_mr)); + } +@@ -1093,7 +1097,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->interp = false; + } + +- if (s390_pci_msix_init(pbdev)) { ++ if (s390_pci_msix_init(pbdev) && !pbdev->interp) { + error_setg(errp, "MSI-X support is mandatory " + "in the S390 architecture"); + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch b/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch new file mode 100644 index 0000000..c97b587 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch @@ -0,0 +1,77 @@ +From 67ebb71d56e95adf185ab4971939e31c4c899863 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:04 -0500 +Subject: [PATCH 02/42] s390x/pci: don't use hard-coded dma range in reg_ioat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [2/41] c7897321f9848ef8f115130832774bbcd6724f03 + +Instead use the values from clp info, they will either be the hard-coded +values or what came from the host driver via vfio. + +Fixes: 9670ee752727 ("s390x/pci: use a PCI Function structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-3-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit df7ce0a94d9283f0656b4bc0f21566973ff649a3) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 1c8ad91175..11b7f6bfa1 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -916,9 +916,10 @@ int pci_dereg_irqs(S390PCIBusDevice *pbdev) + return 0; + } + +-static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, ++static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib, + uintptr_t ra) + { ++ S390PCIIOMMU *iommu = pbdev->iommu; + uint64_t pba = ldq_p(&fib.pba); + uint64_t pal = ldq_p(&fib.pal); + uint64_t g_iota = ldq_p(&fib.iota); +@@ -927,7 +928,7 @@ static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, + + pba &= ~0xfff; + pal |= 0xfff; +- if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) { ++ if (pba > pal || pba < pbdev->zpci_fn.sdma || pal > pbdev->zpci_fn.edma) { + s390_program_interrupt(env, PGM_OPERAND, ra); + return -EINVAL; + } +@@ -1125,7 +1126,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } else if (pbdev->iommu->enabled) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); +- } else if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ } else if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +@@ -1150,7 +1151,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { + pci_dereg_ioat(pbdev->iommu); +- if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch b/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch new file mode 100644 index 0000000..c36c575 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch @@ -0,0 +1,265 @@ +From 362fae654bbae03741003e565fb95d73d8c0025f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:34 -0400 +Subject: [PATCH 09/42] s390x/pci: enable adapter event notification for + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [9/41] 771975c436c7cb608e0e9e40edd732ac310beb69 + +Use the associated kvm ioctl operation to enable adapter event notification +and forwarding for devices when requested. This feature will be set up +with or without firmware assist based upon the 'forwarding_assist' setting. + +Signed-off-by: Matthew Rosato +Message-Id: <20220902172737.170349-6-mjrosato@linux.ibm.com> +[thuth: Rename "forwarding_assist" property to "forwarding-assist"] +Signed-off-by: Thomas Huth +(cherry picked from commit d0bc7091c2013ad2fa164100cf7b17962370e8ab) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 20 ++++++++++++++--- + hw/s390x/s390-pci-inst.c | 40 +++++++++++++++++++++++++++++++-- + hw/s390x/s390-pci-kvm.c | 30 +++++++++++++++++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 14 ++++++++++++ + 5 files changed, 100 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 07c7c155e3..cd152ce711 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -190,7 +190,10 @@ void s390_pci_sclp_deconfigure(SCCB *sccb) + rc = SCLP_RC_NO_ACTION_REQUIRED; + break; + default: +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1082,6 +1085,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } else { + DPRINTF("zPCI interpretation facilities missing.\n"); + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); +@@ -1090,11 +1094,13 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!pbdev->interp) { + /* Do vfio passthrough but intercept for I/O */ + pbdev->fh |= FH_SHM_VFIO; ++ pbdev->forwarding_assist = false; + } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + + if (s390_pci_msix_init(pbdev) && !pbdev->interp) { +@@ -1244,7 +1250,10 @@ static void s390_pcihost_reset(DeviceState *dev) + /* Process all pending unplug requests */ + QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { + if (pbdev->unplug_requested) { +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1382,7 +1391,10 @@ static void s390_pci_device_reset(DeviceState *dev) + break; + } + +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1428,6 +1440,8 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), + DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), ++ DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist, ++ true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 651ec38635..20a9bcc7af 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1066,6 +1066,32 @@ static void fmb_update(void *opaque) + timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + ++static int mpcifc_reg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_enable(pbdev, fib, pbdev->forwarding_assist); ++ if (rc) { ++ DPRINTF("Failed to enable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++static int mpcifc_dereg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_disable(pbdev); ++ if (rc) { ++ DPRINTF("Failed to disable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + uintptr_t ra) + { +@@ -1120,7 +1146,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + + switch (oc) { + case ZPCI_MOD_FC_REG_INT: +- if (pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_reg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else if (reg_irqs(env, pbdev, fib)) { +@@ -1129,7 +1160,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + break; + case ZPCI_MOD_FC_DEREG_INT: +- if (!pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_dereg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (!pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index 0f16104a74..9134fe185f 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -11,12 +11,42 @@ + + #include "qemu/osdep.h" + ++#include ++ + #include "kvm/kvm_s390x.h" + #include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-inst.h" + #include "cpu_models.h" + + bool s390_pci_kvm_interp_allowed(void) + { + return kvm_s390_get_zpci_op() && !s390_is_pv(); + } ++ ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_REG_AEN, ++ .u.reg_aen.ibv = fib->aibv, ++ .u.reg_aen.sb = fib->aisb, ++ .u.reg_aen.noi = FIB_DATA_NOI(fib->data), ++ .u.reg_aen.isc = FIB_DATA_ISC(fib->data), ++ .u.reg_aen.sbo = FIB_DATA_AISBO(fib->data), ++ .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} ++ ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_DEREG_AEN ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index a9843dfe97..5b09f0cf2f 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -351,6 +351,7 @@ struct S390PCIBusDevice { + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; ++ bool forwarding_assist; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +index 80a2e7d0ca..933814a402 100644 +--- a/include/hw/s390x/s390-pci-kvm.h ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -12,13 +12,27 @@ + #ifndef HW_S390_PCI_KVM_H + #define HW_S390_PCI_KVM_H + ++#include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-inst.h" ++ + #ifdef CONFIG_KVM + bool s390_pci_kvm_interp_allowed(void); ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist); ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_kvm_interp_allowed(void) + { + return false; + } ++static inline int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, ++ bool assist) ++{ ++ return -EINVAL; ++} ++static inline int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ return -EINVAL; ++} + #endif + + #endif +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch b/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch new file mode 100644 index 0000000..56f228b --- /dev/null +++ b/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch @@ -0,0 +1,319 @@ +From 62fbb66d18f598d0896164383aab465e093fb0c1 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:32 -0400 +Subject: [PATCH 07/42] s390x/pci: enable for load/store interpretation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [7/41] 3a96e901e295bb9e0c530638c45b5da5d60c00bd + +If the ZPCI_OP ioctl reports that is is available and usable, then the +underlying KVM host will enable load/store intepretation for any guest +device without a SHM bit in the guest function handle. For a device that +will be using interpretation support, ensure the guest function handle +matches the host function handle; this value is re-checked every time the +guest issues a SET PCI FN to enable the guest device as it is the only +opportunity to reflect function handle changes. + +By default, unless interpret=off is specified, interpretation support will +always be assumed and exploited if the necessary ioctl and features are +available on the host kernel. When these are unavailable, we will silently +revert to the interception model; this allows existing guest configurations +to work unmodified on hosts with and without zPCI interpretation support, +allowing QEMU to choose the best support model available. + +Signed-off-by: Matthew Rosato +Acked-by: Thomas Huth +Message-Id: <20220902172737.170349-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit dd1d5fd9684beeb0c14c39f497ef2aa9ac683aa7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/meson.build | 1 + + hw/s390x/s390-pci-bus.c | 66 ++++++++++++++++++++++++++++++++- + hw/s390x/s390-pci-inst.c | 16 ++++++++ + hw/s390x/s390-pci-kvm.c | 22 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 24 ++++++++++++ + target/s390x/kvm/kvm.c | 7 ++++ + target/s390x/kvm/kvm_s390x.h | 1 + + 8 files changed, 137 insertions(+), 1 deletion(-) + create mode 100644 hw/s390x/s390-pci-kvm.c + create mode 100644 include/hw/s390x/s390-pci-kvm.h + +diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build +index 28484256ec..6e6e47fcda 100644 +--- a/hw/s390x/meson.build ++++ b/hw/s390x/meson.build +@@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 's390-skeys-kvm.c', + 's390-stattrib-kvm.c', + 'pv.c', ++ 's390-pci-kvm.c', + )) + s390x_ss.add(when: 'CONFIG_TCG', if_true: files( + 'tod-tcg.c', +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 01b58ebc70..18bfae0465 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -16,6 +16,7 @@ + #include "qapi/visitor.h" + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-kvm.h" + #include "hw/s390x/s390-pci-vfio.h" + #include "hw/pci/pci_bus.h" + #include "hw/qdev-properties.h" +@@ -971,12 +972,51 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr) + } + } + ++static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev) ++{ ++ uint32_t idx, fh; ++ ++ if (!s390_pci_get_host_fh(pbdev, &fh)) { ++ return -EPERM; ++ } ++ ++ /* ++ * The host device is already in an enabled state, but we always present ++ * the initial device state to the guest as disabled (ZPCI_FS_DISABLED). ++ * Therefore, mask off the enable bit from the passthrough handle until ++ * the guest issues a CLP SET PCI FN later to enable the device. ++ */ ++ pbdev->fh = fh & ~FH_MASK_ENABLE; ++ ++ /* Next, see if the idx is already in-use */ ++ idx = pbdev->fh & FH_MASK_INDEX; ++ if (pbdev->idx != idx) { ++ if (s390_pci_find_dev_by_idx(s, idx)) { ++ return -EINVAL; ++ } ++ /* ++ * Update the idx entry with the passed through idx ++ * If the relinquished idx is lower than next_idx, use it ++ * to replace next_idx ++ */ ++ g_hash_table_remove(s->zpci_table, &pbdev->idx); ++ if (idx < s->next_idx) { ++ s->next_idx = idx; ++ } ++ pbdev->idx = idx; ++ g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev); ++ } ++ ++ return 0; ++} ++ + static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { + S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); + PCIDevice *pdev = NULL; + S390PCIBusDevice *pbdev = NULL; ++ int rc; + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBridge *pb = PCI_BRIDGE(dev); +@@ -1022,12 +1062,35 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + set_pbdev_info(pbdev); + + if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { +- pbdev->fh |= FH_SHM_VFIO; ++ /* ++ * By default, interpretation is always requested; if the available ++ * facilities indicate it is not available, fallback to the ++ * interception model. ++ */ ++ if (pbdev->interp) { ++ if (s390_pci_kvm_interp_allowed()) { ++ rc = s390_pci_interp_plug(s, pbdev); ++ if (rc) { ++ error_setg(errp, "Plug failed for zPCI device in " ++ "interpretation mode: %d", rc); ++ return; ++ } ++ } else { ++ DPRINTF("zPCI interpretation facilities missing.\n"); ++ pbdev->interp = false; ++ } ++ } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); + /* Fill in CLP information passed via the vfio region */ + s390_pci_get_clp_info(pbdev); ++ if (!pbdev->interp) { ++ /* Do vfio passthrough but intercept for I/O */ ++ pbdev->fh |= FH_SHM_VFIO; ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; ++ /* Always intercept emulated devices */ ++ pbdev->interp = false; + } + + if (s390_pci_msix_init(pbdev)) { +@@ -1360,6 +1423,7 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED), + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), ++ DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 6d400d4147..651ec38635 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -18,6 +18,8 @@ + #include "sysemu/hw_accel.h" + #include "hw/s390x/s390-pci-inst.h" + #include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "hw/s390x/tod.h" + + #ifndef DEBUG_S390PCI_INST +@@ -246,6 +248,20 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + goto out; + } + ++ /* ++ * Take this opportunity to make sure we still have an accurate ++ * host fh. It's possible part of the handle changed while the ++ * device was disabled to the guest (e.g. vfio hot reset for ++ * ISM during plug) ++ */ ++ if (pbdev->interp) { ++ /* Take this opportunity to make sure we are sync'd with host */ ++ if (!s390_pci_get_host_fh(pbdev, &pbdev->fh) || ++ !(pbdev->fh & FH_MASK_ENABLE)) { ++ stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH); ++ goto out; ++ } ++ } + pbdev->fh |= FH_MASK_ENABLE; + pbdev->state = ZPCI_FS_ENABLED; + stl_p(&ressetpci->fh, pbdev->fh); +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +new file mode 100644 +index 0000000000..0f16104a74 +--- /dev/null ++++ b/hw/s390x/s390-pci-kvm.c +@@ -0,0 +1,22 @@ ++/* ++ * s390 zPCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm/kvm_s390x.h" ++#include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "cpu_models.h" ++ ++bool s390_pci_kvm_interp_allowed(void) ++{ ++ return kvm_s390_get_zpci_op() && !s390_is_pv(); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index da3cde2bb4..a9843dfe97 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -350,6 +350,7 @@ struct S390PCIBusDevice { + IndAddr *indicator; + bool pci_unplug_request_processed; + bool unplug_requested; ++ bool interp; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +new file mode 100644 +index 0000000000..80a2e7d0ca +--- /dev/null ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -0,0 +1,24 @@ ++/* ++ * s390 PCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#ifndef HW_S390_PCI_KVM_H ++#define HW_S390_PCI_KVM_H ++ ++#ifdef CONFIG_KVM ++bool s390_pci_kvm_interp_allowed(void); ++#else ++static inline bool s390_pci_kvm_interp_allowed(void) ++{ ++ return false; ++} ++#endif ++ ++#endif +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index ba04997da1..30712487d4 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -158,6 +158,7 @@ static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; ++static int cap_zpci_op; + + static bool mem_op_storage_key_support; + +@@ -363,6 +364,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); ++ cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2579,3 +2581,8 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++int kvm_s390_get_zpci_op(void) ++{ ++ return cap_zpci_op; ++} +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index 05a5e1e6f4..aaae8570de 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -27,6 +27,7 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); + int kvm_s390_get_ri(void); ++int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch b/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch new file mode 100644 index 0000000..2778225 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch @@ -0,0 +1,192 @@ +From b98a5bc4c21284dd0a8a1c86b91af81fcb75f060 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:35 -0400 +Subject: [PATCH 10/42] s390x/pci: let intercept devices have separate PCI + groups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [10/41] 1545bdcd2e21386afa9869f0414e96eecb62647d + +Let's use the reserved pool of simulated PCI groups to allow intercept +devices to have separate groups from interpreted devices as some group +values may be different. If we run out of simulated PCI groups, subsequent +intercept devices just get the default group. +Furthermore, if we encounter any PCI groups from hostdevs that are marked +as simulated, let's just assign them to the default group to avoid +conflicts between host simulated groups and our own simulated groups. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-7-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 30dcf4f7fd23bef7d72a2454c60881710fd4c785) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 19 ++++++++++++++-- + hw/s390x/s390-pci-vfio.c | 40 ++++++++++++++++++++++++++++++--- + include/hw/s390x/s390-pci-bus.h | 6 ++++- + 3 files changed, 59 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index cd152ce711..d8b1e44a02 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -748,13 +748,14 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) + object_unref(OBJECT(iommu)); + } + +-S390PCIGroup *s390_group_create(int id) ++S390PCIGroup *s390_group_create(int id, int host_id) + { + S390PCIGroup *group; + S390pciState *s = s390_get_phb(); + + group = g_new0(S390PCIGroup, 1); + group->id = id; ++ group->host_id = host_id; + QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link); + return group; + } +@@ -772,12 +773,25 @@ S390PCIGroup *s390_group_find(int id) + return NULL; + } + ++S390PCIGroup *s390_group_find_host_sim(int host_id) ++{ ++ S390PCIGroup *group; ++ S390pciState *s = s390_get_phb(); ++ ++ QTAILQ_FOREACH(group, &s->zpci_groups, link) { ++ if (group->id >= ZPCI_SIM_GRP_START && group->host_id == host_id) { ++ return group; ++ } ++ } ++ return NULL; ++} ++ + static void s390_pci_init_default_group(void) + { + S390PCIGroup *group; + ClpRspQueryPciGrp *resgrp; + +- group = s390_group_create(ZPCI_DEFAULT_FN_GRP); ++ group = s390_group_create(ZPCI_DEFAULT_FN_GRP, ZPCI_DEFAULT_FN_GRP); + resgrp = &group->zpci_group; + resgrp->fr = 1; + resgrp->dasm = 0; +@@ -825,6 +839,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) + NULL, g_free); + s->zpci_table = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, NULL); + s->bus_no = 0; ++ s->next_sim_grp = ZPCI_SIM_GRP_START; + QTAILQ_INIT(&s->pending_sei); + QTAILQ_INIT(&s->zpci_devs); + QTAILQ_INIT(&s->zpci_dma_limit); +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 08bcc55e85..338f436e87 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -150,13 +150,18 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + { + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_group *cap; ++ S390pciState *s = s390_get_phb(); + ClpRspQueryPciGrp *resgrp; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint8_t start_gid = pbdev->zpci_fn.pfgid; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + +- /* If capability not provided, just use the default group */ +- if (hdr == NULL) { ++ /* ++ * If capability not provided or the underlying hostdev is simulated, just ++ * use the default group. ++ */ ++ if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) { + trace_s390_pci_clp_cap(vpci->vbasedev.name, + VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; +@@ -165,11 +170,40 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + } + cap = (void *) hdr; + ++ /* ++ * For an intercept device, let's use an existing simulated group if one ++ * one was already created for other intercept devices in this group. ++ * If not, create a new simulated group if any are still available. ++ * If all else fails, just fall back on the default group. ++ */ ++ if (!pbdev->interp) { ++ pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid); ++ if (pbdev->pci_group) { ++ /* Use existing simulated group */ ++ pbdev->zpci_fn.pfgid = pbdev->pci_group->id; ++ return; ++ } else { ++ if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) { ++ /* All out of simulated groups, use default */ ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); ++ pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; ++ pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP); ++ return; ++ } else { ++ /* We can assign a new simulated group */ ++ pbdev->zpci_fn.pfgid = s->next_sim_grp; ++ s->next_sim_grp++; ++ /* Fall through to create the new sim group using CLP info */ ++ } ++ } ++ } ++ + /* See if the PCI group is already defined, create if not */ + pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid); + + if (!pbdev->pci_group) { +- pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid); ++ pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid); + + resgrp = &pbdev->pci_group->zpci_group; + if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) { +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 5b09f0cf2f..0605fcea24 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -315,13 +315,16 @@ typedef struct ZpciFmb { + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + + #define ZPCI_DEFAULT_FN_GRP 0xFF ++#define ZPCI_SIM_GRP_START 0xF0 + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; ++ int host_id; + QTAILQ_ENTRY(S390PCIGroup) link; + } S390PCIGroup; +-S390PCIGroup *s390_group_create(int id); ++S390PCIGroup *s390_group_create(int id, int host_id); + S390PCIGroup *s390_group_find(int id); ++S390PCIGroup *s390_group_find_host_sim(int host_id); + + struct S390PCIBusDevice { + DeviceState qdev; +@@ -370,6 +373,7 @@ struct S390pciState { + QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; + QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; + QTAILQ_HEAD(, S390PCIGroup) zpci_groups; ++ uint8_t next_sim_grp; + }; + + S390pciState *s390_get_phb(void); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch b/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch new file mode 100644 index 0000000..bbd9612 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch @@ -0,0 +1,52 @@ +From 65f90bfccf7500978879c15104a79de58173a06b Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:36 -0400 +Subject: [PATCH 11/42] s390x/pci: reflect proper maxstbl for groups of + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [11/41] 9ac2f5dedef3d743ef621525eef222a3e09d63b3 + +The maximum supported store block length might be different depending +on whether the instruction is interpretively executed (firmware-reported +maximum) or handled via userspace intercept (host kernel API maximum). +Choose the best available value during group creation. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-8-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 9ee8f7e46a7d42ede69a4780200129bf1acb0d01) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 338f436e87..2aefa508a0 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -213,7 +213,11 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->msia = cap->msi_addr; + resgrp->mui = cap->mui; + resgrp->i = cap->noi; +- resgrp->maxstbl = cap->maxstbl; ++ if (pbdev->interp && hdr->version >= 2) { ++ resgrp->maxstbl = cap->imaxstbl; ++ } else { ++ resgrp->maxstbl = cap->maxstbl; ++ } + resgrp->version = cap->version; + resgrp->dtsm = ZPCI_DTSM; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch new file mode 100644 index 0000000..215c5dd --- /dev/null +++ b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch @@ -0,0 +1,147 @@ +From 9ec96a236be84e34b16681e658d3910fc3877a44 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 9 Dec 2022 14:57:00 -0500 +Subject: [PATCH 11/11] s390x/pci: reset ISM passthrough devices on shutdown + and system reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] c857d022c7c2f43cdeb66c4f6acfd9272c925b35 + +ISM device firmware stores unique state information that can +can cause a wholesale unmap of the associated IOMMU (e.g. when +we get a termination signal for QEMU) to trigger firmware errors +because firmware believes we are attempting to invalidate entries +that are still in-use by the guest OS (when in fact that guest is +in the process of being terminated or rebooted). +To alleviate this, register both a shutdown notifier (for unexpected +termination cases e.g. virsh destroy) as well as a reset callback +(for cases like guest OS reboot). For each of these scenarios, trigger +PCI device reset; this is enough to indicate to firmware that the IOMMU +is no longer in-use by the guest OS, making it safe to invalidate any +associated IOMMU entries. + +Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") +Signed-off-by: Matthew Rosato +Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] +Signed-off-by: Thomas Huth +(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ + hw/s390x/s390-pci-vfio.c | 2 ++ + include/hw/s390x/s390-pci-bus.h | 5 +++++ + 3 files changed, 35 insertions(+) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index d8b1e44a02..2d92848b0f 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -24,6 +24,8 @@ + #include "hw/pci/msi.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/reset.h" ++#include "sysemu/runstate.h" + + #ifndef DEBUG_S390PCI_BUS + #define DEBUG_S390PCI_BUS 0 +@@ -150,10 +152,30 @@ out: + psccb->header.response_code = cpu_to_be16(rc); + } + ++static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) ++{ ++ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, ++ shutdown_notifier); ++ ++ pci_device_reset(pbdev->pdev); ++} ++ ++static void s390_pci_reset_cb(void *opaque) ++{ ++ S390PCIBusDevice *pbdev = opaque; ++ ++ pci_device_reset(pbdev->pdev); ++} ++ + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ notifier_remove(&pbdev->shutdown_notifier); ++ qemu_unregister_reset(s390_pci_reset_cb, pbdev); ++ } ++ + /* Unplug the PCI device */ + if (pbdev->pdev) { + DeviceState *pdev = DEVICE(pbdev->pdev); +@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->fh |= FH_SHM_VFIO; + pbdev->forwarding_assist = false; + } ++ /* Register shutdown notifier and reset callback for ISM devices */ ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; ++ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); ++ qemu_register_reset(s390_pci_reset_cb, pbdev); ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 99806e2a84..69af35f4fe 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ /* Store function type separately for type-specific behavior */ ++ pbdev->pft = cap->pft; + + /* + * If appropriate, reduce the size of the supported DMA aperture reported +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 1c46e3a269..e0a9f9385b 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -39,6 +39,9 @@ + #define UID_CHECKING_ENABLED 0x01 + #define ZPCI_DTSM 0x40 + ++/* zPCI Function Types */ ++#define ZPCI_PFT_ISM 5 ++ + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) +@@ -344,6 +347,7 @@ struct S390PCIBusDevice { + uint16_t noi; + uint16_t maxstbl; + uint8_t sum; ++ uint8_t pft; + S390PCIGroup *pci_group; + ClpRspQueryPci zpci_fn; + S390MsixInfo msix; +@@ -352,6 +356,7 @@ struct S390PCIBusDevice { + MemoryRegion msix_notify_mr; + IndAddr *summary_ind; + IndAddr *indicator; ++ Notifier shutdown_notifier; + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch new file mode 100644 index 0000000..e1df69f --- /dev/null +++ b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch @@ -0,0 +1,91 @@ +From a0b6c21b555566eb6bc38643269d14c82dfd0226 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:58 -0400 +Subject: [PATCH 10/11] s390x/pci: shrink DMA aperture to be bound by vfio DMA + limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] aa241dd250ad5e696b67c87dddc31ee5aaee9c0e + +Currently, s390x-pci performs accounting against the vfio DMA +limit and triggers the guest to clean up mappings when the limit +is reached. Let's go a step further and also limit the size of +the supported DMA aperture reported to the guest based upon the +initial vfio DMA limit reported for the container (if less than +than the size reported by the firmware/host zPCI layer). This +avoids processing sections of the guest DMA table during global +refresh that, for common use cases, will never be used anway, and +makes exhausting the vfio DMA limit due to mismatch between guest +aperture size and host limit far less likely and more indicitive +of an error. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 11 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 2aefa508a0..99806e2a84 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + cnt->users = 1; + cnt->avail = avail; + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); ++ pbdev->iommu->max_dma_limit = avail; + return cnt; + } + +@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_base *cap; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint64_t vfio_size; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); + +@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ ++ /* ++ * If appropriate, reduce the size of the supported DMA aperture reported ++ * to the guest based upon the vfio DMA limit. ++ */ ++ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; ++ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { ++ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; ++ } + } + + static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 0605fcea24..1c46e3a269 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -278,6 +278,7 @@ struct S390PCIIOMMU { + uint64_t g_iota; + uint64_t pba; + uint64_t pal; ++ uint64_t max_dma_limit; + GHashTable *iotlb; + S390PCIDMACount *dma_limit; + }; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch b/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch new file mode 100644 index 0000000..399f115 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch @@ -0,0 +1,49 @@ +From 55294fc4a955491f1fd947e4d98bd6df832c88ba Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:03 -0500 +Subject: [PATCH 01/42] s390x/pci: use a reserved ID for the default PCI group +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [1/41] ad3ed38dec95acf0da04d7669fe772d798d039fc + +The current default PCI group being used can technically collide with a +real group ID passed from a hostdev. Let's instead use a group ID that +comes from a special pool (0xF0-0xFF) that is architected to be reserved +for simulated devices. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-2-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit b2892a2b9d45d25b909108ca633d19f9d8d673f5) +Signed-off-by: Cédric Le Goater +--- + include/hw/s390x/s390-pci-bus.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index aa891c178d..2727e7bdef 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -313,7 +313,7 @@ typedef struct ZpciFmb { + } ZpciFmb; + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + +-#define ZPCI_DEFAULT_FN_GRP 0x20 ++#define ZPCI_DEFAULT_FN_GRP 0xFF + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch b/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch new file mode 100644 index 0000000..2fda07a --- /dev/null +++ b/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch @@ -0,0 +1,59 @@ +From c251a7a16e776072b9c7bbc4e61cfa4f52599b0a Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:05 -0500 +Subject: [PATCH 03/42] s390x/pci: use the passthrough measurement update + interval +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [3/41] bc31ea731fe64e51522f1202e65528311397b919 + +We may have gotten a measurement update interval from the underlying host +via vfio -- Use it to set the interval via which we update the function +measurement block. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit cb6d6a3e6aa1226b67fd218953dcb3866c3a6845) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 11b7f6bfa1..07bab85ce5 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1046,7 +1046,7 @@ static void fmb_update(void *opaque) + sizeof(pbdev->fmb.last_update))) { + return; + } +- timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI); ++ timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, +@@ -1204,7 +1204,8 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + pbdev->fmb_addr = fmb_addr; + timer_mod(pbdev->fmb_timer, +- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + DEFAULT_MUI); ++ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + ++ pbdev->pci_group->zpci_group.mui); + break; + } + default: +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pv-Add-dump-support.patch b/SOURCES/kvm-s390x-pv-Add-dump-support.patch new file mode 100644 index 0000000..dec84fd --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Add-dump-support.patch @@ -0,0 +1,445 @@ +From 86aeb4fd7ff9395afba574e422d83f990ce1f047 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:22 +0000 +Subject: [PATCH 41/42] s390x: pv: Add dump support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [41/41] 2731c2329276e76013e3b3df21e9743bc74edd2b + +Sometimes dumping a guest from the outside is the only way to get the +data that is needed. This can be the case if a dumping mechanism like +KDUMP hasn't been configured or data needs to be fetched at a specific +point. Dumping a protected guest from the outside without help from +fw/hw doesn't yield sufficient data to be useful. Hence we now +introduce PV dump support. + +The PV dump support works by integrating the firmware into the dump +process. New Ultravisor calls are used to initiate the dump process, +dump cpu data, dump memory state and lastly complete the dump process. +The UV calls are exposed by KVM via the new KVM_PV_DUMP command and +its subcommands. The guest's data is fully encrypted and can only be +decrypted by the entity that owns the customer communication key for +the dumped guest. Also dumping needs to be allowed via a flag in the +SE header. + +On the QEMU side of things we store the PV dump data in the newly +introduced architecture ELF sections (storage state and completion +data) and the cpu notes (for cpu dump data). + +Users can use the zgetdump tool to convert the encrypted QEMU dump to an +unencrypted one. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Message-Id: <20221017083822.43118-11-frankja@linux.ibm.com> +(cherry picked from commit 113d8f4e95cf0450bea421263de6ec016c779ad0) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 12 +- + include/sysemu/dump.h | 5 + + target/s390x/arch_dump.c | 262 +++++++++++++++++++++++++++++++++++---- + 3 files changed, 246 insertions(+), 33 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4aa8fb64d2..5dee060b73 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -709,9 +709,9 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + int64_t size, left, right; + +@@ -729,9 +729,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, + return size; + } + +-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + if (filter_area_length) { + /* return -1 if the block is not within filter area */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 38ccac7190..4ffed0b659 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -215,4 +215,9 @@ typedef struct DumpState { + uint16_t cpu_to_dump16(DumpState *s, uint16_t val); + uint32_t cpu_to_dump32(DumpState *s, uint32_t val); + uint64_t cpu_to_dump64(DumpState *s, uint64_t val); ++ ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); + #endif +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index f60a14920d..a2329141e8 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -12,11 +12,13 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "cpu.h" + #include "s390x-internal.h" + #include "elf.h" + #include "sysemu/dump.h" +- ++#include "hw/s390x/pv.h" ++#include "kvm/kvm_s390x.h" + + struct S390xUserRegsStruct { + uint64_t psw[2]; +@@ -76,9 +78,16 @@ typedef struct noteStruct { + uint64_t todcmp; + uint32_t todpreg; + uint64_t ctrs[16]; ++ uint8_t dynamic[1]; /* ++ * Would be a flexible array member, if ++ * that was legal inside a union. Real ++ * size comes from PV info interface. ++ */ + } contents; + } QEMU_PACKED Note; + ++static bool pv_dump_initialized; ++ + static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id) + { + int i; +@@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id) + note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa)); + } + ++static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id) ++{ ++ note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA); ++ if (!pv_dump_initialized) { ++ return; ++ } ++ kvm_s390_dump_cpu(cpu, ¬e->contents.dynamic); ++} + + typedef struct NoteFuncDescStruct { + int contents_size; ++ uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */ + void (*note_contents_func)(Note *note, S390CPU *cpu, int id); ++ bool pvonly; + } NoteFuncDesc; + + static const NoteFuncDesc note_core[] = { +- {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus}, +- {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false}, ++ {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false}, ++ { 0, NULL, NULL, false} + }; + + static const NoteFuncDesc note_linux[] = { +- {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix}, +- {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs}, +- {sizeof_field(Note, contents.timer), s390x_write_elf64_timer}, +- {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp}, +- {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg}, +- {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo}, +- {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi}, +- {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false}, ++ {sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false}, ++ {sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false}, ++ {sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false}, ++ {sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false}, ++ {sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false}, ++ {sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false}, ++ {sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false}, ++ {0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true}, ++ { 0, NULL, NULL, false} + }; + + static int s390x_write_elf64_notes(const char *note_name, +@@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name, + DumpState *s, + const NoteFuncDesc *funcs) + { +- Note note; ++ Note note, *notep; + const NoteFuncDesc *nf; +- int note_size; ++ int note_size, content_size; + int ret = -1; + + assert(strlen(note_name) < sizeof(note.name)); + + for (nf = funcs; nf->note_contents_func; nf++) { +- memset(¬e, 0, sizeof(note)); +- note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); +- note.hdr.n_descsz = cpu_to_be32(nf->contents_size); +- g_strlcpy(note.name, note_name, sizeof(note.name)); +- (*nf->note_contents_func)(¬e, cpu, id); ++ notep = ¬e; ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ ++ content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size; ++ note_size = sizeof(note) - sizeof(notep->contents) + content_size; ++ ++ /* Notes with dynamic sizes need to allocate a note */ ++ if (nf->note_size_func) { ++ notep = g_malloc(note_size); ++ } ++ ++ memset(notep, 0, sizeof(note)); + +- note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, s); ++ /* Setup note header data */ ++ notep->hdr.n_descsz = cpu_to_be32(content_size); ++ notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); ++ g_strlcpy(notep->name, note_name, sizeof(notep->name)); ++ ++ /* Get contents and write them out */ ++ (*nf->note_contents_func)(notep, cpu, id); ++ ret = f(notep, note_size, s); ++ ++ if (nf->note_size_func) { ++ g_free(notep); ++ } + + if (ret < 0) { + return -1; +@@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + ++/* PV dump section size functions */ ++static uint64_t get_mem_state_size_from_len(uint64_t len) ++{ ++ return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state(); ++} ++ ++static uint64_t get_size_mem_state(DumpState *s) ++{ ++ return get_mem_state_size_from_len(s->total_size); ++} ++ ++static uint64_t get_size_completion_data(DumpState *s) ++{ ++ return kvm_s390_pv_dmp_get_size_completion_data(); ++} ++ ++/* PV dump section data functions*/ ++static int get_data_completion(DumpState *s, uint8_t *buff) ++{ ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ rc = kvm_s390_dump_completion_data(buff); ++ if (!rc) { ++ pv_dump_initialized = false; ++ } ++ return rc; ++} ++ ++static int get_mem_state(DumpState *s, uint8_t *buff) ++{ ++ int64_t memblock_size, memblock_start; ++ GuestPhysBlock *block; ++ uint64_t off; ++ int rc; ++ ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, ++ s->filter_area_length); ++ if (memblock_start == -1) { ++ continue; ++ } ++ ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, ++ s->filter_area_length); ++ ++ off = get_mem_state_size_from_len(block->target_start); ++ ++ rc = kvm_s390_dump_mem_state(block->target_start, ++ get_mem_state_size_from_len(memblock_size), ++ buff + off); ++ if (rc) { ++ return rc; ++ } ++ } ++ ++ return 0; ++} ++ ++static struct sections { ++ uint64_t (*sections_size_func)(DumpState *s); ++ int (*sections_contents_func)(DumpState *s, uint8_t *buff); ++ char sctn_str[12]; ++} sections[] = { ++ { get_size_mem_state, get_mem_state, "pv_mem_meta"}, ++ { get_size_completion_data, get_data_completion, "pv_compl"}, ++ {NULL , NULL, ""} ++}; ++ ++static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff) ++{ ++ Elf64_Shdr *shdr = (void *)buff; ++ struct sections *sctn = sections; ++ uint64_t off = s->section_offset; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ ++ for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) { ++ memset(shdr, 0, sizeof(*shdr)); ++ shdr->sh_type = SHT_PROGBITS; ++ shdr->sh_offset = off; ++ shdr->sh_size = sctn->sections_size_func(s); ++ shdr->sh_name = s->string_table_buf->len; ++ g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str)); ++ } ++ ++ return (uintptr_t)shdr - (uintptr_t)buff; ++} ++ ++ ++/* Add arch specific number of sections and their respective sizes */ ++static void arch_sections_add(DumpState *s) ++{ ++ struct sections *sctn = sections; ++ ++ /* ++ * We only do a PV dump if we are running a PV guest, KVM supports ++ * the dump API and we got valid dump length information. ++ */ ++ if (!s390_is_pv() || !kvm_s390_get_protected_dump() || ++ !kvm_s390_pv_info_basic_valid()) { ++ return; ++ } ++ ++ /* ++ * Start the UV dump process by doing the initialize dump call via ++ * KVM as the proxy. ++ */ ++ if (!kvm_s390_dump_init()) { ++ pv_dump_initialized = true; ++ } else { ++ /* ++ * Dump init failed, maybe the guest owner disabled dumping. ++ * We'll continue the non-PV dump process since this is no ++ * reason to crash qemu. ++ */ ++ return; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ s->shdr_num += 1; ++ s->elf_section_data_size += sctn->sections_size_func(s); ++ } ++} ++ ++/* ++ * After the PV dump has been initialized, the CPU data has been ++ * fetched and memory has been dumped, we need to grab the tweak data ++ * and the completion data. ++ */ ++static int arch_sections_write(DumpState *s, uint8_t *buff) ++{ ++ struct sections *sctn = sections; ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return -EINVAL; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ rc = sctn->sections_contents_func(s, buff); ++ buff += sctn->sections_size_func(s); ++ if (rc) { ++ return rc; ++ } ++ } ++ return 0; ++} ++ + int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks) + { + info->d_machine = EM_S390; + info->d_endian = ELFDATA2MSB; + info->d_class = ELFCLASS64; +- ++ /* ++ * This is evaluated for each dump so we can freely switch ++ * between PV and non-PV. ++ */ ++ if (s390_is_pv() && kvm_s390_get_protected_dump() && ++ kvm_s390_pv_info_basic_valid()) { ++ info->arch_sections_add_fn = *arch_sections_add; ++ info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; ++ info->arch_sections_write_fn = *arch_sections_write; ++ } else { ++ info->arch_sections_add_fn = NULL; ++ info->arch_sections_write_hdr_fn = NULL; ++ info->arch_sections_write_fn = NULL; ++ } + return 0; + } + +@@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + { + int name_size = 8; /* "LINUX" or "CORE" + pad */ + size_t elf_note_size = 0; +- int note_head_size; ++ int note_head_size, content_size; + const NoteFuncDesc *nf; + + assert(class == ELFCLASS64); +@@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + note_head_size = sizeof(Elf64_Nhdr); + + for (nf = note_core; nf->note_contents_func; nf++) { +- elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size; + } + for (nf = note_linux; nf->note_contents_func; nf++) { ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ content_size = nf->contents_size ? nf->contents_size : nf->note_size_func(); + elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ content_size; + } + + return (elf_note_size) * nr_cpus; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch new file mode 100644 index 0000000..71dc7dc --- /dev/null +++ b/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch @@ -0,0 +1,55 @@ +From c7b14d3af7c73a3bf0c00911b85f202840d9b414 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 7 Nov 2022 17:13:49 +0100 +Subject: [PATCH 12/42] s390x/s390-virtio-ccw: Switch off zPCI enhancements on + older machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [12/41] 61e32bab6d68ee9abd6a0751944af82e002b05b4 + +zPCI enhancement features (interpretation and forward assist) were +recently introduced to improve performance on PCI passthrough devices. +To maintain the same behaviour on older Z machines, deactivate the +features with the associated properties. + +Signed-off-by: Cédric Le Goater +Message-Id: <20221107161349.1032730-3-clg@kaod.org> +Reviewed-by: Matthew Rosato +Signed-off-by: Thomas Huth +(cherry picked from commit d3d1a406127f7da482eafbdc871c120c2770bb91) +[ clg: applied zPCI restrictions to rhel8.5.0 machine and below ] +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bec270598b..bd80e72cf8 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1130,8 +1130,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch b/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch new file mode 100644 index 0000000..b07c902 --- /dev/null +++ b/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch @@ -0,0 +1,70 @@ +From 90fcfe4b07afc5299c5ee69fa663ca46b597fd4b Mon Sep 17 00:00:00 2001 +From: Nico Boehr +Date: Wed, 12 Oct 2022 14:32:29 +0200 +Subject: [PATCH] s390x/tod-kvm: don't save/restore the TOD in PV guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 244: s390x/tod-kvm: don't save/restore the TOD in PV guests +RH-Bugzilla: 2155448 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] 3cb3154dd7c1549c54cf8c0483b5f23b235f6db3 + +Under PV, the guest's TOD clock is under control of the ultravisor and the +hypervisor cannot change it. + +With upcoming kernel changes[1], the Linux kernel will reject QEMU's +request to adjust the guest's clock in this case, so don't attempt to set +the clock. + +This avoids the following warning message on save/restore of a PV guest: + +warning: Unable to set KVM guest TOD clock: Operation not supported + +[1] https://lore.kernel.org/all/20221011160712.928239-2-nrb@linux.ibm.com/ + +Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") +Signed-off-by: Nico Boehr +Message-Id: <20221012123229.1196007-1-nrb@linux.ibm.com> +[thuth: Add curly braces] +Signed-off-by: Thomas Huth +(cherry picked from commit 38621181ae3cbec62e3490fbc14f6ac01642d07a) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/tod-kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c +index ec855811ae..c804c979b5 100644 +--- a/hw/s390x/tod-kvm.c ++++ b/hw/s390x/tod-kvm.c +@@ -13,6 +13,7 @@ + #include "qemu/module.h" + #include "sysemu/runstate.h" + #include "hw/s390x/tod.h" ++#include "hw/s390x/pv.h" + #include "kvm/kvm_s390x.h" + + static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) +@@ -84,6 +85,14 @@ static void kvm_s390_tod_vm_state_change(void *opaque, bool running, + S390TODState *td = opaque; + Error *local_err = NULL; + ++ /* ++ * Under PV, the clock is under ultravisor control, hence we cannot restore ++ * it on resume. ++ */ ++ if (s390_is_pv()) { ++ return; ++ } ++ + if (running && td->stopped) { + /* Set the old TOD when running the VM - start the TOD clock. */ + kvm_s390_set_tod_raw(&td->base, &local_err); +-- +2.37.3 + diff --git a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch new file mode 100644 index 0000000..1343855 --- /dev/null +++ b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch @@ -0,0 +1,176 @@ +From df836ee4b4e2a69cca5042a3a9daf2c41dc2aa58 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:16 -0500 +Subject: [PATCH 11/13] scsi: protect req->aiocb with AioContext lock + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/3] e6a6d4109713e0fd6d6c515535c66196fea98688 + +If requests are being processed in the IOThread when a SCSIDevice is +unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races +with I/O completion callbacks. Both threads load and store req->aiocb. +This can lead to assert(r->req.aiocb == NULL) failures and undefined +behavior. + +Protect r->req.aiocb with the AioContext lock to prevent the race. + +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- + hw/scsi/scsi-generic.c | 11 ++++++----- + 2 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index d4914178ea..179ce22c4a 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -270,9 +270,11 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +@@ -354,10 +356,11 @@ static void scsi_dma_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -390,10 +393,11 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -443,10 +447,11 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -527,10 +532,11 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -1659,10 +1665,11 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (scsi_disk_req_check_error(r, ret, true)) { + scsi_req_unref(&r->req); + g_free(data); +@@ -1738,9 +1745,11 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 3742899839..a1a40df64b 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + scsi_command_complete_noio(r, ret); + aio_context_release(blk_get_aio_context(s->conf.blk)); + } +@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +@@ -387,11 +388,11 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +-- +2.37.3 + diff --git a/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch b/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch new file mode 100644 index 0000000..94ff608 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch @@ -0,0 +1,65 @@ +From 3014c7c11b6e64433fe9f3c463bd91e318ac96b6 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 2/2] target/arm/kvm: Retry KVM_CREATE_VM call if it fails + EINTR + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [2/2] 8494bbfb3fcd8693f56312f984d2964d1ca275c2 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit bbde13cd14ad4eec18529ce0bf5876058464e124 +Author: Peter Maydell +Date: Fri Sep 30 12:38:24 2022 +0100 + + target/arm/kvm: Retry KVM_CREATE_VM call if it fails EINTR + + Occasionally the KVM_CREATE_VM ioctl can return EINTR, even though + there is no pending signal to be taken. In commit 94ccff13382055 + we added a retry-on-EINTR loop to the KVM_CREATE_VM call in the + generic KVM code. Adopt the same approach for the use of the + ioctl in the Arm-specific KVM code (where we use it to create a + scratch VM for probing for various things). + + For more information, see the mailing list thread: + https://lore.kernel.org/qemu-devel/8735e0s1zw.wl-maz@kernel.org/ + + Reported-by: Vitaly Chikunov + Signed-off-by: Peter Maydell + Reviewed-by: Vitaly Chikunov + Reviewed-by: Eric Auger + Acked-by: Marc Zyngier + Message-id: 20220930113824.1933293-1-peter.maydell@linaro.org + +(cherry picked from commit bbde13cd14ad4eec18529ce0bf5876058464e124) +Signed-off-by: Jon Maloy +--- + target/arm/kvm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..1ae4e51055 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -80,7 +80,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + if (max_vm_pa_size < 0) { + max_vm_pa_size = 0; + } +- vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ do { ++ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ } while (vmfd == -1 && errno == EINTR); + if (vmfd < 0) { + goto err; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch new file mode 100644 index 0000000..188c6bf --- /dev/null +++ b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch @@ -0,0 +1,50 @@ +From e1870dec813fa6f8482f4f27b7a9bef8c1584b6b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Feb 2023 14:48:37 +0100 +Subject: [PATCH 3/3] target/s390x/arch_dump: Fix memory corruption in + s390x_write_elf64_notes() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 260: target/s390x/arch_dump: Fix memory corruption in s390x_write_elf64_notes() +RH-Bugzilla: 2168187 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] 67b71ed720a1f03d5bda9119969ea95fc4a6106d + +Bugzilla: https://bugzilla.redhat.com/2168187 +Upstream-Status: Posted (and reviewed, but not merged yet) + +"note_size" can be smaller than sizeof(note), so unconditionally calling +memset(notep, 0, sizeof(note)) could cause a memory corruption here in +case notep has been allocated dynamically, thus let's use note_size as +length argument for memset() instead. + +Fixes: 113d8f4e95 ("s390x: pv: Add dump support") +Message-Id: <20230214141056.680969-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index a2329141e8..a7c44ba49d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, + notep = g_malloc(note_size); + } + +- memset(notep, 0, sizeof(note)); ++ memset(notep, 0, note_size); + + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); +-- +2.37.3 + diff --git a/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch new file mode 100644 index 0000000..cfc1e4a --- /dev/null +++ b/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -0,0 +1,106 @@ +From 8d1a60069cddcc69ef1a6f50f2b55343de348b57 Mon Sep 17 00:00:00 2001 +From: Janis Schoetterl-Glausch +Date: Fri, 6 May 2022 17:39:56 +0200 +Subject: [PATCH 2/3] target/s390x: kvm: Honor storage keys during emulation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [2/2] 980dbb4eba8d2f1da7cf4113230d0a6483cffc4f + +Storage key controlled protection is currently not honored when +emulating instructions. +If available, enable key protection for the MEM_OP ioctl, thereby +enabling it for the s390_cpu_virt_mem_* functions, when using kvm. +As a result, the emulation of the following instructions honors storage +keys: + +* CLP + The Synch I/O CLP command would need special handling in order + to support storage keys, but is currently not supported. +* CHSC + Performing commands asynchronously would require special + handling, but commands are currently always synchronous. +* STSI +* TSCH + Must (and does) not change channel if terminated due to + protection. +* MSCH + Suppressed on protection, works because fetching instruction. +* SSCH + Suppressed on protection, works because fetching instruction. +* STSCH +* STCRW + Suppressed on protection, this works because no partial store is + possible, because the operand cannot span multiple pages. +* PCISTB +* MPCIFC +* STPCIFC + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 + +Signed-off-by: Janis Schoetterl-Glausch +Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) +Signed-off-by: Cédric Le Goater +--- + target/s390x/kvm/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index c52434985b..ba04997da1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + static int cap_sync_regs; + static int cap_async_pf; + static int cap_mem_op; ++static int cap_mem_op_extension; + static int cap_s390_irq; + static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + ++static bool mem_op_storage_key_support; ++ + static int active_cmma; + + static int kvm_s390_query_mem_limit(uint64_t *memory_limit) +@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); ++ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); ++ mem_op_storage_key_support = cap_mem_op_extension > 0; + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); +@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + : KVM_S390_MEMOP_LOGICAL_READ, + .buf = (uint64_t)hostbuf, + .ar = ar, ++ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, + }; + int ret; + +@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + if (!hostbuf) { + mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + } ++ if (mem_op_storage_key_support) { ++ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { +-- +2.35.3 + diff --git a/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch b/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch new file mode 100644 index 0000000..9ade171 --- /dev/null +++ b/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch @@ -0,0 +1,80 @@ +From d3602e5afa1e90c5e33625fc528db7f96195bada Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 7 Nov 2022 19:59:46 -0500 +Subject: [PATCH 42/42] ui/vnc-clipboard: fix integer underflow in + vnc_client_cut_text_ext +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 227: ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext +RH-Bugzilla: 2129760 +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/1] ac19a6c0777e308061bcb6d1de5cc9beaa105a3a (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2129760 +CVE: CVE-2022-3165 +Upstream: Merged + +commit d307040b18bfcb1393b910f1bae753d5c12a4dc7 +Author: Mauro Matteo Cascella +Date: Sun Sep 25 22:45:11 2022 +0200 + + ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext + + Extended ClientCutText messages start with a 4-byte header. If len < 4, + an integer underflow occurs in vnc_client_cut_text_ext. The result is + used to decompress data in a while loop in inflate_buffer, leading to + CPU consumption and denial of service. Prevent this by checking dlen in + protocol_client_msg. + + Fixes: CVE-2022-3165 + Fixes: 0bf41cab93e5 ("ui/vnc: clipboard support") + Reported-by: TangPeng + Signed-off-by: Mauro Matteo Cascella + Message-Id: <20220925204511.1103214-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit d307040b18bfcb1393b910f1bae753d5c12a4dc7) +Signed-off-by: Jon Maloy +--- + ui/vnc.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index af02522e84..a14b6861be 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -2442,8 +2442,8 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + if (len == 1) { + return 8; + } ++ uint32_t dlen = abs(read_s32(data, 4)); + if (len == 8) { +- uint32_t dlen = abs(read_s32(data, 4)); + if (dlen > (1 << 20)) { + error_report("vnc: client_cut_text msg payload has %u bytes" + " which exceeds our limit of 1MB.", dlen); +@@ -2456,8 +2456,13 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + } + + if (read_s32(data, 4) < 0) { +- vnc_client_cut_text_ext(vs, abs(read_s32(data, 4)), +- read_u32(data, 8), data + 12); ++ if (dlen < 4) { ++ error_report("vnc: malformed payload (header less than 4 bytes)" ++ " in extended clipboard pseudo-encoding."); ++ vnc_client_error(vs); ++ break; ++ } ++ vnc_client_cut_text_ext(vs, dlen, read_u32(data, 8), data + 12); + break; + } + vnc_client_cut_text(vs, read_u32(data, 4), data + 8); +-- +2.37.3 + diff --git a/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch b/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch index 1b8e1e2..ae80c9d 100644 --- a/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +++ b/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch @@ -1,20 +1,20 @@ -From 05630592885c6c82ba83e197626e773b0d93d608 Mon Sep 17 00:00:00 2001 +From ffdf44cb8b4c743e7ab0ce46d62687d7178f9a49 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 18 Nov 2022 20:26:54 -0500 -Subject: [PATCH 1/6] ui/vnc.c: Fixed a deadlock bug. +Subject: [PATCH 3/3] ui/vnc.c: Fixed a deadlock bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 237: ui/vnc.c: Fixed a deadlock bug. -RH-Bugzilla: 2148504 +RH-MergeRequest: 234: ui/vnc.c: Fixed a deadlock bug. +RH-Bugzilla: 2141896 RH-Acked-by: Gerd Hoffmann RH-Acked-by: Marc-André Lureau RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [1/1] 0be7fc3eaa5b41e806adaf75750fee75bb33a92f (jmaloy/jons-qemu-kvm) +RH-Commit: [1/1] d3d1d28d7b621a8ae8a593a5bd5303fa7951c17c (jmaloy/qemu-kvm) -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148504 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141896 Upstream: Merged commit 1dbbe6f172810026c51dc84ed927a3cc23017949 @@ -67,7 +67,7 @@ Signed-off-by: Jon Maloy 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/vnc.c b/ui/vnc.c -index af02522e84..b253e85c65 100644 +index a14b6861be..76372ca1de 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs) diff --git a/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch b/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch new file mode 100644 index 0000000..9af491f --- /dev/null +++ b/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch @@ -0,0 +1,102 @@ +From 56e2aef97e750ffdc572dcecbfc31314728d37a9 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Mar 2022 12:29:39 +0100 +Subject: [PATCH 2/2] virtio: fix feature negotiation for ACCESS_PLATFORM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] 264d3bdbbde985f16ed6f5a1786547c25fb8cc04 + +Unlike most virtio features ACCESS_PLATFORM is considered mandatory by +QEMU, i.e. the driver must accept it if offered by the device. The +virtio specification says that the driver SHOULD accept the +ACCESS_PLATFORM feature if offered, and that the device MAY fail to +operate if ACCESS_PLATFORM was offered but not negotiated. + +While a SHOULD ain't exactly a MUST, we are certainly allowed to fail +the device when the driver fences ACCESS_PLATFORM. With commit +2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") we already made the +decision to do so whenever the get_dma_as() callback is implemented (by +the bus), which in practice means for the entirety of virtio-pci. + +That means, if the device needs to translate I/O addresses, then +ACCESS_PLATFORM is mandatory. The aforementioned commit tells us in the +commit message that this is for security reasons. More precisely if we +were to allow a less then trusted driver (e.g. an user-space driver, or +a nested guest) to make the device bypass the IOMMU by not negotiating +ACCESS_PLATFORM, then the guest kernel would have no ability to +control/police (by programming the IOMMU) what pieces of guest memory +the driver may manipulate using the device. Which would break security +assumptions within the guest. + +If ACCESS_PLATFORM is offered not because we want the device to utilize +an IOMMU and do address translation, but because the device does not +have access to the entire guest RAM, and needs the driver to grant +access to the bits it needs access to (e.g. confidential guest support), +we still require the guest to have the corresponding logic and to accept +ACCESS_PLATFORM. If the driver does not accept ACCESS_PLATFORM, then +things are bound to go wrong, and we may see failures much less graceful +than failing the device because the driver didn't negotiate +ACCESS_PLATFORM. + +So let us make ACCESS_PLATFORM mandatory for the driver regardless +of whether the get_dma_as() callback is implemented or not. + +Signed-off-by: Halil Pasic +Fixes: 2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") + +Message-Id: <20220307112939.2780117-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Cornelia Huck +(cherry picked from commit 06134e2bc35dc21543d4cbcf31f858c03d383442) +--- + hw/virtio/virtio-bus.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 0f69d1c742..d7ec023adf 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- if (klass->get_dma_as != NULL && has_iommu) { ++ vdev->dma_as = &address_space_memory; ++ if (has_iommu) { ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ /* ++ * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and ++ * device operational. If the driver does not accept IOMMU_PLATFORM ++ * we fail the device. ++ */ + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); +- vdev->dma_as = klass->get_dma_as(qbus->parent); +- if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { +- error_setg(errp, ++ if (klass->get_dma_as) { ++ vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, + "iommu_platform=true is not supported by the device"); +- return; ++ return; ++ } + } +- } else { +- vdev->dma_as = &address_space_memory; + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch b/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch new file mode 100644 index 0000000..b5632e1 --- /dev/null +++ b/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch @@ -0,0 +1,115 @@ +From c731ffdf9faee74e9522dff06e61cda817902088 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Feb 2022 12:28:57 +0100 +Subject: [PATCH 1/2] virtio: fix the condition for iommu_platform not + supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] d7edc7e3905a04644c9ff44b0d36122c72068e08 + +The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") claims to fail the device hotplug when iommu_platform +is requested, but not supported by the (vhost) device. On the first +glance the condition for detecting that situation looks perfect, but +because a certain peculiarity of virtio_platform it ain't. + +In fact the aforementioned commit introduces a regression. It breaks +virtio-fs support for Secure Execution, and most likely also for AMD SEV +or any other confidential guest scenario that relies encrypted guest +memory. The same also applies to any other vhost device that does not +support _F_ACCESS_PLATFORM. + +The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates +"device can not access all of the guest RAM" and "iova != gpa, thus +device needs to translate iova". + +Confidential guest technologies currently rely on the device/hypervisor +offering _F_ACCESS_PLATFORM, so that, after the feature has been +negotiated, the guest grants access to the portions of memory the +device needs to see. So in for confidential guests, generally, +_F_ACCESS_PLATFORM is about the restricted access to memory, but not +about the addresses used being something else than guest physical +addresses. + +This is the very reason for which commit f7ef7e6e3b ("vhost: correctly +turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the +vhost device that does not need it, because on the vhost interface it +only means "I/O address translation is needed". + +This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on +VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the +situation when _F_ACCESS_PLATFORM is requested, but no I/O translation +by the device, and thus no device capability is needed. In this +situation claiming that the device does not support iommu_plattform=on +is counter-productive. So let us stop doing that! + +Signed-off-by: Halil Pasic +Reported-by: Jakob Naucke +Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") +Acked-by: Cornelia Huck +Reviewed-by: Daniel Henrique Barboza +Tested-by: Daniel Henrique Barboza +Cc: Kevin Wolf +Cc: qemu-stable@nongnu.org + +Message-Id: <20220207112857.607829-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit e65902a913bf31ba79a83a3bd3621108b85cf645) +--- + hw/virtio/virtio-bus.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index d23db98c56..0f69d1c742 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ bool vdev_has_iommu; + Error *local_err = NULL; + + DPRINTF("%s: plug device.\n", qbus->name); +@@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { +- error_setg(errp, "iommu_platform=true is not supported by the device"); +- return; +- } +- + if (klass->device_plugged != NULL) { + klass->device_plugged(qbus->parent, &local_err); + } +@@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + if (klass->get_dma_as != NULL && has_iommu) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); + vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, ++ "iommu_platform=true is not supported by the device"); ++ return; ++ } + } else { + vdev->dma_as = &address_space_memory; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch new file mode 100644 index 0000000..9325d69 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch @@ -0,0 +1,337 @@ +From 31e9e3691789469b93a75d0221387bab3e526094 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:18 -0500 +Subject: [PATCH 13/13] virtio-scsi: reset SCSI devices from main loop thread + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [3/3] 30d7c2bd868efa6694992e75ace22fb48aef161b + +When an IOThread is configured, the ctrl virtqueue is processed in the +IOThread. TMFs that reset SCSI devices are currently called directly +from the IOThread and trigger an assertion failure in blk_drain() from +the following call stack: + +virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset +-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain + + ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. + +The blk_drain() function is not designed to be called from an IOThread +because it needs the Big QEMU Lock (BQL). + +This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) +that runs in the main loop thread under the BQL. This way it's safe to +call blk_drain() and the assertion failure is avoided. + +Introduce s->tmf_bh_list for tracking TMF requests that have been +deferred to the BH. When the BH runs it will grab the entire list and +process all requests. Care must be taken to clear the list when the +virtio-scsi device is reset or unrealized. Otherwise deferred TMF +requests could execute later and lead to use-after-free or other +undefined behavior. + +The s->resetting counter that's used by TMFs that reset SCSI devices is +accessed from multiple threads. This patch makes that explicit by using +atomic accessor functions. With this patch applied the counter is only +modified by the main loop thread under the BQL but can be read by any +thread. + +Reported-by: Qing Wang +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) +Signed-off-by: Stefan Hajnoczi + +Conflicts: +- hw/scsi/virtio-scsi.c + - VirtIOSCSIReq is defined in include/hw/virtio/virtio-scsi.h + downstream instead of hw/scsi/virtio-scsi.c because commit + 3dc584abeef0 ("virtio-scsi: move request-related items from .h to + .c") is missing. Update the struct fields in virtio-scsi.h + downstream. + + - Use qbus_reset_all() downstream instead of bus_cold_reset() because + commit 4a5fc890b1d3 ("scsi: Use device_cold_reset() and + bus_cold_reset()") is missing. + + - Drop GLOBAL_STATE_CODE() because these macros don't exist + downstream. They are assertions/documentation and can be removed + without affecting the code. +--- + hw/scsi/virtio-scsi.c | 155 +++++++++++++++++++++++++------- + include/hw/virtio/virtio-scsi.h | 21 +++-- + 2 files changed, 139 insertions(+), 37 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index a35257c35a..ef19a9bcd0 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -256,6 +256,118 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) + } + } + ++static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); ++ BusChild *kid; ++ int target; ++ ++ switch (req->req.tmf.subtype) { ++ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: ++ if (!d) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; ++ goto out; ++ } ++ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; ++ goto out; ++ } ++ qatomic_inc(&s->resetting); ++ qdev_reset_all(&d->qdev); ++ qatomic_dec(&s->resetting); ++ break; ++ ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ target = req->req.tmf.lun[1]; ++ qatomic_inc(&s->resetting); ++ ++ rcu_read_lock(); ++ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { ++ SCSIDevice *d1 = SCSI_DEVICE(kid->child); ++ if (d1->channel == 0 && d1->id == target) { ++ qdev_reset_all(&d1->qdev); ++ } ++ } ++ rcu_read_unlock(); ++ ++ qatomic_dec(&s->resetting); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++out: ++ object_unref(OBJECT(d)); ++ ++ virtio_scsi_acquire(s); ++ virtio_scsi_complete_req(req); ++ virtio_scsi_release(s); ++} ++ ++/* Some TMFs must be processed from the main loop thread */ ++static void virtio_scsi_do_tmf_bh(void *opaque) ++{ ++ VirtIOSCSI *s = opaque; ++ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ virtio_scsi_acquire(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } ++ ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ ++ virtio_scsi_release(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { ++ QTAILQ_REMOVE(&reqs, req, next); ++ virtio_scsi_do_one_tmf_bh(req); ++ } ++} ++ ++static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) ++{ ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ virtio_scsi_acquire(s); ++ ++ if (s->tmf_bh) { ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ ++ /* SAM-6 6.3.2 Hard reset */ ++ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; ++ virtio_scsi_complete_req(req); ++ } ++ ++ virtio_scsi_release(s); ++} ++ ++static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } ++} ++ + /* Return 0 if the request is ready to be completed and return to guest; + * -EINPROGRESS if the request is submitted and will be completed later, in the + * case of async cancellation. */ +@@ -263,8 +375,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + { + SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); + SCSIRequest *r, *next; +- BusChild *kid; +- int target; + int ret = 0; + + virtio_scsi_ctx_check(s, d); +@@ -321,15 +431,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: +- if (!d) { +- goto fail; +- } +- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { +- goto incorrect_lun; +- } +- s->resetting++; +- qdev_reset_all(&d->qdev); +- s->resetting--; ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ virtio_scsi_defer_tmf_to_bh(req); ++ ret = -EINPROGRESS; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: +@@ -372,22 +476,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + break; + +- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: +- target = req->req.tmf.lun[1]; +- s->resetting++; +- +- rcu_read_lock(); +- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { +- SCSIDevice *d1 = SCSI_DEVICE(kid->child); +- if (d1->channel == 0 && d1->id == target) { +- qdev_reset_all(&d1->qdev); +- } +- } +- rcu_read_unlock(); +- +- s->resetting--; +- break; +- + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + default: + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; +@@ -603,7 +691,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) + if (!req) { + return; + } +- if (req->dev->resetting) { ++ if (qatomic_read(&req->dev->resetting)) { + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; + } else { + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; +@@ -784,9 +872,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + assert(!s->dataplane_started); +- s->resetting++; ++ ++ virtio_scsi_reset_tmf_bh(s); ++ ++ qatomic_inc(&s->resetting); + qbus_reset_all(BUS(&s->bus)); +- s->resetting--; ++ qatomic_dec(&s->resetting); + + vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; +@@ -1018,6 +1109,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + VirtIOSCSI *s = VIRTIO_SCSI(dev); + Error *err = NULL; + ++ QTAILQ_INIT(&s->tmf_bh_list); ++ + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, +@@ -1055,6 +1148,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + { + VirtIOSCSI *s = VIRTIO_SCSI(dev); + ++ virtio_scsi_reset_tmf_bh(s); ++ + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); + } +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 543681bc18..b0e36f25aa 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -77,13 +77,22 @@ struct VirtIOSCSICommon { + VirtQueue **cmd_vqs; + }; + ++struct VirtIOSCSIReq; ++ + struct VirtIOSCSI { + VirtIOSCSICommon parent_obj; + + SCSIBus bus; +- int resetting; ++ int resetting; /* written from main loop thread, read from any thread */ + bool events_dropped; + ++ /* ++ * TMFs deferred to main loop BH. These fields are protected by ++ * virtio_scsi_acquire(). ++ */ ++ QEMUBH *tmf_bh; ++ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; ++ + /* Fields for dataplane below */ + AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + +@@ -106,13 +115,11 @@ typedef struct VirtIOSCSIReq { + QEMUSGList qsgl; + QEMUIOVector resp_iov; + +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ /* Used for two-stage request submission and TMFs deferred to BH */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; + +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; ++ /* Used for cancellation of request during TMFs */ ++ int remaining; + + SCSIRequest *sreq; + size_t resp_size; +-- +2.37.3 + diff --git a/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch b/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch index 9047a1e..e6ffec1 100644 --- a/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch +++ b/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch @@ -1,15 +1,19 @@ -From c4cc9d30f656acb7fcdc4d3a4b4a9bb0a34aaf7d Mon Sep 17 00:00:00 2001 +From ebf6be5ba316ffda354af5eb1f1241ad6543b3cd Mon Sep 17 00:00:00 2001 From: Yusuke Okada Date: Thu, 18 Aug 2022 14:46:19 -0400 -Subject: [PATCH] virtiofsd: use g_date_time_get_microsecond to get subsecond +Subject: [PATCH 3/3] virtiofsd: use g_date_time_get_microsecond to get + subsecond +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 223: 8.7.z: virtiofsd: use g_date_time_get_microsecond to get subsecond -RH-Bugzilla: 2132391 -RH-Acked-by: German Maglione -RH-Acked-by: Jano Tomko -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] bf1fe2785996ee9799ae23cc117598517749336c +RH-MergeRequest: 222: virtiofsd: use g_date_time_get_microsecond to get subsecond +RH-Bugzilla: 2018885 +RH-Acked-by: Vivek Goyal +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Sergio Lopez +RH-Commit: [1/1] da8795576acc7029044a801ef42676d66471a577 The "%f" specifier in g_date_time_format() is only available in glib 2.65.2 or later. If combined with older glib, the function returns null @@ -57,5 +61,5 @@ index b3d0674f6d..523d8fbe1e 100644 fmt = localfmt; } -- -2.31.1 +2.35.3 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index b51e40b..b2f9cff 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 22%{?rcrel}%{?dist}.2 +Release: 32%{?rcrel}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -427,26 +427,231 @@ Patch163: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch Patch164: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch # For bz#2120279 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7] Patch165: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch -# For bz#2116743 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command -Patch166: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch -# For bz#2116743 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command -Patch167: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch -# For bz#2132391 - [virtiofs] virtiofsd debug log's timestamp is NULL [rhel-8.7.0.z] -Patch168: kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch -# For bz#2148504 - VMs hung on vnc_clipboard_send [rhel-8.7.0.z] -Patch169: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch -# For bz#2152085 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z] -Patch170: kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch -# For bz#2152085 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z] -Patch171: kvm-hw-display-qxl-Document-qxl_phys2virt.patch -# For bz#2152085 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z] -Patch172: kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch -# For bz#2152085 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z] -Patch173: kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch -# For bz#2152085 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z] -Patch174: kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch -# For bz#2168217 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-8.7.0.z] -Patch175: kvm-migration-Read-state-once.patch +# For bz#2117149 - Can't run when memory backing with hugepages and backend type memfd +Patch166: kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch167: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch168: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch169: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch170: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +# For bz#2018885 - [virtiofs] virtiofsd debug log's timestamp is NULL +Patch171: kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch172: kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch173: kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch174: kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch175: kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch176: kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch177: kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch178: kvm-Update-linux-headers-to-v6.0-rc4.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch179: kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch180: kvm-s390x-pci-enable-for-load-store-interpretation.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch181: kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch182: kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch183: kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch184: kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch185: kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch186: kvm-dump-Use-ERRP_GUARD.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch187: kvm-dump-Remove-the-sh_info-variable.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch188: kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch189: kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch190: kvm-dump-Add-more-offset-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch191: kvm-dump-Introduce-dump_is_64bit-helper-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch192: kvm-dump-Consolidate-phdr-note-writes.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch193: kvm-dump-Cleanup-dump_begin-write-functions.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch194: kvm-dump-Consolidate-elf-note-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch195: kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch196: kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch197: kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch198: kvm-dump-Rework-get_start_block.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch199: kvm-dump-Rework-filter-area-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch200: kvm-dump-Rework-dump_calculate_size-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch201: kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch202: kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch203: kvm-dump-simplify-a-bit-kdump-get_next_page.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch204: kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch205: kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch206: kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch207: kvm-dump-Reorder-struct-DumpState.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch208: kvm-dump-Reintroduce-memory_offset-and-section_offset.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch209: kvm-dump-Add-architecture-section-and-section-string-tab.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch210: kvm-s390x-Add-protected-dump-cap.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch211: kvm-s390x-Introduce-PV-query-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch212: kvm-include-elf.h-add-s390x-note-types.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch213: kvm-s390x-Add-KVM-PV-dump-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch214: kvm-s390x-pv-Add-dump-support.patch +# For bz#2129760 - CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8] +Patch215: kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch216: kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch217: kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch218: kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch219: kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch +# For bz#2141896 - VMs hung on vnc_clipboard_send +Patch220: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch221: kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch222: kvm-hw-display-qxl-Document-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch223: kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch224: kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch225: kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch +# For bz#2155448 - RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part +Patch226: kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch227: kvm-block-mirror-Do-not-wait-for-active-writes.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch228: kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch229: kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch230: kvm-accel-introduce-accelerator-blocker-API.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch231: kvm-KVM-keep-track-of-running-ioctls.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch232: kvm-kvm-Atomic-memslot-updates.patch +# For bz#2074205 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set +Patch233: kvm-migration-Read-state-once.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch234: kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch235: kvm-s390x-pci-coalesce-unmap-operations.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch236: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch237: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch238: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch239: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch240: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch241: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +# For bz#2137740 - Multifd migration fails under a weak network/socket ordering race +Patch242: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +# For bz#2137740 - Multifd migration fails under a weak network/socket ordering race +Patch243: kvm-migration-check-magic-value-for-deciding-the-mapping.patch +# For bz#2168187 - [s390x] qemu-kvm coredumps when SE crashes +Patch244: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch245: kvm-aio_wait_kick-add-missing-memory-barrier.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch246: kvm-qatomic-add-smp_mb__before-after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch247: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch248: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch249: kvm-edu-add-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch250: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch251: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch252: kvm-physmem-add-missing-memory-barrier.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch253: kvm-async-update-documentation-of-the-memory-barriers.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch254: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch255: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch BuildRequires: wget BuildRequires: rpm-build @@ -1616,33 +1821,165 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog -* Tue Feb 14 2023 Jon Maloy - 6.2.0-22.el8_7.2 -- kvm-migration-Read-state-once.patch [bz#2168217] -- Resolves: bz#2168217 - (while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-8.7.0.z]) - -* Thu Dec 15 2022 Jon Maloy - 6.2.0-21.el8_7.2 -- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2148504] -- kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch [bz#2152085] -- kvm-hw-display-qxl-Document-qxl_phys2virt.patch [bz#2152085] -- kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch [bz#2152085] -- kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch [bz#2152085] -- kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch [bz#2152085] -- Resolves: bz#2148504 - (VMs hung on vnc_clipboard_send [rhel-8.7.0.z]) -- Resolves: bz#2152085 - (CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] [rhel-8.7.0.z]) - -* Wed Oct 12 2022 Miroslav Rezanina - 6.2.0-20.el8_7.2 -- kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch [bz#2132391] -- Resolves: bz#2132391 - ([virtiofs] virtiofsd debug log's timestamp is NULL [rhel-8.7.0.z]) - -* Wed Sep 14 2022 Miroslav Rezanina - 6.2.0-20.el8.1 -- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2116743] -- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2116743] -- Resolves: bz#2116743 - ([RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command) +* Mon Mar 13 2023 Jon Maloy - 6.2.0-32.el8_8 +- kvm-aio_wait_kick-add-missing-memory-barrier.patch [bz#2168472] +- kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2168472] +- kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2168472] +- kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2168472] +- kvm-edu-add-smp_mb__after_rmw.patch [bz#2168472] +- kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2168472] +- kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2168472] +- kvm-physmem-add-missing-memory-barrier.patch [bz#2168472] +- kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2168472] +- kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2168472] +- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2090990] +- kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2090990] +- kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2090990] +- Resolves: bz#2168472 + (Guest hangs when starting or rebooting) +- Resolves: bz#2090990 + (qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]) + +* Wed Feb 15 2023 Jon Maloy - 6.2.0-31 +- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2137740] +- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2137740] +- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168187] +- Resolves: bz#2137740 + (Multifd migration fails under a weak network/socket ordering race) +- Resolves: bz#2168187 + ([s390x] qemu-kvm coredumps when SE crashes) + +* Mon Feb 13 2023 Jon Maloy - 6.2.0-30 +- kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2147617] +- kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2147617] +- kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch [bz#2147617] +- kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch [bz#2147617] +- Resolves: bz#2147617 + (qemu-img finishes successfully while having errors in commit or bitmaps operations) + +* Fri Jan 27 2023 Jon Maloy - 6.2.0-29 +- kvm-block-mirror-Do-not-wait-for-active-writes.patch [bz#2125119] +- kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch [bz#2125119] +- kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch [bz#2125119] +- kvm-accel-introduce-accelerator-blocker-API.patch [bz#2161188] +- kvm-KVM-keep-track-of-running-ioctls.patch [bz#2161188] +- kvm-kvm-Atomic-memslot-updates.patch [bz#2161188] +- kvm-migration-Read-state-once.patch [bz#2074205] +- kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch [bz#2163713] +- kvm-s390x-pci-coalesce-unmap-operations.patch [bz#2163713] +- kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch [bz#2163713] +- kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch [bz#2163713] +- Resolves: bz#2125119 + (Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O) +- Resolves: bz#2161188 + (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) +- Resolves: bz#2074205 + (while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set) +- Resolves: bz#2163713 + ([s390x] VM fails to start with ISM passed through) + +* Wed Jan 04 2023 Jon Maloy - 6.2.0-28 +- kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch [bz#2155448] +- Resolves: bz#2155448 + (RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part) + +* Thu Dec 08 2022 Jon Maloy - 6.2.0-27 +- kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch [bz#2148545] +- kvm-hw-display-qxl-Document-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch [bz#2148545] +- kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch [bz#2148545] +- Resolves: bz#2148545 + (CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8]) + +* Wed Nov 23 2022 Jon Maloy - 6.2.0-26 +- kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch [bz#2128225] +- kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch [bz#2128225] +- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2141896] +- Resolves: bz#2128225 + ([s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu) +- Resolves: bz#2141896 + (VMs hung on vnc_clipboard_send) + +* Wed Nov 16 2022 Jon Maloy - 6.2.0-25 +- kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch [bz#2132609] +- kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch [bz#2132609] +- Resolves: bz#2132609 + (qemu-kvm: backport some aarch64 fixes) + +* Thu Nov 10 2022 Jon Maloy - 6.2.0-24 +- kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch [bz#1664378 bz#2043909] +- kvm-Update-linux-headers-to-v6.0-rc4.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-for-load-store-interpretation.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch [bz#1664378 bz#2043909] +- kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-ERRP_GUARD.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-sh_info-variable.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-more-offset-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-dump_is_64bit-helper-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-phdr-note-writes.patch [bz#1664378 bz#2043909] +- kvm-dump-Cleanup-dump_begin-write-functions.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-elf-note-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch [bz#1664378 bz#2043909] +- kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-get_start_block.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-filter-area-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-dump_calculate_size-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch [bz#1664378 bz#2043909] +- kvm-dump-simplify-a-bit-kdump-get_next_page.patch [bz#1664378 bz#2043909] +- kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch [bz#1664378 bz#2043909] +- kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch [bz#1664378 bz#2043909] +- kvm-dump-Reorder-struct-DumpState.patch [bz#1664378 bz#2043909] +- kvm-dump-Reintroduce-memory_offset-and-section_offset.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-architecture-section-and-section-string-tab.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-protected-dump-cap.patch [bz#1664378 bz#2043909] +- kvm-s390x-Introduce-PV-query-interface.patch [bz#1664378 bz#2043909] +- kvm-include-elf.h-add-s390x-note-types.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-KVM-PV-dump-interface.patch [bz#1664378 bz#2043909] +- kvm-s390x-pv-Add-dump-support.patch [bz#1664378 bz#2043909] +- kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch [bz#2129760] +- Resolves: bz#1664378 + ([IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part) +- Resolves: bz#2043909 + ([IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part) +- Resolves: bz#2129760 + (CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8]) + +* Wed Oct 26 2022 Jon Maloy - 6.2.0-23 +- kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch [bz#2116302] +- kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch [bz#2116302] +- Resolves: bz#2116302 + (RHEL8.6 - virtiofs will not mount fs on secure execution guest) + +* Wed Oct 05 2022 Jon Maloy - 6.2.0-22 +- kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch [bz#2124757] +- kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch [bz#2124757] +- kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch [bz#2018885] +- Resolves: bz#2124757 + (RHEL8: skey test in kvm_unit_test got failed) +- Resolves: bz#2018885 + ([virtiofs] virtiofsd debug log's timestamp is NULL) + +* Thu Sep 29 2022 Jon Maloy - 6.2.0-21 +- kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch [bz#2117149] +- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125271] +- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125271] +- Resolves: bz#2117149 + (Can't run when memory backing with hugepages and backend type memfd) +- Resolves: bz#2125271 + ([RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0]) * Fri Aug 26 2022 Jon Maloy - 6.2.0-20 - kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120279]