From 6443c29d89f8e96601a60b20dee8f4190bfb0859 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 02 2022 07:09:31 +0000 Subject: import qemu-kvm-6.2.0-11.module+el8.6.0+15668+464a1f31.2 --- diff --git a/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch new file mode 100644 index 0000000..c2df0b1 --- /dev/null +++ b/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch @@ -0,0 +1,107 @@ +From fb23aa4bc1842bb78e270ee7c4fdd663e45e2317 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 24 Mar 2022 16:04:57 +0100 +Subject: [PATCH 01/11] RHEL: disable "seqpacket" for "vhost-vsock-device" in + rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 146: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 [rhel-8.6.0.z] +RH-Commit: [1/1] 516ee668b84252528de6f4946298089b153846f1 +RH-Bugzilla: 2071103 +RH-Acked-by: Jason Wang +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Dr. David Alan Gilbert + +vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. +To avoid problems when migrating a VM from RHEL 9 host, we need to +disable it in rhel8-* machine types. + +Signed-off-by: Stefano Garzarella +--- + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 5 files changed, 18 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 024b025fc2..76fcabec7a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,16 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2068202 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index f03a8f0db8..ab6d03e07a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 5559261d9e..882fe7a68d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + } + + DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9795eb9406..bec270598b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1109,6 +1109,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + } + DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 04e8759815..4ddb798144 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +-- +2.27.0 + diff --git a/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch b/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch new file mode 100644 index 0000000..3aac284 --- /dev/null +++ b/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch @@ -0,0 +1,128 @@ +From 27fbe5c920c6680f9dbe994963183ee515f40c12 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 16:45:30 -0400 +Subject: [PATCH 1/2] Revert redhat: Add some devices for exporting upstream + machine types + +RH-Author: Jon Maloy +RH-MergeRequest: 177: Revert redhat: Add some devices for exporting upstream machine types +RH-Commit: [1/1] 9e5dd81cc2e2c8f1df76de9f75d815c91c250c69 (jmaloy/qemu-kvm) +RH-Bugzilla: 2077928 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2077928 +Upstream: no + +Manual revert of commit 70d3924521c9bfd912bcf1a1fc76f49eb377de46, since +the directory structure looks different from rhel-av-8.4.0.z where +this commit is taken from. Besides, x86_64-softmmu.mak looks totally +different and should not be affected by this reversal. + +Signed-off-by: Jon Maloy +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 - + .../devices/x86_64-softmmu/x86_64-upstream-devices.mak | 4 ---- + hw/char/parallel.c | 9 --------- + hw/i386/pc_piix.c | 2 +- + hw/i386/pc_q35.c | 2 +- + hw/timer/hpet.c | 8 -------- + 6 files changed, 2 insertions(+), 24 deletions(-) + delete mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index fdbbdf9742..31ce08edab 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -1,5 +1,4 @@ + include ../rh-virtio.mak +-include x86_64-upstream-devices.mak + + CONFIG_AC97=y + CONFIG_ACPI=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +deleted file mode 100644 +index 2cd20f54d2..0000000000 +--- a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak ++++ /dev/null +@@ -1,4 +0,0 @@ +-# We need "isa-parallel" +-CONFIG_PARALLEL=y +-# We need "hpet" +-CONFIG_HPET=y +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index e5f108211b..b45e67bfbb 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,7 +29,6 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" +-#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -535,14 +534,6 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index ab6d03e07a..5f101c8748 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -966,7 +966,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; + m->default_display = "std"; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 882fe7a68d..73b0d0d317 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -633,7 +633,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 202e032524..9520471be2 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,14 +733,6 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch b/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch new file mode 100644 index 0000000..c933542 --- /dev/null +++ b/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch @@ -0,0 +1,59 @@ +From 08103b26cb393920410e0a943d35e79bbd9f5ce0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 20:54:45 -0400 +Subject: [PATCH 05/11] display/qxl-render: fix race condition in qxl_cursor + (CVE-2021-4207) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 162: display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) +RH-Commit: [1/1] 055c5a8345ad434a723b8106ba0a7b85fe117547 (jmaloy/qemu-kvm) +RH-Bugzilla: 2075683 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Mauro Matteo Cascella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075683 +Upstream: Merged +CVE: CVE-2021-4207 + +commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895 +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:11:06 2022 +0200 + + display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) + + Avoid fetching 'width' and 'height' a second time to prevent possible + race condition. Refer to security advisory + https://starlabs.sg/advisories/22-4207/ for more information. + + Fixes: CVE-2021-4207 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081106.343235-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index d28849b121..237ed293ba 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -266,7 +266,7 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + } + break; + case SPICE_CURSOR_TYPE_ALPHA: +- size = sizeof(uint32_t) * cursor->header.width * cursor->header.height; ++ size = sizeof(uint32_t) * c->width * c->height; + qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id); + if (qxl->debug > 2) { + cursor_print_ascii_art(c, "qxl/alpha"); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch b/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch new file mode 100644 index 0000000..4f7455b --- /dev/null +++ b/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch @@ -0,0 +1,76 @@ +From ef311c8ca284d11dc812a1004de52125fc9eb194 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 02/11] hw/intc/arm_gicv3: Check for !MEMTX_OK instead of + MEMTX_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 158: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [1/3] ebfd87ea3a2de51c6961569861d5f4fba25890cc (jmaloy/qemu-kvm) +RH-Bugzilla: 2075686 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075686 +Upstream: Merged +CVE: CVE-2021-3750 + +commit b9d383ab797f54ae5fa8746117770709921dc529 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:19 2021 +0100 + + hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR + + Quoting Peter Maydell: + + "These MEMTX_* aren't from the memory transaction + API functions; they're just being used by gicd_readl() and + friends as a way to indicate a success/failure so that the + actual MemoryRegionOps read/write fns like gicv3_dist_read() + can log a guest error." + + We are going to introduce more MemTxResult bits, so it is + safer to check for !MEMTX_OK rather than MEMTX_ERROR. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Peter Maydell + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Peter Maydell + +(cherry picked from commit b9d383ab797f54ae5fa8746117770709921dc529) +Signed-off-by: Jon Maloy +--- + hw/intc/arm_gicv3_redist.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c +index c8ff3eca08..99b11ca5ee 100644 +--- a/hw/intc/arm_gicv3_redist.c ++++ b/hw/intc/arm_gicv3_redist.c +@@ -462,7 +462,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest read at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +@@ -521,7 +521,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch new file mode 100644 index 0000000..f50104d --- /dev/null +++ b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -0,0 +1,52 @@ +From f663b951b87735bee6b5a4d75b726f609e557f8c Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 21 Apr 2022 16:24:35 +0200 +Subject: [PATCH 11/11] iotests/108: Fix when missing user_allow_other + +RH-Author: Hanna Reitz +RH-MergeRequest: 173: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] cea00f9cb38639d4c51fab13c20311d1737d5ca3 +RH-Bugzilla: 2072242 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +FUSE exports' allow-other option defaults to "auto", which means that it +will try passing allow_other as a mount option, and fall back to not +using it when an error occurs. We make no effort to hide fusermount's +error message (because it would be difficult, and because users might +want to know about the fallback occurring), and so when allow_other does +not work (primarily when /etc/fuse.conf does not contain +user_allow_other), this error message will appear and break the +reference output. + +We do not need allow_other here, though, so we can just pass +allow-other=off to fix that. + +Reported-by: Markus Armbruster +Signed-off-by: Hanna Reitz +Message-Id: <20220421142435.569600-1-hreitz@redhat.com> +Tested-by: Markus Armbruster +Tested-by: Eric Blake +(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 23abbeaff0..775ff08eca 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -326,7 +326,7 @@ else + + $QSD \ + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ +- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ + --pidfile "$TEST_DIR/qsd.pid" \ + & + +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch new file mode 100644 index 0000000..288557c --- /dev/null +++ b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -0,0 +1,445 @@ +From a5cc7f98669fbde7290fc363cb61dc7bd41718f3 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:51 +0200 +Subject: [PATCH 09/11] iotests/108: Test new refcount rebuild algorithm + +RH-Author: Hanna Reitz +RH-MergeRequest: 173: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] db808dcc1c5ee9fd901c0800b3fac1348fab246f +RH-Bugzilla: 2072242 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +One clear problem with how qcow2's refcount structure rebuild algorithm +used to be before "qcow2: Improve refcount structure rebuilding" was +that it is prone to failure for qcow2 images on block devices: There is +generally unused space after the actual image, and if that exceeds what +one refblock covers, the old algorithm would invariably write the +reftable past the block device's end, which cannot work. The new +algorithm does not have this problem. + +Test it with three tests: +(1) Create an image with more empty space at the end than what one + refblock covers, see whether rebuilding the refcount structures + results in a change in the image file length. (It should not.) + +(2) Leave precisely enough space somewhere at the beginning of the image + for the new reftable (and the refblock for that place), see whether + the new algorithm puts the reftable there. (It should.) + +(3) Test the original problem: Create (something like) a block device + with a fixed size, then create a qcow2 image in there, write some + data, and then have qemu-img check rebuild the refcount structures. + Before HEAD^, the reftable would have been written past the image + file end, i.e. outside of what the block device provides, which + cannot work. HEAD^ should have fixed that. + ("Something like a block device" means a loop device if we can use + one ("sudo -n losetup" works), or a FUSE block export with + growable=false otherwise.) + +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-3-hreitz@redhat.com> +(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) + +Conflicts: +- 108: The downstream qemu-storage-daemon does not support --daemonize, + so this switch has been replaced by a loop waiting for the PID file to + appear + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- + tests/qemu-iotests/108.out | 81 ++++++++++++ + 2 files changed, 343 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 8eaef0b8bf..23abbeaff0 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -30,13 +30,20 @@ status=1 # failure is the default! + + _cleanup() + { +- _cleanup_test_img ++ _cleanup_test_img ++ if [ -f "$TEST_DIR/qsd.pid" ]; then ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -KILL "$qsd_pid" ++ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null ++ fi ++ rm -f "$TEST_DIR/fuse-export" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks + . ./common.rc + . ./common.filter ++. ./common.qemu + + # This tests qcow2-specific low-level functionality + _supported_fmt qcow2 +@@ -47,6 +54,22 @@ _supported_os Linux + # files + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + ++# This test either needs sudo -n losetup or FUSE exports to work ++if sudo -n losetup &>/dev/null; then ++ loopdev=true ++else ++ loopdev=false ++ ++ # QSD --export fuse will either yield "Parameter 'id' is missing" ++ # or "Invalid parameter 'fuse'", depending on whether there is ++ # FUSE support or not. ++ error=$($QSD --export fuse 2>&1) ++ if [[ $error = *"'fuse'"* ]]; then ++ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ ++ 'neither is available' ++ fi ++fi ++ + echo + echo '=== Repairing an image without any refcount table ===' + echo +@@ -138,6 +161,244 @@ _make_test_img 64M + poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" + _check_test_img -r all + ++echo ++echo '=== Check rebuilt reftable location ===' ++ ++# In an earlier version of the refcount rebuild algorithm, the ++# reftable was generally placed at the image end (unless something was ++# allocated in the area covered by the refblock right before the image ++# file end, then we would try to place the reftable in that refblock). ++# This was later changed so the reftable would be placed in the ++# earliest possible location. Test this. ++ ++echo ++echo '--- Does the image size increase? ---' ++echo ++ ++# First test: Just create some image, write some data to it, and ++# resize it so there is free space at the end of the image (enough ++# that it spans at least one full refblock, which for cluster_size=512 ++# images, spans 128k). With the old algorithm, the reftable would ++# have then been placed at the end of the image file, but with the new ++# one, it will be put in that free space. ++# We want to check whether the size of the image file increases due to ++# rebuilding the refcount structures (it should not). ++ ++_make_test_img -o 'cluster_size=512' 1M ++# Write something ++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ++ ++# Add free space ++file_len=$(stat -c '%s' "$TEST_IMG") ++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++# Check whether rebuilding the refcount structures increases the image ++# file size ++file_len=$(stat -c '%s' "$TEST_IMG") ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++post_repair_file_len=$(stat -c '%s' "$TEST_IMG") ++ ++if [[ $file_len -eq $post_repair_file_len ]]; then ++ echo 'OK: Image size did not change' ++else ++ echo 'ERROR: Image size differs' \ ++ "($file_len before, $post_repair_file_len after)" ++fi ++ ++echo ++echo '--- Will the reftable occupy a hole specifically left for it? ---' ++echo ++ ++# Note: With cluster_size=512, every refblock covers 128k. ++# The reftable covers 8M per reftable cluster. ++ ++# Create an image that requires two reftable clusters (just because ++# this is more interesting than a single-clustered reftable). ++_make_test_img -o 'cluster_size=512' 9M ++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io ++ ++# Writing 8M will have resized the reftable. Unfortunately, doing so ++# will leave holes in the file, so we need to fill them up so we can ++# be sure the whole file is allocated. Do that by writing ++# consecutively smaller chunks starting from 8 MB, until the file ++# length increases even with a chunk size of 512. Then we must have ++# filled all holes. ++ofs=$((8 * 1024 * 1024)) ++block_len=$((16 * 1024)) ++while [[ $block_len -ge 512 ]]; do ++ file_len=$(stat -c '%s' "$TEST_IMG") ++ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do ++ # Do not include this in the reference output, it does not ++ # really matter which qemu-io calls we do here exactly ++ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null ++ ofs=$((ofs + block_len)) ++ done ++ block_len=$((block_len / 2)) ++done ++ ++# Fill up to 9M (do not include this in the reference output either, ++# $ofs is random for all we know) ++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null ++ ++# Make space as follows: ++# - For the first refblock: Right at the beginning of the image (this ++# refblock is placed in the first place possible), ++# - For the reftable somewhere soon afterwards, still near the ++# beginning of the image (i.e. covered by the first refblock); the ++# reftable too is placed in the first place possible, but only after ++# all refblocks have been placed) ++# No space is needed for the other refblocks, because no refblock is ++# put before the space it covers. In this test case, we do not mind ++# if they are placed at the image file's end. ++ ++# Before we make that space, we have to find out the host offset of ++# the area that belonged to the two data clusters at guest offset 4k, ++# because we expect the reftable to be placed there, and we will have ++# to verify that it is. ++ ++l1_offset=$(peek_file_be "$TEST_IMG" 40 8) ++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) ++l2_offset=$((l2_offset & 0x00fffffffffffe00)) ++data_4k_offset=$(peek_file_be "$TEST_IMG" \ ++ $((l2_offset + 4096 / 512 * 8)) 8) ++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) ++ ++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++ ++# Check whether the reftable was put where we expected ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++if [[ $rt_offset -eq $data_4k_offset ]]; then ++ echo 'OK: Reftable is where we expect it' ++else ++ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" ++fi ++ ++echo ++echo '--- Rebuilding refcount structures on block devices ---' ++echo ++ ++# A block device cannot really grow, at least not during qemu-img ++# check. As mentioned in the above cases, rebuilding the refcount ++# structure may lead to new refcount structures being written after ++# the end of the image, and in the past that happened even if there ++# was more than sufficient space in the image. Such post-EOF writes ++# will not work on block devices, so test that the new algorithm ++# avoids it. ++ ++# If we have passwordless sudo and losetup, we can use those to create ++# a block device. Otherwise, we can resort to qemu's FUSE export to ++# create a file that isn't growable, which effectively tests the same ++# thing. ++ ++_cleanup_test_img ++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" ++ ++if $loopdev; then ++ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") ++ export_mp_driver=host_device ++ sudo -n chmod go+rw "$export_mp" ++else ++ # Create non-growable FUSE export that is a bit like an empty ++ # block device ++ export_mp="$TEST_DIR/fuse-export" ++ export_mp_driver=file ++ touch "$export_mp" ++ ++ $QSD \ ++ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --pidfile "$TEST_DIR/qsd.pid" \ ++ & ++ ++ while [ ! -f "$TEST_DIR/qsd.pid" ]; do ++ sleep 0.1 ++ done ++fi ++ ++# Now create a qcow2 image on the device -- unfortunately, qemu-img ++# create force-creates the file, so we have to resort to the ++# blockdev-create job. ++_launch_qemu \ ++ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "qmp_capabilities" }' \ ++ 'return' ++ ++# Small cluster size again, so the image needs multiple refblocks ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "qcow2", ++ "file": "file", ++ "size": '$((64 * 1024 * 1024))', ++ "cluster-size": 512 ++ } } }' \ ++ '"concluded"' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ ++ 'return' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "quit" }' \ ++ 'return' ++ ++wait=y _cleanup_qemu ++echo ++ ++# Write some data ++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$export_mp" 48 8) ++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) ++poke_file "$export_mp" $rb_offset "\x00\x00" ++ ++# Repairing such a simple case should just work ++# (We used to put the reftable at the end of the image file, which can ++# never work for non-growable devices.) ++echo ++TEST_IMG="$export_mp" _check_test_img -r all \ ++ | grep -v '^Repairing cluster.*refcount=1 reference=0' ++ ++if $loopdev; then ++ sudo -n losetup -d "$export_mp" ++else ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -TERM "$qsd_pid" ++ # Wait for process to exit (cannot `wait` because the QSD is daemonized) ++ while [ -f "$TEST_DIR/qsd.pid" ]; do ++ true ++ done ++fi ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index 75bab8dc84..b5401d788d 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: + 0 leaked clusters + 1 corruptions + ++Double checking the fixed image now... ++No errors were found on the image. ++ ++=== Check rebuilt reftable location === ++ ++--- Does the image size increase? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Image size did not change ++ ++--- Will the reftable occupy a hole specifically left for it? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 ++wrote 8388608/8388608 bytes at offset 0 ++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 1024/1024 bytes at offset 4096 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Reftable is where we expect it ++ ++--- Rebuilding refcount structures on block devices --- ++ ++{ "execute": "qmp_capabilities" } ++{"return": {}} ++{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "IMGFMT", ++ "file": "file", ++ "size": 67108864, ++ "cluster-size": 512 ++ } } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} ++{ "execute": "job-dismiss", "arguments": { "id": "create" } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} ++{"return": {}} ++{ "execute": "quit" } ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++ ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ + Double checking the fixed image now... + No errors were found on the image. + *** done +-- +2.27.0 + diff --git a/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch new file mode 100644 index 0000000..6cd88e8 --- /dev/null +++ b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -0,0 +1,162 @@ +From 6c475f4bc2c323d75d40eb37386fbb86819791ec Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:52 +0200 +Subject: [PATCH 10/11] qcow2: Add errp to rebuild_refcount_structure() + +RH-Author: Hanna Reitz +RH-MergeRequest: 173: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 95a6b7bb558a9ed35afae3fa9fefd761312ecb3b +RH-Bugzilla: 2072242 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Instead of fprint()-ing error messages in rebuild_refcount_structure() +and its rebuild_refcounts_write_refblocks() helper, pass them through an +Error object to qcow2_check_refcounts() (which will then print it). + +Suggested-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-4-hreitz@redhat.com> +Reviewed-by: Eric Blake +(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index 555d8ba5ac..09f8ef4927 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2462,7 +2462,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, +- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, ++ Error **errp + ) + { + BDRVQcow2State *s = bs->opaque; +@@ -2513,8 +2514,8 @@ static int rebuild_refcounts_write_refblocks( + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); ++ error_setg_errno(errp, -refblock_offset, ++ "ERROR allocating refblock"); + return refblock_offset; + } + +@@ -2536,6 +2537,7 @@ static int rebuild_refcounts_write_refblocks( + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { ++ error_setg(errp, "ERROR allocating reftable memory"); + return -ENOMEM; + } + +@@ -2559,7 +2561,7 @@ static int rebuild_refcounts_write_refblocks( + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2575,7 +2577,7 @@ static int rebuild_refcounts_write_refblocks( + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2598,7 +2600,8 @@ static int rebuild_refcounts_write_refblocks( + static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, +- int64_t *nb_clusters) ++ int64_t *nb_clusters, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + int64_t reftable_offset = -1; +@@ -2649,7 +2652,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2673,8 +2676,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); ++ error_setg_errno(errp, -reftable_offset, ++ "ERROR allocating reftable"); + res->check_errors++; + ret = reftable_offset; + goto fail; +@@ -2692,7 +2695,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2722,7 +2725,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2730,7 +2733,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_length); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2743,7 +2746,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { +- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR setting reftable"); + goto fail; + } + +@@ -2811,11 +2814,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; ++ Error *local_err = NULL; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, +- &nb_clusters); ++ &nb_clusters, &local_err); + if (ret < 0) { ++ error_report_err(local_err); + goto fail; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch new file mode 100644 index 0000000..b7cab6f --- /dev/null +++ b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -0,0 +1,465 @@ +From 3daca05a8f845d2a389a6cf767314bcb72109578 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:50 +0200 +Subject: [PATCH 08/11] qcow2: Improve refcount structure rebuilding + +RH-Author: Hanna Reitz +RH-MergeRequest: 173: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] 586e7a0fc3cb7cc2296b544ffcef34d8395fa74c +RH-Bugzilla: 2072242 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +When rebuilding the refcount structures (when qemu-img check -r found +errors with refcount = 0, but reference count > 0), the new refcount +table defaults to being put at the image file end[1]. There is no good +reason for that except that it means we will not have to rewrite any +refblocks we already wrote to disk. + +Changing the code to rewrite those refblocks is not too difficult, +though, so let us do that. That is beneficial for images on block +devices, where we cannot really write beyond the end of the image file. + +Use this opportunity to add extensive comments to the code, and refactor +it a bit, getting rid of the backwards-jumping goto. + +[1] Unless there is something allocated in the area pointed to by the + last refblock, so we have to write that refblock. In that case, we + try to put the reftable in there. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 +Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-2-hreitz@redhat.com> +(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ + 1 file changed, 235 insertions(+), 97 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index 4614572252..555d8ba5ac 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2435,111 +2435,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + } + + /* +- * Creates a new refcount structure based solely on the in-memory information +- * given through *refcount_table. All necessary allocations will be reflected +- * in that array. ++ * Helper function for rebuild_refcount_structure(). + * +- * On success, the old refcount structure is leaked (it will be covered by the +- * new refcount structure). ++ * Scan the range of clusters [first_cluster, end_cluster) for allocated ++ * clusters and write all corresponding refblocks to disk. The refblock ++ * and allocation data is taken from the in-memory refcount table ++ * *refcount_table[] (of size *nb_clusters), which is basically one big ++ * (unlimited size) refblock for the whole image. ++ * ++ * For these refblocks, clusters are allocated using said in-memory ++ * refcount table. Care is taken that these allocations are reflected ++ * in the refblocks written to disk. ++ * ++ * The refblocks' offsets are written into a reftable, which is ++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If ++ * that reftable is of insufficient size, it will be resized to fit. ++ * This reftable is not written to disk. ++ * ++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed ++ * to point to existing valid refblocks that do not need to be allocated ++ * again.) ++ * ++ * Return whether the on-disk reftable array was resized (true/false), ++ * or -errno on error. + */ +-static int rebuild_refcount_structure(BlockDriverState *bs, +- BdrvCheckResult *res, +- void **refcount_table, +- int64_t *nb_clusters) ++static int rebuild_refcounts_write_refblocks( ++ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, ++ int64_t first_cluster, int64_t end_cluster, ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ ) + { + BDRVQcow2State *s = bs->opaque; +- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; ++ int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; +- uint32_t reftable_size = 0; +- uint64_t *on_disk_reftable = NULL; ++ int64_t first_free_cluster = 0; ++ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; ++ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; +- int ret = 0; +- struct { +- uint64_t reftable_offset; +- uint32_t reftable_clusters; +- } QEMU_PACKED reftable_offset_and_clusters; +- +- qcow2_cache_empty(bs, s->refcount_block_cache); ++ bool reftable_grown = false; ++ int ret; + +-write_refblocks: +- for (; cluster < *nb_clusters; cluster++) { ++ for (cluster = first_cluster; cluster < end_cluster; cluster++) { ++ /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + ++ /* ++ * This cluster is allocated, so we need to create a refblock ++ * for it. The data we will write to disk is just the ++ * respective slice from *refcount_table, so it will contain ++ * accurate refcounts for all clusters belonging to this ++ * refblock. After we have written it, we will therefore skip ++ * all remaining clusters in this refblock. ++ */ ++ + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + +- /* Don't allocate a cluster in a refblock already written to disk */ +- if (first_free_cluster < refblock_start) { +- first_free_cluster = refblock_start; +- } +- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, +- nb_clusters, &first_free_cluster); +- if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); +- res->check_errors++; +- ret = refblock_offset; +- goto fail; +- } ++ if (on_disk_reftable_entries > refblock_index && ++ on_disk_reftable[refblock_index]) ++ { ++ /* ++ * We can get here after a `goto write_refblocks`: We have a ++ * reftable from a previous run, and the refblock is already ++ * allocated. No need to allocate it again. ++ */ ++ refblock_offset = on_disk_reftable[refblock_index]; ++ } else { ++ int64_t refblock_cluster_index; + +- if (reftable_size <= refblock_index) { +- uint32_t old_reftable_size = reftable_size; +- uint64_t *new_on_disk_reftable; ++ /* Don't allocate a cluster in a refblock already written to disk */ ++ if (first_free_cluster < refblock_start) { ++ first_free_cluster = refblock_start; ++ } ++ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, ++ nb_clusters, ++ &first_free_cluster); ++ if (refblock_offset < 0) { ++ fprintf(stderr, "ERROR allocating refblock: %s\n", ++ strerror(-refblock_offset)); ++ return refblock_offset; ++ } + +- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, +- s->cluster_size) / REFTABLE_ENTRY_SIZE; +- new_on_disk_reftable = g_try_realloc(on_disk_reftable, +- reftable_size * +- REFTABLE_ENTRY_SIZE); +- if (!new_on_disk_reftable) { +- res->check_errors++; +- ret = -ENOMEM; +- goto fail; ++ refblock_cluster_index = refblock_offset / s->cluster_size; ++ if (refblock_cluster_index >= end_cluster) { ++ /* ++ * We must write the refblock that holds this refblock's ++ * refcount ++ */ ++ end_cluster = refblock_cluster_index + 1; + } +- on_disk_reftable = new_on_disk_reftable; + +- memset(on_disk_reftable + old_reftable_size, 0, +- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); ++ if (on_disk_reftable_entries <= refblock_index) { ++ on_disk_reftable_entries = ++ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, ++ s->cluster_size) / REFTABLE_ENTRY_SIZE; ++ on_disk_reftable = ++ g_try_realloc(on_disk_reftable, ++ on_disk_reftable_entries * ++ REFTABLE_ENTRY_SIZE); ++ if (!on_disk_reftable) { ++ return -ENOMEM; ++ } + +- /* The offset we have for the reftable is now no longer valid; +- * this will leak that range, but we can easily fix that by running +- * a leak-fixing check after this rebuild operation */ +- reftable_offset = -1; +- } else { +- assert(on_disk_reftable); +- } +- on_disk_reftable[refblock_index] = refblock_offset; ++ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, ++ (on_disk_reftable_entries - ++ *on_disk_reftable_entries_ptr) * ++ REFTABLE_ENTRY_SIZE); + +- /* If this is apparently the last refblock (for now), try to squeeze the +- * reftable in */ +- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && +- reftable_offset < 0) +- { +- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * +- REFTABLE_ENTRY_SIZE); +- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, +- refcount_table, nb_clusters, +- &first_free_cluster); +- if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); +- res->check_errors++; +- ret = reftable_offset; +- goto fail; ++ *on_disk_reftable_ptr = on_disk_reftable; ++ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; ++ ++ reftable_grown = true; ++ } else { ++ assert(on_disk_reftable); + } ++ on_disk_reftable[refblock_index] = refblock_offset; + } + ++ /* Refblock is allocated, write it to disk */ ++ + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* The size of *refcount_table is always cluster-aligned, therefore the +- * write operation will not overflow */ ++ /* ++ * The refblock is simply a slice of *refcount_table. ++ * Note that the size of *refcount_table is always aligned to ++ * whole clusters, so the write operation will not result in ++ * out-of-bounds accesses. ++ */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + +@@ -2547,23 +2576,99 @@ write_refblocks: + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* Go to the end of this refblock */ ++ /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + +- if (reftable_offset < 0) { +- uint64_t post_refblock_start, reftable_clusters; ++ return reftable_grown; ++} ++ ++/* ++ * Creates a new refcount structure based solely on the in-memory information ++ * given through *refcount_table (this in-memory information is basically just ++ * the concatenation of all refblocks). All necessary allocations will be ++ * reflected in that array. ++ * ++ * On success, the old refcount structure is leaked (it will be covered by the ++ * new refcount structure). ++ */ ++static int rebuild_refcount_structure(BlockDriverState *bs, ++ BdrvCheckResult *res, ++ void **refcount_table, ++ int64_t *nb_clusters) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ int64_t reftable_offset = -1; ++ int64_t reftable_length = 0; ++ int64_t reftable_clusters; ++ int64_t refblock_index; ++ uint32_t on_disk_reftable_entries = 0; ++ uint64_t *on_disk_reftable = NULL; ++ int ret = 0; ++ int reftable_size_changed = 0; ++ struct { ++ uint64_t reftable_offset; ++ uint32_t reftable_clusters; ++ } QEMU_PACKED reftable_offset_and_clusters; ++ ++ qcow2_cache_empty(bs, s->refcount_block_cache); ++ ++ /* ++ * For each refblock containing entries, we try to allocate a ++ * cluster (in the in-memory refcount table) and write its offset ++ * into on_disk_reftable[]. We then write the whole refblock to ++ * disk (as a slice of the in-memory refcount table). ++ * This is done by rebuild_refcounts_write_refblocks(). ++ * ++ * Once we have scanned all clusters, we try to find space for the ++ * reftable. This will dirty the in-memory refcount table (i.e. ++ * make it differ from the refblocks we have already written), so we ++ * need to run rebuild_refcounts_write_refblocks() again for the ++ * range of clusters where the reftable has been allocated. ++ * ++ * This second run might make the reftable grow again, in which case ++ * we will need to allocate another space for it, which is why we ++ * repeat all this until the reftable stops growing. ++ * ++ * (This loop will terminate, because with every cluster the ++ * reftable grows, it can accomodate a multitude of more refcounts, ++ * so that at some point this must be able to cover the reftable ++ * and all refblocks describing it.) ++ * ++ * We then convert the reftable to big-endian and write it to disk. ++ * ++ * Note that we never free any reftable allocations. Doing so would ++ * needlessly complicate the algorithm: The eventual second check ++ * run we do will clean up all leaks we have caused. ++ */ ++ ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ 0, *nb_clusters, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * There was no reftable before, so rebuild_refcounts_write_refblocks() ++ * must have increased its size (from 0 to something). ++ */ ++ assert(reftable_size_changed); ++ ++ do { ++ int64_t reftable_start_cluster, reftable_end_cluster; ++ int64_t first_free_cluster = 0; ++ ++ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; ++ reftable_clusters = size_to_clusters(s, reftable_length); + +- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); +- reftable_clusters = +- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); +- /* Not pretty but simple */ +- if (first_free_cluster < post_refblock_start) { +- first_free_cluster = post_refblock_start; +- } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); +@@ -2575,24 +2680,55 @@ write_refblocks: + goto fail; + } + +- goto write_refblocks; +- } ++ /* ++ * We need to update the affected refblocks, so re-run the ++ * write_refblocks loop for the reftable's range of clusters. ++ */ ++ assert(offset_into_cluster(s, reftable_offset) == 0); ++ reftable_start_cluster = reftable_offset / s->cluster_size; ++ reftable_end_cluster = reftable_start_cluster + reftable_clusters; ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ reftable_start_cluster, ++ reftable_end_cluster, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * If the reftable size has changed, we will need to find a new ++ * allocation, repeating the loop. ++ */ ++ } while (reftable_size_changed); + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ /* The above loop must have run at least once */ ++ assert(reftable_offset >= 0); ++ ++ /* ++ * All allocations are done, all refblocks are written, convert the ++ * reftable to big-endian and write it to disk. ++ */ ++ ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + +- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, +- reftable_size * REFTABLE_ENTRY_SIZE, ++ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + +- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); ++ assert(reftable_length < INT_MAX); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, +- reftable_size * REFTABLE_ENTRY_SIZE); ++ reftable_length); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; +@@ -2601,7 +2737,7 @@ write_refblocks: + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = +- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); ++ cpu_to_be32(reftable_clusters); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, +@@ -2611,12 +2747,14 @@ write_refblocks: + goto fail; + } + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; +- s->refcount_table_size = reftable_size; ++ s->refcount_table_size = on_disk_reftable_entries; + update_max_refcount_table_index(s); + + return 0; +-- +2.27.0 + diff --git a/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch b/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch new file mode 100644 index 0000000..8a9d7b4 --- /dev/null +++ b/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch @@ -0,0 +1,176 @@ +From b1eae36683cdfe63af5749b5fe86b1c08fc0f63e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 04/11] softmmu/physmem: Introduce MemTxAttrs::memory field and + MEMTX_ACCESS_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 158: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [3/3] 84d64fe85a106f8faf579e43266d4349fc8e65b4 (jmaloy/qemu-kvm) +RH-Bugzilla: 2075686 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075686 +Upstream: Merged +CVE: CVE-2021-3750 +Conflicts: memalign.h has not been introduced in this version. Instead, + we include osdep.h where the function prototypes are to be + found. + +commit 3ab6fdc91b72e156da22848f0003ff4225690ced +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:21 2021 +0100 + + softmmu/physmem: Introduce MemTxAttrs::memory field and MEMTX_ACCESS_ERROR + + Add the 'memory' bit to the memory attributes to restrict bus + controller accesses to memories. + + Introduce flatview_access_allowed() to check bus permission + before running any bus transaction. + + Have read/write accessors return MEMTX_ACCESS_ERROR if an access is + restricted. + + There is no change for the default case where 'memory' is not set. + + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-4-philmd@redhat.com> + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + [thuth: Replaced MEMTX_BUS_ERROR with MEMTX_ACCESS_ERROR, remove "inline"] + Signed-off-by: Thomas Huth + +(cherry picked from commit 3ab6fdc91b72e156da22848f0003ff4225690ced) +Signed-off-by: Jon Maloy +--- + include/exec/memattrs.h | 9 +++++++++ + softmmu/physmem.c | 45 +++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h +index 95f2d20d55..9fb98bc1ef 100644 +--- a/include/exec/memattrs.h ++++ b/include/exec/memattrs.h +@@ -35,6 +35,14 @@ typedef struct MemTxAttrs { + unsigned int secure:1; + /* Memory access is usermode (unprivileged) */ + unsigned int user:1; ++ /* ++ * Bus interconnect and peripherals can access anything (memories, ++ * devices) by default. By setting the 'memory' bit, bus transaction ++ * are restricted to "normal" memories (per the AMBA documentation) ++ * versus devices. Access to devices will be logged and rejected ++ * (see MEMTX_ACCESS_ERROR). ++ */ ++ unsigned int memory:1; + /* Requester ID (for MSI for example) */ + unsigned int requester_id:16; + /* Invert endianness for this page */ +@@ -66,6 +74,7 @@ typedef struct MemTxAttrs { + #define MEMTX_OK 0 + #define MEMTX_ERROR (1U << 0) /* device returned an error */ + #define MEMTX_DECODE_ERROR (1U << 1) /* nothing at that address */ ++#define MEMTX_ACCESS_ERROR (1U << 2) /* access denied */ + typedef uint32_t MemTxResult; + + #endif +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 483a31be81..4d0ef5f92f 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -41,6 +41,8 @@ + #include "qemu/config-file.h" + #include "qemu/error-report.h" + #include "qemu/qemu-print.h" ++#include "qemu/log.h" ++#include "qemu/osdep.h" + #include "exec/memory.h" + #include "exec/ioport.h" + #include "sysemu/dma.h" +@@ -2759,6 +2761,33 @@ static bool prepare_mmio_access(MemoryRegion *mr) + return release_lock; + } + ++/** ++ * flatview_access_allowed ++ * @mr: #MemoryRegion to be accessed ++ * @attrs: memory transaction attributes ++ * @addr: address within that memory region ++ * @len: the number of bytes to access ++ * ++ * Check if a memory transaction is allowed. ++ * ++ * Returns: true if transaction is allowed, false if denied. ++ */ ++static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, ++ hwaddr addr, hwaddr len) ++{ ++ if (likely(!attrs.memory)) { ++ return true; ++ } ++ if (memory_region_is_ram(mr)) { ++ return true; ++ } ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Invalid access to non-RAM device at " ++ "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " ++ "region '%s'\n", addr, len, memory_region_name(mr)); ++ return false; ++} ++ + /* Called within RCU critical section. */ + static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, +@@ -2773,7 +2802,10 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + const uint8_t *buf = ptr; + + for (;;) { +- if (!memory_access_is_direct(mr, true)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, true)) { + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + /* XXX: could force current_cpu to NULL to avoid +@@ -2818,6 +2850,9 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_write_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +@@ -2836,7 +2871,10 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + + fuzz_dma_read_cb(addr, len, mr); + for (;;) { +- if (!memory_access_is_direct(mr, false)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); +@@ -2879,6 +2917,9 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +-- +2.27.0 + diff --git a/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch b/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch new file mode 100644 index 0000000..d554db2 --- /dev/null +++ b/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch @@ -0,0 +1,81 @@ +From b570eb3a78a3096af55979ce63d96f5fc012f9e5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 03/11] softmmu/physmem: Simplify flatview_write and + address_space_access_valid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 158: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [2/3] 66281d1c439be549c7890a662a817dd1b9367ef2 (jmaloy/qemu-kvm) +RH-Bugzilla: 2075686 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075686 +Upstream: Merged +CVE: CVE-2021-3750 + +commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:20 2021 +0100 + + softmmu/physmem: Simplify flatview_write and address_space_access_valid + + Remove unuseful local 'result' variables. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Alexander Bulekov + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-3-philmd@redhat.com> + Signed-off-by: Thomas Huth + +(cherry picked from commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9) +Signed-off-by: Jon Maloy +--- + softmmu/physmem.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 3524c04c2a..483a31be81 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -2815,14 +2815,11 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; +- MemTxResult result = MEMTX_OK; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); +- result = flatview_write_continue(fv, addr, attrs, buf, len, +- addr1, l, mr); +- +- return result; ++ return flatview_write_continue(fv, addr, attrs, buf, len, ++ addr1, l, mr); + } + + /* Called within RCU critical section. */ +@@ -3119,12 +3116,10 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs) + { + FlatView *fv; +- bool result; + + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); +- result = flatview_access_valid(fv, addr, len, is_write, attrs); +- return result; ++ return flatview_access_valid(fv, addr, len, is_write, attrs); + } + + static hwaddr +-- +2.27.0 + diff --git a/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch b/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch new file mode 100644 index 0000000..cdff4fb --- /dev/null +++ b/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch @@ -0,0 +1,105 @@ +From 0a493615833edbe6448bc639200b4a5fa7d492e3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 4 May 2022 10:35:17 -0400 +Subject: [PATCH 2/2] ui/cursor: fix integer overflow in cursor_alloc + (CVE-2021-4206) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 190: ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) +RH-Commit: [1/1] 80588f646942c345a2491812cb41aacd4c0805ff (jmaloy/qemu-kvm) +RH-Bugzilla: 2082622 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Mauro Matteo Cascella + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2082622 +Upstream: Merged +CVE: CVE-2021-4206 + +commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:17:12 2022 +0200 + + ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) + + Prevent potential integer overflow by limiting 'width' and 'height' to + 512x512. Also change 'datasize' type to size_t. Refer to security + advisory https://starlabs.sg/advisories/22-4206/ for more information. + + Fixes: CVE-2021-4206 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081712.345609-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 7 +++++++ + hw/display/vmware_vga.c | 2 ++ + ui/cursor.c | 8 +++++++- + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index 237ed293ba..ca217004bf 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -247,6 +247,13 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + size_t size; + + c = cursor_alloc(cursor->header.width, cursor->header.height); ++ ++ if (!c) { ++ qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__, ++ cursor->header.width, cursor->header.height); ++ goto fail; ++ } ++ + c->hot_x = cursor->header.hot_spot_x; + c->hot_y = cursor->header.hot_spot_y; + switch (cursor->header.type) { +diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c +index e2969a6c81..2b81d6122f 100644 +--- a/hw/display/vmware_vga.c ++++ b/hw/display/vmware_vga.c +@@ -509,6 +509,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s, + int i, pixels; + + qc = cursor_alloc(c->width, c->height); ++ assert(qc != NULL); ++ + qc->hot_x = c->hot_x; + qc->hot_y = c->hot_y; + switch (c->bpp) { +diff --git a/ui/cursor.c b/ui/cursor.c +index 1d62ddd4d0..835f0802f9 100644 +--- a/ui/cursor.c ++++ b/ui/cursor.c +@@ -46,6 +46,8 @@ static QEMUCursor *cursor_parse_xpm(const char *xpm[]) + + /* parse pixel data */ + c = cursor_alloc(width, height); ++ assert(c != NULL); ++ + for (pixel = 0, y = 0; y < height; y++, line++) { + for (x = 0; x < height; x++, pixel++) { + idx = xpm[line][x]; +@@ -91,7 +93,11 @@ QEMUCursor *cursor_builtin_left_ptr(void) + QEMUCursor *cursor_alloc(int width, int height) + { + QEMUCursor *c; +- int datasize = width * height * sizeof(uint32_t); ++ size_t datasize = width * height * sizeof(uint32_t); ++ ++ if (width > 512 || height > 512) { ++ return NULL; ++ } + + c = g_malloc0(sizeof(QEMUCursor) + datasize); + c->width = width; +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch b/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch new file mode 100644 index 0000000..9378e84 --- /dev/null +++ b/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch @@ -0,0 +1,78 @@ +From 2abf617accf878bec7f1c0419d5b85c93b0aa426 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 10:38:26 -0400 +Subject: [PATCH 07/11] vhost-vsock: detach the virqueue element in case of + error + +RH-Author: Jon Maloy +RH-MergeRequest: 168: vhost-vsock: detach the virqueue element in case of error +RH-Commit: [1/1] f81a39d26a7165f8b717fdeeb9526081cb73129b (jmaloy/qemu-kvm) +RH-Bugzilla: 2075639 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Jason Wang +RH-Acked-by: Stefan Hajnoczi + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075639 +Upstream: Merged +CVE: CVE-2022-26354 + +commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf +Author: Stefano Garzarella +Date: Mon Feb 28 10:50:58 2022 +0100 + + vhost-vsock: detach the virqueue element in case of error + + In vhost_vsock_common_send_transport_reset(), if an element popped from + the virtqueue is invalid, we should call virtqueue_detach_element() to + detach it from the virtqueue before freeing its memory. + + Fixes: fc0b9b0e1c ("vhost-vsock: add virtio sockets device") + Fixes: CVE-2022-26354 + Cc: qemu-stable@nongnu.org + Reported-by: VictorV + Signed-off-by: Stefano Garzarella + Message-Id: <20220228095058.27899-1-sgarzare@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vsock-common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index 3f3771274e..ed706681ac 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -153,19 +153,23 @@ static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); +- goto out; ++ goto err; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); +- goto out; ++ goto err; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +-out: ++ g_free(elem); ++ return; ++ ++err: ++ virtqueue_detach_element(vq, elem, 0); + g_free(elem); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch b/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch new file mode 100644 index 0000000..494365a --- /dev/null +++ b/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch @@ -0,0 +1,63 @@ +From ba9806fd38f113deb452156fdc644407f061311c Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 8 Mar 2022 10:42:51 +0800 +Subject: [PATCH 06/11] virtio-net: fix map leaking on error during receive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 165: virtio-net: fix map leaking on error during receive +RH-Commit: [1/1] f0546c74a5bfd37de473703f6a7c6f8f29fb0831 (jmaloy/qemu-kvm) +RH-Bugzilla: 2075637 +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Gerd Hoffmann + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075637 +Upstream: Merged +CVE: CVE-2022-26353 + +commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37 +Author: Jason Wang +Date: Tue Mar 8 10:42:51 2022 +0800 + + virtio-net: fix map leaking on error during receive + + Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + tries to fix the use after free of the sg by caching the virtqueue + elements in an array and unmap them at once after receiving the + packets, But it forgot to unmap the cached elements on error which + will lead to leaking of mapping and other unexpected results. + + Fixing this by detaching the cached elements on error. This addresses + CVE-2022-26353. + + Reported-by: Victor Tom + Cc: qemu-stable@nongnu.org + Fixes: CVE-2022-26353 + Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +(cherry picked from commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37) +Signed-off-by: Jon Maloy +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f2014d5ea0..e1f4748831 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1862,6 +1862,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + + err: + for (j = 0; j < i; j++) { ++ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); + g_free(elems[j]); + } + +-- +2.27.0 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index b199b3e..ca923fd 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 11%{?rcrel}%{?dist} +Release: 11%{?rcrel}%{?dist}.2 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -242,6 +242,32 @@ Patch70: kvm-tests-acpi-update-expected-blobs.patch Patch71: kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch # For bz#1975840 - Windows guest hangs after updating and restarting from the guest OS Patch72: kvm-target-i386-properly-reset-TSC-on-reset.patch +# For bz#2071103 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.6.0.z] +Patch73: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +# For bz#2075686 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] [rhel-8.6.0.z] +Patch74: kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch +# For bz#2075686 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] [rhel-8.6.0.z] +Patch75: kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch +# For bz#2075686 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] [rhel-8.6.0.z] +Patch76: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch +# For bz#2075683 - CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8] [rhel-8.6.0.z] +Patch77: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch +# For bz#2075637 - CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8] [rhel-8.6.0.z] +Patch78: kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch +# For bz#2075639 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] [rhel-8.6.0.z] +Patch79: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch +# For bz#2072242 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) [rhel-8.6.0.z] +Patch80: kvm-qcow2-Improve-refcount-structure-rebuilding.patch +# For bz#2072242 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) [rhel-8.6.0.z] +Patch81: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +# For bz#2072242 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) [rhel-8.6.0.z] +Patch82: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +# For bz#2072242 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) [rhel-8.6.0.z] +Patch83: kvm-iotests-108-Fix-when-missing-user_allow_other.patch +# For bz#2077928 - Remove upstream-only devices from the qemu-kvm binary [rhel-8.6.0.z] +Patch84: kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch +# For bz#2082622 - CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.6.0.z] +Patch85: kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch BuildRequires: wget @@ -1411,6 +1437,39 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Wed Jun 15 2022 Jon Maloy - 6.2.0-11.el8_6.2 +- kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch [bz#2077928] +- kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch [bz#2082622] +- Resolves: bz#2077928 + (Remove upstream-only devices from the qemu-kvm binary [rhel-8.6.0.z]) +- Resolves: bz#2082622 + (CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.6.0.z]) + +* Wed May 25 2022 Jon Maloy - 6.2.0-11.el8_6.1 +- kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2071103] +- kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch [bz#2075686] +- kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch [bz#2075686] +- kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch [bz#2075686] +- kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2075683] +- kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch [bz#2075637] +- kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2075639] +- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#2072242] +- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#2072242] +- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#2072242] +- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#2072242] +- Resolves: bz#2071103 + (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.6.0.z]) +- Resolves: bz#2075686 + (CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] [rhel-8.6.0.z]) +- Resolves: bz#2075683 + (CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8] [rhel-8.6.0.z]) +- Resolves: bz#2075637 + (CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8] [rhel-8.6.0.z]) +- Resolves: bz#2075639 + (CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] [rhel-8.6.0.z]) +- Resolves: bz#2072242 + (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) [rhel-8.6.0.z]) + * Tue Apr 05 2022 Jon Maloy - 6.2.0-11 - kvm-target-i386-properly-reset-TSC-on-reset.patch [bz#1975840] - Resolves: bz#1975840