|
|
ae23c9 |
From 467b5e3127faf168fba6ce61a602f82d34a699c7 Mon Sep 17 00:00:00 2001
|
|
|
ae23c9 |
From: Peter Xu <peterx@redhat.com>
|
|
|
ae23c9 |
Date: Thu, 8 Nov 2018 06:29:36 +0000
|
|
|
ae23c9 |
Subject: [PATCH 08/35] intel_iommu: better handling of dmar state switch
|
|
|
ae23c9 |
|
|
|
ae23c9 |
RH-Author: Peter Xu <peterx@redhat.com>
|
|
|
ae23c9 |
Message-id: <20181108062938.21143-6-peterx@redhat.com>
|
|
|
ae23c9 |
Patchwork-id: 82964
|
|
|
ae23c9 |
O-Subject: [RHEL-8 qemu-kvm PATCH 5/7] intel_iommu: better handling of dmar state switch
|
|
|
ae23c9 |
Bugzilla: 1625173
|
|
|
ae23c9 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Bugzilla: 1625173
|
|
|
ae23c9 |
|
|
|
ae23c9 |
QEMU is not handling the global DMAR switch well, especially when from
|
|
|
ae23c9 |
"on" to "off".
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Let's first take the example of system reset.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Assuming that a guest has IOMMU enabled. When it reboots, we will drop
|
|
|
ae23c9 |
all the existing DMAR mappings to handle the system reset, however we'll
|
|
|
ae23c9 |
still keep the existing memory layouts which has the IOMMU memory region
|
|
|
ae23c9 |
enabled. So after the reboot and before the kernel reloads again, there
|
|
|
ae23c9 |
will be no mapping at all for the host device. That's problematic since
|
|
|
ae23c9 |
any software (for example, SeaBIOS) that runs earlier than the kernel
|
|
|
ae23c9 |
after the reboot will assume the IOMMU is disabled, so any DMA from the
|
|
|
ae23c9 |
software will fail.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
For example, a guest that boots on an assigned NVMe device might fail to
|
|
|
ae23c9 |
find the boot device after a system reboot/reset and we'll be able to
|
|
|
ae23c9 |
observe SeaBIOS errors if we capture the debugging log:
|
|
|
ae23c9 |
|
|
|
ae23c9 |
WARNING - Timeout at nvme_wait:144!
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Meanwhile, we should see DMAR errors on the host of that NVMe device.
|
|
|
ae23c9 |
It's the DMA fault that caused a NVMe driver timeout.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
The correct fix should be that we do proper switching of device DMA
|
|
|
ae23c9 |
address spaces when system resets, which will setup correct memory
|
|
|
ae23c9 |
regions and notify the backend of the devices. This might not affect
|
|
|
ae23c9 |
much on non-assigned devices since QEMU VT-d emulation will assume a
|
|
|
ae23c9 |
default passthrough mapping if DMAR is not enabled in the GCMD
|
|
|
ae23c9 |
register (please refer to vtd_iommu_translate). However that's required
|
|
|
ae23c9 |
for an assigned devices, since that'll rebuild the correct GPA to HPA
|
|
|
ae23c9 |
mapping that is needed for any DMA operation during guest bootstrap.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Besides the system reset, we have some other places that might change
|
|
|
ae23c9 |
the global DMAR status and we'd better do the same thing there. For
|
|
|
ae23c9 |
example, when we change the state of GCMD register, or the DMAR root
|
|
|
ae23c9 |
pointer. Do the same refresh for all these places. For these two
|
|
|
ae23c9 |
places we'll also need to explicitly invalidate the context entry cache
|
|
|
ae23c9 |
and iotlb cache.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625173
|
|
|
ae23c9 |
CC: QEMU Stable <qemu-stable@nongnu.org>
|
|
|
ae23c9 |
Reported-by: Cong Li <coli@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
ae23c9 |
--
|
|
|
ae23c9 |
v2:
|
|
|
ae23c9 |
- do the same for GCMD write, or root pointer update [Alex]
|
|
|
ae23c9 |
- test is carried out by me this time, by observing the
|
|
|
ae23c9 |
vtd_switch_address_space tracepoint after system reboot
|
|
|
ae23c9 |
v3:
|
|
|
ae23c9 |
- rewrite commit message as suggested by Alex
|
|
|
ae23c9 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
ae23c9 |
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
|
|
ae23c9 |
Reviewed-by: Jason Wang <jasowang@redhat.com>
|
|
|
ae23c9 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
ae23c9 |
(cherry picked from commit 2cc9ddccebcaa48b3debfc279a83761fcbb7616c)
|
|
|
ae23c9 |
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
ae23c9 |
---
|
|
|
ae23c9 |
hw/i386/intel_iommu.c | 21 ++++++++++++++-------
|
|
|
ae23c9 |
1 file changed, 14 insertions(+), 7 deletions(-)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
|
|
|
ae23c9 |
index 48d0ba3..a6e87a9 100644
|
|
|
ae23c9 |
--- a/hw/i386/intel_iommu.c
|
|
|
ae23c9 |
+++ b/hw/i386/intel_iommu.c
|
|
|
ae23c9 |
@@ -37,6 +37,8 @@
|
|
|
ae23c9 |
#include "kvm_i386.h"
|
|
|
ae23c9 |
#include "trace.h"
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+static void vtd_address_space_refresh_all(IntelIOMMUState *s);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
|
|
|
ae23c9 |
uint64_t wmask, uint64_t w1cmask)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
@@ -1436,7 +1438,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
|
|
ae23c9 |
vtd_reset_context_cache_locked(s);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
vtd_iommu_unlock(s);
|
|
|
ae23c9 |
- vtd_switch_address_space_all(s);
|
|
|
ae23c9 |
+ vtd_address_space_refresh_all(s);
|
|
|
ae23c9 |
/*
|
|
|
ae23c9 |
* From VT-d spec 6.5.2.1, a global context entry invalidation
|
|
|
ae23c9 |
* should be followed by a IOTLB global invalidation, so we should
|
|
|
ae23c9 |
@@ -1727,6 +1729,8 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
|
|
|
ae23c9 |
vtd_root_table_setup(s);
|
|
|
ae23c9 |
/* Ok - report back to driver */
|
|
|
ae23c9 |
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
|
|
|
ae23c9 |
+ vtd_reset_caches(s);
|
|
|
ae23c9 |
+ vtd_address_space_refresh_all(s);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
/* Set Interrupt Remap Table Pointer */
|
|
|
ae23c9 |
@@ -1759,7 +1763,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
|
|
|
ae23c9 |
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- vtd_switch_address_space_all(s);
|
|
|
ae23c9 |
+ vtd_reset_caches(s);
|
|
|
ae23c9 |
+ vtd_address_space_refresh_all(s);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
/* Handle Interrupt Remap Enable/Disable */
|
|
|
ae23c9 |
@@ -3059,6 +3064,12 @@ static void vtd_address_space_unmap_all(IntelIOMMUState *s)
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+static void vtd_address_space_refresh_all(IntelIOMMUState *s)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ vtd_address_space_unmap_all(s);
|
|
|
ae23c9 |
+ vtd_switch_address_space_all(s);
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
memory_region_notify_one((IOMMUNotifier *)private, entry);
|
|
|
ae23c9 |
@@ -3231,11 +3242,7 @@ static void vtd_reset(DeviceState *dev)
|
|
|
ae23c9 |
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
vtd_init(s);
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- /*
|
|
|
ae23c9 |
- * When device reset, throw away all mappings and external caches
|
|
|
ae23c9 |
- */
|
|
|
ae23c9 |
- vtd_address_space_unmap_all(s);
|
|
|
ae23c9 |
+ vtd_address_space_refresh_all(s);
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
|
|
|
ae23c9 |
--
|
|
|
ae23c9 |
1.8.3.1
|
|
|
ae23c9 |
|