|
|
9ae3a8 |
From f3b05560b20866cadb604f0a5a6f4a7698d2e07b Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
Date: Tue, 5 Nov 2013 15:37:35 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 14/25] vfio-pci: Lazy PCI option ROM loading
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
RH-Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
Message-id: <20131105153734.16057.77668.stgit@bling.home>
|
|
|
9ae3a8 |
Patchwork-id: 55423
|
|
|
9ae3a8 |
O-Subject: [RHEL7 qemu-kvm PATCH 2/5] vfio-pci: Lazy PCI option ROM loading
|
|
|
9ae3a8 |
Bugzilla: 1026550
|
|
|
9ae3a8 |
RH-Acked-by: Bandan Das <bsd@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Bugzilla: 1026550
|
|
|
9ae3a8 |
Upstream commit: 6f864e6ec8812d5a5525a7861ca599c6bcabdebe
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
During vfio-pci initfn, the device is not always in a state where the
|
|
|
9ae3a8 |
option ROM can be read. In the case of graphics cards, there's often
|
|
|
9ae3a8 |
no per function reset, which means we have host driver state affecting
|
|
|
9ae3a8 |
whether the option ROM is usable. Ideally we want to move reading the
|
|
|
9ae3a8 |
option ROM past any co-assigned device resets to the point where the
|
|
|
9ae3a8 |
guest first tries to read the ROM itself.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
To accomplish this, we switch the memory region for the option rom to
|
|
|
9ae3a8 |
an I/O region rather than a memory mapped region. This has the side
|
|
|
9ae3a8 |
benefit that we don't waste KVM memory slots for a BAR where we don't
|
|
|
9ae3a8 |
care about performance. This also allows us to delay loading the ROM
|
|
|
9ae3a8 |
from the device until the first read by the guest. We then use the
|
|
|
9ae3a8 |
PCI config space size of the ROM BAR when setting up the BAR through
|
|
|
9ae3a8 |
QEMU PCI.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Another benefit of this approach is that previously when a user set
|
|
|
9ae3a8 |
the ROM to a file using the romfile= option, we still probed VFIO for
|
|
|
9ae3a8 |
the parameters of the ROM, which can result in dmesg errors about an
|
|
|
9ae3a8 |
invalid ROM. We now only probe VFIO to get the ROM contents if the
|
|
|
9ae3a8 |
guest actually tries to read the ROM.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
hw/misc/vfio.c | 184 +++++++++++++++++++++++++++++++++++++-------------------
|
|
|
9ae3a8 |
1 file changed, 122 insertions(+), 62 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
hw/misc/vfio.c | 184 +++++++++++++++++++++++++++++++++++++-------------------
|
|
|
9ae3a8 |
1 files changed, 122 insertions(+), 62 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
|
|
|
9ae3a8 |
index 8e69182..8d84891 100644
|
|
|
9ae3a8 |
--- a/hw/misc/vfio.c
|
|
|
9ae3a8 |
+++ b/hw/misc/vfio.c
|
|
|
9ae3a8 |
@@ -166,6 +166,7 @@ typedef struct VFIODevice {
|
|
|
9ae3a8 |
off_t config_offset; /* Offset of config space region within device fd */
|
|
|
9ae3a8 |
unsigned int rom_size;
|
|
|
9ae3a8 |
off_t rom_offset; /* Offset of ROM region within device fd */
|
|
|
9ae3a8 |
+ void *rom;
|
|
|
9ae3a8 |
int msi_cap_size;
|
|
|
9ae3a8 |
VFIOMSIVector *msi_vectors;
|
|
|
9ae3a8 |
VFIOMSIXInfo *msix;
|
|
|
9ae3a8 |
@@ -1058,6 +1059,125 @@ static const MemoryRegionOps vfio_bar_ops = {
|
|
|
9ae3a8 |
.endianness = DEVICE_LITTLE_ENDIAN,
|
|
|
9ae3a8 |
};
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+static void vfio_pci_load_rom(VFIODevice *vdev)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ struct vfio_region_info reg_info = {
|
|
|
9ae3a8 |
+ .argsz = sizeof(reg_info),
|
|
|
9ae3a8 |
+ .index = VFIO_PCI_ROM_REGION_INDEX
|
|
|
9ae3a8 |
+ };
|
|
|
9ae3a8 |
+ uint64_t size;
|
|
|
9ae3a8 |
+ off_t off = 0;
|
|
|
9ae3a8 |
+ size_t bytes;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) {
|
|
|
9ae3a8 |
+ error_report("vfio: Error getting ROM info: %m");
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain,
|
|
|
9ae3a8 |
+ vdev->host.bus, vdev->host.slot, vdev->host.function);
|
|
|
9ae3a8 |
+ DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
|
|
|
9ae3a8 |
+ (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
|
|
|
9ae3a8 |
+ (unsigned long)reg_info.flags);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ vdev->rom_size = size = reg_info.size;
|
|
|
9ae3a8 |
+ vdev->rom_offset = reg_info.offset;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (!vdev->rom_size) {
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ vdev->rom = g_malloc(size);
|
|
|
9ae3a8 |
+ memset(vdev->rom, 0xff, size);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ while (size) {
|
|
|
9ae3a8 |
+ bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off);
|
|
|
9ae3a8 |
+ if (bytes == 0) {
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
+ } else if (bytes > 0) {
|
|
|
9ae3a8 |
+ off += bytes;
|
|
|
9ae3a8 |
+ size -= bytes;
|
|
|
9ae3a8 |
+ } else {
|
|
|
9ae3a8 |
+ if (errno == EINTR || errno == EAGAIN) {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ error_report("vfio: Error reading device ROM: %m");
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ VFIODevice *vdev = opaque;
|
|
|
9ae3a8 |
+ uint64_t val = ((uint64_t)1 << (size * 8)) - 1;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /* Load the ROM lazily when the guest tries to read it */
|
|
|
9ae3a8 |
+ if (unlikely(!vdev->rom)) {
|
|
|
9ae3a8 |
+ vfio_pci_load_rom(vdev);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ memcpy(&val, vdev->rom + addr,
|
|
|
9ae3a8 |
+ (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
|
|
|
9ae3a8 |
+ __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
|
|
|
9ae3a8 |
+ vdev->host.function, addr, size, val);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return val;
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+static const MemoryRegionOps vfio_rom_ops = {
|
|
|
9ae3a8 |
+ .read = vfio_rom_read,
|
|
|
9ae3a8 |
+ .endianness = DEVICE_LITTLE_ENDIAN,
|
|
|
9ae3a8 |
+};
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+static void vfio_pci_size_rom(VFIODevice *vdev)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK;
|
|
|
9ae3a8 |
+ off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
|
|
|
9ae3a8 |
+ char name[32];
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /*
|
|
|
9ae3a8 |
+ * Use the same size ROM BAR as the physical device. The contents
|
|
|
9ae3a8 |
+ * will get filled in later when the guest tries to read it.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+ if (pread(vdev->fd, &orig, 4, offset) != 4 ||
|
|
|
9ae3a8 |
+ pwrite(vdev->fd, &size, 4, offset) != 4 ||
|
|
|
9ae3a8 |
+ pread(vdev->fd, &size, 4, offset) != 4 ||
|
|
|
9ae3a8 |
+ pwrite(vdev->fd, &orig, 4, offset) != 4) {
|
|
|
9ae3a8 |
+ error_report("%s(%04x:%02x:%02x.%x) failed: %m",
|
|
|
9ae3a8 |
+ __func__, vdev->host.domain, vdev->host.bus,
|
|
|
9ae3a8 |
+ vdev->host.slot, vdev->host.function);
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ size = ~(size & PCI_ROM_ADDRESS_MASK) + 1;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (!size) {
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
|
|
|
9ae3a8 |
+ vdev->host.bus, vdev->host.slot, vdev->host.function, size);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
|
|
|
9ae3a8 |
+ vdev->host.domain, vdev->host.bus, vdev->host.slot,
|
|
|
9ae3a8 |
+ vdev->host.function);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ memory_region_init_io(&vdev->pdev.rom,
|
|
|
9ae3a8 |
+ &vfio_rom_ops, vdev, name, size);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
|
|
|
9ae3a8 |
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ vdev->pdev.has_rom = true;
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
static void vfio_vga_write(void *opaque, hwaddr addr,
|
|
|
9ae3a8 |
uint64_t data, unsigned size)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
@@ -2633,51 +2753,6 @@ static int vfio_add_capabilities(VFIODevice *vdev)
|
|
|
9ae3a8 |
return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-static int vfio_load_rom(VFIODevice *vdev)
|
|
|
9ae3a8 |
-{
|
|
|
9ae3a8 |
- uint64_t size = vdev->rom_size;
|
|
|
9ae3a8 |
- char name[32];
|
|
|
9ae3a8 |
- off_t off = 0, voff = vdev->rom_offset;
|
|
|
9ae3a8 |
- ssize_t bytes;
|
|
|
9ae3a8 |
- void *ptr;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- /* If loading ROM from file, pci handles it */
|
|
|
9ae3a8 |
- if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
|
|
|
9ae3a8 |
- return 0;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
|
|
9ae3a8 |
- vdev->host.bus, vdev->host.slot, vdev->host.function);
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
|
|
|
9ae3a8 |
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
|
|
|
9ae3a8 |
- vdev->host.function);
|
|
|
9ae3a8 |
- memory_region_init_ram(&vdev->pdev.rom, name, size);
|
|
|
9ae3a8 |
- ptr = memory_region_get_ram_ptr(&vdev->pdev.rom);
|
|
|
9ae3a8 |
- memset(ptr, 0xff, size);
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- while (size) {
|
|
|
9ae3a8 |
- bytes = pread(vdev->fd, ptr + off, size, voff + off);
|
|
|
9ae3a8 |
- if (bytes == 0) {
|
|
|
9ae3a8 |
- break; /* expect that we could get back less than the ROM BAR */
|
|
|
9ae3a8 |
- } else if (bytes > 0) {
|
|
|
9ae3a8 |
- off += bytes;
|
|
|
9ae3a8 |
- size -= bytes;
|
|
|
9ae3a8 |
- } else {
|
|
|
9ae3a8 |
- if (errno == EINTR || errno == EAGAIN) {
|
|
|
9ae3a8 |
- continue;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- error_report("vfio: Error reading device ROM: %m");
|
|
|
9ae3a8 |
- memory_region_destroy(&vdev->pdev.rom);
|
|
|
9ae3a8 |
- return -errno;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
|
|
|
9ae3a8 |
- vdev->pdev.has_rom = true;
|
|
|
9ae3a8 |
- return 0;
|
|
|
9ae3a8 |
-}
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
static int vfio_connect_container(VFIOGroup *group)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
VFIOContainer *container;
|
|
|
9ae3a8 |
@@ -2911,22 +2986,6 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
|
|
|
9ae3a8 |
QLIST_INIT(&vdev->bars[i].quirks);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- reg_info.index = VFIO_PCI_ROM_REGION_INDEX;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
|
|
|
9ae3a8 |
- if (ret) {
|
|
|
9ae3a8 |
- error_report("vfio: Error getting ROM info: %m");
|
|
|
9ae3a8 |
- goto error;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- DPRINTF("Device %s ROM:\n", name);
|
|
|
9ae3a8 |
- DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
|
|
|
9ae3a8 |
- (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
|
|
|
9ae3a8 |
- (unsigned long)reg_info.flags);
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- vdev->rom_size = reg_info.size;
|
|
|
9ae3a8 |
- vdev->rom_offset = reg_info.offset;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
|
|
|
9ae3a8 |
@@ -3224,7 +3283,7 @@ static int vfio_initfn(PCIDevice *pdev)
|
|
|
9ae3a8 |
memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
|
|
|
9ae3a8 |
memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- vfio_load_rom(vdev);
|
|
|
9ae3a8 |
+ vfio_pci_size_rom(vdev);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
ret = vfio_early_setup_msix(vdev);
|
|
|
9ae3a8 |
if (ret) {
|
|
|
9ae3a8 |
@@ -3289,6 +3348,7 @@ static void vfio_exitfn(PCIDevice *pdev)
|
|
|
9ae3a8 |
vfio_teardown_msi(vdev);
|
|
|
9ae3a8 |
vfio_unmap_bars(vdev);
|
|
|
9ae3a8 |
g_free(vdev->emulated_config_bits);
|
|
|
9ae3a8 |
+ g_free(vdev->rom);
|
|
|
9ae3a8 |
vfio_put_device(vdev);
|
|
|
9ae3a8 |
vfio_put_group(group);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.7.1
|
|
|
9ae3a8 |
|