|
|
9ae3a8 |
From bbd8cc516329f84b70d38a75820f36f2ecd0abda Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
Date: Fri, 29 Sep 2017 21:46:14 +0200
|
|
|
9ae3a8 |
Subject: [PATCH 15/27] vfio: Enable sparse mmap capability
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
RH-Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
Message-id: <20170929214614.16765.48627.stgit@gimli.home>
|
|
|
9ae3a8 |
Patchwork-id: 76773
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.5 qemu-kvm PATCH 15/16] vfio: Enable sparse mmap capability
|
|
|
9ae3a8 |
Bugzilla: 1494181
|
|
|
9ae3a8 |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Upstream: b53b0f696b10828f6393155f44a352c019e673fd
|
|
|
9ae3a8 |
RHEL: Roll in required linux-headers update
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The sparse mmap capability in a vfio region info allows vfio to tell
|
|
|
9ae3a8 |
us which sub-areas of a region may be mmap'd. Thus rather than
|
|
|
9ae3a8 |
assuming a single mmap covers the entire region and later frobbing it
|
|
|
9ae3a8 |
ourselves for things like the PCI MSI-X vector table, we can read that
|
|
|
9ae3a8 |
directly from vfio.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
|
9ae3a8 |
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
|
|
|
9ae3a8 |
Tested-by: Gerd Hoffmann <kraxel@redhat.com>
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
hw/misc/vfio.c | 67 +++++++++++++++++++++++++++++++++++++++++++---
|
|
|
9ae3a8 |
linux-headers/linux/vfio.h | 53 +++++++++++++++++++++++++++++++++++-
|
|
|
9ae3a8 |
trace-events | 2 ++
|
|
|
9ae3a8 |
3 files changed, 117 insertions(+), 5 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
|
|
|
9ae3a8 |
index d634531..a27698b 100644
|
|
|
9ae3a8 |
--- a/hw/misc/vfio.c
|
|
|
9ae3a8 |
+++ b/hw/misc/vfio.c
|
|
|
9ae3a8 |
@@ -2602,6 +2602,54 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
|
|
|
9ae3a8 |
vfio_region_finalize(&bar->region);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+static struct vfio_info_cap_header *
|
|
|
9ae3a8 |
+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ struct vfio_info_cap_header *hdr;
|
|
|
9ae3a8 |
+ void *ptr = info;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
|
|
|
9ae3a8 |
+ return NULL;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
|
|
|
9ae3a8 |
+ if (hdr->id == id) {
|
|
|
9ae3a8 |
+ return hdr;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return NULL;
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
|
|
|
9ae3a8 |
+ struct vfio_region_info *info)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ struct vfio_info_cap_header *hdr;
|
|
|
9ae3a8 |
+ struct vfio_region_info_cap_sparse_mmap *sparse;
|
|
|
9ae3a8 |
+ int i;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
|
|
|
9ae3a8 |
+ if (!hdr) {
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
|
|
|
9ae3a8 |
+ region->nr, sparse->nr_areas);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ region->nr_mmaps = sparse->nr_areas;
|
|
|
9ae3a8 |
+ region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ for (i = 0; i < region->nr_mmaps; i++) {
|
|
|
9ae3a8 |
+ region->mmaps[i].offset = sparse->areas[i].offset;
|
|
|
9ae3a8 |
+ region->mmaps[i].size = sparse->areas[i].size;
|
|
|
9ae3a8 |
+ trace_vfio_region_sparse_mmap_entry(i, region->mmaps[i].offset,
|
|
|
9ae3a8 |
+ region->mmaps[i].offset +
|
|
|
9ae3a8 |
+ region->mmaps[i].size);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
static int vfio_region_setup(Object *obj, VFIODevice *vbasedev,
|
|
|
9ae3a8 |
VFIORegion *region, int index, const char *name)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
@@ -2628,11 +2676,14 @@ static int vfio_region_setup(Object *obj, VFIODevice *vbasedev,
|
|
|
9ae3a8 |
region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
|
|
|
9ae3a8 |
!(region->size & ~TARGET_PAGE_MASK)) {
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- region->nr_mmaps = 1;
|
|
|
9ae3a8 |
- region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
|
|
|
9ae3a8 |
+ vfio_setup_region_sparse_mmaps(region, info);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- region->mmaps[0].offset = 0;
|
|
|
9ae3a8 |
- region->mmaps[0].size = region->size;
|
|
|
9ae3a8 |
+ if (!region->nr_mmaps) {
|
|
|
9ae3a8 |
+ region->nr_mmaps = 1;
|
|
|
9ae3a8 |
+ region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
|
|
|
9ae3a8 |
+ region->mmaps[0].offset = 0;
|
|
|
9ae3a8 |
+ region->mmaps[0].size = region->size;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
@@ -3796,6 +3847,7 @@ static int vfio_get_region_info(VFIODevice *vbasedev, int index,
|
|
|
9ae3a8 |
*info = g_malloc0(argsz);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
(*info)->index = index;
|
|
|
9ae3a8 |
+retry:
|
|
|
9ae3a8 |
(*info)->argsz = argsz;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
|
|
|
9ae3a8 |
@@ -3803,6 +3855,13 @@ static int vfio_get_region_info(VFIODevice *vbasedev, int index,
|
|
|
9ae3a8 |
return -errno;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ if ((*info)->argsz > argsz) {
|
|
|
9ae3a8 |
+ argsz = (*info)->argsz;
|
|
|
9ae3a8 |
+ *info = g_realloc(*info, argsz);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ goto retry;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
return 0;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
|
|
|
9ae3a8 |
index d197fd4..8995a34 100644
|
|
|
9ae3a8 |
--- a/linux-headers/linux/vfio.h
|
|
|
9ae3a8 |
+++ b/linux-headers/linux/vfio.h
|
|
|
9ae3a8 |
@@ -38,6 +38,33 @@
|
|
|
9ae3a8 |
#define VFIO_TYPE (';')
|
|
|
9ae3a8 |
#define VFIO_BASE 100
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+/*
|
|
|
9ae3a8 |
+ * For extension of INFO ioctls, VFIO makes use of a capability chain
|
|
|
9ae3a8 |
+ * designed after PCI/e capabilities. A flag bit indicates whether
|
|
|
9ae3a8 |
+ * this capability chain is supported and a field defined in the fixed
|
|
|
9ae3a8 |
+ * structure defines the offset of the first capability in the chain.
|
|
|
9ae3a8 |
+ * This field is only valid when the corresponding bit in the flags
|
|
|
9ae3a8 |
+ * bitmap is set. This offset field is relative to the start of the
|
|
|
9ae3a8 |
+ * INFO buffer, as is the next field within each capability header.
|
|
|
9ae3a8 |
+ * The id within the header is a shared address space per INFO ioctl,
|
|
|
9ae3a8 |
+ * while the version field is specific to the capability id. The
|
|
|
9ae3a8 |
+ * contents following the header are specific to the capability id.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+struct vfio_info_cap_header {
|
|
|
9ae3a8 |
+ __u16 id; /* Identifies capability */
|
|
|
9ae3a8 |
+ __u16 version; /* Version specific to the capability ID */
|
|
|
9ae3a8 |
+ __u32 next; /* Offset of next capability */
|
|
|
9ae3a8 |
+};
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+/*
|
|
|
9ae3a8 |
+ * Callers of INFO ioctls passing insufficiently sized buffers will see
|
|
|
9ae3a8 |
+ * the capability chain flag bit set, a zero value for the first capability
|
|
|
9ae3a8 |
+ * offset (if available within the provided argsz), and argsz will be
|
|
|
9ae3a8 |
+ * updated to report the necessary buffer size. For compatibility, the
|
|
|
9ae3a8 |
+ * INFO ioctl will not report error in this case, but the capability chain
|
|
|
9ae3a8 |
+ * will not be available.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/**
|
|
|
9ae3a8 |
@@ -171,13 +198,37 @@ struct vfio_region_info {
|
|
|
9ae3a8 |
#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
|
|
|
9ae3a8 |
#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
|
|
|
9ae3a8 |
#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
|
|
|
9ae3a8 |
+#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */
|
|
|
9ae3a8 |
__u32 index; /* Region index */
|
|
|
9ae3a8 |
- __u32 resv; /* Reserved for alignment */
|
|
|
9ae3a8 |
+ __u32 cap_offset; /* Offset within info struct of first cap */
|
|
|
9ae3a8 |
__u64 size; /* Region size (bytes) */
|
|
|
9ae3a8 |
__u64 offset; /* Region offset from start of device fd */
|
|
|
9ae3a8 |
};
|
|
|
9ae3a8 |
#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+/*
|
|
|
9ae3a8 |
+ * The sparse mmap capability allows finer granularity of specifying areas
|
|
|
9ae3a8 |
+ * within a region with mmap support. When specified, the user should only
|
|
|
9ae3a8 |
+ * mmap the offset ranges specified by the areas array. mmaps outside of the
|
|
|
9ae3a8 |
+ * areas specified may fail (such as the range covering a PCI MSI-X table) or
|
|
|
9ae3a8 |
+ * may result in improper device behavior.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * The structures below define version 1 of this capability.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+struct vfio_region_sparse_mmap_area {
|
|
|
9ae3a8 |
+ __u64 offset; /* Offset of mmap'able area within region */
|
|
|
9ae3a8 |
+ __u64 size; /* Size of mmap'able area */
|
|
|
9ae3a8 |
+};
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+struct vfio_region_info_cap_sparse_mmap {
|
|
|
9ae3a8 |
+ struct vfio_info_cap_header header;
|
|
|
9ae3a8 |
+ __u32 nr_areas;
|
|
|
9ae3a8 |
+ __u32 reserved;
|
|
|
9ae3a8 |
+ struct vfio_region_sparse_mmap_area areas[];
|
|
|
9ae3a8 |
+};
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/**
|
|
|
9ae3a8 |
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
|
|
|
9ae3a8 |
* struct vfio_irq_info)
|
|
|
9ae3a8 |
diff --git a/trace-events b/trace-events
|
|
|
9ae3a8 |
index cc62b0b..fa2618d 100644
|
|
|
9ae3a8 |
--- a/trace-events
|
|
|
9ae3a8 |
+++ b/trace-events
|
|
|
9ae3a8 |
@@ -1164,3 +1164,5 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
|
|
|
9ae3a8 |
vfio_region_exit(const char *name, int index) "Device %s, region %d"
|
|
|
9ae3a8 |
vfio_region_finalize(const char *name, int index) "Device %s, region %d"
|
|
|
9ae3a8 |
vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
|
|
|
9ae3a8 |
+vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
|
|
9ae3a8 |
+vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.8.3.1
|
|
|
9ae3a8 |
|