|
|
9ae3a8 |
From 04981be7338ba396410cdab91612c17d6383d3a3 Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
Date: Tue, 18 Nov 2014 15:30:14 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 35/41] block/raw-posix: Try both FIEMAP and SEEK_HOLE
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Message-id: <1416324620-16229-2-git-send-email-mreitz@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 62436
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.1/7.0.z qemu-kvm PATCH v3 1/7] block/raw-posix: Try both FIEMAP and SEEK_HOLE
|
|
|
9ae3a8 |
Bugzilla: 1160237
|
|
|
9ae3a8 |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Markus Armbruster <armbru@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The current version of raw-posix always uses ioctl(FS_IOC_FIEMAP) if
|
|
|
9ae3a8 |
FIEMAP is available; lseek with SEEK_HOLE/SEEK_DATA are not even
|
|
|
9ae3a8 |
compiled in in this case. However, there may be implementations which
|
|
|
9ae3a8 |
support the latter but not the former (e.g., NFSv4.2) as well as vice
|
|
|
9ae3a8 |
versa.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
To cover both cases, try FIEMAP first (as this will return -ENOTSUP if
|
|
|
9ae3a8 |
not supported instead of returning a failsafe value (everything
|
|
|
9ae3a8 |
allocated as a single extent)) and if that does not work, fall back to
|
|
|
9ae3a8 |
SEEK_HOLE/SEEK_DATA.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
9ae3a8 |
(cherry picked from commit 4f11aa8a40351b28c0e67c7276e0003b38cc46ac)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block/raw-posix.c | 127 +++++++++++++++++++++++++++++++++---------------------
|
|
|
9ae3a8 |
1 file changed, 77 insertions(+), 50 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/block/raw-posix.c b/block/raw-posix.c
|
|
|
9ae3a8 |
index cfe7452..5f57412 100644
|
|
|
9ae3a8 |
--- a/block/raw-posix.c
|
|
|
9ae3a8 |
+++ b/block/raw-posix.c
|
|
|
9ae3a8 |
@@ -147,6 +147,9 @@ typedef struct BDRVRawState {
|
|
|
9ae3a8 |
bool has_discard:1;
|
|
|
9ae3a8 |
bool has_write_zeroes:1;
|
|
|
9ae3a8 |
bool discard_zeroes:1;
|
|
|
9ae3a8 |
+#ifdef CONFIG_FIEMAP
|
|
|
9ae3a8 |
+ bool skip_fiemap;
|
|
|
9ae3a8 |
+#endif
|
|
|
9ae3a8 |
} BDRVRawState;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
typedef struct BDRVRawReopenState {
|
|
|
9ae3a8 |
@@ -1305,53 +1308,29 @@ out:
|
|
|
9ae3a8 |
return result;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-/*
|
|
|
9ae3a8 |
- * Returns true iff the specified sector is present in the disk image. Drivers
|
|
|
9ae3a8 |
- * not implementing the functionality are assumed to not support backing files,
|
|
|
9ae3a8 |
- * hence all their sectors are reported as allocated.
|
|
|
9ae3a8 |
- *
|
|
|
9ae3a8 |
- * If 'sector_num' is beyond the end of the disk image the return value is 0
|
|
|
9ae3a8 |
- * and 'pnum' is set to 0.
|
|
|
9ae3a8 |
- *
|
|
|
9ae3a8 |
- * 'pnum' is set to the number of sectors (including and immediately following
|
|
|
9ae3a8 |
- * the specified sector) that are known to be in the same
|
|
|
9ae3a8 |
- * allocated/unallocated state.
|
|
|
9ae3a8 |
- *
|
|
|
9ae3a8 |
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
|
|
|
9ae3a8 |
- * beyond the end of the disk image it will be clamped.
|
|
|
9ae3a8 |
- */
|
|
|
9ae3a8 |
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
|
|
|
9ae3a8 |
- int64_t sector_num,
|
|
|
9ae3a8 |
- int nb_sectors, int *pnum)
|
|
|
9ae3a8 |
+static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
|
|
|
9ae3a8 |
+ off_t *hole, int nb_sectors, int *pnum)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
- off_t start, data, hole;
|
|
|
9ae3a8 |
- int64_t ret;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- ret = fd_open(bs);
|
|
|
9ae3a8 |
- if (ret < 0) {
|
|
|
9ae3a8 |
- return ret;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- start = sector_num * BDRV_SECTOR_SIZE;
|
|
|
9ae3a8 |
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
#ifdef CONFIG_FIEMAP
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
BDRVRawState *s = bs->opaque;
|
|
|
9ae3a8 |
+ int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
|
|
|
9ae3a8 |
struct {
|
|
|
9ae3a8 |
struct fiemap fm;
|
|
|
9ae3a8 |
struct fiemap_extent fe;
|
|
|
9ae3a8 |
} f;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ if (s->skip_fiemap) {
|
|
|
9ae3a8 |
+ return -ENOTSUP;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
f.fm.fm_start = start;
|
|
|
9ae3a8 |
f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
|
|
|
9ae3a8 |
f.fm.fm_flags = 0;
|
|
|
9ae3a8 |
f.fm.fm_extent_count = 1;
|
|
|
9ae3a8 |
f.fm.fm_reserved = 0;
|
|
|
9ae3a8 |
if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
|
|
|
9ae3a8 |
- /* Assume everything is allocated. */
|
|
|
9ae3a8 |
- *pnum = nb_sectors;
|
|
|
9ae3a8 |
- return ret;
|
|
|
9ae3a8 |
+ s->skip_fiemap = true;
|
|
|
9ae3a8 |
+ return -errno;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (f.fm.fm_mapped_extents == 0) {
|
|
|
9ae3a8 |
@@ -1359,44 +1338,92 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
|
|
|
9ae3a8 |
* f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
off_t length = lseek(s->fd, 0, SEEK_END);
|
|
|
9ae3a8 |
- hole = f.fm.fm_start;
|
|
|
9ae3a8 |
- data = MIN(f.fm.fm_start + f.fm.fm_length, length);
|
|
|
9ae3a8 |
+ *hole = f.fm.fm_start;
|
|
|
9ae3a8 |
+ *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
|
|
|
9ae3a8 |
} else {
|
|
|
9ae3a8 |
- data = f.fe.fe_logical;
|
|
|
9ae3a8 |
- hole = f.fe.fe_logical + f.fe.fe_length;
|
|
|
9ae3a8 |
+ *data = f.fe.fe_logical;
|
|
|
9ae3a8 |
+ *hole = f.fe.fe_logical + f.fe.fe_length;
|
|
|
9ae3a8 |
if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
|
|
|
9ae3a8 |
ret |= BDRV_BLOCK_ZERO;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-#elif defined SEEK_HOLE && defined SEEK_DATA
|
|
|
9ae3a8 |
+ return ret;
|
|
|
9ae3a8 |
+#else
|
|
|
9ae3a8 |
+ return -ENOTSUP;
|
|
|
9ae3a8 |
+#endif
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
|
|
|
9ae3a8 |
+ off_t *hole, int *pnum)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+#if defined SEEK_HOLE && defined SEEK_DATA
|
|
|
9ae3a8 |
BDRVRawState *s = bs->opaque;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- hole = lseek(s->fd, start, SEEK_HOLE);
|
|
|
9ae3a8 |
- if (hole == -1) {
|
|
|
9ae3a8 |
+ *hole = lseek(s->fd, start, SEEK_HOLE);
|
|
|
9ae3a8 |
+ if (*hole == -1) {
|
|
|
9ae3a8 |
/* -ENXIO indicates that sector_num was past the end of the file.
|
|
|
9ae3a8 |
* There is a virtual hole there. */
|
|
|
9ae3a8 |
assert(errno != -ENXIO);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- /* Most likely EINVAL. Assume everything is allocated. */
|
|
|
9ae3a8 |
- *pnum = nb_sectors;
|
|
|
9ae3a8 |
- return ret;
|
|
|
9ae3a8 |
+ return -errno;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- if (hole > start) {
|
|
|
9ae3a8 |
- data = start;
|
|
|
9ae3a8 |
+ if (*hole > start) {
|
|
|
9ae3a8 |
+ *data = start;
|
|
|
9ae3a8 |
} else {
|
|
|
9ae3a8 |
/* On a hole. We need another syscall to find its end. */
|
|
|
9ae3a8 |
- data = lseek(s->fd, start, SEEK_DATA);
|
|
|
9ae3a8 |
- if (data == -1) {
|
|
|
9ae3a8 |
- data = lseek(s->fd, 0, SEEK_END);
|
|
|
9ae3a8 |
+ *data = lseek(s->fd, start, SEEK_DATA);
|
|
|
9ae3a8 |
+ if (*data == -1) {
|
|
|
9ae3a8 |
+ *data = lseek(s->fd, 0, SEEK_END);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
|
|
|
9ae3a8 |
#else
|
|
|
9ae3a8 |
- data = 0;
|
|
|
9ae3a8 |
- hole = start + nb_sectors * BDRV_SECTOR_SIZE;
|
|
|
9ae3a8 |
+ return -ENOTSUP;
|
|
|
9ae3a8 |
#endif
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+/*
|
|
|
9ae3a8 |
+ * Returns true iff the specified sector is present in the disk image. Drivers
|
|
|
9ae3a8 |
+ * not implementing the functionality are assumed to not support backing files,
|
|
|
9ae3a8 |
+ * hence all their sectors are reported as allocated.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
|
|
|
9ae3a8 |
+ * and 'pnum' is set to 0.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * 'pnum' is set to the number of sectors (including and immediately following
|
|
|
9ae3a8 |
+ * the specified sector) that are known to be in the same
|
|
|
9ae3a8 |
+ * allocated/unallocated state.
|
|
|
9ae3a8 |
+ *
|
|
|
9ae3a8 |
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
|
|
|
9ae3a8 |
+ * beyond the end of the disk image it will be clamped.
|
|
|
9ae3a8 |
+ */
|
|
|
9ae3a8 |
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
|
|
|
9ae3a8 |
+ int64_t sector_num,
|
|
|
9ae3a8 |
+ int nb_sectors, int *pnum)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ off_t start, data = 0, hole = 0;
|
|
|
9ae3a8 |
+ int64_t ret;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ ret = fd_open(bs);
|
|
|
9ae3a8 |
+ if (ret < 0) {
|
|
|
9ae3a8 |
+ return ret;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ start = sector_num * BDRV_SECTOR_SIZE;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
|
|
|
9ae3a8 |
+ if (ret < 0) {
|
|
|
9ae3a8 |
+ ret = try_seek_hole(bs, start, &data, &hole, pnum);
|
|
|
9ae3a8 |
+ if (ret < 0) {
|
|
|
9ae3a8 |
+ /* Assume everything is allocated. */
|
|
|
9ae3a8 |
+ data = 0;
|
|
|
9ae3a8 |
+ hole = start + nb_sectors * BDRV_SECTOR_SIZE;
|
|
|
9ae3a8 |
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (data <= start) {
|
|
|
9ae3a8 |
/* On a data extent, compute sectors to the end of the extent. */
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.8.3.1
|
|
|
9ae3a8 |
|