|
|
9ae3a8 |
From 766239ec3a7be7b8bf250a677e478773ac1b96dd Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
Date: Mon, 9 Sep 2013 14:28:02 +0200
|
|
|
9ae3a8 |
Subject: [PATCH 11/38] qcow2: Batch discards
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
Message-id: <1378736903-18489-12-git-send-email-kwolf@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 54199
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.0 qemu-kvm PATCH 11/32] qcow2: Batch discards
|
|
|
9ae3a8 |
Bugzilla: 1005818
|
|
|
9ae3a8 |
RH-Acked-by: Fam Zheng <famz@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Bugzilla: 1005818
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
This optimises the discard operation for freed clusters by batching
|
|
|
9ae3a8 |
discard requests (both snapshot deletion and bdrv_discard end up
|
|
|
9ae3a8 |
updating the refcounts cluster by cluster).
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Note that we don't discard asynchronously, but keep s->lock held. This
|
|
|
9ae3a8 |
is to avoid that a freed cluster is reallocated and written to while the
|
|
|
9ae3a8 |
discard is still in flight.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
9ae3a8 |
(cherry picked from commit 0b919fae31f4f80f78ebeab8388dc3a03cc5e4ed)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block/qcow2-cluster.c | 22 +++++++++++---
|
|
|
9ae3a8 |
block/qcow2-refcount.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
9ae3a8 |
block/qcow2.c | 1 +
|
|
|
9ae3a8 |
block/qcow2.h | 11 +++++++
|
|
|
9ae3a8 |
4 files changed, 109 insertions(+), 7 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block/qcow2-cluster.c | 22 ++++++++++--
|
|
|
9ae3a8 |
block/qcow2-refcount.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
9ae3a8 |
block/qcow2.c | 1 +
|
|
|
9ae3a8 |
block/qcow2.h | 11 ++++++
|
|
|
9ae3a8 |
4 files changed, 109 insertions(+), 7 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
|
|
|
9ae3a8 |
index 3191d6b..cca76d4 100644
|
|
|
9ae3a8 |
--- a/block/qcow2-cluster.c
|
|
|
9ae3a8 |
+++ b/block/qcow2-cluster.c
|
|
|
9ae3a8 |
@@ -1377,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
nb_clusters = size_to_clusters(s, end_offset - offset);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ s->cache_discards = true;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* Each L2 table is handled by its own loop iteration */
|
|
|
9ae3a8 |
while (nb_clusters > 0) {
|
|
|
9ae3a8 |
ret = discard_single_l2(bs, offset, nb_clusters);
|
|
|
9ae3a8 |
if (ret < 0) {
|
|
|
9ae3a8 |
- return ret;
|
|
|
9ae3a8 |
+ goto fail;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
nb_clusters -= ret;
|
|
|
9ae3a8 |
offset += (ret * s->cluster_size);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- return 0;
|
|
|
9ae3a8 |
+ ret = 0;
|
|
|
9ae3a8 |
+fail:
|
|
|
9ae3a8 |
+ s->cache_discards = false;
|
|
|
9ae3a8 |
+ qcow2_process_discards(bs, ret);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return ret;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/*
|
|
|
9ae3a8 |
@@ -1450,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
|
|
|
9ae3a8 |
/* Each L2 table is handled by its own loop iteration */
|
|
|
9ae3a8 |
nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ s->cache_discards = true;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
while (nb_clusters > 0) {
|
|
|
9ae3a8 |
ret = zero_single_l2(bs, offset, nb_clusters);
|
|
|
9ae3a8 |
if (ret < 0) {
|
|
|
9ae3a8 |
- return ret;
|
|
|
9ae3a8 |
+ goto fail;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
nb_clusters -= ret;
|
|
|
9ae3a8 |
offset += (ret * s->cluster_size);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- return 0;
|
|
|
9ae3a8 |
+ ret = 0;
|
|
|
9ae3a8 |
+fail:
|
|
|
9ae3a8 |
+ s->cache_discards = false;
|
|
|
9ae3a8 |
+ qcow2_process_discards(bs, ret);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ return ret;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
|
|
|
9ae3a8 |
index 7488988..1244693 100644
|
|
|
9ae3a8 |
--- a/block/qcow2-refcount.c
|
|
|
9ae3a8 |
+++ b/block/qcow2-refcount.c
|
|
|
9ae3a8 |
@@ -420,6 +420,74 @@ fail_block:
|
|
|
9ae3a8 |
return ret;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+void qcow2_process_discards(BlockDriverState *bs, int ret)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ BDRVQcowState *s = bs->opaque;
|
|
|
9ae3a8 |
+ Qcow2DiscardRegion *d, *next;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
|
|
|
9ae3a8 |
+ QTAILQ_REMOVE(&s->discards, d, next);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /* Discard is optional, ignore the return value */
|
|
|
9ae3a8 |
+ if (ret >= 0) {
|
|
|
9ae3a8 |
+ bdrv_discard(bs->file,
|
|
|
9ae3a8 |
+ d->offset >> BDRV_SECTOR_BITS,
|
|
|
9ae3a8 |
+ d->bytes >> BDRV_SECTOR_BITS);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ g_free(d);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+static void update_refcount_discard(BlockDriverState *bs,
|
|
|
9ae3a8 |
+ uint64_t offset, uint64_t length)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ BDRVQcowState *s = bs->opaque;
|
|
|
9ae3a8 |
+ Qcow2DiscardRegion *d, *p, *next;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ QTAILQ_FOREACH(d, &s->discards, next) {
|
|
|
9ae3a8 |
+ uint64_t new_start = MIN(offset, d->offset);
|
|
|
9ae3a8 |
+ uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (new_end - new_start <= length + d->bytes) {
|
|
|
9ae3a8 |
+ /* There can't be any overlap, areas ending up here have no
|
|
|
9ae3a8 |
+ * references any more and therefore shouldn't get freed another
|
|
|
9ae3a8 |
+ * time. */
|
|
|
9ae3a8 |
+ assert(d->bytes + length == new_end - new_start);
|
|
|
9ae3a8 |
+ d->offset = new_start;
|
|
|
9ae3a8 |
+ d->bytes = new_end - new_start;
|
|
|
9ae3a8 |
+ goto found;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ d = g_malloc(sizeof(*d));
|
|
|
9ae3a8 |
+ *d = (Qcow2DiscardRegion) {
|
|
|
9ae3a8 |
+ .bs = bs,
|
|
|
9ae3a8 |
+ .offset = offset,
|
|
|
9ae3a8 |
+ .bytes = length,
|
|
|
9ae3a8 |
+ };
|
|
|
9ae3a8 |
+ QTAILQ_INSERT_TAIL(&s->discards, d, next);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+found:
|
|
|
9ae3a8 |
+ /* Merge discard requests if they are adjacent now */
|
|
|
9ae3a8 |
+ QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
|
|
|
9ae3a8 |
+ if (p == d
|
|
|
9ae3a8 |
+ || p->offset > d->offset + d->bytes
|
|
|
9ae3a8 |
+ || d->offset > p->offset + p->bytes)
|
|
|
9ae3a8 |
+ {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ /* Still no overlap possible */
|
|
|
9ae3a8 |
+ assert(p->offset == d->offset + d->bytes
|
|
|
9ae3a8 |
+ || d->offset == p->offset + p->bytes);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ QTAILQ_REMOVE(&s->discards, p, next);
|
|
|
9ae3a8 |
+ d->offset = MIN(d->offset, p->offset);
|
|
|
9ae3a8 |
+ d->bytes += p->bytes;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* XXX: cache several refcount block clusters ? */
|
|
|
9ae3a8 |
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
|
|
|
9ae3a8 |
int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
|
|
|
9ae3a8 |
@@ -488,15 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
|
|
|
9ae3a8 |
s->free_cluster_index = cluster_index;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
refcount_block[block_index] = cpu_to_be16(refcount);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
if (refcount == 0 && s->discard_passthrough[type]) {
|
|
|
9ae3a8 |
- /* Try discarding, ignore errors */
|
|
|
9ae3a8 |
- /* FIXME Doing this cluster by cluster will be painfully slow */
|
|
|
9ae3a8 |
- bdrv_discard(bs->file, cluster_offset, 1);
|
|
|
9ae3a8 |
+ update_refcount_discard(bs, cluster_offset, s->cluster_size);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
ret = 0;
|
|
|
9ae3a8 |
fail:
|
|
|
9ae3a8 |
+ if (!s->cache_discards) {
|
|
|
9ae3a8 |
+ qcow2_process_discards(bs, ret);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* Write last changed block to disk */
|
|
|
9ae3a8 |
if (refcount_block) {
|
|
|
9ae3a8 |
int wret;
|
|
|
9ae3a8 |
@@ -755,6 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
|
|
|
9ae3a8 |
l1_table = NULL;
|
|
|
9ae3a8 |
l1_size2 = l1_size * sizeof(uint64_t);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ s->cache_discards = true;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* WARNING: qcow2_snapshot_goto relies on this function not using the
|
|
|
9ae3a8 |
* l1_table_offset when it is the current s->l1_table_offset! Be careful
|
|
|
9ae3a8 |
* when changing this! */
|
|
|
9ae3a8 |
@@ -867,6 +940,9 @@ fail:
|
|
|
9ae3a8 |
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ s->cache_discards = false;
|
|
|
9ae3a8 |
+ qcow2_process_discards(bs, ret);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* Update L1 only if it isn't deleted anyway (addend = -1) */
|
|
|
9ae3a8 |
if (ret == 0 && addend >= 0 && l1_modified) {
|
|
|
9ae3a8 |
for (i = 0; i < l1_size; i++) {
|
|
|
9ae3a8 |
diff --git a/block/qcow2.c b/block/qcow2.c
|
|
|
9ae3a8 |
index ef8a2ca..9383990 100644
|
|
|
9ae3a8 |
--- a/block/qcow2.c
|
|
|
9ae3a8 |
+++ b/block/qcow2.c
|
|
|
9ae3a8 |
@@ -486,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
QLIST_INIT(&s->cluster_allocs);
|
|
|
9ae3a8 |
+ QTAILQ_INIT(&s->discards);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* read qcow2 extensions */
|
|
|
9ae3a8 |
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
|
|
|
9ae3a8 |
diff --git a/block/qcow2.h b/block/qcow2.h
|
|
|
9ae3a8 |
index 6f91b9a..3b2d5cd 100644
|
|
|
9ae3a8 |
--- a/block/qcow2.h
|
|
|
9ae3a8 |
+++ b/block/qcow2.h
|
|
|
9ae3a8 |
@@ -147,6 +147,13 @@ typedef struct Qcow2Feature {
|
|
|
9ae3a8 |
char name[46];
|
|
|
9ae3a8 |
} QEMU_PACKED Qcow2Feature;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+typedef struct Qcow2DiscardRegion {
|
|
|
9ae3a8 |
+ BlockDriverState *bs;
|
|
|
9ae3a8 |
+ uint64_t offset;
|
|
|
9ae3a8 |
+ uint64_t bytes;
|
|
|
9ae3a8 |
+ QTAILQ_ENTRY(Qcow2DiscardRegion) next;
|
|
|
9ae3a8 |
+} Qcow2DiscardRegion;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
typedef struct BDRVQcowState {
|
|
|
9ae3a8 |
int cluster_bits;
|
|
|
9ae3a8 |
int cluster_size;
|
|
|
9ae3a8 |
@@ -199,6 +206,8 @@ typedef struct BDRVQcowState {
|
|
|
9ae3a8 |
size_t unknown_header_fields_size;
|
|
|
9ae3a8 |
void* unknown_header_fields;
|
|
|
9ae3a8 |
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
|
|
|
9ae3a8 |
+ QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
|
|
|
9ae3a8 |
+ bool cache_discards;
|
|
|
9ae3a8 |
} BDRVQcowState;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* XXX: use std qcow open function ? */
|
|
|
9ae3a8 |
@@ -374,6 +383,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
|
|
|
9ae3a8 |
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
|
|
|
9ae3a8 |
BdrvCheckMode fix);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+void qcow2_process_discards(BlockDriverState *bs, int ret);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* qcow2-cluster.c functions */
|
|
|
9ae3a8 |
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
|
|
9ae3a8 |
bool exact_size);
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.7.1
|
|
|
9ae3a8 |
|