|
|
ae23c9 |
From a0e14e757707733db8e3b927b6bcae336ae219d8 Mon Sep 17 00:00:00 2001
|
|
|
ae23c9 |
From: Fam Zheng <famz@redhat.com>
|
|
|
ae23c9 |
Date: Fri, 29 Jun 2018 06:11:44 +0200
|
|
|
ae23c9 |
Subject: [PATCH 170/268] qcow2: Implement copy offloading
|
|
|
ae23c9 |
|
|
|
ae23c9 |
RH-Author: Fam Zheng <famz@redhat.com>
|
|
|
ae23c9 |
Message-id: <20180629061153.12687-5-famz@redhat.com>
|
|
|
ae23c9 |
Patchwork-id: 81154
|
|
|
ae23c9 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH v2 04/13] qcow2: Implement copy offloading
|
|
|
ae23c9 |
Bugzilla: 1482537
|
|
|
ae23c9 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
ae23c9 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
ae23c9 |
|
|
|
ae23c9 |
The two callbacks are implemented quite similarly to the read/write
|
|
|
ae23c9 |
functions: bdrv_co_copy_range_from maps for read and calls into bs->file
|
|
|
ae23c9 |
or bs->backing depending on the allocation status; bdrv_co_copy_range_to
|
|
|
ae23c9 |
maps for write and calls into bs->file.
|
|
|
ae23c9 |
|
|
|
ae23c9 |
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
ae23c9 |
Message-id: 20180601092648.24614-5-famz@redhat.com
|
|
|
ae23c9 |
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
ae23c9 |
(cherry picked from commit fd9fcd37a8645efe322956d94f76e90135522a16)
|
|
|
ae23c9 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
ae23c9 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
ae23c9 |
---
|
|
|
ae23c9 |
block/qcow2.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
|
|
|
ae23c9 |
1 file changed, 199 insertions(+), 30 deletions(-)
|
|
|
ae23c9 |
|
|
|
ae23c9 |
diff --git a/block/qcow2.c b/block/qcow2.c
|
|
|
ae23c9 |
index 092db81..c85ebcb 100644
|
|
|
ae23c9 |
--- a/block/qcow2.c
|
|
|
ae23c9 |
+++ b/block/qcow2.c
|
|
|
ae23c9 |
@@ -1756,6 +1756,39 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
|
|
|
ae23c9 |
return status;
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
|
|
|
ae23c9 |
+ QCowL2Meta **pl2meta,
|
|
|
ae23c9 |
+ bool link_l2)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ int ret = 0;
|
|
|
ae23c9 |
+ QCowL2Meta *l2meta = *pl2meta;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ while (l2meta != NULL) {
|
|
|
ae23c9 |
+ QCowL2Meta *next;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ if (!ret && link_l2) {
|
|
|
ae23c9 |
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
|
|
|
ae23c9 |
+ if (ret) {
|
|
|
ae23c9 |
+ goto out;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ /* Take the request off the list of running requests */
|
|
|
ae23c9 |
+ if (l2meta->nb_clusters != 0) {
|
|
|
ae23c9 |
+ QLIST_REMOVE(l2meta, next_in_flight);
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ next = l2meta->next;
|
|
|
ae23c9 |
+ g_free(l2meta);
|
|
|
ae23c9 |
+ l2meta = next;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+out:
|
|
|
ae23c9 |
+ *pl2meta = l2meta;
|
|
|
ae23c9 |
+ return ret;
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
|
|
|
ae23c9 |
uint64_t bytes, QEMUIOVector *qiov,
|
|
|
ae23c9 |
int flags)
|
|
|
ae23c9 |
@@ -2042,24 +2075,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
- while (l2meta != NULL) {
|
|
|
ae23c9 |
- QCowL2Meta *next;
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
|
|
|
ae23c9 |
- if (ret < 0) {
|
|
|
ae23c9 |
- goto fail;
|
|
|
ae23c9 |
- }
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- /* Take the request off the list of running requests */
|
|
|
ae23c9 |
- if (l2meta->nb_clusters != 0) {
|
|
|
ae23c9 |
- QLIST_REMOVE(l2meta, next_in_flight);
|
|
|
ae23c9 |
- }
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- next = l2meta->next;
|
|
|
ae23c9 |
- g_free(l2meta);
|
|
|
ae23c9 |
- l2meta = next;
|
|
|
ae23c9 |
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
|
|
|
ae23c9 |
+ if (ret) {
|
|
|
ae23c9 |
+ goto fail;
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
bytes -= cur_bytes;
|
|
|
ae23c9 |
@@ -2070,18 +2088,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
|
ae23c9 |
ret = 0;
|
|
|
ae23c9 |
|
|
|
ae23c9 |
fail:
|
|
|
ae23c9 |
- while (l2meta != NULL) {
|
|
|
ae23c9 |
- QCowL2Meta *next;
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- if (l2meta->nb_clusters != 0) {
|
|
|
ae23c9 |
- QLIST_REMOVE(l2meta, next_in_flight);
|
|
|
ae23c9 |
- }
|
|
|
ae23c9 |
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
|
|
ae23c9 |
-
|
|
|
ae23c9 |
- next = l2meta->next;
|
|
|
ae23c9 |
- g_free(l2meta);
|
|
|
ae23c9 |
- l2meta = next;
|
|
|
ae23c9 |
- }
|
|
|
ae23c9 |
+ qcow2_handle_l2meta(bs, &l2meta, false);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
qemu_co_mutex_unlock(&s->lock);
|
|
|
ae23c9 |
|
|
|
ae23c9 |
@@ -3264,6 +3271,166 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
|
|
|
ae23c9 |
return ret;
|
|
|
ae23c9 |
}
|
|
|
ae23c9 |
|
|
|
ae23c9 |
+static int coroutine_fn
|
|
|
ae23c9 |
+qcow2_co_copy_range_from(BlockDriverState *bs,
|
|
|
ae23c9 |
+ BdrvChild *src, uint64_t src_offset,
|
|
|
ae23c9 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
ae23c9 |
+ uint64_t bytes, BdrvRequestFlags flags)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ BDRVQcow2State *s = bs->opaque;
|
|
|
ae23c9 |
+ int ret;
|
|
|
ae23c9 |
+ unsigned int cur_bytes; /* number of bytes in current iteration */
|
|
|
ae23c9 |
+ BdrvChild *child = NULL;
|
|
|
ae23c9 |
+ BdrvRequestFlags cur_flags;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ assert(!bs->encrypted);
|
|
|
ae23c9 |
+ qemu_co_mutex_lock(&s->lock);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ while (bytes != 0) {
|
|
|
ae23c9 |
+ uint64_t copy_offset = 0;
|
|
|
ae23c9 |
+ /* prepare next request */
|
|
|
ae23c9 |
+ cur_bytes = MIN(bytes, INT_MAX);
|
|
|
ae23c9 |
+ cur_flags = flags;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset);
|
|
|
ae23c9 |
+ if (ret < 0) {
|
|
|
ae23c9 |
+ goto out;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ switch (ret) {
|
|
|
ae23c9 |
+ case QCOW2_CLUSTER_UNALLOCATED:
|
|
|
ae23c9 |
+ if (bs->backing && bs->backing->bs) {
|
|
|
ae23c9 |
+ int64_t backing_length = bdrv_getlength(bs->backing->bs);
|
|
|
ae23c9 |
+ if (src_offset >= backing_length) {
|
|
|
ae23c9 |
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
|
|
|
ae23c9 |
+ } else {
|
|
|
ae23c9 |
+ child = bs->backing;
|
|
|
ae23c9 |
+ cur_bytes = MIN(cur_bytes, backing_length - src_offset);
|
|
|
ae23c9 |
+ copy_offset = src_offset;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ } else {
|
|
|
ae23c9 |
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ break;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ case QCOW2_CLUSTER_ZERO_PLAIN:
|
|
|
ae23c9 |
+ case QCOW2_CLUSTER_ZERO_ALLOC:
|
|
|
ae23c9 |
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
|
|
|
ae23c9 |
+ break;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ case QCOW2_CLUSTER_COMPRESSED:
|
|
|
ae23c9 |
+ ret = -ENOTSUP;
|
|
|
ae23c9 |
+ goto out;
|
|
|
ae23c9 |
+ break;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ case QCOW2_CLUSTER_NORMAL:
|
|
|
ae23c9 |
+ child = bs->file;
|
|
|
ae23c9 |
+ copy_offset += offset_into_cluster(s, src_offset);
|
|
|
ae23c9 |
+ if ((copy_offset & 511) != 0) {
|
|
|
ae23c9 |
+ ret = -EIO;
|
|
|
ae23c9 |
+ goto out;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ break;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ default:
|
|
|
ae23c9 |
+ abort();
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ qemu_co_mutex_unlock(&s->lock);
|
|
|
ae23c9 |
+ ret = bdrv_co_copy_range_from(child,
|
|
|
ae23c9 |
+ copy_offset,
|
|
|
ae23c9 |
+ dst, dst_offset,
|
|
|
ae23c9 |
+ cur_bytes, cur_flags);
|
|
|
ae23c9 |
+ qemu_co_mutex_lock(&s->lock);
|
|
|
ae23c9 |
+ if (ret < 0) {
|
|
|
ae23c9 |
+ goto out;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ bytes -= cur_bytes;
|
|
|
ae23c9 |
+ src_offset += cur_bytes;
|
|
|
ae23c9 |
+ dst_offset += cur_bytes;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ ret = 0;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+out:
|
|
|
ae23c9 |
+ qemu_co_mutex_unlock(&s->lock);
|
|
|
ae23c9 |
+ return ret;
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+static int coroutine_fn
|
|
|
ae23c9 |
+qcow2_co_copy_range_to(BlockDriverState *bs,
|
|
|
ae23c9 |
+ BdrvChild *src, uint64_t src_offset,
|
|
|
ae23c9 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
ae23c9 |
+ uint64_t bytes, BdrvRequestFlags flags)
|
|
|
ae23c9 |
+{
|
|
|
ae23c9 |
+ BDRVQcow2State *s = bs->opaque;
|
|
|
ae23c9 |
+ int offset_in_cluster;
|
|
|
ae23c9 |
+ int ret;
|
|
|
ae23c9 |
+ unsigned int cur_bytes; /* number of sectors in current iteration */
|
|
|
ae23c9 |
+ uint64_t cluster_offset;
|
|
|
ae23c9 |
+ uint8_t *cluster_data = NULL;
|
|
|
ae23c9 |
+ QCowL2Meta *l2meta = NULL;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ assert(!bs->encrypted);
|
|
|
ae23c9 |
+ s->cluster_cache_offset = -1; /* disable compressed cache */
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ qemu_co_mutex_lock(&s->lock);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ while (bytes != 0) {
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ l2meta = NULL;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ offset_in_cluster = offset_into_cluster(s, dst_offset);
|
|
|
ae23c9 |
+ cur_bytes = MIN(bytes, INT_MAX);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ /* TODO:
|
|
|
ae23c9 |
+ * If src->bs == dst->bs, we could simply copy by incrementing
|
|
|
ae23c9 |
+ * the refcnt, without copying user data.
|
|
|
ae23c9 |
+ * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
|
|
|
ae23c9 |
+ ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
|
|
|
ae23c9 |
+ &cluster_offset, &l2meta);
|
|
|
ae23c9 |
+ if (ret < 0) {
|
|
|
ae23c9 |
+ goto fail;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ assert((cluster_offset & 511) == 0);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ ret = qcow2_pre_write_overlap_check(bs, 0,
|
|
|
ae23c9 |
+ cluster_offset + offset_in_cluster, cur_bytes);
|
|
|
ae23c9 |
+ if (ret < 0) {
|
|
|
ae23c9 |
+ goto fail;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ qemu_co_mutex_unlock(&s->lock);
|
|
|
ae23c9 |
+ ret = bdrv_co_copy_range_to(src, src_offset,
|
|
|
ae23c9 |
+ bs->file,
|
|
|
ae23c9 |
+ cluster_offset + offset_in_cluster,
|
|
|
ae23c9 |
+ cur_bytes, flags);
|
|
|
ae23c9 |
+ qemu_co_mutex_lock(&s->lock);
|
|
|
ae23c9 |
+ if (ret < 0) {
|
|
|
ae23c9 |
+ goto fail;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
|
|
|
ae23c9 |
+ if (ret) {
|
|
|
ae23c9 |
+ goto fail;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ bytes -= cur_bytes;
|
|
|
ae23c9 |
+ dst_offset += cur_bytes;
|
|
|
ae23c9 |
+ }
|
|
|
ae23c9 |
+ ret = 0;
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+fail:
|
|
|
ae23c9 |
+ qcow2_handle_l2meta(bs, &l2meta, false);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ qemu_co_mutex_unlock(&s->lock);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ qemu_vfree(cluster_data);
|
|
|
ae23c9 |
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
+ return ret;
|
|
|
ae23c9 |
+}
|
|
|
ae23c9 |
+
|
|
|
ae23c9 |
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
|
|
|
ae23c9 |
PreallocMode prealloc, Error **errp)
|
|
|
ae23c9 |
{
|
|
|
ae23c9 |
@@ -4522,6 +4689,8 @@ BlockDriver bdrv_qcow2 = {
|
|
|
ae23c9 |
|
|
|
ae23c9 |
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
|
|
|
ae23c9 |
.bdrv_co_pdiscard = qcow2_co_pdiscard,
|
|
|
ae23c9 |
+ .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
|
|
|
ae23c9 |
+ .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
|
|
|
ae23c9 |
.bdrv_truncate = qcow2_truncate,
|
|
|
ae23c9 |
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
|
|
|
ae23c9 |
.bdrv_make_empty = qcow2_make_empty,
|
|
|
ae23c9 |
--
|
|
|
ae23c9 |
1.8.3.1
|
|
|
ae23c9 |
|