05bba0
From 6745478978aed56c72daed821e912a9f9644932a Mon Sep 17 00:00:00 2001
05bba0
From: Max Reitz <mreitz@redhat.com>
05bba0
Date: Sat, 13 Jun 2015 16:22:21 +0200
05bba0
Subject: [PATCH 27/42] qcow2: Do not perform potentially damaging repairs
05bba0
05bba0
Message-id: <1434212556-3927-28-git-send-email-mreitz@redhat.com>
05bba0
Patchwork-id: 66046
05bba0
O-Subject: [RHEL-7.2 qemu-kvm PATCH 27/42] qcow2: Do not perform potentially damaging repairs
05bba0
Bugzilla: 1129893
05bba0
RH-Acked-by: Jeffrey Cody <jcody@redhat.com>
05bba0
RH-Acked-by: Fam Zheng <famz@redhat.com>
05bba0
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
05bba0
05bba0
BZ: 1129893
05bba0
05bba0
If a referenced cluster has a refcount of 0, increasing its refcount may
05bba0
result in clusters being allocated for the refcount structures. This may
05bba0
overwrite the referenced cluster, therefore we cannot simply increase
05bba0
the refcount then.
05bba0
05bba0
In such cases, we can either try to replicate all the refcount
05bba0
operations solely for the check operation, basing the allocations on the
05bba0
in-memory refcount table; or we can simply rebuild the whole refcount
05bba0
structure based on the in-memory refcount table. Since the latter will
05bba0
be much easier, do that.
05bba0
05bba0
To prepare for this, introduce a "rebuild" boolean which should be set
05bba0
to true whenever a fix is rather dangerous or too complicated using the
05bba0
current refcount structures. Another example for this is refcount blocks
05bba0
being referenced more than once.
05bba0
05bba0
Signed-off-by: Max Reitz <mreitz@redhat.com>
05bba0
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
05bba0
(cherry picked from commit f307b2558f61e068ce514f2dde2cad74c62036d6)
05bba0
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
05bba0
05bba0
Conflicts:
05bba0
	block/qcow2-refcount.c
05bba0
05bba0
Some conflicts in the code that is being removed.
05bba0
05bba0
Signed-off-by: Max Reitz <mreitz@redhat.com>
05bba0
---
05bba0
 block/qcow2-refcount.c | 186 +++++++------------------------------------------
05bba0
 1 file changed, 27 insertions(+), 159 deletions(-)
05bba0
05bba0
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
05bba0
index 8ce0447..3d66e7c 100644
05bba0
--- a/block/qcow2-refcount.c
05bba0
+++ b/block/qcow2-refcount.c
05bba0
@@ -1424,125 +1424,12 @@ fail:
05bba0
 }
05bba0
 
05bba0
 /*
05bba0
- * Writes one sector of the refcount table to the disk
05bba0
- */
05bba0
-#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
05bba0
-static int write_reftable_entry(BlockDriverState *bs, int rt_index)
05bba0
-{
05bba0
-    BDRVQcowState *s = bs->opaque;
05bba0
-    uint64_t buf[RT_ENTRIES_PER_SECTOR];
05bba0
-    int rt_start_index;
05bba0
-    int i, ret;
05bba0
-
05bba0
-    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
05bba0
-    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
05bba0
-        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
05bba0
-    }
05bba0
-
05bba0
-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
05bba0
-            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
05bba0
-            sizeof(buf));
05bba0
-    if (ret < 0) {
05bba0
-        return ret;
05bba0
-    }
05bba0
-
05bba0
-    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
05bba0
-    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
05bba0
-            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
05bba0
-    if (ret < 0) {
05bba0
-        return ret;
05bba0
-    }
05bba0
-
05bba0
-    return 0;
05bba0
-}
05bba0
-
05bba0
-/*
05bba0
- * Allocates a new cluster for the given refcount block (represented by its
05bba0
- * offset in the image file) and copies the current content there. This function
05bba0
- * does _not_ decrement the reference count for the currently occupied cluster.
05bba0
- *
05bba0
- * This function prints an informative message to stderr on error (and returns
05bba0
- * -errno); on success, 0 is returned.
05bba0
- */
05bba0
-static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
05bba0
-                                      uint64_t offset)
05bba0
-{
05bba0
-    BDRVQcowState *s = bs->opaque;
05bba0
-    int64_t new_offset = 0;
05bba0
-    void *refcount_block = NULL;
05bba0
-    int ret;
05bba0
-
05bba0
-    /* allocate new refcount block */
05bba0
-    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
05bba0
-    if (new_offset < 0) {
05bba0
-        fprintf(stderr, "Could not allocate new cluster: %s\n",
05bba0
-                strerror(-new_offset));
05bba0
-        ret = new_offset;
05bba0
-        goto fail;
05bba0
-    }
05bba0
-
05bba0
-    /* fetch current refcount block content */
05bba0
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
05bba0
-    if (ret < 0) {
05bba0
-        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
05bba0
-        goto fail;
05bba0
-    }
05bba0
-
05bba0
-    /* new block has not yet been entered into refcount table, therefore it is
05bba0
-     * no refcount block yet (regarding this check) */
05bba0
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
05bba0
-    if (ret < 0) {
05bba0
-        fprintf(stderr, "Could not write refcount block; metadata overlap "
05bba0
-                "check failed: %s\n", strerror(-ret));
05bba0
-        /* the image will be marked corrupt, so don't even attempt on freeing
05bba0
-         * the cluster */
05bba0
-        new_offset = 0;
05bba0
-        goto fail;
05bba0
-    }
05bba0
-
05bba0
-    /* write to new block */
05bba0
-    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
05bba0
-            s->cluster_sectors);
05bba0
-    if (ret < 0) {
05bba0
-        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
05bba0
-        goto fail;
05bba0
-    }
05bba0
-
05bba0
-    /* update refcount table */
05bba0
-    assert(!(new_offset & (s->cluster_size - 1)));
05bba0
-    s->refcount_table[reftable_index] = new_offset;
05bba0
-    ret = write_reftable_entry(bs, reftable_index);
05bba0
-    if (ret < 0) {
05bba0
-        fprintf(stderr, "Could not update refcount table: %s\n",
05bba0
-                strerror(-ret));
05bba0
-        goto fail;
05bba0
-    }
05bba0
-
05bba0
-fail:
05bba0
-    if (new_offset && (ret < 0)) {
05bba0
-        qcow2_free_clusters(bs, new_offset, s->cluster_size,
05bba0
-                QCOW2_DISCARD_ALWAYS);
05bba0
-    }
05bba0
-    if (refcount_block) {
05bba0
-        if (ret < 0) {
05bba0
-            qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
05bba0
-        } else {
05bba0
-            ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
05bba0
-        }
05bba0
-    }
05bba0
-    if (ret < 0) {
05bba0
-        return ret;
05bba0
-    }
05bba0
-    return new_offset;
05bba0
-}
05bba0
-
05bba0
-/*
05bba0
  * Checks consistency of refblocks and accounts for each refblock in
05bba0
  * *refcount_table.
05bba0
  */
05bba0
 static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
-                           BdrvCheckMode fix, uint16_t **refcount_table,
05bba0
-                           int64_t *nb_clusters)
05bba0
+                           BdrvCheckMode fix, bool *rebuild,
05bba0
+                           uint16_t **refcount_table, int64_t *nb_clusters)
05bba0
 {
05bba0
     BDRVQcowState *s = bs->opaque;
05bba0
     int64_t i, size;
05bba0
@@ -1558,6 +1445,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
05bba0
                 "cluster aligned; refcount table entry corrupted\n", i);
05bba0
             res->corruptions++;
05bba0
+            *rebuild = true;
05bba0
             continue;
05bba0
         }
05bba0
 
05bba0
@@ -1619,6 +1507,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
 
05bba0
 resize_fail:
05bba0
                 res->corruptions++;
05bba0
+                *rebuild = true;
05bba0
                 fprintf(stderr, "ERROR could not resize image: %s\n",
05bba0
                         strerror(-ret));
05bba0
             } else {
05bba0
@@ -1634,43 +1523,10 @@ resize_fail:
05bba0
                 return ret;
05bba0
             }
05bba0
             if ((*refcount_table)[cluster] != 1) {
05bba0
-                fprintf(stderr, "%s refcount block %" PRId64
05bba0
-                    " refcount=%d\n",
05bba0
-                    fix & BDRV_FIX_ERRORS ? "Repairing" :
05bba0
-                                            "ERROR",
05bba0
-                    i, (*refcount_table)[cluster]);
05bba0
-
05bba0
-                if (fix & BDRV_FIX_ERRORS) {
05bba0
-                    int64_t new_offset;
05bba0
-
05bba0
-                    new_offset = realloc_refcount_block(bs, i, offset);
05bba0
-                    if (new_offset < 0) {
05bba0
-                        res->corruptions++;
05bba0
-                        continue;
05bba0
-                    }
05bba0
-
05bba0
-                    /* update refcounts */
05bba0
-                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
05bba0
-                        /* increase refcount_table size if necessary */
05bba0
-                        int old_nb_clusters = *nb_clusters;
05bba0
-                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
05bba0
-                        *refcount_table = g_renew(uint16_t, *refcount_table,
05bba0
-                                                  *nb_clusters);
05bba0
-                        memset(&(*refcount_table)[old_nb_clusters], 0,
05bba0
-                               (*nb_clusters - old_nb_clusters) *
05bba0
-                               sizeof(**refcount_table));
05bba0
-                    }
05bba0
-                    (*refcount_table)[cluster]--;
05bba0
-                    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
05bba0
-                                        new_offset, s->cluster_size);
05bba0
-                    if (ret < 0) {
05bba0
-                        return ret;
05bba0
-                    }
05bba0
-
05bba0
-                    res->corruptions_fixed++;
05bba0
-                } else {
05bba0
-                    res->corruptions++;
05bba0
-                }
05bba0
+                fprintf(stderr, "ERROR refcount block %" PRId64
05bba0
+                        " refcount=%d\n", i, (*refcount_table)[cluster]);
05bba0
+                res->corruptions++;
05bba0
+                *rebuild = true;
05bba0
             }
05bba0
         }
05bba0
     }
05bba0
@@ -1682,8 +1538,8 @@ resize_fail:
05bba0
  * Calculates an in-memory refcount table.
05bba0
  */
05bba0
 static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
-                               BdrvCheckMode fix, uint16_t **refcount_table,
05bba0
-                               int64_t *nb_clusters)
05bba0
+                               BdrvCheckMode fix, bool *rebuild,
05bba0
+                               uint16_t **refcount_table, int64_t *nb_clusters)
05bba0
 {
05bba0
     BDRVQcowState *s = bs->opaque;
05bba0
     int64_t i;
05bba0
@@ -1735,7 +1591,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
         return ret;
05bba0
     }
05bba0
 
05bba0
-    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
05bba0
+    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
05bba0
 }
05bba0
 
05bba0
 /*
05bba0
@@ -1743,7 +1599,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
  * refcount as reported by the refcount structures on-disk.
05bba0
  */
05bba0
 static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
-                              BdrvCheckMode fix, int64_t *highest_cluster,
05bba0
+                              BdrvCheckMode fix, bool *rebuild,
05bba0
+                              int64_t *highest_cluster,
05bba0
                               uint16_t *refcount_table, int64_t nb_clusters)
05bba0
 {
05bba0
     BDRVQcowState *s = bs->opaque;
05bba0
@@ -1768,7 +1625,9 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
         if (refcount1 != refcount2) {
05bba0
             /* Check if we're allowed to fix the mismatch */
05bba0
             int *num_fixed = NULL;
05bba0
-            if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
05bba0
+            if (refcount1 == 0) {
05bba0
+                *rebuild = true;
05bba0
+            } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
05bba0
                 num_fixed = &res->leaks_fixed;
05bba0
             } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
05bba0
                 num_fixed = &res->corruptions_fixed;
05bba0
@@ -1812,6 +1671,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
     BDRVQcowState *s = bs->opaque;
05bba0
     int64_t size, highest_cluster, nb_clusters;
05bba0
     uint16_t *refcount_table = NULL;
05bba0
+    bool rebuild = false;
05bba0
     int ret;
05bba0
 
05bba0
     size = bdrv_getlength(bs->file);
05bba0
@@ -1829,14 +1689,22 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
05bba0
     res->bfi.total_clusters =
05bba0
         size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
05bba0
 
05bba0
-    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
05bba0
+    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
05bba0
+                              &nb_clusters);
05bba0
     if (ret < 0) {
05bba0
         goto fail;
05bba0
     }
05bba0
 
05bba0
-    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
05bba0
+    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
05bba0
                       nb_clusters);
05bba0
 
05bba0
+    if (rebuild) {
05bba0
+        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
05bba0
+        res->check_errors++;
05bba0
+        /* Just carry on, the rest does not rely on the on-disk refcount
05bba0
+         * structures */
05bba0
+    }
05bba0
+
05bba0
     /* check OFLAG_COPIED */
05bba0
     ret = check_oflag_copied(bs, res, fix);
05bba0
     if (ret < 0) {
05bba0
-- 
05bba0
1.8.3.1
05bba0