|
|
c025cf |
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
|
|
|
c025cf |
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
c025cf |
Date: Wed, 17 Oct 2018 12:11:41 +0200
|
|
|
c025cf |
Subject: [RHEL7.7 PATCH 06/21] imsm: update metadata correctly while raid10
|
|
|
c025cf |
double degradation
|
|
|
c025cf |
|
|
|
c025cf |
Mdmon calls end_migration() when map state changes from normal to
|
|
|
c025cf |
degraded. It is not valid because in raid 10 double degradation case
|
|
|
c025cf |
mdmon breaks checkpointing but array is still rebuilding.
|
|
|
c025cf |
In this case mdmon has to mark map as degraded and continues marking
|
|
|
c025cf |
recovery checkpoint in metadata. Migration can be finished only if newly
|
|
|
c025cf |
failed device is a rebuilding device.
|
|
|
c025cf |
|
|
|
c025cf |
Add catching double degraded to degraded transition. Migration is
|
|
|
c025cf |
finished but map state doesn't change, array is still degraded.
|
|
|
c025cf |
|
|
|
c025cf |
Update failed_disk_num correctly. If double degradation
|
|
|
c025cf |
happens rebuild will start on the lowest slot, but this variable points
|
|
|
c025cf |
to the first failed slot. If second fail happens while rebuild this
|
|
|
c025cf |
variable shouldn't be updated until rebuild is not finished.
|
|
|
c025cf |
|
|
|
c025cf |
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
c025cf |
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
|
|
c025cf |
---
|
|
|
c025cf |
super-intel.c | 25 +++++++++++++++++++------
|
|
|
c025cf |
1 file changed, 19 insertions(+), 6 deletions(-)
|
|
|
c025cf |
|
|
|
c025cf |
diff --git a/super-intel.c b/super-intel.c
|
|
|
c025cf |
index 6438987..d2035cc 100644
|
|
|
c025cf |
--- a/super-intel.c
|
|
|
c025cf |
+++ b/super-intel.c
|
|
|
c025cf |
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
|
|
|
c025cf |
set_imsm_ord_tbl_ent(map2, slot2,
|
|
|
c025cf |
idx | IMSM_ORD_REBUILD);
|
|
|
c025cf |
}
|
|
|
c025cf |
- if (map->failed_disk_num == 0xff)
|
|
|
c025cf |
+ if (map->failed_disk_num == 0xff ||
|
|
|
c025cf |
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
|
|
|
c025cf |
map->failed_disk_num = slot;
|
|
|
c025cf |
|
|
|
c025cf |
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
|
|
|
c025cf |
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
c025cf |
break;
|
|
|
c025cf |
}
|
|
|
c025cf |
if (is_rebuilding(dev)) {
|
|
|
c025cf |
- dprintf_cont("while rebuilding.");
|
|
|
c025cf |
+ dprintf_cont("while rebuilding ");
|
|
|
c025cf |
if (map->map_state != map_state) {
|
|
|
c025cf |
- dprintf_cont(" Map state change");
|
|
|
c025cf |
- end_migration(dev, super, map_state);
|
|
|
c025cf |
+ dprintf_cont("map state change ");
|
|
|
c025cf |
+ if (n == map->failed_disk_num) {
|
|
|
c025cf |
+ dprintf_cont("end migration");
|
|
|
c025cf |
+ end_migration(dev, super, map_state);
|
|
|
c025cf |
+ } else {
|
|
|
c025cf |
+ dprintf_cont("raid10 double degradation, map state change");
|
|
|
c025cf |
+ map->map_state = map_state;
|
|
|
c025cf |
+ }
|
|
|
c025cf |
super->updates_pending++;
|
|
|
c025cf |
- } else if (!rebuild_done) {
|
|
|
c025cf |
+ } else if (!rebuild_done)
|
|
|
c025cf |
break;
|
|
|
c025cf |
+ else if (n == map->failed_disk_num) {
|
|
|
c025cf |
+ /* r10 double degraded to degraded transition */
|
|
|
c025cf |
+ dprintf_cont("raid10 double degradation end migration");
|
|
|
c025cf |
+ end_migration(dev, super, map_state);
|
|
|
c025cf |
+ a->last_checkpoint = 0;
|
|
|
c025cf |
+ super->updates_pending++;
|
|
|
c025cf |
}
|
|
|
c025cf |
|
|
|
c025cf |
/* check if recovery is really finished */
|
|
|
c025cf |
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
c025cf |
}
|
|
|
c025cf |
if (recovery_not_finished) {
|
|
|
c025cf |
dprintf_cont("\n");
|
|
|
c025cf |
- dprintf("Rebuild has not finished yet, state not changed");
|
|
|
c025cf |
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
|
|
|
c025cf |
if (a->last_checkpoint < mdi->recovery_start) {
|
|
|
c025cf |
a->last_checkpoint =
|
|
|
c025cf |
mdi->recovery_start;
|
|
|
c025cf |
--
|
|
|
c025cf |
2.7.5
|
|
|
c025cf |
|