|
|
5d5466 |
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
|
|
|
5d5466 |
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
5d5466 |
Date: Wed, 17 Oct 2018 12:11:41 +0200
|
|
|
5d5466 |
Subject: [RHEL7.7 PATCH 06/24] imsm: update metadata correctly while raid10
|
|
|
5d5466 |
double degradation
|
|
|
5d5466 |
|
|
|
5d5466 |
Mdmon calls end_migration() when map state changes from normal to
|
|
|
5d5466 |
degraded. It is not valid because in raid 10 double degradation case
|
|
|
5d5466 |
mdmon breaks checkpointing but array is still rebuilding.
|
|
|
5d5466 |
In this case mdmon has to mark map as degraded and continues marking
|
|
|
5d5466 |
recovery checkpoint in metadata. Migration can be finished only if newly
|
|
|
5d5466 |
failed device is a rebuilding device.
|
|
|
5d5466 |
|
|
|
5d5466 |
Add catching double degraded to degraded transition. Migration is
|
|
|
5d5466 |
finished but map state doesn't change, array is still degraded.
|
|
|
5d5466 |
|
|
|
5d5466 |
Update failed_disk_num correctly. If double degradation
|
|
|
5d5466 |
happens rebuild will start on the lowest slot, but this variable points
|
|
|
5d5466 |
to the first failed slot. If second fail happens while rebuild this
|
|
|
5d5466 |
variable shouldn't be updated until rebuild is not finished.
|
|
|
5d5466 |
|
|
|
5d5466 |
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
|
|
|
5d5466 |
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
|
|
5d5466 |
---
|
|
|
5d5466 |
super-intel.c | 25 +++++++++++++++++++------
|
|
|
5d5466 |
1 file changed, 19 insertions(+), 6 deletions(-)
|
|
|
5d5466 |
|
|
|
5d5466 |
diff --git a/super-intel.c b/super-intel.c
|
|
|
5d5466 |
index 6438987..d2035cc 100644
|
|
|
5d5466 |
--- a/super-intel.c
|
|
|
5d5466 |
+++ b/super-intel.c
|
|
|
5d5466 |
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
|
|
|
5d5466 |
set_imsm_ord_tbl_ent(map2, slot2,
|
|
|
5d5466 |
idx | IMSM_ORD_REBUILD);
|
|
|
5d5466 |
}
|
|
|
5d5466 |
- if (map->failed_disk_num == 0xff)
|
|
|
5d5466 |
+ if (map->failed_disk_num == 0xff ||
|
|
|
5d5466 |
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
|
|
|
5d5466 |
map->failed_disk_num = slot;
|
|
|
5d5466 |
|
|
|
5d5466 |
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
|
|
|
5d5466 |
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
5d5466 |
break;
|
|
|
5d5466 |
}
|
|
|
5d5466 |
if (is_rebuilding(dev)) {
|
|
|
5d5466 |
- dprintf_cont("while rebuilding.");
|
|
|
5d5466 |
+ dprintf_cont("while rebuilding ");
|
|
|
5d5466 |
if (map->map_state != map_state) {
|
|
|
5d5466 |
- dprintf_cont(" Map state change");
|
|
|
5d5466 |
- end_migration(dev, super, map_state);
|
|
|
5d5466 |
+ dprintf_cont("map state change ");
|
|
|
5d5466 |
+ if (n == map->failed_disk_num) {
|
|
|
5d5466 |
+ dprintf_cont("end migration");
|
|
|
5d5466 |
+ end_migration(dev, super, map_state);
|
|
|
5d5466 |
+ } else {
|
|
|
5d5466 |
+ dprintf_cont("raid10 double degradation, map state change");
|
|
|
5d5466 |
+ map->map_state = map_state;
|
|
|
5d5466 |
+ }
|
|
|
5d5466 |
super->updates_pending++;
|
|
|
5d5466 |
- } else if (!rebuild_done) {
|
|
|
5d5466 |
+ } else if (!rebuild_done)
|
|
|
5d5466 |
break;
|
|
|
5d5466 |
+ else if (n == map->failed_disk_num) {
|
|
|
5d5466 |
+ /* r10 double degraded to degraded transition */
|
|
|
5d5466 |
+ dprintf_cont("raid10 double degradation end migration");
|
|
|
5d5466 |
+ end_migration(dev, super, map_state);
|
|
|
5d5466 |
+ a->last_checkpoint = 0;
|
|
|
5d5466 |
+ super->updates_pending++;
|
|
|
5d5466 |
}
|
|
|
5d5466 |
|
|
|
5d5466 |
/* check if recovery is really finished */
|
|
|
5d5466 |
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
|
|
5d5466 |
}
|
|
|
5d5466 |
if (recovery_not_finished) {
|
|
|
5d5466 |
dprintf_cont("\n");
|
|
|
5d5466 |
- dprintf("Rebuild has not finished yet, state not changed");
|
|
|
5d5466 |
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
|
|
|
5d5466 |
if (a->last_checkpoint < mdi->recovery_start) {
|
|
|
5d5466 |
a->last_checkpoint =
|
|
|
5d5466 |
mdi->recovery_start;
|
|
|
5d5466 |
--
|
|
|
5d5466 |
2.7.5
|
|
|
5d5466 |
|