dcavalca / rpms / mdadm

Forked from rpms/mdadm 3 years ago
Clone

Blame SOURCES/0073-Manage-imsm-Write-metadata-before-add.patch

2792dd
From 12724c018c964596aa277489fd287d5c3506361a Mon Sep 17 00:00:00 2001
2792dd
From: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
2792dd
Date: Fri, 17 Apr 2020 13:55:55 +0200
2792dd
Subject: [RHEL7.9 PATCH 73/77] Manage, imsm: Write metadata before add
2792dd
2792dd
New drive in container always appears as spare. Manager is able to
2792dd
handle that, and queues appropriative update to monitor.
2792dd
No update from mdadm side has to be processed, just insert the drive and
2792dd
ping the mdmon. Metadata has to be written if no mdmon is running (case
2792dd
for Raid0 or container without arrays).
2792dd
2792dd
If bare drive is added very early on startup (by custom bare rule),
2792dd
there is possiblity that mdmon was not restarted after switch root. Old
2792dd
one is not able to handle new drive. New one fails because there is
2792dd
drive without metadata in container and metadata cannot be loaded.
2792dd
2792dd
To prevent this, write spare metadata before adding device
2792dd
to container. Mdmon will overwrite it (same case as spare migration,
2792dd
if drive appears it writes the most recent metadata).
2792dd
Metadata has to be written only on new drive before sysfs_add_disk(),
2792dd
don't race with mdmon if running.
2792dd
2792dd
Signed-off-by: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
2792dd
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
2792dd
---
2792dd
 Manage.c      |  6 +-----
2792dd
 super-intel.c | 66 ++++++++++++++++++++++++++++++++++++++---------------------
2792dd
 2 files changed, 44 insertions(+), 28 deletions(-)
2792dd
2792dd
diff --git a/Manage.c b/Manage.c
2792dd
index b22c396..0a5f09b 100644
2792dd
--- a/Manage.c
2792dd
+++ b/Manage.c
2792dd
@@ -994,17 +994,13 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
2792dd
 
2792dd
 		Kill(dv->devname, NULL, 0, -1, 0);
2792dd
 		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
2792dd
-		if (mdmon_running(tst->container_devnm))
2792dd
-			tst->update_tail = &tst->updates;
2792dd
 		if (tst->ss->add_to_super(tst, &disc, dfd,
2792dd
 					  dv->devname, INVALID_SECTORS)) {
2792dd
 			close(dfd);
2792dd
 			close(container_fd);
2792dd
 			return -1;
2792dd
 		}
2792dd
-		if (tst->update_tail)
2792dd
-			flush_metadata_updates(tst);
2792dd
-		else
2792dd
+		if (!mdmon_running(tst->container_devnm))
2792dd
 			tst->ss->sync_metadata(tst);
2792dd
 
2792dd
 		sra = sysfs_read(container_fd, NULL, 0);
2792dd
diff --git a/super-intel.c b/super-intel.c
2792dd
index 562a58c..3a73d2b 100644
2792dd
--- a/super-intel.c
2792dd
+++ b/super-intel.c
2792dd
@@ -5809,6 +5809,9 @@ int mark_spare(struct dl *disk)
2792dd
 	return ret_val;
2792dd
 }
2792dd
 
2792dd
+
2792dd
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d);
2792dd
+
2792dd
 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
2792dd
 			     int fd, char *devname,
2792dd
 			     unsigned long long data_offset)
2792dd
@@ -5938,9 +5941,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
2792dd
 		dd->next = super->disk_mgmt_list;
2792dd
 		super->disk_mgmt_list = dd;
2792dd
 	} else {
2792dd
+		/* this is called outside of mdmon
2792dd
+		 * write initial spare metadata
2792dd
+		 * mdmon will overwrite it.
2792dd
+		 */
2792dd
 		dd->next = super->disks;
2792dd
 		super->disks = dd;
2792dd
-		super->updates_pending++;
2792dd
+		write_super_imsm_spare(super, dd);
2792dd
 	}
2792dd
 
2792dd
 	return 0;
2792dd
@@ -5979,15 +5986,15 @@ static union {
2792dd
 	struct imsm_super anchor;
2792dd
 } spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
2792dd
 
2792dd
-/* spare records have their own family number and do not have any defined raid
2792dd
- * devices
2792dd
- */
2792dd
-static int write_super_imsm_spares(struct intel_super *super, int doclose)
2792dd
+
2792dd
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d)
2792dd
 {
2792dd
 	struct imsm_super *mpb = super->anchor;
2792dd
 	struct imsm_super *spare = &spare_record.anchor;
2792dd
 	__u32 sum;
2792dd
-	struct dl *d;
2792dd
+
2792dd
+	if (d->index != -1)
2792dd
+		return 1;
2792dd
 
2792dd
 	spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super));
2792dd
 	spare->generation_num = __cpu_to_le32(1UL);
2792dd
@@ -6000,28 +6007,41 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose)
2792dd
 	snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
2792dd
 		 MPB_SIGNATURE MPB_VERSION_RAID0);
2792dd
 
2792dd
-	for (d = super->disks; d; d = d->next) {
2792dd
-		if (d->index != -1)
2792dd
-			continue;
2792dd
+	spare->disk[0] = d->disk;
2792dd
+	if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
2792dd
+		spare->attributes |= MPB_ATTRIB_2TB_DISK;
2792dd
 
2792dd
-		spare->disk[0] = d->disk;
2792dd
-		if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
2792dd
-			spare->attributes |= MPB_ATTRIB_2TB_DISK;
2792dd
+	if (super->sector_size == 4096)
2792dd
+		convert_to_4k_imsm_disk(&spare->disk[0]);
2792dd
 
2792dd
-		if (super->sector_size == 4096)
2792dd
-			convert_to_4k_imsm_disk(&spare->disk[0]);
2792dd
+	sum = __gen_imsm_checksum(spare);
2792dd
+	spare->family_num = __cpu_to_le32(sum);
2792dd
+	spare->orig_family_num = 0;
2792dd
+	sum = __gen_imsm_checksum(spare);
2792dd
+	spare->check_sum = __cpu_to_le32(sum);
2792dd
 
2792dd
-		sum = __gen_imsm_checksum(spare);
2792dd
-		spare->family_num = __cpu_to_le32(sum);
2792dd
-		spare->orig_family_num = 0;
2792dd
-		sum = __gen_imsm_checksum(spare);
2792dd
-		spare->check_sum = __cpu_to_le32(sum);
2792dd
+	if (store_imsm_mpb(d->fd, spare)) {
2792dd
+		pr_err("failed for device %d:%d %s\n",
2792dd
+			d->major, d->minor, strerror(errno));
2792dd
+		return 1;
2792dd
+	}
2792dd
+
2792dd
+	return 0;
2792dd
+}
2792dd
+/* spare records have their own family number and do not have any defined raid
2792dd
+ * devices
2792dd
+ */
2792dd
+static int write_super_imsm_spares(struct intel_super *super, int doclose)
2792dd
+{
2792dd
+	struct dl *d;
2792dd
+
2792dd
+	for (d = super->disks; d; d = d->next) {
2792dd
+		if (d->index != -1)
2792dd
+			continue;
2792dd
 
2792dd
-		if (store_imsm_mpb(d->fd, spare)) {
2792dd
-			pr_err("failed for device %d:%d %s\n",
2792dd
-				d->major, d->minor, strerror(errno));
2792dd
+		if (write_super_imsm_spare(super, d))
2792dd
 			return 1;
2792dd
-		}
2792dd
+
2792dd
 		if (doclose) {
2792dd
 			close(d->fd);
2792dd
 			d->fd = -1;
2792dd
-- 
2792dd
2.7.5
2792dd