Blame SOURCES/0032-kpartx-read-devices-with-direct-IO.patch

b7337d
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
b7337d
From: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
Date: Fri, 26 Jun 2020 20:06:24 -0500
b7337d
Subject: [PATCH] kpartx: read devices with direct IO
b7337d
b7337d
If kpartx is used on top of shared storage, and a device has its
b7337d
partition table changed on one machine, and then kpartx is run on
b7337d
another, it may not see the new data, because the cache still contains
b7337d
the old data, and there is nothing to tell the machine running kpartx to
b7337d
invalidate it. To solve this, kpartx should read the devices using
b7337d
direct io.
b7337d
b7337d
One issue with how this code has been updated is that the original code
b7337d
for getblock() always read 1024 bytes. The new code reads a logical
b7337d
sector size chunk of the device, and returns a pointer to the 512 byte
b7337d
sector that the caller asked for, within that (possibly larger) chunk.
b7337d
This means that if the logical sector size is 512, then the code is now
b7337d
only reading 512 bytes.  Looking through the code for the various
b7337d
partition types, I can't see a case where more than 512 bytes is needed
b7337d
and getblock() is used.  If anyone has a reason why this code should be
b7337d
reading 1024 bytes at minmum, I can certainly change this.  But when I
b7337d
looked, I couldn't find a case where reading 512 bytes would cause a
b7337d
problem.
b7337d
b7337d
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
---
b7337d
 kpartx/dasd.c   |  7 ++++---
b7337d
 kpartx/gpt.c    | 22 +++++++++----------
b7337d
 kpartx/kpartx.c | 56 +++++++++++++++++++++++++++++++++++++++----------
b7337d
 kpartx/kpartx.h |  2 ++
b7337d
 4 files changed, 61 insertions(+), 26 deletions(-)
b7337d
b7337d
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
b7337d
index 14b9d3aa..f0398645 100644
b7337d
--- a/kpartx/dasd.c
b7337d
+++ b/kpartx/dasd.c
b7337d
@@ -22,6 +22,7 @@
b7337d
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
b7337d
  */
b7337d
 
b7337d
+#define _GNU_SOURCE
b7337d
 #include <stdio.h>
b7337d
 #include <stdlib.h>
b7337d
 #include <unistd.h>
b7337d
@@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
b7337d
 
b7337d
 		sprintf(pathname, "/dev/.kpartx-node-%u-%u",
b7337d
 			(unsigned int)major(dev), (unsigned int)minor(dev));
b7337d
-		if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
b7337d
+		if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == -1) {
b7337d
 			/* Devicenode does not exist. Try to create one */
b7337d
 			if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
b7337d
 				/* Couldn't create a device node */
b7337d
 				return -1;
b7337d
 			}
b7337d
-			fd_dasd = open(pathname, O_RDONLY);
b7337d
+			fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
b7337d
 			/*
b7337d
 			 * The file will vanish when the last process (we)
b7337d
 			 * has ceased to access it.
b7337d
@@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
b7337d
 	 * Get volume label, extract name and type.
b7337d
 	 */
b7337d
 
b7337d
-	if (!(data = (unsigned char *)malloc(blocksize)))
b7337d
+	if (aligned_malloc((void **)&data, blocksize, NULL))
b7337d
 		goto out;
b7337d
 
b7337d
 
b7337d
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
b7337d
index 785b34ea..f7fefb70 100644
b7337d
--- a/kpartx/gpt.c
b7337d
+++ b/kpartx/gpt.c
b7337d
@@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
b7337d
 
b7337d
 	if (!count) return NULL;
b7337d
 
b7337d
-	pte = (gpt_entry *)malloc(count);
b7337d
-	if (!pte)
b7337d
+	if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
b7337d
 		return NULL;
b7337d
 	memset(pte, 0, count);
b7337d
 
b7337d
@@ -269,12 +268,11 @@ static gpt_header *
b7337d
 alloc_read_gpt_header(int fd, uint64_t lba)
b7337d
 {
b7337d
 	gpt_header *gpt;
b7337d
-	gpt = (gpt_header *)
b7337d
-	    malloc(sizeof (gpt_header));
b7337d
-	if (!gpt)
b7337d
+	size_t size = sizeof (gpt_header);
b7337d
+	if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
b7337d
 		return NULL;
b7337d
-	memset(gpt, 0, sizeof (*gpt));
b7337d
-	if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
b7337d
+	memset(gpt, 0, size);
b7337d
+	if (!read_lba(fd, lba, gpt, size)) {
b7337d
 		free(gpt);
b7337d
 		return NULL;
b7337d
 	}
b7337d
@@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
b7337d
 	gpt_header *pgpt = NULL, *agpt = NULL;
b7337d
 	gpt_entry *pptes = NULL, *aptes = NULL;
b7337d
 	legacy_mbr *legacymbr = NULL;
b7337d
+	size_t size = sizeof(legacy_mbr);
b7337d
 	uint64_t lastlba;
b7337d
 	if (!gpt || !ptes)
b7337d
 		return 0;
b7337d
@@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
b7337d
 	}
b7337d
 
b7337d
 	/* This will be added to the EFI Spec. per Intel after v1.02. */
b7337d
-	legacymbr = malloc(sizeof (*legacymbr));
b7337d
-	if (legacymbr) {
b7337d
-		memset(legacymbr, 0, sizeof (*legacymbr));
b7337d
-		read_lba(fd, 0, (uint8_t *) legacymbr,
b7337d
-			 sizeof (*legacymbr));
b7337d
+	if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
b7337d
+			   &size) == 0) {
b7337d
+		memset(legacymbr, 0, size);
b7337d
+		read_lba(fd, 0, (uint8_t *) legacymbr, size);
b7337d
 		good_pmbr = is_pmbr_valid(legacymbr);
b7337d
 		free(legacymbr);
b7337d
 		legacymbr=NULL;
b7337d
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
b7337d
index d3620c5c..c24ad6d9 100644
b7337d
--- a/kpartx/kpartx.c
b7337d
+++ b/kpartx/kpartx.c
b7337d
@@ -19,6 +19,7 @@
b7337d
  * cva, 2002-10-26
b7337d
  */
b7337d
 
b7337d
+#define _GNU_SOURCE
b7337d
 #include <stdio.h>
b7337d
 #include <fcntl.h>
b7337d
 #include <errno.h>
b7337d
@@ -41,7 +42,6 @@
b7337d
 
b7337d
 #define SIZE(a) (sizeof(a)/sizeof((a)[0]))
b7337d
 
b7337d
-#define READ_SIZE	1024
b7337d
 #define MAXTYPES	64
b7337d
 #define MAXSLICES	256
b7337d
 #define DM_TARGET	"linear"
b7337d
@@ -388,7 +388,7 @@ main(int argc, char **argv){
b7337d
 		set_delimiter(mapname, delim);
b7337d
 	}
b7337d
 
b7337d
-	fd = open(device, O_RDONLY);
b7337d
+	fd = open(device, O_RDONLY | O_DIRECT);
b7337d
 
b7337d
 	if (fd == -1) {
b7337d
 		perror(device);
b7337d
@@ -690,9 +690,9 @@ xmalloc (size_t size) {
b7337d
  */
b7337d
 
b7337d
 static int
b7337d
-sseek(int fd, unsigned int secnr) {
b7337d
+sseek(int fd, unsigned int secnr, int secsz) {
b7337d
 	off64_t in, out;
b7337d
-	in = ((off64_t) secnr << 9);
b7337d
+	in = ((off64_t) secnr * secsz);
b7337d
 	out = 1;
b7337d
 
b7337d
 	if ((out = lseek64(fd, in, SEEK_SET)) != in)
b7337d
@@ -703,6 +703,31 @@ sseek(int fd, unsigned int secnr) {
b7337d
 	return 0;
b7337d
 }
b7337d
 
b7337d
+int
b7337d
+aligned_malloc(void **mem_p, size_t align, size_t *size_p)
b7337d
+{
b7337d
+	static size_t pgsize = 0;
b7337d
+	size_t size;
b7337d
+	int err;
b7337d
+
b7337d
+	if (!mem_p || !align || (size_p && !*size_p))
b7337d
+		return EINVAL;
b7337d
+
b7337d
+	if (!pgsize)
b7337d
+		pgsize = getpagesize();
b7337d
+
b7337d
+	if (size_p)
b7337d
+		size = ((*size_p + align - 1) / align) * align;
b7337d
+	else
b7337d
+		size = pgsize;
b7337d
+
b7337d
+	err = posix_memalign(mem_p, pgsize, size);
b7337d
+	if (!err && size_p)
b7337d
+		*size_p = size;
b7337d
+	return err;
b7337d
+}
b7337d
+
b7337d
+/* always in sector size blocks */
b7337d
 static
b7337d
 struct block {
b7337d
 	unsigned int secnr;
b7337d
@@ -710,30 +735,39 @@ struct block {
b7337d
 	struct block *next;
b7337d
 } *blockhead;
b7337d
 
b7337d
+/* blknr is always in 512 byte blocks */
b7337d
 char *
b7337d
-getblock (int fd, unsigned int secnr) {
b7337d
+getblock (int fd, unsigned int blknr) {
b7337d
+	unsigned int secsz = get_sector_size(fd);
b7337d
+	unsigned int blks_per_sec = secsz / 512;
b7337d
+	unsigned int secnr = blknr / blks_per_sec;
b7337d
+	unsigned int blk_off = (blknr % blks_per_sec) * 512;
b7337d
 	struct block *bp;
b7337d
 
b7337d
 	for (bp = blockhead; bp; bp = bp->next)
b7337d
 
b7337d
 		if (bp->secnr == secnr)
b7337d
-			return bp->block;
b7337d
+			return bp->block + blk_off;
b7337d
 
b7337d
-	if (sseek(fd, secnr))
b7337d
+	if (sseek(fd, secnr, secsz))
b7337d
 		return NULL;
b7337d
 
b7337d
 	bp = xmalloc(sizeof(struct block));
b7337d
 	bp->secnr = secnr;
b7337d
 	bp->next = blockhead;
b7337d
 	blockhead = bp;
b7337d
-	bp->block = (char *) xmalloc(READ_SIZE);
b7337d
+	if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
b7337d
+		fprintf(stderr, "aligned_malloc failed\n");
b7337d
+		exit(1);
b7337d
+	}
b7337d
 
b7337d
-	if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
b7337d
+	if (read(fd, bp->block, secsz) != secsz) {
b7337d
 		fprintf(stderr, "read error, sector %d\n", secnr);
b7337d
-		bp->block = NULL;
b7337d
+		blockhead = bp->next;
b7337d
+		return NULL;
b7337d
 	}
b7337d
 
b7337d
-	return bp->block;
b7337d
+	return bp->block + blk_off;
b7337d
 }
b7337d
 
b7337d
 int
b7337d
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
b7337d
index 67edeb82..727632c1 100644
b7337d
--- a/kpartx/kpartx.h
b7337d
+++ b/kpartx/kpartx.h
b7337d
@@ -1,6 +1,7 @@
b7337d
 #ifndef _KPARTX_H
b7337d
 #define _KPARTX_H
b7337d
 
b7337d
+#include <stddef.h>
b7337d
 #include <stdint.h>
b7337d
 #include <sys/ioctl.h>
b7337d
 
b7337d
@@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
b7337d
 extern ptreader read_sun_pt;
b7337d
 extern ptreader read_ps3_pt;
b7337d
 
b7337d
+int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
b7337d
 char *getblock(int fd, unsigned int secnr);
b7337d
 
b7337d
 static inline unsigned int
b7337d
-- 
b7337d
2.17.2
b7337d