|
|
b7337d |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
b7337d |
From: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
b7337d |
Date: Fri, 26 Jun 2020 20:06:24 -0500
|
|
|
b7337d |
Subject: [PATCH] kpartx: read devices with direct IO
|
|
|
b7337d |
|
|
|
b7337d |
If kpartx is used on top of shared storage, and a device has its
|
|
|
b7337d |
partition table changed on one machine, and then kpartx is run on
|
|
|
b7337d |
another, it may not see the new data, because the cache still contains
|
|
|
b7337d |
the old data, and there is nothing to tell the machine running kpartx to
|
|
|
b7337d |
invalidate it. To solve this, kpartx should read the devices using
|
|
|
b7337d |
direct io.
|
|
|
b7337d |
|
|
|
b7337d |
One issue with how this code has been updated is that the original code
|
|
|
b7337d |
for getblock() always read 1024 bytes. The new code reads a logical
|
|
|
b7337d |
sector size chunk of the device, and returns a pointer to the 512 byte
|
|
|
b7337d |
sector that the caller asked for, within that (possibly larger) chunk.
|
|
|
b7337d |
This means that if the logical sector size is 512, then the code is now
|
|
|
b7337d |
only reading 512 bytes. Looking through the code for the various
|
|
|
b7337d |
partition types, I can't see a case where more than 512 bytes is needed
|
|
|
b7337d |
and getblock() is used. If anyone has a reason why this code should be
|
|
|
b7337d |
reading 1024 bytes at minmum, I can certainly change this. But when I
|
|
|
b7337d |
looked, I couldn't find a case where reading 512 bytes would cause a
|
|
|
b7337d |
problem.
|
|
|
b7337d |
|
|
|
b7337d |
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
b7337d |
---
|
|
|
b7337d |
kpartx/dasd.c | 7 ++++---
|
|
|
b7337d |
kpartx/gpt.c | 22 +++++++++----------
|
|
|
b7337d |
kpartx/kpartx.c | 56 +++++++++++++++++++++++++++++++++++++++----------
|
|
|
b7337d |
kpartx/kpartx.h | 2 ++
|
|
|
b7337d |
4 files changed, 61 insertions(+), 26 deletions(-)
|
|
|
b7337d |
|
|
|
b7337d |
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
|
|
|
b7337d |
index 14b9d3aa..f0398645 100644
|
|
|
b7337d |
--- a/kpartx/dasd.c
|
|
|
b7337d |
+++ b/kpartx/dasd.c
|
|
|
b7337d |
@@ -22,6 +22,7 @@
|
|
|
b7337d |
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
b7337d |
*/
|
|
|
b7337d |
|
|
|
b7337d |
+#define _GNU_SOURCE
|
|
|
b7337d |
#include <stdio.h>
|
|
|
b7337d |
#include <stdlib.h>
|
|
|
b7337d |
#include <unistd.h>
|
|
|
b7337d |
@@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
|
|
|
b7337d |
|
|
|
b7337d |
sprintf(pathname, "/dev/.kpartx-node-%u-%u",
|
|
|
b7337d |
(unsigned int)major(dev), (unsigned int)minor(dev));
|
|
|
b7337d |
- if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
|
|
|
b7337d |
+ if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == -1) {
|
|
|
b7337d |
/* Devicenode does not exist. Try to create one */
|
|
|
b7337d |
if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
|
|
|
b7337d |
/* Couldn't create a device node */
|
|
|
b7337d |
return -1;
|
|
|
b7337d |
}
|
|
|
b7337d |
- fd_dasd = open(pathname, O_RDONLY);
|
|
|
b7337d |
+ fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
|
|
|
b7337d |
/*
|
|
|
b7337d |
* The file will vanish when the last process (we)
|
|
|
b7337d |
* has ceased to access it.
|
|
|
b7337d |
@@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) struct slice all,
|
|
|
b7337d |
* Get volume label, extract name and type.
|
|
|
b7337d |
*/
|
|
|
b7337d |
|
|
|
b7337d |
- if (!(data = (unsigned char *)malloc(blocksize)))
|
|
|
b7337d |
+ if (aligned_malloc((void **)&data, blocksize, NULL))
|
|
|
b7337d |
goto out;
|
|
|
b7337d |
|
|
|
b7337d |
|
|
|
b7337d |
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
|
|
|
b7337d |
index 785b34ea..f7fefb70 100644
|
|
|
b7337d |
--- a/kpartx/gpt.c
|
|
|
b7337d |
+++ b/kpartx/gpt.c
|
|
|
b7337d |
@@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
|
|
|
b7337d |
|
|
|
b7337d |
if (!count) return NULL;
|
|
|
b7337d |
|
|
|
b7337d |
- pte = (gpt_entry *)malloc(count);
|
|
|
b7337d |
- if (!pte)
|
|
|
b7337d |
+ if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
|
|
|
b7337d |
return NULL;
|
|
|
b7337d |
memset(pte, 0, count);
|
|
|
b7337d |
|
|
|
b7337d |
@@ -269,12 +268,11 @@ static gpt_header *
|
|
|
b7337d |
alloc_read_gpt_header(int fd, uint64_t lba)
|
|
|
b7337d |
{
|
|
|
b7337d |
gpt_header *gpt;
|
|
|
b7337d |
- gpt = (gpt_header *)
|
|
|
b7337d |
- malloc(sizeof (gpt_header));
|
|
|
b7337d |
- if (!gpt)
|
|
|
b7337d |
+ size_t size = sizeof (gpt_header);
|
|
|
b7337d |
+ if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
|
|
|
b7337d |
return NULL;
|
|
|
b7337d |
- memset(gpt, 0, sizeof (*gpt));
|
|
|
b7337d |
- if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
|
|
|
b7337d |
+ memset(gpt, 0, size);
|
|
|
b7337d |
+ if (!read_lba(fd, lba, gpt, size)) {
|
|
|
b7337d |
free(gpt);
|
|
|
b7337d |
return NULL;
|
|
|
b7337d |
}
|
|
|
b7337d |
@@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
|
|
|
b7337d |
gpt_header *pgpt = NULL, *agpt = NULL;
|
|
|
b7337d |
gpt_entry *pptes = NULL, *aptes = NULL;
|
|
|
b7337d |
legacy_mbr *legacymbr = NULL;
|
|
|
b7337d |
+ size_t size = sizeof(legacy_mbr);
|
|
|
b7337d |
uint64_t lastlba;
|
|
|
b7337d |
if (!gpt || !ptes)
|
|
|
b7337d |
return 0;
|
|
|
b7337d |
@@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
|
|
|
b7337d |
}
|
|
|
b7337d |
|
|
|
b7337d |
/* This will be added to the EFI Spec. per Intel after v1.02. */
|
|
|
b7337d |
- legacymbr = malloc(sizeof (*legacymbr));
|
|
|
b7337d |
- if (legacymbr) {
|
|
|
b7337d |
- memset(legacymbr, 0, sizeof (*legacymbr));
|
|
|
b7337d |
- read_lba(fd, 0, (uint8_t *) legacymbr,
|
|
|
b7337d |
- sizeof (*legacymbr));
|
|
|
b7337d |
+ if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
|
|
|
b7337d |
+ &size) == 0) {
|
|
|
b7337d |
+ memset(legacymbr, 0, size);
|
|
|
b7337d |
+ read_lba(fd, 0, (uint8_t *) legacymbr, size);
|
|
|
b7337d |
good_pmbr = is_pmbr_valid(legacymbr);
|
|
|
b7337d |
free(legacymbr);
|
|
|
b7337d |
legacymbr=NULL;
|
|
|
b7337d |
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
|
|
|
b7337d |
index d3620c5c..c24ad6d9 100644
|
|
|
b7337d |
--- a/kpartx/kpartx.c
|
|
|
b7337d |
+++ b/kpartx/kpartx.c
|
|
|
b7337d |
@@ -19,6 +19,7 @@
|
|
|
b7337d |
* cva, 2002-10-26
|
|
|
b7337d |
*/
|
|
|
b7337d |
|
|
|
b7337d |
+#define _GNU_SOURCE
|
|
|
b7337d |
#include <stdio.h>
|
|
|
b7337d |
#include <fcntl.h>
|
|
|
b7337d |
#include <errno.h>
|
|
|
b7337d |
@@ -41,7 +42,6 @@
|
|
|
b7337d |
|
|
|
b7337d |
#define SIZE(a) (sizeof(a)/sizeof((a)[0]))
|
|
|
b7337d |
|
|
|
b7337d |
-#define READ_SIZE 1024
|
|
|
b7337d |
#define MAXTYPES 64
|
|
|
b7337d |
#define MAXSLICES 256
|
|
|
b7337d |
#define DM_TARGET "linear"
|
|
|
b7337d |
@@ -388,7 +388,7 @@ main(int argc, char **argv){
|
|
|
b7337d |
set_delimiter(mapname, delim);
|
|
|
b7337d |
}
|
|
|
b7337d |
|
|
|
b7337d |
- fd = open(device, O_RDONLY);
|
|
|
b7337d |
+ fd = open(device, O_RDONLY | O_DIRECT);
|
|
|
b7337d |
|
|
|
b7337d |
if (fd == -1) {
|
|
|
b7337d |
perror(device);
|
|
|
b7337d |
@@ -690,9 +690,9 @@ xmalloc (size_t size) {
|
|
|
b7337d |
*/
|
|
|
b7337d |
|
|
|
b7337d |
static int
|
|
|
b7337d |
-sseek(int fd, unsigned int secnr) {
|
|
|
b7337d |
+sseek(int fd, unsigned int secnr, int secsz) {
|
|
|
b7337d |
off64_t in, out;
|
|
|
b7337d |
- in = ((off64_t) secnr << 9);
|
|
|
b7337d |
+ in = ((off64_t) secnr * secsz);
|
|
|
b7337d |
out = 1;
|
|
|
b7337d |
|
|
|
b7337d |
if ((out = lseek64(fd, in, SEEK_SET)) != in)
|
|
|
b7337d |
@@ -703,6 +703,31 @@ sseek(int fd, unsigned int secnr) {
|
|
|
b7337d |
return 0;
|
|
|
b7337d |
}
|
|
|
b7337d |
|
|
|
b7337d |
+int
|
|
|
b7337d |
+aligned_malloc(void **mem_p, size_t align, size_t *size_p)
|
|
|
b7337d |
+{
|
|
|
b7337d |
+ static size_t pgsize = 0;
|
|
|
b7337d |
+ size_t size;
|
|
|
b7337d |
+ int err;
|
|
|
b7337d |
+
|
|
|
b7337d |
+ if (!mem_p || !align || (size_p && !*size_p))
|
|
|
b7337d |
+ return EINVAL;
|
|
|
b7337d |
+
|
|
|
b7337d |
+ if (!pgsize)
|
|
|
b7337d |
+ pgsize = getpagesize();
|
|
|
b7337d |
+
|
|
|
b7337d |
+ if (size_p)
|
|
|
b7337d |
+ size = ((*size_p + align - 1) / align) * align;
|
|
|
b7337d |
+ else
|
|
|
b7337d |
+ size = pgsize;
|
|
|
b7337d |
+
|
|
|
b7337d |
+ err = posix_memalign(mem_p, pgsize, size);
|
|
|
b7337d |
+ if (!err && size_p)
|
|
|
b7337d |
+ *size_p = size;
|
|
|
b7337d |
+ return err;
|
|
|
b7337d |
+}
|
|
|
b7337d |
+
|
|
|
b7337d |
+/* always in sector size blocks */
|
|
|
b7337d |
static
|
|
|
b7337d |
struct block {
|
|
|
b7337d |
unsigned int secnr;
|
|
|
b7337d |
@@ -710,30 +735,39 @@ struct block {
|
|
|
b7337d |
struct block *next;
|
|
|
b7337d |
} *blockhead;
|
|
|
b7337d |
|
|
|
b7337d |
+/* blknr is always in 512 byte blocks */
|
|
|
b7337d |
char *
|
|
|
b7337d |
-getblock (int fd, unsigned int secnr) {
|
|
|
b7337d |
+getblock (int fd, unsigned int blknr) {
|
|
|
b7337d |
+ unsigned int secsz = get_sector_size(fd);
|
|
|
b7337d |
+ unsigned int blks_per_sec = secsz / 512;
|
|
|
b7337d |
+ unsigned int secnr = blknr / blks_per_sec;
|
|
|
b7337d |
+ unsigned int blk_off = (blknr % blks_per_sec) * 512;
|
|
|
b7337d |
struct block *bp;
|
|
|
b7337d |
|
|
|
b7337d |
for (bp = blockhead; bp; bp = bp->next)
|
|
|
b7337d |
|
|
|
b7337d |
if (bp->secnr == secnr)
|
|
|
b7337d |
- return bp->block;
|
|
|
b7337d |
+ return bp->block + blk_off;
|
|
|
b7337d |
|
|
|
b7337d |
- if (sseek(fd, secnr))
|
|
|
b7337d |
+ if (sseek(fd, secnr, secsz))
|
|
|
b7337d |
return NULL;
|
|
|
b7337d |
|
|
|
b7337d |
bp = xmalloc(sizeof(struct block));
|
|
|
b7337d |
bp->secnr = secnr;
|
|
|
b7337d |
bp->next = blockhead;
|
|
|
b7337d |
blockhead = bp;
|
|
|
b7337d |
- bp->block = (char *) xmalloc(READ_SIZE);
|
|
|
b7337d |
+ if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
|
|
|
b7337d |
+ fprintf(stderr, "aligned_malloc failed\n");
|
|
|
b7337d |
+ exit(1);
|
|
|
b7337d |
+ }
|
|
|
b7337d |
|
|
|
b7337d |
- if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
|
|
|
b7337d |
+ if (read(fd, bp->block, secsz) != secsz) {
|
|
|
b7337d |
fprintf(stderr, "read error, sector %d\n", secnr);
|
|
|
b7337d |
- bp->block = NULL;
|
|
|
b7337d |
+ blockhead = bp->next;
|
|
|
b7337d |
+ return NULL;
|
|
|
b7337d |
}
|
|
|
b7337d |
|
|
|
b7337d |
- return bp->block;
|
|
|
b7337d |
+ return bp->block + blk_off;
|
|
|
b7337d |
}
|
|
|
b7337d |
|
|
|
b7337d |
int
|
|
|
b7337d |
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
|
|
|
b7337d |
index 67edeb82..727632c1 100644
|
|
|
b7337d |
--- a/kpartx/kpartx.h
|
|
|
b7337d |
+++ b/kpartx/kpartx.h
|
|
|
b7337d |
@@ -1,6 +1,7 @@
|
|
|
b7337d |
#ifndef _KPARTX_H
|
|
|
b7337d |
#define _KPARTX_H
|
|
|
b7337d |
|
|
|
b7337d |
+#include <stddef.h>
|
|
|
b7337d |
#include <stdint.h>
|
|
|
b7337d |
#include <sys/ioctl.h>
|
|
|
b7337d |
|
|
|
b7337d |
@@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
|
|
|
b7337d |
extern ptreader read_sun_pt;
|
|
|
b7337d |
extern ptreader read_ps3_pt;
|
|
|
b7337d |
|
|
|
b7337d |
+int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
|
|
|
b7337d |
char *getblock(int fd, unsigned int secnr);
|
|
|
b7337d |
|
|
|
b7337d |
static inline unsigned int
|
|
|
b7337d |
--
|
|
|
b7337d |
2.17.2
|
|
|
b7337d |
|