|
|
22c213 |
From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
|
|
|
22c213 |
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
|
|
22c213 |
Date: Mon, 27 Jan 2020 19:01:17 +0100
|
|
|
22c213 |
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
|
|
|
22c213 |
MIME-Version: 1.0
|
|
|
22c213 |
Content-Type: text/plain; charset=UTF-8
|
|
|
22c213 |
Content-Transfer-Encoding: 8bit
|
|
|
22c213 |
|
|
|
22c213 |
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
|
22c213 |
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
|
|
|
22c213 |
Patchwork-id: 93496
|
|
|
22c213 |
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
|
|
|
22c213 |
Bugzilla: 1694164
|
|
|
22c213 |
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
|
22c213 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
22c213 |
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
|
|
|
22c213 |
|
|
|
22c213 |
From: Miklos Szeredi <mszeredi@redhat.com>
|
|
|
22c213 |
|
|
|
22c213 |
We have two operations that cannot be done race-free on a symlink in
|
|
|
22c213 |
certain cases: utimes and link.
|
|
|
22c213 |
|
|
|
22c213 |
Add racy fallback for these if the race-free method doesn't work. We do
|
|
|
22c213 |
our best to avoid races even in this case:
|
|
|
22c213 |
|
|
|
22c213 |
- get absolute path by reading /proc/self/fd/NN symlink
|
|
|
22c213 |
|
|
|
22c213 |
- lookup parent directory: after this we are safe against renames in
|
|
|
22c213 |
ancestors
|
|
|
22c213 |
|
|
|
22c213 |
- lookup name in parent directory, and verify that we got to the original
|
|
|
22c213 |
inode, if not retry the whole thing
|
|
|
22c213 |
|
|
|
22c213 |
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
|
|
|
22c213 |
so a racing rename/delete by this fuse instance is not possible, only from
|
|
|
22c213 |
other entities changing the filesystem.
|
|
|
22c213 |
|
|
|
22c213 |
If the "norace" option is given, then disable the racy fallbacks.
|
|
|
22c213 |
|
|
|
22c213 |
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
|
|
|
22c213 |
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
|
|
|
22c213 |
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
|
22c213 |
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
|
|
|
22c213 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
22c213 |
---
|
|
|
22c213 |
tools/virtiofsd/helper.c | 5 +-
|
|
|
22c213 |
tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
|
|
|
22c213 |
2 files changed, 145 insertions(+), 17 deletions(-)
|
|
|
22c213 |
|
|
|
22c213 |
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
|
|
|
22c213 |
index b8ec5ac..5531425 100644
|
|
|
22c213 |
--- a/tools/virtiofsd/helper.c
|
|
|
22c213 |
+++ b/tools/virtiofsd/helper.c
|
|
|
22c213 |
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
|
|
|
22c213 |
" --daemonize run in background\n"
|
|
|
22c213 |
" -o max_idle_threads the maximum number of idle worker "
|
|
|
22c213 |
"threads\n"
|
|
|
22c213 |
- " allowed (default: 10)\n");
|
|
|
22c213 |
+ " allowed (default: 10)\n"
|
|
|
22c213 |
+ " -o norace disable racy fallback\n"
|
|
|
22c213 |
+ " default: false\n"
|
|
|
22c213 |
+ );
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
|
|
|
22c213 |
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
|
|
22c213 |
index 9815bfa..ac380ef 100644
|
|
|
22c213 |
--- a/tools/virtiofsd/passthrough_ll.c
|
|
|
22c213 |
+++ b/tools/virtiofsd/passthrough_ll.c
|
|
|
22c213 |
@@ -98,6 +98,7 @@ enum {
|
|
|
22c213 |
struct lo_data {
|
|
|
22c213 |
pthread_mutex_t mutex;
|
|
|
22c213 |
int debug;
|
|
|
22c213 |
+ int norace;
|
|
|
22c213 |
int writeback;
|
|
|
22c213 |
int flock;
|
|
|
22c213 |
int xattr;
|
|
|
22c213 |
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
|
|
|
22c213 |
{ "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
|
|
|
22c213 |
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
|
|
|
22c213 |
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
|
|
|
22c213 |
-
|
|
|
22c213 |
+ { "norace", offsetof(struct lo_data, norace), 1 },
|
|
|
22c213 |
FUSE_OPT_END
|
|
|
22c213 |
};
|
|
|
22c213 |
|
|
|
22c213 |
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
|
|
|
22c213 |
+
|
|
|
22c213 |
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
|
|
|
22c213 |
+
|
|
|
22c213 |
+
|
|
|
22c213 |
static struct lo_data *lo_data(fuse_req_t req)
|
|
|
22c213 |
{
|
|
|
22c213 |
return (struct lo_data *)fuse_req_userdata(req);
|
|
|
22c213 |
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
|
|
|
22c213 |
fuse_reply_attr(req, &buf, lo->timeout);
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
-static int utimensat_empty_nofollow(struct lo_inode *inode,
|
|
|
22c213 |
- const struct timespec *tv)
|
|
|
22c213 |
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
|
|
|
22c213 |
+ char path[PATH_MAX], struct lo_inode **parent)
|
|
|
22c213 |
{
|
|
|
22c213 |
- int res;
|
|
|
22c213 |
char procname[64];
|
|
|
22c213 |
+ char *last;
|
|
|
22c213 |
+ struct stat stat;
|
|
|
22c213 |
+ struct lo_inode *p;
|
|
|
22c213 |
+ int retries = 2;
|
|
|
22c213 |
+ int res;
|
|
|
22c213 |
+
|
|
|
22c213 |
+retry:
|
|
|
22c213 |
+ sprintf(procname, "/proc/self/fd/%i", inode->fd);
|
|
|
22c213 |
+
|
|
|
22c213 |
+ res = readlink(procname, path, PATH_MAX);
|
|
|
22c213 |
+ if (res < 0) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
|
|
|
22c213 |
+ goto fail_noretry;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+
|
|
|
22c213 |
+ if (res >= PATH_MAX) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
|
|
|
22c213 |
+ goto fail_noretry;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ path[res] = '\0';
|
|
|
22c213 |
+
|
|
|
22c213 |
+ last = strrchr(path, '/');
|
|
|
22c213 |
+ if (last == NULL) {
|
|
|
22c213 |
+ /* Shouldn't happen */
|
|
|
22c213 |
+ fuse_log(
|
|
|
22c213 |
+ FUSE_LOG_WARNING,
|
|
|
22c213 |
+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
|
|
|
22c213 |
+ goto fail_noretry;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ if (last == path) {
|
|
|
22c213 |
+ p = &lo->root;
|
|
|
22c213 |
+ pthread_mutex_lock(&lo->mutex);
|
|
|
22c213 |
+ p->refcount++;
|
|
|
22c213 |
+ pthread_mutex_unlock(&lo->mutex);
|
|
|
22c213 |
+ } else {
|
|
|
22c213 |
+ *last = '\0';
|
|
|
22c213 |
+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
|
|
|
22c213 |
+ if (res == -1) {
|
|
|
22c213 |
+ if (!retries) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING,
|
|
|
22c213 |
+ "%s: failed to stat parent: %m\n", __func__);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ goto fail;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ p = lo_find(lo, &stat;;
|
|
|
22c213 |
+ if (p == NULL) {
|
|
|
22c213 |
+ if (!retries) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING,
|
|
|
22c213 |
+ "%s: failed to find parent\n", __func__);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ goto fail;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ last++;
|
|
|
22c213 |
+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
|
|
|
22c213 |
+ if (res == -1) {
|
|
|
22c213 |
+ if (!retries) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING,
|
|
|
22c213 |
+ "%s: failed to stat last\n", __func__);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ goto fail_unref;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
|
|
|
22c213 |
+ if (!retries) {
|
|
|
22c213 |
+ fuse_log(FUSE_LOG_WARNING,
|
|
|
22c213 |
+ "%s: failed to match last\n", __func__);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ goto fail_unref;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+ *parent = p;
|
|
|
22c213 |
+ memmove(path, last, strlen(last) + 1);
|
|
|
22c213 |
+
|
|
|
22c213 |
+ return 0;
|
|
|
22c213 |
+
|
|
|
22c213 |
+fail_unref:
|
|
|
22c213 |
+ unref_inode(lo, p, 1);
|
|
|
22c213 |
+fail:
|
|
|
22c213 |
+ if (retries) {
|
|
|
22c213 |
+ retries--;
|
|
|
22c213 |
+ goto retry;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+fail_noretry:
|
|
|
22c213 |
+ errno = EIO;
|
|
|
22c213 |
+ return -1;
|
|
|
22c213 |
+}
|
|
|
22c213 |
+
|
|
|
22c213 |
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
|
|
|
22c213 |
+ const struct timespec *tv)
|
|
|
22c213 |
+{
|
|
|
22c213 |
+ int res;
|
|
|
22c213 |
+ struct lo_inode *parent;
|
|
|
22c213 |
+ char path[PATH_MAX];
|
|
|
22c213 |
|
|
|
22c213 |
if (inode->is_symlink) {
|
|
|
22c213 |
- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
|
|
|
22c213 |
+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
|
|
|
22c213 |
if (res == -1 && errno == EINVAL) {
|
|
|
22c213 |
/* Sorry, no race free way to set times on symlink. */
|
|
|
22c213 |
- errno = EPERM;
|
|
|
22c213 |
+ if (lo->norace) {
|
|
|
22c213 |
+ errno = EPERM;
|
|
|
22c213 |
+ } else {
|
|
|
22c213 |
+ goto fallback;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
}
|
|
|
22c213 |
return res;
|
|
|
22c213 |
}
|
|
|
22c213 |
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
|
|
|
22c213 |
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
|
|
|
22c213 |
|
|
|
22c213 |
- return utimensat(AT_FDCWD, procname, tv, 0);
|
|
|
22c213 |
+ return utimensat(AT_FDCWD, path, tv, 0);
|
|
|
22c213 |
+
|
|
|
22c213 |
+fallback:
|
|
|
22c213 |
+ res = lo_parent_and_name(lo, inode, path, &parent);
|
|
|
22c213 |
+ if (res != -1) {
|
|
|
22c213 |
+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
|
|
|
22c213 |
+ unref_inode(lo, parent, 1);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
+
|
|
|
22c213 |
+ return res;
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
|
|
|
22c213 |
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
|
|
22c213 |
{
|
|
|
22c213 |
int saverr;
|
|
|
22c213 |
char procname[64];
|
|
|
22c213 |
+ struct lo_data *lo = lo_data(req);
|
|
|
22c213 |
struct lo_inode *inode;
|
|
|
22c213 |
int ifd;
|
|
|
22c213 |
int res;
|
|
|
22c213 |
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
|
|
22c213 |
if (fi) {
|
|
|
22c213 |
res = futimens(fd, tv);
|
|
|
22c213 |
} else {
|
|
|
22c213 |
- res = utimensat_empty_nofollow(inode, tv);
|
|
|
22c213 |
+ res = utimensat_empty(lo, inode, tv);
|
|
|
22c213 |
}
|
|
|
22c213 |
if (res == -1) {
|
|
|
22c213 |
goto out_err;
|
|
|
22c213 |
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
|
|
|
22c213 |
lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
|
|
|
22c213 |
- const char *name)
|
|
|
22c213 |
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
|
|
|
22c213 |
+ int dfd, const char *name)
|
|
|
22c213 |
{
|
|
|
22c213 |
int res;
|
|
|
22c213 |
- char procname[64];
|
|
|
22c213 |
+ struct lo_inode *parent;
|
|
|
22c213 |
+ char path[PATH_MAX];
|
|
|
22c213 |
|
|
|
22c213 |
if (inode->is_symlink) {
|
|
|
22c213 |
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
|
|
|
22c213 |
if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
|
|
|
22c213 |
/* Sorry, no race free way to hard-link a symlink. */
|
|
|
22c213 |
- errno = EPERM;
|
|
|
22c213 |
+ if (lo->norace) {
|
|
|
22c213 |
+ errno = EPERM;
|
|
|
22c213 |
+ } else {
|
|
|
22c213 |
+ goto fallback;
|
|
|
22c213 |
+ }
|
|
|
22c213 |
}
|
|
|
22c213 |
return res;
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
|
|
|
22c213 |
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
|
|
|
22c213 |
+
|
|
|
22c213 |
+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
|
|
|
22c213 |
+
|
|
|
22c213 |
+fallback:
|
|
|
22c213 |
+ res = lo_parent_and_name(lo, inode, path, &parent);
|
|
|
22c213 |
+ if (res != -1) {
|
|
|
22c213 |
+ res = linkat(parent->fd, path, dfd, name, 0);
|
|
|
22c213 |
+ unref_inode(lo, parent, 1);
|
|
|
22c213 |
+ }
|
|
|
22c213 |
|
|
|
22c213 |
- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
|
|
|
22c213 |
+ return res;
|
|
|
22c213 |
}
|
|
|
22c213 |
|
|
|
22c213 |
static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
|
|
|
22c213 |
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
|
|
|
22c213 |
e.attr_timeout = lo->timeout;
|
|
|
22c213 |
e.entry_timeout = lo->timeout;
|
|
|
22c213 |
|
|
|
22c213 |
- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
|
|
|
22c213 |
+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
|
|
|
22c213 |
if (res == -1) {
|
|
|
22c213 |
goto out_err;
|
|
|
22c213 |
}
|
|
|
22c213 |
--
|
|
|
22c213 |
1.8.3.1
|
|
|
22c213 |
|