yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch

22c213
From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
22c213
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
22c213
Date: Mon, 27 Jan 2020 19:01:17 +0100
22c213
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
22c213
MIME-Version: 1.0
22c213
Content-Type: text/plain; charset=UTF-8
22c213
Content-Transfer-Encoding: 8bit
22c213
22c213
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
22c213
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
22c213
Patchwork-id: 93496
22c213
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
22c213
Bugzilla: 1694164
22c213
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22c213
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
22c213
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
22c213
22c213
From: Miklos Szeredi <mszeredi@redhat.com>
22c213
22c213
We have two operations that cannot be done race-free on a symlink in
22c213
certain cases: utimes and link.
22c213
22c213
Add racy fallback for these if the race-free method doesn't work.  We do
22c213
our best to avoid races even in this case:
22c213
22c213
  - get absolute path by reading /proc/self/fd/NN symlink
22c213
22c213
  - lookup parent directory: after this we are safe against renames in
22c213
    ancestors
22c213
22c213
  - lookup name in parent directory, and verify that we got to the original
22c213
    inode,  if not retry the whole thing
22c213
22c213
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
22c213
so a racing rename/delete by this fuse instance is not possible, only from
22c213
other entities changing the filesystem.
22c213
22c213
If the "norace" option is given, then disable the racy fallbacks.
22c213
22c213
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
22c213
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
22c213
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
22c213
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
22c213
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
22c213
---
22c213
 tools/virtiofsd/helper.c         |   5 +-
22c213
 tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
22c213
 2 files changed, 145 insertions(+), 17 deletions(-)
22c213
22c213
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
22c213
index b8ec5ac..5531425 100644
22c213
--- a/tools/virtiofsd/helper.c
22c213
+++ b/tools/virtiofsd/helper.c
22c213
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
22c213
            "    --daemonize                run in background\n"
22c213
            "    -o max_idle_threads        the maximum number of idle worker "
22c213
            "threads\n"
22c213
-           "                               allowed (default: 10)\n");
22c213
+           "                               allowed (default: 10)\n"
22c213
+           "    -o norace                  disable racy fallback\n"
22c213
+           "                               default: false\n"
22c213
+          );
22c213
 }
22c213
 
22c213
 static int fuse_helper_opt_proc(void *data, const char *arg, int key,
22c213
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
22c213
index 9815bfa..ac380ef 100644
22c213
--- a/tools/virtiofsd/passthrough_ll.c
22c213
+++ b/tools/virtiofsd/passthrough_ll.c
22c213
@@ -98,6 +98,7 @@ enum {
22c213
 struct lo_data {
22c213
     pthread_mutex_t mutex;
22c213
     int debug;
22c213
+    int norace;
22c213
     int writeback;
22c213
     int flock;
22c213
     int xattr;
22c213
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
22c213
     { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
22c213
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
22c213
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
22c213
-
22c213
+    { "norace", offsetof(struct lo_data, norace), 1 },
22c213
     FUSE_OPT_END
22c213
 };
22c213
 
22c213
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
22c213
+
22c213
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
22c213
+
22c213
+
22c213
 static struct lo_data *lo_data(fuse_req_t req)
22c213
 {
22c213
     return (struct lo_data *)fuse_req_userdata(req);
22c213
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
22c213
     fuse_reply_attr(req, &buf, lo->timeout);
22c213
 }
22c213
 
22c213
-static int utimensat_empty_nofollow(struct lo_inode *inode,
22c213
-                                    const struct timespec *tv)
22c213
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
22c213
+                              char path[PATH_MAX], struct lo_inode **parent)
22c213
 {
22c213
-    int res;
22c213
     char procname[64];
22c213
+    char *last;
22c213
+    struct stat stat;
22c213
+    struct lo_inode *p;
22c213
+    int retries = 2;
22c213
+    int res;
22c213
+
22c213
+retry:
22c213
+    sprintf(procname, "/proc/self/fd/%i", inode->fd);
22c213
+
22c213
+    res = readlink(procname, path, PATH_MAX);
22c213
+    if (res < 0) {
22c213
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
22c213
+        goto fail_noretry;
22c213
+    }
22c213
+
22c213
+    if (res >= PATH_MAX) {
22c213
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
22c213
+        goto fail_noretry;
22c213
+    }
22c213
+    path[res] = '\0';
22c213
+
22c213
+    last = strrchr(path, '/');
22c213
+    if (last == NULL) {
22c213
+        /* Shouldn't happen */
22c213
+        fuse_log(
22c213
+            FUSE_LOG_WARNING,
22c213
+            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
22c213
+        goto fail_noretry;
22c213
+    }
22c213
+    if (last == path) {
22c213
+        p = &lo->root;
22c213
+        pthread_mutex_lock(&lo->mutex);
22c213
+        p->refcount++;
22c213
+        pthread_mutex_unlock(&lo->mutex);
22c213
+    } else {
22c213
+        *last = '\0';
22c213
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
22c213
+        if (res == -1) {
22c213
+            if (!retries) {
22c213
+                fuse_log(FUSE_LOG_WARNING,
22c213
+                         "%s: failed to stat parent: %m\n", __func__);
22c213
+            }
22c213
+            goto fail;
22c213
+        }
22c213
+        p = lo_find(lo, &stat;;
22c213
+        if (p == NULL) {
22c213
+            if (!retries) {
22c213
+                fuse_log(FUSE_LOG_WARNING,
22c213
+                         "%s: failed to find parent\n", __func__);
22c213
+            }
22c213
+            goto fail;
22c213
+        }
22c213
+    }
22c213
+    last++;
22c213
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
22c213
+    if (res == -1) {
22c213
+        if (!retries) {
22c213
+            fuse_log(FUSE_LOG_WARNING,
22c213
+                     "%s: failed to stat last\n", __func__);
22c213
+        }
22c213
+        goto fail_unref;
22c213
+    }
22c213
+    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
22c213
+        if (!retries) {
22c213
+            fuse_log(FUSE_LOG_WARNING,
22c213
+                     "%s: failed to match last\n", __func__);
22c213
+        }
22c213
+        goto fail_unref;
22c213
+    }
22c213
+    *parent = p;
22c213
+    memmove(path, last, strlen(last) + 1);
22c213
+
22c213
+    return 0;
22c213
+
22c213
+fail_unref:
22c213
+    unref_inode(lo, p, 1);
22c213
+fail:
22c213
+    if (retries) {
22c213
+        retries--;
22c213
+        goto retry;
22c213
+    }
22c213
+fail_noretry:
22c213
+    errno = EIO;
22c213
+    return -1;
22c213
+}
22c213
+
22c213
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
22c213
+                           const struct timespec *tv)
22c213
+{
22c213
+    int res;
22c213
+    struct lo_inode *parent;
22c213
+    char path[PATH_MAX];
22c213
 
22c213
     if (inode->is_symlink) {
22c213
-        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
22c213
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
22c213
         if (res == -1 && errno == EINVAL) {
22c213
             /* Sorry, no race free way to set times on symlink. */
22c213
-            errno = EPERM;
22c213
+            if (lo->norace) {
22c213
+                errno = EPERM;
22c213
+            } else {
22c213
+                goto fallback;
22c213
+            }
22c213
         }
22c213
         return res;
22c213
     }
22c213
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
22c213
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
22c213
 
22c213
-    return utimensat(AT_FDCWD, procname, tv, 0);
22c213
+    return utimensat(AT_FDCWD, path, tv, 0);
22c213
+
22c213
+fallback:
22c213
+    res = lo_parent_and_name(lo, inode, path, &parent);
22c213
+    if (res != -1) {
22c213
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
22c213
+        unref_inode(lo, parent, 1);
22c213
+    }
22c213
+
22c213
+    return res;
22c213
 }
22c213
 
22c213
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
22c213
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
22c213
 {
22c213
     int saverr;
22c213
     char procname[64];
22c213
+    struct lo_data *lo = lo_data(req);
22c213
     struct lo_inode *inode;
22c213
     int ifd;
22c213
     int res;
22c213
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
22c213
         if (fi) {
22c213
             res = futimens(fd, tv);
22c213
         } else {
22c213
-            res = utimensat_empty_nofollow(inode, tv);
22c213
+            res = utimensat_empty(lo, inode, tv);
22c213
         }
22c213
         if (res == -1) {
22c213
             goto out_err;
22c213
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
22c213
     lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
22c213
 }
22c213
 
22c213
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
22c213
-                                 const char *name)
22c213
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
22c213
+                                 int dfd, const char *name)
22c213
 {
22c213
     int res;
22c213
-    char procname[64];
22c213
+    struct lo_inode *parent;
22c213
+    char path[PATH_MAX];
22c213
 
22c213
     if (inode->is_symlink) {
22c213
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
22c213
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
22c213
             /* Sorry, no race free way to hard-link a symlink. */
22c213
-            errno = EPERM;
22c213
+            if (lo->norace) {
22c213
+                errno = EPERM;
22c213
+            } else {
22c213
+                goto fallback;
22c213
+            }
22c213
         }
22c213
         return res;
22c213
     }
22c213
 
22c213
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
22c213
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
22c213
+
22c213
+    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
22c213
+
22c213
+fallback:
22c213
+    res = lo_parent_and_name(lo, inode, path, &parent);
22c213
+    if (res != -1) {
22c213
+        res = linkat(parent->fd, path, dfd, name, 0);
22c213
+        unref_inode(lo, parent, 1);
22c213
+    }
22c213
 
22c213
-    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
22c213
+    return res;
22c213
 }
22c213
 
22c213
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
22c213
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
22c213
     e.attr_timeout = lo->timeout;
22c213
     e.entry_timeout = lo->timeout;
22c213
 
22c213
-    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
22c213
+    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
22c213
     if (res == -1) {
22c213
         goto out_err;
22c213
     }
22c213
-- 
22c213
1.8.3.1
22c213