Blame 0048-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch

1d442b
From: Miklos Szeredi <mszeredi@redhat.com>
1d442b
Date: Mon, 27 Jan 2020 19:01:17 +0000
1d442b
Subject: [PATCH] virtiofsd: passthrough_ll: add fallback for racy ops
1d442b
1d442b
We have two operations that cannot be done race-free on a symlink in
1d442b
certain cases: utimes and link.
1d442b
1d442b
Add racy fallback for these if the race-free method doesn't work.  We do
1d442b
our best to avoid races even in this case:
1d442b
1d442b
  - get absolute path by reading /proc/self/fd/NN symlink
1d442b
1d442b
  - lookup parent directory: after this we are safe against renames in
1d442b
    ancestors
1d442b
1d442b
  - lookup name in parent directory, and verify that we got to the original
1d442b
    inode,  if not retry the whole thing
1d442b
1d442b
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
1d442b
so a racing rename/delete by this fuse instance is not possible, only from
1d442b
other entities changing the filesystem.
1d442b
1d442b
If the "norace" option is given, then disable the racy fallbacks.
1d442b
1d442b
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
1d442b
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
1d442b
---
1d442b
 tools/virtiofsd/helper.c         |   5 +-
1d442b
 tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++----
1d442b
 2 files changed, 145 insertions(+), 17 deletions(-)
1d442b
1d442b
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
1d442b
index b8ec5ac8dc..5531425223 100644
1d442b
--- a/tools/virtiofsd/helper.c
1d442b
+++ b/tools/virtiofsd/helper.c
1d442b
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
1d442b
            "    --daemonize                run in background\n"
1d442b
            "    -o max_idle_threads        the maximum number of idle worker "
1d442b
            "threads\n"
1d442b
-           "                               allowed (default: 10)\n");
1d442b
+           "                               allowed (default: 10)\n"
1d442b
+           "    -o norace                  disable racy fallback\n"
1d442b
+           "                               default: false\n"
1d442b
+          );
1d442b
 }
1d442b
 
1d442b
 static int fuse_helper_opt_proc(void *data, const char *arg, int key,
1d442b
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
1d442b
index 9815bfa5c5..ac380efcb1 100644
1d442b
--- a/tools/virtiofsd/passthrough_ll.c
1d442b
+++ b/tools/virtiofsd/passthrough_ll.c
1d442b
@@ -98,6 +98,7 @@ enum {
1d442b
 struct lo_data {
1d442b
     pthread_mutex_t mutex;
1d442b
     int debug;
1d442b
+    int norace;
1d442b
     int writeback;
1d442b
     int flock;
1d442b
     int xattr;
1d442b
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
1d442b
     { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
1d442b
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
1d442b
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
1d442b
-
1d442b
+    { "norace", offsetof(struct lo_data, norace), 1 },
1d442b
     FUSE_OPT_END
1d442b
 };
1d442b
 
1d442b
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
1d442b
+
1d442b
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
1d442b
+
1d442b
+
1d442b
 static struct lo_data *lo_data(fuse_req_t req)
1d442b
 {
1d442b
     return (struct lo_data *)fuse_req_userdata(req);
1d442b
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
1d442b
     fuse_reply_attr(req, &buf, lo->timeout);
1d442b
 }
1d442b
 
1d442b
-static int utimensat_empty_nofollow(struct lo_inode *inode,
1d442b
-                                    const struct timespec *tv)
1d442b
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
1d442b
+                              char path[PATH_MAX], struct lo_inode **parent)
1d442b
 {
1d442b
-    int res;
1d442b
     char procname[64];
1d442b
+    char *last;
1d442b
+    struct stat stat;
1d442b
+    struct lo_inode *p;
1d442b
+    int retries = 2;
1d442b
+    int res;
1d442b
+
1d442b
+retry:
1d442b
+    sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+    res = readlink(procname, path, PATH_MAX);
1d442b
+    if (res < 0) {
1d442b
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
1d442b
+        goto fail_noretry;
1d442b
+    }
1d442b
+
1d442b
+    if (res >= PATH_MAX) {
1d442b
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
1d442b
+        goto fail_noretry;
1d442b
+    }
1d442b
+    path[res] = '\0';
1d442b
+
1d442b
+    last = strrchr(path, '/');
1d442b
+    if (last == NULL) {
1d442b
+        /* Shouldn't happen */
1d442b
+        fuse_log(
1d442b
+            FUSE_LOG_WARNING,
1d442b
+            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
1d442b
+        goto fail_noretry;
1d442b
+    }
1d442b
+    if (last == path) {
1d442b
+        p = &lo->root;
1d442b
+        pthread_mutex_lock(&lo->mutex);
1d442b
+        p->refcount++;
1d442b
+        pthread_mutex_unlock(&lo->mutex);
1d442b
+    } else {
1d442b
+        *last = '\0';
1d442b
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
1d442b
+        if (res == -1) {
1d442b
+            if (!retries) {
1d442b
+                fuse_log(FUSE_LOG_WARNING,
1d442b
+                         "%s: failed to stat parent: %m\n", __func__);
1d442b
+            }
1d442b
+            goto fail;
1d442b
+        }
1d442b
+        p = lo_find(lo, &stat;;
1d442b
+        if (p == NULL) {
1d442b
+            if (!retries) {
1d442b
+                fuse_log(FUSE_LOG_WARNING,
1d442b
+                         "%s: failed to find parent\n", __func__);
1d442b
+            }
1d442b
+            goto fail;
1d442b
+        }
1d442b
+    }
1d442b
+    last++;
1d442b
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
1d442b
+    if (res == -1) {
1d442b
+        if (!retries) {
1d442b
+            fuse_log(FUSE_LOG_WARNING,
1d442b
+                     "%s: failed to stat last\n", __func__);
1d442b
+        }
1d442b
+        goto fail_unref;
1d442b
+    }
1d442b
+    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
1d442b
+        if (!retries) {
1d442b
+            fuse_log(FUSE_LOG_WARNING,
1d442b
+                     "%s: failed to match last\n", __func__);
1d442b
+        }
1d442b
+        goto fail_unref;
1d442b
+    }
1d442b
+    *parent = p;
1d442b
+    memmove(path, last, strlen(last) + 1);
1d442b
+
1d442b
+    return 0;
1d442b
+
1d442b
+fail_unref:
1d442b
+    unref_inode(lo, p, 1);
1d442b
+fail:
1d442b
+    if (retries) {
1d442b
+        retries--;
1d442b
+        goto retry;
1d442b
+    }
1d442b
+fail_noretry:
1d442b
+    errno = EIO;
1d442b
+    return -1;
1d442b
+}
1d442b
+
1d442b
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
1d442b
+                           const struct timespec *tv)
1d442b
+{
1d442b
+    int res;
1d442b
+    struct lo_inode *parent;
1d442b
+    char path[PATH_MAX];
1d442b
 
1d442b
     if (inode->is_symlink) {
1d442b
-        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
1d442b
         if (res == -1 && errno == EINVAL) {
1d442b
             /* Sorry, no race free way to set times on symlink. */
1d442b
-            errno = EPERM;
1d442b
+            if (lo->norace) {
1d442b
+                errno = EPERM;
1d442b
+            } else {
1d442b
+                goto fallback;
1d442b
+            }
1d442b
         }
1d442b
         return res;
1d442b
     }
1d442b
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
1d442b
 
1d442b
-    return utimensat(AT_FDCWD, procname, tv, 0);
1d442b
+    return utimensat(AT_FDCWD, path, tv, 0);
1d442b
+
1d442b
+fallback:
1d442b
+    res = lo_parent_and_name(lo, inode, path, &parent);
1d442b
+    if (res != -1) {
1d442b
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
1d442b
+        unref_inode(lo, parent, 1);
1d442b
+    }
1d442b
+
1d442b
+    return res;
1d442b
 }
1d442b
 
1d442b
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
1d442b
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
1d442b
 {
1d442b
     int saverr;
1d442b
     char procname[64];
1d442b
+    struct lo_data *lo = lo_data(req);
1d442b
     struct lo_inode *inode;
1d442b
     int ifd;
1d442b
     int res;
1d442b
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
1d442b
         if (fi) {
1d442b
             res = futimens(fd, tv);
1d442b
         } else {
1d442b
-            res = utimensat_empty_nofollow(inode, tv);
1d442b
+            res = utimensat_empty(lo, inode, tv);
1d442b
         }
1d442b
         if (res == -1) {
1d442b
             goto out_err;
1d442b
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
1d442b
     lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
1d442b
 }
1d442b
 
1d442b
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
1d442b
-                                 const char *name)
1d442b
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
1d442b
+                                 int dfd, const char *name)
1d442b
 {
1d442b
     int res;
1d442b
-    char procname[64];
1d442b
+    struct lo_inode *parent;
1d442b
+    char path[PATH_MAX];
1d442b
 
1d442b
     if (inode->is_symlink) {
1d442b
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
1d442b
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
1d442b
             /* Sorry, no race free way to hard-link a symlink. */
1d442b
-            errno = EPERM;
1d442b
+            if (lo->norace) {
1d442b
+                errno = EPERM;
1d442b
+            } else {
1d442b
+                goto fallback;
1d442b
+            }
1d442b
         }
1d442b
         return res;
1d442b
     }
1d442b
 
1d442b
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
1d442b
+
1d442b
+fallback:
1d442b
+    res = lo_parent_and_name(lo, inode, path, &parent);
1d442b
+    if (res != -1) {
1d442b
+        res = linkat(parent->fd, path, dfd, name, 0);
1d442b
+        unref_inode(lo, parent, 1);
1d442b
+    }
1d442b
 
1d442b
-    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
1d442b
+    return res;
1d442b
 }
1d442b
 
1d442b
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
1d442b
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
1d442b
     e.attr_timeout = lo->timeout;
1d442b
     e.entry_timeout = lo->timeout;
1d442b
 
1d442b
-    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
1d442b
+    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
1d442b
     if (res == -1) {
1d442b
         goto out_err;
1d442b
     }