Blob Blame History Raw
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 27 Jan 2020 19:02:11 +0000
Subject: [PATCH] virtiofsd: introduce inode refcount to prevent use-after-free

If thread A is using an inode it must not be deleted by thread B when
processing a FUSE_FORGET request.

The FUSE protocol itself already has a counter called nlookup that is
used in FUSE_FORGET messages.  We cannot trust this counter since the
untrusted client can manipulate it via FUSE_FORGET messages.

Introduce a new refcount to keep inodes alive for the required lifespan.
lo_inode_put() must be called to release a reference.  FUSE's nlookup
counter holds exactly one reference so that the inode stays alive as
long as the client still wants to remember it.

Note that the lo_inode->is_symlink field is moved to avoid creating a
hole in the struct due to struct field alignment.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3)
---
 tools/virtiofsd/passthrough_ll.c | 169 ++++++++++++++++++++++++++-----
 1 file changed, 146 insertions(+), 23 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e3a6d6b611..ab1613586e 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -97,7 +97,13 @@ struct lo_key {
 
 struct lo_inode {
     int fd;
-    bool is_symlink;
+
+    /*
+     * Atomic reference count for this object.  The nlookup field holds a
+     * reference and release it when nlookup reaches 0.
+     */
+    gint refcount;
+
     struct lo_key key;
 
     /*
@@ -116,6 +122,8 @@ struct lo_inode {
     fuse_ino_t fuse_ino;
     pthread_mutex_t plock_mutex;
     GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
+
+    bool is_symlink;
 };
 
 struct lo_cred {
@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
     return elem - lo_data(req)->ino_map.elems;
 }
 
+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
+{
+    struct lo_inode *inode = *inodep;
+
+    if (!inode) {
+        return;
+    }
+
+    *inodep = NULL;
+
+    if (g_atomic_int_dec_and_test(&inode->refcount)) {
+        close(inode->fd);
+        free(inode);
+    }
+}
+
+/* Caller must release refcount using lo_inode_put() */
 static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 {
     struct lo_data *lo = lo_data(req);
@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 
     pthread_mutex_lock(&lo->mutex);
     elem = lo_map_get(&lo->ino_map, ino);
+    if (elem) {
+        g_atomic_int_inc(&elem->inode->refcount);
+    }
     pthread_mutex_unlock(&lo->mutex);
 
     if (!elem) {
@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
     return elem->inode;
 }
 
+/*
+ * TODO Remove this helper and force callers to hold an inode refcount until
+ * they are done with the fd.  This will be done in a later patch to make
+ * review easier.
+ */
 static int lo_fd(fuse_req_t req, fuse_ino_t ino)
 {
     struct lo_inode *inode = lo_inode(req, ino);
-    return inode ? inode->fd : -1;
+    int fd;
+
+    if (!inode) {
+        return -1;
+    }
+
+    fd = inode->fd;
+    lo_inode_put(lo_data(req), &inode);
+    return fd;
 }
 
 static void lo_init(void *userdata, struct fuse_conn_info *conn)
@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
     fuse_reply_attr(req, &buf, lo->timeout);
 }
 
+/*
+ * Increments parent->nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
 static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
                               char path[PATH_MAX], struct lo_inode **parent)
 {
@@ -582,6 +627,7 @@ retry:
         p = &lo->root;
         pthread_mutex_lock(&lo->mutex);
         p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
         pthread_mutex_unlock(&lo->mutex);
     } else {
         *last = '\0';
@@ -625,6 +671,7 @@ retry:
 
 fail_unref:
     unref_inode_lolocked(lo, p, 1);
+    lo_inode_put(lo, &p);
 fail:
     if (retries) {
         retries--;
@@ -663,6 +710,7 @@ fallback:
     if (res != -1) {
         res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
         unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
     }
 
     return res;
@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
             goto out_err;
         }
     }
+    lo_inode_put(lo, &inode);
 
     return lo_getattr(req, ino, fi);
 
 out_err:
     saverr = errno;
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
     if (p) {
         assert(p->nlookup > 0);
         p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
     }
     pthread_mutex_unlock(&lo->mutex);
 
@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data)
     free(plock);
 }
 
+/*
+ * Increments nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
 static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
                         struct fuse_entry_param *e)
 {
@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
     int res;
     int saverr;
     struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode, *dir = lo_inode(req, parent);
+    struct lo_inode *inode = NULL;
+    struct lo_inode *dir = lo_inode(req, parent);
 
     /*
      * name_to_handle_at() and open_by_handle_at() can reach here with fuse
@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         }
 
         inode->is_symlink = S_ISLNK(e->attr.st_mode);
+
+        /*
+         * One for the caller and one for nlookup (released in
+         * unref_inode_lolocked())
+         */
+        g_atomic_int_set(&inode->refcount, 2);
+
         inode->nlookup = 1;
         inode->fd = newfd;
         newfd = -1;
@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         pthread_mutex_unlock(&lo->mutex);
     }
     e->ino = inode->fuse_ino;
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
 
     fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
              name, (unsigned long long)e->ino);
@@ -894,6 +959,8 @@ out_err:
     if (newfd != -1) {
         close(newfd);
     }
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
     return saverr;
 }
 
@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
 {
     int res;
     int saverr;
+    struct lo_data *lo = lo_data(req);
     struct lo_inode *dir;
     struct fuse_entry_param e;
     struct lo_cred old = {};
@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
              name, (unsigned long long)e.ino);
 
     fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &dir);
     return;
 
 out:
+    lo_inode_put(lo, &dir);
     fuse_reply_err(req, saverr);
 }
 
@@ -1085,6 +1155,7 @@ fallback:
     if (res != -1) {
         res = linkat(parent->fd, path, dfd, name, 0);
         unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
     }
 
     return res;
@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
 {
     int res;
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
     struct lo_inode *inode;
     struct fuse_entry_param e;
     int saverr;
@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
     inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
+    if (!parent_inode || !inode) {
+        errno = EBADF;
+        goto out_err;
     }
 
     memset(&e, 0, sizeof(struct fuse_entry_param));
     e.attr_timeout = lo->timeout;
     e.entry_timeout = lo->timeout;
 
-    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
+    res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
     if (res == -1) {
         goto out_err;
     }
@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
              name, (unsigned long long)e.ino);
 
     fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
+/* Increments nlookup and caller must release refcount using lo_inode_put() */
 static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
                                     const char *name)
 {
@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
 
     fuse_reply_err(req, res == -1 ? errno : 0);
     unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
 }
 
 static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
                       unsigned int flags)
 {
     int res;
-    struct lo_inode *oldinode;
-    struct lo_inode *newinode;
+    struct lo_inode *parent_inode;
+    struct lo_inode *newparent_inode;
+    struct lo_inode *oldinode = NULL;
+    struct lo_inode *newinode = NULL;
     struct lo_data *lo = lo_data(req);
 
     if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
+    newparent_inode = lo_inode(req, newparent);
+    if (!parent_inode || !newparent_inode) {
+        fuse_reply_err(req, EBADF);
+        goto out;
+    }
+
     oldinode = lookup_name(req, parent, name);
     newinode = lookup_name(req, newparent, newname);
 
@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
 #ifndef SYS_renameat2
         fuse_reply_err(req, EINVAL);
 #else
-        res = syscall(SYS_renameat2, lo_fd(req, parent), name,
-                       lo_fd(req, newparent), newname, flags);
+        res = syscall(SYS_renameat2, parent_inode->fd, name,
+                        newparent_inode->fd, newname, flags);
         if (res == -1 && errno == ENOSYS) {
             fuse_reply_err(req, EINVAL);
         } else {
@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
         goto out;
     }
 
-    res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
+    res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
 
     fuse_reply_err(req, res == -1 ? errno : 0);
 out:
     unref_inode_lolocked(lo, oldinode, 1);
     unref_inode_lolocked(lo, newinode, 1);
+    lo_inode_put(lo, &oldinode);
+    lo_inode_put(lo, &newinode);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &newparent_inode);
 }
 
 static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
 
     fuse_reply_err(req, res == -1 ? errno : 0);
     unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
 }
 
 static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
         g_hash_table_destroy(inode->posix_locks);
         pthread_mutex_destroy(&inode->plock_mutex);
         pthread_mutex_unlock(&lo->mutex);
-        close(inode->fd);
-        free(inode);
+
+        /* Drop our refcount from lo_do_lookup() */
+        lo_inode_put(lo, &inode);
     } else {
         pthread_mutex_unlock(&lo->mutex);
     }
@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
     inode->nlookup = 0;
     lo_map_remove(&lo->ino_map, inode->fuse_ino);
     close(inode->fd);
+    lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
 
     return TRUE;
 }
@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
              (unsigned long long)nlookup);
 
     unref_inode_lolocked(lo, inode, nlookup);
+    lo_inode_put(lo, &inode);
 }
 
 static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
     err = 0;
 error:
     lo_dirp_put(&d);
+    lo_inode_put(lo, &dinode);
 
     /*
      * If there's an error, we can only signal it if we haven't stored
@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 {
     int fd;
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
     struct fuse_entry_param e;
     int err;
     struct lo_cred old = {};
@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
+    if (!parent_inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
     err = lo_change_cred(req, &old);
     if (err) {
         goto out;
     }
 
-    fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
+    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
                 mode);
     err = fd == -1 ? errno : 0;
     lo_restore_cred(&old);
@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
         pthread_mutex_unlock(&lo->mutex);
         if (fh == -1) {
             close(fd);
-            fuse_reply_err(req, ENOMEM);
-            return;
+            err = ENOMEM;
+            goto out;
         }
 
         fi->fh = fh;
@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
     }
 
 out:
+    lo_inode_put(lo, &parent_inode);
+
     if (err) {
         fuse_reply_err(req, err);
     } else {
@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
     plock =
         lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
     if (!plock) {
-        pthread_mutex_unlock(&inode->plock_mutex);
-        fuse_reply_err(req, ret);
-        return;
+        saverr = ret;
+        goto out;
     }
 
     ret = fcntl(plock->fd, F_OFD_GETLK, lock);
     if (ret == -1) {
         saverr = errno;
     }
+
+out:
     pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
 
     if (saverr) {
         fuse_reply_err(req, saverr);
@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
         lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
 
     if (!plock) {
-        pthread_mutex_unlock(&inode->plock_mutex);
-        fuse_reply_err(req, ret);
-        return;
+        saverr = ret;
+        goto out;
     }
 
     /* TODO: Is it alright to modify flock? */
@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
     if (ret == -1) {
         saverr = errno;
     }
+
+out:
     pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
+
     fuse_reply_err(req, saverr);
 }
 
@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
     pthread_mutex_unlock(&inode->plock_mutex);
 
     res = close(dup(lo_fi_fd(req, fi)));
+    lo_inode_put(lo_data(req), &inode);
     fuse_reply_err(req, res == -1 ? errno : 0);
 }
 
@@ -2115,11 +2227,14 @@ out_free:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
 out:
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
     goto out_free;
 }
@@ -2190,11 +2305,14 @@ out_free:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
 out:
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
     goto out_free;
 }
@@ -2243,6 +2361,8 @@ out:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -2289,6 +2409,8 @@ out:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
     root->key.ino = stat.st_ino;
     root->key.dev = stat.st_dev;
     root->nlookup = 2;
+    g_atomic_int_set(&root->refcount, 2);
 }
 
 static guint lo_key_hash(gconstpointer key)