Blame 0058-virtiofsd-sandbox-mount-namespace.patch

1d442b
From: Stefan Hajnoczi <stefanha@redhat.com>
1d442b
Date: Mon, 27 Jan 2020 19:01:27 +0000
1d442b
Subject: [PATCH] virtiofsd: sandbox mount namespace
1d442b
MIME-Version: 1.0
1d442b
Content-Type: text/plain; charset=UTF-8
1d442b
Content-Transfer-Encoding: 8bit
1d442b
1d442b
Use a mount namespace with the shared directory tree mounted at "/" and
1d442b
no other mounts.
1d442b
1d442b
This prevents symlink escape attacks because symlink targets are
1d442b
resolved only against the shared directory and cannot go outside it.
1d442b
1d442b
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
1d442b
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
1d442b
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc)
1d442b
---
1d442b
 tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++
1d442b
 1 file changed, 89 insertions(+)
1d442b
1d442b
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
1d442b
index e2e2211ea1..0570453eef 100644
1d442b
--- a/tools/virtiofsd/passthrough_ll.c
1d442b
+++ b/tools/virtiofsd/passthrough_ll.c
1d442b
@@ -50,6 +50,7 @@
1d442b
 #include <stdlib.h>
1d442b
 #include <string.h>
1d442b
 #include <sys/file.h>
1d442b
+#include <sys/mount.h>
1d442b
 #include <sys/syscall.h>
1d442b
 #include <sys/xattr.h>
1d442b
 #include <unistd.h>
1d442b
@@ -1943,6 +1944,58 @@ static void print_capabilities(void)
1d442b
     printf("}\n");
1d442b
 }
1d442b
 
1d442b
+/* This magic is based on lxc's lxc_pivot_root() */
1d442b
+static void setup_pivot_root(const char *source)
1d442b
+{
1d442b
+    int oldroot;
1d442b
+    int newroot;
1d442b
+
1d442b
+    oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
1d442b
+    if (oldroot < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
1d442b
+    if (newroot < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (fchdir(newroot) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (syscall(__NR_pivot_root, ".", ".") < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (fchdir(oldroot) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (umount2(".", MNT_DETACH) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (fchdir(newroot) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    close(newroot);
1d442b
+    close(oldroot);
1d442b
+}
1d442b
+
1d442b
 static void setup_proc_self_fd(struct lo_data *lo)
1d442b
 {
1d442b
     lo->proc_self_fd = open("/proc/self/fd", O_PATH);
1d442b
@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo)
1d442b
     }
1d442b
 }
1d442b
 
1d442b
+/*
1d442b
+ * Make the source directory our root so symlinks cannot escape and no other
1d442b
+ * files are accessible.
1d442b
+ */
1d442b
+static void setup_mount_namespace(const char *source)
1d442b
+{
1d442b
+    if (unshare(CLONE_NEWNS) != 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n");
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
1d442b
+        exit(1);
1d442b
+    }
1d442b
+
1d442b
+    setup_pivot_root(source);
1d442b
+}
1d442b
+
1d442b
+/*
1d442b
+ * Lock down this process to prevent access to other processes or files outside
1d442b
+ * source directory.  This reduces the impact of arbitrary code execution bugs.
1d442b
+ */
1d442b
+static void setup_sandbox(struct lo_data *lo)
1d442b
+{
1d442b
+    setup_mount_namespace(lo->source);
1d442b
+}
1d442b
+
1d442b
 int main(int argc, char *argv[])
1d442b
 {
1d442b
     struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
1d442b
@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[])
1d442b
     }
1d442b
 
1d442b
     lo.root.fd = open(lo.source, O_PATH);
1d442b
+
1d442b
     if (lo.root.fd == -1) {
1d442b
         fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source);
1d442b
         exit(1);
1d442b
@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[])
1d442b
     /* Must be after daemonize to get the right /proc/self/fd */
1d442b
     setup_proc_self_fd(&lo);
1d442b
 
1d442b
+    setup_sandbox(&lo);
1d442b
+
1d442b
     /* Block until ctrl+c or fusermount -u */
1d442b
     ret = virtio_loop(se);
1d442b