render / rpms / qemu

Forked from rpms/qemu 10 months ago
Clone

Blame 0001-linux-user-add-openat2-support-in-linux-user.patch

Michael Vogt 0cab45
From 9651cead2f1bb34b9b72f9c2c5dc81baea2b082e Mon Sep 17 00:00:00 2001
Michael Vogt 0cab45
From: Michael Vogt <mvogt@redhat.com>
Michael Vogt 0cab45
Date: Tue, 1 Oct 2024 17:14:53 +0200
Michael Vogt 0cab45
Subject: [PATCH] linux-user: add openat2 support in linux-user
Michael Vogt 0cab45
Michael Vogt 0cab45
This commit adds support for the `openat2()` syscall in the
Michael Vogt 0cab45
`linux-user` userspace emulator.
Michael Vogt 0cab45
Michael Vogt 0cab45
It is implemented by extracting a new helper `maybe_do_fake_open()`
Michael Vogt 0cab45
out of the exiting `do_guest_openat()` and share that with the
Michael Vogt 0cab45
new `do_guest_openat2()`. Unfortunately we cannot just make
Michael Vogt 0cab45
do_guest_openat2() a superset of do_guest_openat() because the
Michael Vogt 0cab45
openat2() syscall is stricter with the argument checking and
Michael Vogt 0cab45
will return an error for invalid flags or mode combinations (which
Michael Vogt 0cab45
open()/openat() will ignore).
Michael Vogt 0cab45
Michael Vogt 0cab45
The implementation is similar to SYSCALL_DEFINE(openat2), i.e.
Michael Vogt 0cab45
a new `copy_struct_from_user()` is used that works the same
Michael Vogt 0cab45
as the kernels version to support backwards-compatibility
Michael Vogt 0cab45
for struct syscall argument.
Michael Vogt 0cab45
Michael Vogt 0cab45
Instead of including openat2.h we create a copy of `open_how`
Michael Vogt 0cab45
as `open_how_ver0` to ensure that if the structure grows we
Michael Vogt 0cab45
can log a LOG_UNIMP warning.
Michael Vogt 0cab45
Michael Vogt 0cab45
Note that in this commit using openat2() for a "faked" file in
Michael Vogt 0cab45
/proc will honor the "resolve" flags for
Michael Vogt 0cab45
RESOLVE_NO_{MAGIC,SYM}LINKS for path based access to /proc/self/exe
Michael Vogt 0cab45
(which is the only magic link we support for faked files).
Michael Vogt 0cab45
Note it will not catch special access via e.g. dirfd. This is not
Michael Vogt 0cab45
great but it seems similar to the exiting behavior when openat()
Michael Vogt 0cab45
is called with a dirfd to "/proc". Here too the fake file lookup
Michael Vogt 0cab45
may not catch the special file because no dirfd is used to
Michael Vogt 0cab45
determine if the path is in /proc.
Michael Vogt 0cab45
Michael Vogt 0cab45
Signed-off-by: Michael Vogt <mvogt@redhat.com>
Michael Vogt 0cab45
Buglink: https://github.com/osbuild/bootc-image-builder/issues/619
Michael Vogt 0cab45
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Michael Vogt 0cab45
Message-ID: <1c2c8c9db3731ed4c6fd9b10c63637c3e4caf8f5.1727795334.git.mvogt@redhat.com>
Michael Vogt 0cab45
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Michael Vogt 0cab45
---
Michael Vogt 0cab45
 linux-user/syscall.c      | 105 +++++++++++++++++++++++++++++++++++++-
Michael Vogt 0cab45
 linux-user/syscall_defs.h |  13 +++++
Michael Vogt 0cab45
 2 files changed, 116 insertions(+), 2 deletions(-)
Michael Vogt 0cab45
Michael Vogt 0cab45
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
Michael Vogt 0cab45
index a666986189..2febc3bc3f 100644
Michael Vogt 0cab45
--- a/linux-user/syscall.c
Michael Vogt 0cab45
+++ b/linux-user/syscall.c
Michael Vogt 0cab45
@@ -602,6 +602,34 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize)
Michael Vogt 0cab45
     return 1;
Michael Vogt 0cab45
 }
Michael Vogt 0cab45
 
Michael Vogt 0cab45
+/*
Michael Vogt 0cab45
+ * Copies a target struct to a host struct, in a way that guarantees
Michael Vogt 0cab45
+ * backwards-compatibility for struct syscall arguments.
Michael Vogt 0cab45
+ *
Michael Vogt 0cab45
+ * Similar to kernels uaccess.h:copy_struct_from_user()
Michael Vogt 0cab45
+ */
Michael Vogt 0cab45
+static int
Michael Vogt 0cab45
+copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize)
Michael Vogt 0cab45
+{
Michael Vogt 0cab45
+    size_t size = MIN(ksize, usize);
Michael Vogt 0cab45
+    size_t rest = MAX(ksize, usize) - size;
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+    /* Deal with trailing bytes. */
Michael Vogt 0cab45
+    if (usize < ksize) {
Michael Vogt 0cab45
+        memset(dst + size, 0, rest);
Michael Vogt 0cab45
+    } else if (usize > ksize) {
Michael Vogt 0cab45
+        int ret = check_zeroed_user(src, ksize, usize);
Michael Vogt 0cab45
+        if (ret <= 0) {
Michael Vogt 0cab45
+            return ret ?: -TARGET_E2BIG;
Michael Vogt 0cab45
+        }
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+    /* Copy the interoperable parts of the struct. */
Michael Vogt 0cab45
+    if (copy_from_user(dst, src, size)) {
Michael Vogt 0cab45
+        return -TARGET_EFAULT;
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+    return 0;
Michael Vogt 0cab45
+}
Michael Vogt 0cab45
+
Michael Vogt 0cab45
 #define safe_syscall0(type, name) \
Michael Vogt 0cab45
 static type safe_##name(void) \
Michael Vogt 0cab45
 { \
Michael Vogt 0cab45
@@ -653,6 +681,15 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count)
Michael Vogt 0cab45
 safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count)
Michael Vogt 0cab45
 safe_syscall4(int, openat, int, dirfd, const char *, pathname, \
Michael Vogt 0cab45
               int, flags, mode_t, mode)
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+struct open_how_ver0 {
Michael Vogt 0cab45
+    __u64 flags;
Michael Vogt 0cab45
+    __u64 mode;
Michael Vogt 0cab45
+    __u64 resolve;
Michael Vogt 0cab45
+};
Michael Vogt 0cab45
+safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \
Michael Vogt 0cab45
+              const struct open_how_ver0 *, how, size_t, size)
Michael Vogt 0cab45
+
Michael Vogt 0cab45
 #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid)
Michael Vogt 0cab45
 safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
Michael Vogt 0cab45
               struct rusage *, rusage)
Michael Vogt 0cab45
@@ -8332,8 +8369,9 @@ static int open_net_route(CPUArchState *cpu_env, int fd)
Michael Vogt 0cab45
 }
Michael Vogt 0cab45
 #endif
Michael Vogt 0cab45
 
Michael Vogt 0cab45
-int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
Michael Vogt 0cab45
-                    int flags, mode_t mode, bool safe)
Michael Vogt 0cab45
+static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd,
Michael Vogt 0cab45
+                              const char *fname, int flags, mode_t mode,
Michael Vogt 0cab45
+                              int openat2_resolve, bool safe)
Michael Vogt 0cab45
 {
Michael Vogt 0cab45
     g_autofree char *proc_name = NULL;
Michael Vogt 0cab45
     const char *pathname;
Michael Vogt 0cab45
@@ -8370,6 +8408,12 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
Michael Vogt 0cab45
     }
Michael Vogt 0cab45
 
Michael Vogt 0cab45
     if (is_proc_myself(pathname, "exe")) {
Michael Vogt 0cab45
+        /* Honor openat2 resolve flags */
Michael Vogt 0cab45
+        if ((openat2_resolve & RESOLVE_NO_MAGICLINKS) ||
Michael Vogt 0cab45
+            (openat2_resolve & RESOLVE_NO_SYMLINKS)) {
Michael Vogt 0cab45
+            errno = ELOOP;
Michael Vogt 0cab45
+            return -1;
Michael Vogt 0cab45
+        }
Michael Vogt 0cab45
         if (safe) {
Michael Vogt 0cab45
             return safe_openat(dirfd, exec_path, flags, mode);
Michael Vogt 0cab45
         } else {
Michael Vogt 0cab45
@@ -8416,6 +8460,17 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
Michael Vogt 0cab45
         return fd;
Michael Vogt 0cab45
     }
Michael Vogt 0cab45
 
Michael Vogt 0cab45
+    return -2;
Michael Vogt 0cab45
+}
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname,
Michael Vogt 0cab45
+                    int flags, mode_t mode, bool safe)
Michael Vogt 0cab45
+{
Michael Vogt 0cab45
+    int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, flags, mode, 0, safe);
Michael Vogt 0cab45
+    if (fd > -2) {
Michael Vogt 0cab45
+        return fd;
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+
Michael Vogt 0cab45
     if (safe) {
Michael Vogt 0cab45
         return safe_openat(dirfd, path(pathname), flags, mode);
Michael Vogt 0cab45
     } else {
Michael Vogt 0cab45
@@ -8423,6 +8478,49 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
Michael Vogt 0cab45
     }
Michael Vogt 0cab45
 }
Michael Vogt 0cab45
 
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+static int do_openat2(CPUArchState *cpu_env, abi_long dirfd,
Michael Vogt 0cab45
+                      abi_ptr guest_pathname, abi_ptr guest_open_how,
Michael Vogt 0cab45
+                      abi_ulong guest_size)
Michael Vogt 0cab45
+{
Michael Vogt 0cab45
+    struct open_how_ver0 how = {0};
Michael Vogt 0cab45
+    char *pathname;
Michael Vogt 0cab45
+    int ret;
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+    if (guest_size < sizeof(struct target_open_how_ver0)) {
Michael Vogt 0cab45
+        return -TARGET_EINVAL;
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+    ret = copy_struct_from_user(&how, sizeof(how), guest_open_how, guest_size);
Michael Vogt 0cab45
+    if (ret) {
Michael Vogt 0cab45
+        if (ret == -TARGET_E2BIG) {
Michael Vogt 0cab45
+            qemu_log_mask(LOG_UNIMP,
Michael Vogt 0cab45
+                          "Unimplemented openat2 open_how size: "
Michael Vogt 0cab45
+                          TARGET_ABI_FMT_lu "\n", guest_size);
Michael Vogt 0cab45
+        }
Michael Vogt 0cab45
+        return ret;
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+    pathname = lock_user_string(guest_pathname);
Michael Vogt 0cab45
+    if (!pathname) {
Michael Vogt 0cab45
+        return -TARGET_EFAULT;
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+    how.flags = target_to_host_bitmask(tswap64(how.flags), fcntl_flags_tbl);
Michael Vogt 0cab45
+    how.mode = tswap64(how.mode);
Michael Vogt 0cab45
+    how.resolve = tswap64(how.resolve);
Michael Vogt 0cab45
+    int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, how.flags, how.mode,
Michael Vogt 0cab45
+                                how.resolve, true);
Michael Vogt 0cab45
+    if (fd > -2) {
Michael Vogt 0cab45
+        ret = get_errno(fd);
Michael Vogt 0cab45
+    } else {
Michael Vogt 0cab45
+        ret = get_errno(safe_openat2(dirfd, pathname, &how,
Michael Vogt 0cab45
+                                     sizeof(struct open_how_ver0)));
Michael Vogt 0cab45
+    }
Michael Vogt 0cab45
+
Michael Vogt 0cab45
+    fd_trans_unregister(ret);
Michael Vogt 0cab45
+    unlock_user(pathname, guest_pathname, 0);
Michael Vogt 0cab45
+    return ret;
Michael Vogt 0cab45
+}
Michael Vogt 0cab45
+
Michael Vogt 0cab45
 ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz)
Michael Vogt 0cab45
 {
Michael Vogt 0cab45
     ssize_t ret;
Michael Vogt 0cab45
@@ -9195,6 +9293,9 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
Michael Vogt 0cab45
         fd_trans_unregister(ret);
Michael Vogt 0cab45
         unlock_user(p, arg2, 0);
Michael Vogt 0cab45
         return ret;
Michael Vogt 0cab45
+    case TARGET_NR_openat2:
Michael Vogt 0cab45
+        ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4);
Michael Vogt 0cab45
+        return ret;
Michael Vogt 0cab45
 #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE)
Michael Vogt 0cab45
     case TARGET_NR_name_to_handle_at:
Michael Vogt 0cab45
         ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5);
Michael Vogt 0cab45
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
Michael Vogt 0cab45
index e08d088740..de5091c977 100644
Michael Vogt 0cab45
--- a/linux-user/syscall_defs.h
Michael Vogt 0cab45
+++ b/linux-user/syscall_defs.h
Michael Vogt 0cab45
@@ -2748,4 +2748,17 @@ struct target_sched_param {
Michael Vogt 0cab45
     abi_int sched_priority;
Michael Vogt 0cab45
 };
Michael Vogt 0cab45
 
Michael Vogt 0cab45
+/* from kernel's include/uapi/linux/openat2.h */
Michael Vogt 0cab45
+struct target_open_how_ver0 {
Michael Vogt 0cab45
+    abi_ullong flags;
Michael Vogt 0cab45
+    abi_ullong mode;
Michael Vogt 0cab45
+    abi_ullong resolve;
Michael Vogt 0cab45
+};
Michael Vogt 0cab45
+#ifndef RESOLVE_NO_MAGICLINKS
Michael Vogt 0cab45
+#define RESOLVE_NO_MAGICLINKS   0x02
Michael Vogt 0cab45
+#endif
Michael Vogt 0cab45
+#ifndef RESOLVE_NO_SYMLINKS
Michael Vogt 0cab45
+#define RESOLVE_NO_SYMLINKS     0x04
Michael Vogt 0cab45
+#endif
Michael Vogt 0cab45
+
Michael Vogt 0cab45
 #endif
Michael Vogt 0cab45
-- 
Michael Vogt 0cab45
2.47.0
Michael Vogt 0cab45