thebeanogamer / rpms / qemu-kvm

Forked from rpms/qemu-kvm 7 months ago
Clone
7f1c5b
From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001
7f1c5b
From: Peter Xu <peterx@redhat.com>
7f1c5b
Date: Tue, 7 Feb 2023 15:57:11 -0500
7f1c5b
Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd
7f1c5b
7f1c5b
RH-Author: Peter Xu <peterx@redhat.com>
7f1c5b
RH-MergeRequest: 149: Support /dev/userfaultfd
7f1c5b
RH-Bugzilla: 2158704
7f1c5b
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
7f1c5b
RH-Acked-by: quintela1 <quintela@redhat.com>
7f1c5b
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
7f1c5b
RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm)
7f1c5b
7f1c5b
Teach QEMU to use /dev/userfaultfd when it existed and fallback to the
7f1c5b
system call if either it's not there or doesn't have enough permission.
7f1c5b
7f1c5b
Firstly, as long as the app has permission to access /dev/userfaultfd, it
7f1c5b
always have the ability to trap kernel faults which QEMU mostly wants.
7f1c5b
Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be
7f1c5b
forbidden, so it can be the major way to use postcopy in a restricted
7f1c5b
environment with strict seccomp setup.
7f1c5b
7f1c5b
Signed-off-by: Peter Xu <peterx@redhat.com>
7f1c5b
Reviewed-by: Juan Quintela <quintela@redhat.com>
7f1c5b
Signed-off-by: Juan Quintela <quintela@redhat.com>
7f1c5b
(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828)
7f1c5b
Signed-off-by: Peter Xu <peterx@redhat.com>
7f1c5b
---
7f1c5b
 util/trace-events  |  1 +
7f1c5b
 util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++
7f1c5b
 2 files changed, 33 insertions(+)
7f1c5b
7f1c5b
diff --git a/util/trace-events b/util/trace-events
7f1c5b
index c8f53d7d9f..16f78d8fe5 100644
7f1c5b
--- a/util/trace-events
7f1c5b
+++ b/util/trace-events
7f1c5b
@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz
7f1c5b
 qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
7f1c5b
 
7f1c5b
 #userfaultfd.c
7f1c5b
+uffd_detect_open_mode(int mode) "%d"
7f1c5b
 uffd_query_features_nosys(int err) "errno: %i"
7f1c5b
 uffd_query_features_api_failed(int err) "errno: %i"
7f1c5b
 uffd_create_fd_nosys(int err) "errno: %i"
7f1c5b
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
7f1c5b
index 4953b3137d..fdff4867e8 100644
7f1c5b
--- a/util/userfaultfd.c
7f1c5b
+++ b/util/userfaultfd.c
7f1c5b
@@ -18,10 +18,42 @@
7f1c5b
 #include <poll.h>
7f1c5b
 #include <sys/syscall.h>
7f1c5b
 #include <sys/ioctl.h>
7f1c5b
+#include <fcntl.h>
7f1c5b
+
7f1c5b
+typedef enum {
7f1c5b
+    UFFD_UNINITIALIZED = 0,
7f1c5b
+    UFFD_USE_DEV_PATH,
7f1c5b
+    UFFD_USE_SYSCALL,
7f1c5b
+} uffd_open_mode;
7f1c5b
 
7f1c5b
 int uffd_open(int flags)
7f1c5b
 {
7f1c5b
 #if defined(__NR_userfaultfd)
7f1c5b
+    static uffd_open_mode open_mode;
7f1c5b
+    static int uffd_dev;
7f1c5b
+
7f1c5b
+    /* Detect how to generate uffd desc when run the 1st time */
7f1c5b
+    if (open_mode == UFFD_UNINITIALIZED) {
7f1c5b
+        /*
7f1c5b
+         * Make /dev/userfaultfd the default approach because it has better
7f1c5b
+         * permission controls, meanwhile allows kernel faults without any
7f1c5b
+         * privilege requirement (e.g. SYS_CAP_PTRACE).
7f1c5b
+         */
7f1c5b
+        uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
7f1c5b
+        if (uffd_dev >= 0) {
7f1c5b
+            open_mode = UFFD_USE_DEV_PATH;
7f1c5b
+        } else {
7f1c5b
+            /* Fallback to the system call */
7f1c5b
+            open_mode = UFFD_USE_SYSCALL;
7f1c5b
+        }
7f1c5b
+        trace_uffd_detect_open_mode(open_mode);
7f1c5b
+    }
7f1c5b
+
7f1c5b
+    if (open_mode == UFFD_USE_DEV_PATH) {
7f1c5b
+        assert(uffd_dev >= 0);
7f1c5b
+        return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
7f1c5b
+    }
7f1c5b
+
7f1c5b
     return syscall(__NR_userfaultfd, flags);
7f1c5b
 #else
7f1c5b
     return -EINVAL;
7f1c5b
-- 
7f1c5b
2.31.1
7f1c5b