ac3a84
From e86a03c4f201745a683cfe1549a202d5ae636b07 Mon Sep 17 00:00:00 2001
ac3a84
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
ac3a84
Date: Mon, 28 Nov 2022 12:12:55 +0100
ac3a84
Subject: [PATCH] coredump: do not allow user to access coredumps with changed
ac3a84
 uid/gid/capabilities
ac3a84
ac3a84
When the user starts a program which elevates its permissions via setuid,
ac3a84
setgid, or capabilities set on the file, it may access additional information
ac3a84
which would then be visible in the coredump. We shouldn't make the the coredump
ac3a84
visible to the user in such cases.
ac3a84
ac3a84
Reported-by: Matthias Gerstner <mgerstner@suse.de>
ac3a84
ac3a84
This reads the /proc/<pid>/auxv file and attaches it to the process metadata as
ac3a84
PROC_AUXV. Before the coredump is submitted, it is parsed and if either
ac3a84
at_secure was set (which the kernel will do for processes that are setuid,
ac3a84
setgid, or setcap), or if the effective uid/gid don't match uid/gid, the file
ac3a84
is not made accessible to the user. If we can't access this data, we assume the
ac3a84
file should not be made accessible either. In principle we could also access
ac3a84
the auxv data from a note in the core file, but that is much more complex and
ac3a84
it seems better to use the stand-alone file that is provided by the kernel.
ac3a84
ac3a84
Attaching auxv is both convient for this patch (because this way it's passed
ac3a84
between the stages along with other fields), but I think it makes sense to save
ac3a84
it in general.
ac3a84
ac3a84
We use the information early in the core file to figure out if the program was
ac3a84
32-bit or 64-bit and its endianness. This way we don't need heuristics to guess
ac3a84
whether the format of the auxv structure. This test might reject some cases on
ac3a84
fringe architecutes. But the impact would be limited: we just won't grant the
ac3a84
user permissions to view the coredump file. If people report that we're missing
ac3a84
some cases, we can always enhance this to support more architectures.
ac3a84
ac3a84
I tested auxv parsing on amd64, 32-bit program on amd64, arm64, arm32, and
ac3a84
ppc64el, but not the whole coredump handling.
ac3a84
ac3a84
(cherry picked from commit 3e4d0f6cf99f8677edd6a237382a65bfe758de03)
ac3a84
ac3a84
Resolves: #2155517
ac3a84
---
ac3a84
 src/basic/io-util.h     |   9 ++
ac3a84
 src/coredump/coredump.c | 196 +++++++++++++++++++++++++++++++++++++---
ac3a84
 2 files changed, 192 insertions(+), 13 deletions(-)
ac3a84
ac3a84
diff --git a/src/basic/io-util.h b/src/basic/io-util.h
ac3a84
index 39728e06bc..3afb134266 100644
ac3a84
--- a/src/basic/io-util.h
ac3a84
+++ b/src/basic/io-util.h
ac3a84
@@ -91,7 +91,16 @@ struct iovec_wrapper *iovw_new(void);
ac3a84
 struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw);
ac3a84
 struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw);
ac3a84
 void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors);
ac3a84
+
ac3a84
 int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len);
ac3a84
+static inline int iovw_consume(struct iovec_wrapper *iovw, void *data, size_t len) {
ac3a84
+        /* Move data into iovw or free on error */
ac3a84
+        int r = iovw_put(iovw, data, len);
ac3a84
+        if (r < 0)
ac3a84
+                free(data);
ac3a84
+        return r;
ac3a84
+}
ac3a84
+
ac3a84
 int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value);
ac3a84
 int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value);
ac3a84
 void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new);
ac3a84
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
ac3a84
index 7a181bdeeb..ea3d8c415a 100644
ac3a84
--- a/src/coredump/coredump.c
ac3a84
+++ b/src/coredump/coredump.c
ac3a84
@@ -4,6 +4,7 @@
ac3a84
 #include <stdio.h>
ac3a84
 #include <sys/prctl.h>
ac3a84
 #include <sys/statvfs.h>
ac3a84
+#include <sys/auxv.h>
ac3a84
 #include <sys/xattr.h>
ac3a84
 #include <unistd.h>
ac3a84
 
ac3a84
@@ -106,6 +107,7 @@ enum {
ac3a84
 
ac3a84
         META_EXE = _META_MANDATORY_MAX,
ac3a84
         META_UNIT,
ac3a84
+        META_PROC_AUXV,
ac3a84
         _META_MAX
ac3a84
 };
ac3a84
 
ac3a84
@@ -120,10 +122,12 @@ static const char * const meta_field_names[_META_MAX] = {
ac3a84
         [META_COMM]           = "COREDUMP_COMM=",
ac3a84
         [META_EXE]            = "COREDUMP_EXE=",
ac3a84
         [META_UNIT]           = "COREDUMP_UNIT=",
ac3a84
+        [META_PROC_AUXV]      = "COREDUMP_PROC_AUXV=",
ac3a84
 };
ac3a84
 
ac3a84
 typedef struct Context {
ac3a84
         const char *meta[_META_MAX];
ac3a84
+        size_t meta_size[_META_MAX];
ac3a84
         pid_t pid;
ac3a84
         bool is_pid1;
ac3a84
         bool is_journald;
ac3a84
@@ -185,13 +189,16 @@ static uint64_t storage_size_max(void) {
ac3a84
         return 0;
ac3a84
 }
ac3a84
 
ac3a84
-static int fix_acl(int fd, uid_t uid) {
ac3a84
+static int fix_acl(int fd, uid_t uid, bool allow_user) {
ac3a84
+        assert(fd >= 0);
ac3a84
+        assert(uid_is_valid(uid));
ac3a84
 
ac3a84
 #if HAVE_ACL
ac3a84
         int r;
ac3a84
 
ac3a84
-        assert(fd >= 0);
ac3a84
-        assert(uid_is_valid(uid));
ac3a84
+        /* We don't allow users to read coredumps if the uid or capabilities were changed. */
ac3a84
+        if (!allow_user)
ac3a84
+                return 0;
ac3a84
 
ac3a84
         if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY)
ac3a84
                 return 0;
ac3a84
@@ -251,7 +258,8 @@ static int fix_permissions(
ac3a84
                 const char *filename,
ac3a84
                 const char *target,
ac3a84
                 const Context *context,
ac3a84
-                uid_t uid) {
ac3a84
+                uid_t uid,
ac3a84
+                bool allow_user) {
ac3a84
 
ac3a84
         int r;
ac3a84
 
ac3a84
@@ -261,7 +269,7 @@ static int fix_permissions(
ac3a84
 
ac3a84
         /* Ignore errors on these */
ac3a84
         (void) fchmod(fd, 0640);
ac3a84
-        (void) fix_acl(fd, uid);
ac3a84
+        (void) fix_acl(fd, uid, allow_user);
ac3a84
         (void) fix_xattr(fd, context);
ac3a84
 
ac3a84
         r = fsync_full(fd);
ac3a84
@@ -331,6 +339,153 @@ static int make_filename(const Context *context, char **ret) {
ac3a84
         return 0;
ac3a84
 }
ac3a84
 
ac3a84
+static int parse_auxv64(
ac3a84
+                const uint64_t *auxv,
ac3a84
+                size_t size_bytes,
ac3a84
+                int *at_secure,
ac3a84
+                uid_t *uid,
ac3a84
+                uid_t *euid,
ac3a84
+                gid_t *gid,
ac3a84
+                gid_t *egid) {
ac3a84
+
ac3a84
+        assert(auxv || size_bytes == 0);
ac3a84
+
ac3a84
+        if (size_bytes % (2 * sizeof(uint64_t)) != 0)
ac3a84
+                return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Incomplete auxv structure (%zu bytes).", size_bytes);
ac3a84
+
ac3a84
+        size_t words = size_bytes / sizeof(uint64_t);
ac3a84
+
ac3a84
+        /* Note that we set output variables even on error. */
ac3a84
+
ac3a84
+        for (size_t i = 0; i + 1 < words; i += 2)
ac3a84
+                switch (auxv[i]) {
ac3a84
+                case AT_SECURE:
ac3a84
+                        *at_secure = auxv[i + 1] != 0;
ac3a84
+                        break;
ac3a84
+                case AT_UID:
ac3a84
+                        *uid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_EUID:
ac3a84
+                        *euid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_GID:
ac3a84
+                        *gid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_EGID:
ac3a84
+                        *egid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_NULL:
ac3a84
+                        if (auxv[i + 1] != 0)
ac3a84
+                                goto error;
ac3a84
+                        return 0;
ac3a84
+                }
ac3a84
+ error:
ac3a84
+        return log_warning_errno(SYNTHETIC_ERRNO(ENODATA),
ac3a84
+                                 "AT_NULL terminator not found, cannot parse auxv structure.");
ac3a84
+}
ac3a84
+
ac3a84
+static int parse_auxv32(
ac3a84
+                const uint32_t *auxv,
ac3a84
+                size_t size_bytes,
ac3a84
+                int *at_secure,
ac3a84
+                uid_t *uid,
ac3a84
+                uid_t *euid,
ac3a84
+                gid_t *gid,
ac3a84
+                gid_t *egid) {
ac3a84
+
ac3a84
+        assert(auxv || size_bytes == 0);
ac3a84
+
ac3a84
+        size_t words = size_bytes / sizeof(uint32_t);
ac3a84
+
ac3a84
+        if (size_bytes % (2 * sizeof(uint32_t)) != 0)
ac3a84
+                return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Incomplete auxv structure (%zu bytes).", size_bytes);
ac3a84
+
ac3a84
+        /* Note that we set output variables even on error. */
ac3a84
+
ac3a84
+        for (size_t i = 0; i + 1 < words; i += 2)
ac3a84
+                switch (auxv[i]) {
ac3a84
+                case AT_SECURE:
ac3a84
+                        *at_secure = auxv[i + 1] != 0;
ac3a84
+                        break;
ac3a84
+                case AT_UID:
ac3a84
+                        *uid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_EUID:
ac3a84
+                        *euid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_GID:
ac3a84
+                        *gid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_EGID:
ac3a84
+                        *egid = auxv[i + 1];
ac3a84
+                        break;
ac3a84
+                case AT_NULL:
ac3a84
+                        if (auxv[i + 1] != 0)
ac3a84
+                                goto error;
ac3a84
+                        return 0;
ac3a84
+                }
ac3a84
+ error:
ac3a84
+        return log_warning_errno(SYNTHETIC_ERRNO(ENODATA),
ac3a84
+                                 "AT_NULL terminator not found, cannot parse auxv structure.");
ac3a84
+}
ac3a84
+
ac3a84
+static int grant_user_access(int core_fd, const Context *context) {
ac3a84
+        int at_secure = -1;
ac3a84
+        uid_t uid = UID_INVALID, euid = UID_INVALID;
ac3a84
+        uid_t gid = GID_INVALID, egid = GID_INVALID;
ac3a84
+        int r;
ac3a84
+
ac3a84
+        assert(core_fd >= 0);
ac3a84
+        assert(context);
ac3a84
+
ac3a84
+        if (!context->meta[META_PROC_AUXV])
ac3a84
+                return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions.");
ac3a84
+
ac3a84
+        uint8_t elf[EI_NIDENT];
ac3a84
+        errno = 0;
ac3a84
+        if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf))
ac3a84
+                return log_warning_errno(errno_or_else(EIO),
ac3a84
+                                         "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno));
ac3a84
+
ac3a84
+        if (elf[EI_MAG0] != ELFMAG0 ||
ac3a84
+            elf[EI_MAG1] != ELFMAG1 ||
ac3a84
+            elf[EI_MAG2] != ELFMAG2 ||
ac3a84
+            elf[EI_MAG3] != ELFMAG3 ||
ac3a84
+            elf[EI_VERSION] != EV_CURRENT)
ac3a84
+                return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
ac3a84
+                                      "Core file does not have ELF header, not adjusting permissions.");
ac3a84
+        if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) ||
ac3a84
+            !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB))
ac3a84
+                return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
ac3a84
+                                      "Core file has strange ELF class, not adjusting permissions.");
ac3a84
+
ac3a84
+        if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN))
ac3a84
+                return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
ac3a84
+                                      "Core file has non-native endianness, not adjusting permissions.");
ac3a84
+
ac3a84
+        if (elf[EI_CLASS] == ELFCLASS64)
ac3a84
+                r = parse_auxv64((const uint64_t*) context->meta[META_PROC_AUXV],
ac3a84
+                                 context->meta_size[META_PROC_AUXV],
ac3a84
+                                 &at_secure, &uid, &euid, &gid, &egid);
ac3a84
+        else
ac3a84
+                r = parse_auxv32((const uint32_t*) context->meta[META_PROC_AUXV],
ac3a84
+                                 context->meta_size[META_PROC_AUXV],
ac3a84
+                                 &at_secure, &uid, &euid, &gid, &egid);
ac3a84
+        if (r < 0)
ac3a84
+                return r;
ac3a84
+
ac3a84
+        /* We allow access if we got all the data and at_secure is not set and
ac3a84
+         * the uid/gid matches euid/egid. */
ac3a84
+        bool ret =
ac3a84
+                at_secure == 0 &&
ac3a84
+                uid != UID_INVALID && euid != UID_INVALID && uid == euid &&
ac3a84
+                gid != GID_INVALID && egid != GID_INVALID && gid == egid;
ac3a84
+        log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)",
ac3a84
+                  ret ? "permit" : "restrict",
ac3a84
+                  uid, euid, gid, egid, yes_no(at_secure));
ac3a84
+        return ret;
ac3a84
+}
ac3a84
+
ac3a84
 static int save_external_coredump(
ac3a84
                 const Context *context,
ac3a84
                 int input_fd,
ac3a84
@@ -453,6 +608,8 @@ static int save_external_coredump(
ac3a84
                                 context->meta[META_ARGV_PID], context->meta[META_COMM]);
ac3a84
         truncated = r == 1;
ac3a84
 
ac3a84
+        bool allow_user = grant_user_access(fd, context) > 0;
ac3a84
+
ac3a84
 #if HAVE_COMPRESSION
ac3a84
         if (arg_compress) {
ac3a84
                 _cleanup_(unlink_and_freep) char *tmp_compressed = NULL;
ac3a84
@@ -490,7 +647,7 @@ static int save_external_coredump(
ac3a84
                         uncompressed_size += partial_uncompressed_size;
ac3a84
                 }
ac3a84
 
ac3a84
-                r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
ac3a84
+                r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid, allow_user);
ac3a84
                 if (r < 0)
ac3a84
                         return r;
ac3a84
 
ac3a84
@@ -517,7 +674,7 @@ static int save_external_coredump(
ac3a84
                            "SIZE_LIMIT=%"PRIu64, max_size,
ac3a84
                            "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
ac3a84
 
ac3a84
-        r = fix_permissions(fd, tmp, fn, context, uid);
ac3a84
+        r = fix_permissions(fd, tmp, fn, context, uid, allow_user);
ac3a84
         if (r < 0)
ac3a84
                 return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn);
ac3a84
 
ac3a84
@@ -765,7 +922,7 @@ static int change_uid_gid(const Context *context) {
ac3a84
 }
ac3a84
 
ac3a84
 static int submit_coredump(
ac3a84
-                Context *context,
ac3a84
+                const Context *context,
ac3a84
                 struct iovec_wrapper *iovw,
ac3a84
                 int input_fd) {
ac3a84
 
ac3a84
@@ -944,16 +1101,15 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) {
ac3a84
                 struct iovec *iovec = iovw->iovec + n;
ac3a84
 
ac3a84
                 for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) {
ac3a84
-                        char *p;
ac3a84
-
ac3a84
                         /* Note that these strings are NUL terminated, because we made sure that a
ac3a84
                          * trailing NUL byte is in the buffer, though not included in the iov_len
ac3a84
                          * count (see process_socket() and gather_pid_metadata_*()) */
ac3a84
                         assert(((char*) iovec->iov_base)[iovec->iov_len] == 0);
ac3a84
 
ac3a84
-                        p = startswith(iovec->iov_base, meta_field_names[i]);
ac3a84
+                        const char *p = startswith(iovec->iov_base, meta_field_names[i]);
ac3a84
                         if (p) {
ac3a84
                                 context->meta[i] = p;
ac3a84
+                                context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]);
ac3a84
                                 break;
ac3a84
                         }
ac3a84
                 }
ac3a84
@@ -1190,6 +1346,7 @@ static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
ac3a84
         uid_t owner_uid;
ac3a84
         pid_t pid;
ac3a84
         char *t;
ac3a84
+        size_t size;
ac3a84
         const char *p;
ac3a84
         int r;
ac3a84
 
ac3a84
@@ -1254,13 +1411,26 @@ static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
ac3a84
                 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t);
ac3a84
 
ac3a84
         p = procfs_file_alloca(pid, "cgroup");
ac3a84
-        if (read_full_virtual_file(p, &t, NULL) >=0)
ac3a84
+        if (read_full_virtual_file(p, &t, NULL) >= 0)
ac3a84
                 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t);
ac3a84
 
ac3a84
         p = procfs_file_alloca(pid, "mountinfo");
ac3a84
-        if (read_full_virtual_file(p, &t, NULL) >=0)
ac3a84
+        if (read_full_virtual_file(p, &t, NULL) >= 0)
ac3a84
                 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t);
ac3a84
 
ac3a84
+        /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */
ac3a84
+        p = procfs_file_alloca(pid, "auxv");
ac3a84
+        if (read_full_virtual_file(p, &t, &size) >= 0) {
ac3a84
+                char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1);
ac3a84
+                if (buf) {
ac3a84
+                        /* Add a dummy terminator to make save_context() happy. */
ac3a84
+                        *((uint8_t*) mempcpy(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size)) = '\0';
ac3a84
+                        (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV="));
ac3a84
+                }
ac3a84
+
ac3a84
+                free(t);
ac3a84
+        }
ac3a84
+
ac3a84
         if (get_process_cwd(pid, &t) >= 0)
ac3a84
                 (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t);
ac3a84