923a60
From 5ccae46f2a192a9347feb604901127c55ce1e039 Mon Sep 17 00:00:00 2001
923a60
From: Kyle Walker <kwalker@redhat.com>
923a60
Date: Wed, 13 Dec 2017 12:49:26 -0500
923a60
Subject: [PATCH] core: Implement timeout based umount/remount limit
923a60
923a60
Remount, and subsequent umount, attempts can hang for inaccessible network
923a60
based mount points. This can leave a system in a hard hang state that
923a60
requires a hard reset in order to recover. This change moves the remount,
923a60
and umount attempts into separate child processes. The remount and umount
923a60
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
923a60
those waits fail, the parent will issue a SIGKILL to the child and continue
923a60
with the shutdown efforts.
923a60
923a60
In addition, instead of only reporting some additional errors on the final
923a60
attempt, failures are reported as they occur.
923a60
923a60
(cherry picked from commit d5641e0d7e8f55937fbc3a7ecd667e42c5836d80)
923a60
923a60
Related: #1571098
923a60
---
923a60
 src/core/umount.c         | 112 ++++++++++++++++++++++++++++++--------
923a60
 src/shared/def.h          |   2 -
923a60
 src/shared/login-shared.c |   1 +
923a60
 src/shared/util.c         |  61 +++++++++++++++++++++
923a60
 src/shared/util.h         |  16 ++++++
923a60
 5 files changed, 168 insertions(+), 24 deletions(-)
923a60
923a60
diff --git a/src/core/umount.c b/src/core/umount.c
923a60
index 91d67c06ca..bd38966124 100644
923a60
--- a/src/core/umount.c
923a60
+++ b/src/core/umount.c
923a60
@@ -363,7 +363,84 @@ static int delete_dm(dev_t devnum) {
923a60
         return r >= 0 ? 0 : -errno;
923a60
 }
923a60
 
923a60
-static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_error) {
923a60
+static int remount_with_timeout(MountPoint *m, char *options, int *n_failed) {
923a60
+        pid_t pid;
923a60
+        int r;
923a60
+
923a60
+        BLOCK_SIGNALS(SIGCHLD);
923a60
+
923a60
+        /* Due to the possiblity of a remount operation hanging, we
923a60
+         * fork a child process and set a timeout. If the timeout
923a60
+         * lapses, the assumption is that that particular remount
923a60
+         * failed. */
923a60
+        pid = fork();
923a60
+        if (pid < 0)
923a60
+                return log_error_errno(errno, "Failed to fork: %m");
923a60
+
923a60
+        if (pid == 0) {
923a60
+                log_info("Remounting '%s' read-only in with options '%s'.", m->path, options);
923a60
+
923a60
+                /* Start the mount operation here in the child */
923a60
+                r = mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
923a60
+                if (r < 0)
923a60
+                        log_error_errno(errno, "Failed to remount '%s' read-only: %m", m->path);
923a60
+
923a60
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
923a60
+        }
923a60
+
923a60
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
923a60
+        if (r == -ETIMEDOUT) {
923a60
+                log_error_errno(errno, "Remounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
923a60
+                (void) kill(pid, SIGKILL);
923a60
+        } else if (r < 0)
923a60
+                log_error_errno(r, "Failed to wait for process: %m");
923a60
+
923a60
+        return r;
923a60
+}
923a60
+
923a60
+static int umount_with_timeout(MountPoint *m, bool *changed) {
923a60
+        pid_t pid;
923a60
+        int r;
923a60
+
923a60
+        BLOCK_SIGNALS(SIGCHLD);
923a60
+
923a60
+        /* Due to the possiblity of a umount operation hanging, we
923a60
+         * fork a child process and set a timeout. If the timeout
923a60
+         * lapses, the assumption is that that particular umount
923a60
+         * failed. */
923a60
+        pid = fork();
923a60
+        if (pid < 0)
923a60
+                return log_error_errno(errno, "Failed to fork: %m");
923a60
+
923a60
+        if (pid == 0) {
923a60
+                log_info("Unmounting '%s'.", m->path);
923a60
+
923a60
+                /* Start the mount operation here in the child Using MNT_FORCE
923a60
+                 * causes some filesystems (e.g. FUSE and NFS and other network
923a60
+                 * filesystems) to abort any pending requests and return -EIO
923a60
+                 * rather than blocking indefinitely. If the filesysten is
923a60
+                 * "busy", this may allow processes to die, thus making the
923a60
+                 * filesystem less busy so the unmount might succeed (rather
923a60
+                 * then return EBUSY).*/
923a60
+                r = umount2(m->path, MNT_FORCE);
923a60
+                if (r < 0)
923a60
+                        log_error_errno(errno, "Failed to unmount %s: %m", m->path);
923a60
+
923a60
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
923a60
+        }
923a60
+
923a60
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
923a60
+        if (r == -ETIMEDOUT) {
923a60
+                log_error_errno(errno, "Unmounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
923a60
+                (void) kill(pid, SIGKILL);
923a60
+        } else if (r < 0)
923a60
+                log_error_errno(r, "Failed to wait for process: %m");
923a60
+
923a60
+        return r;
923a60
+}
923a60
+
923a60
+
923a60
+static int mount_points_list_umount(MountPoint **head, bool *changed) {
923a60
         MountPoint *m, *n;
923a60
         int n_failed = 0;
923a60
 
923a60
@@ -405,9 +482,13 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
923a60
                          * explicitly remount the super block of that
923a60
                          * alias read-only we hence should be
923a60
                          * relatively safe regarding keeping the fs we
923a60
-                         * can otherwise not see dirty. */
923a60
-                        log_info("Remounting '%s' read-only with options '%s'.", m->path, options);
923a60
-                        (void) mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
923a60
+                         * can otherwise not see dirty.
923a60
+                         *
923a60
+                         * Since the remount can hang in the instance of
923a60
+                         * remote filesystems, we remount asynchronously
923a60
+                         * and skip the subsequent umount if it fails */
923a60
+                        if (remount_with_timeout(m, options, &n_failed) < 0)
923a60
+                                continue;
923a60
                 }
923a60
 
923a60
                 /* Skip / and /usr since we cannot unmount that
923a60
@@ -420,22 +501,14 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
923a60
                 )
923a60
                         continue;
923a60
 
923a60
-                /* Trying to umount. Using MNT_FORCE causes some
923a60
-                 * filesystems (e.g. FUSE and NFS and other network
923a60
-                 * filesystems) to abort any pending requests and
923a60
-                 * return -EIO rather than blocking indefinitely.
923a60
-                 * If the filesysten is "busy", this may allow processes
923a60
-                 * to die, thus making the filesystem less busy so
923a60
-                 * the unmount might succeed (rather then return EBUSY).*/
923a60
-                log_info("Unmounting %s.", m->path);
923a60
-                if (umount2(m->path, MNT_FORCE) == 0) {
923a60
+                /* Trying to umount */
923a60
+                if (umount_with_timeout(m, changed) < 0)
923a60
+                        n_failed++;
923a60
+                else {
923a60
                         if (changed)
923a60
                                 *changed = true;
923a60
 
923a60
                         mount_point_free(head, m);
923a60
-                } else if (log_error) {
923a60
-                        log_warning_errno(errno, "Could not unmount %s: %m", m->path);
923a60
-                        n_failed++;
923a60
                 }
923a60
         }
923a60
 
923a60
@@ -550,17 +623,12 @@ int umount_all(bool *changed) {
923a60
         do {
923a60
                 umount_changed = false;
923a60
 
923a60
-                mount_points_list_umount(&mp_list_head, &umount_changed, false);
923a60
+                mount_points_list_umount(&mp_list_head, &umount_changed);
923a60
                 if (umount_changed)
923a60
                         *changed = true;
923a60
 
923a60
         } while (umount_changed);
923a60
 
923a60
-        /* umount one more time with logging enabled */
923a60
-        r = mount_points_list_umount(&mp_list_head, &umount_changed, true);
923a60
-        if (r <= 0)
923a60
-                goto end;
923a60
-
923a60
   end:
923a60
         mount_points_list_free(&mp_list_head);
923a60
 
923a60
diff --git a/src/shared/def.h b/src/shared/def.h
923a60
index 9e008a6d2d..f193ab1f9f 100644
923a60
--- a/src/shared/def.h
923a60
+++ b/src/shared/def.h
923a60
@@ -21,8 +21,6 @@
923a60
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
923a60
 ***/
923a60
 
923a60
-#include "util.h"
923a60
-
923a60
 #define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
923a60
 #define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
923a60
 #define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
923a60
diff --git a/src/shared/login-shared.c b/src/shared/login-shared.c
923a60
index 054c77503b..5da0f05838 100644
923a60
--- a/src/shared/login-shared.c
923a60
+++ b/src/shared/login-shared.c
923a60
@@ -21,6 +21,7 @@
923a60
 
923a60
 #include "login-shared.h"
923a60
 #include "def.h"
923a60
+#include "util.h"
923a60
 
923a60
 bool session_id_valid(const char *id) {
923a60
         assert(id);
923a60
diff --git a/src/shared/util.c b/src/shared/util.c
923a60
index 982f5e044f..3216f004ad 100644
923a60
--- a/src/shared/util.c
923a60
+++ b/src/shared/util.c
923a60
@@ -9049,3 +9049,64 @@ try_dev_shm_without_o_tmpfile:
923a60
 
923a60
         return -EOPNOTSUPP;
923a60
 }
923a60
+
923a60
+/*
923a60
+ * Return values:
923a60
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
923a60
+ *       process, the process timed out, the process was terminated by a
923a60
+ *       signal, or failed for an unknown reason.
923a60
+ * >=0 : The process terminated normally with no failures.
923a60
+ *
923a60
+ * Success is indicated by a return value of zero, a timeout is indicated
923a60
+ * by ETIMEDOUT, and all other child failure states are indicated by error
923a60
+ * is indicated by a non-zero value.
923a60
+*/
923a60
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
923a60
+        sigset_t mask;
923a60
+        int r;
923a60
+        usec_t until;
923a60
+
923a60
+        assert_se(sigemptyset(&mask) == 0);
923a60
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
923a60
+
923a60
+        /* Drop into a sigtimewait-based timeout. Waiting for the
923a60
+         * pid to exit. */
923a60
+        until = now(CLOCK_MONOTONIC) + timeout;
923a60
+        for (;;) {
923a60
+                usec_t n;
923a60
+                siginfo_t status = {};
923a60
+                struct timespec ts;
923a60
+
923a60
+                n = now(CLOCK_MONOTONIC);
923a60
+                if (n >= until)
923a60
+                        break;
923a60
+
923a60
+                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
923a60
+                /* Assuming we woke due to the child exiting. */
923a60
+                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
923a60
+                        if (status.si_pid == pid) {
923a60
+                                /* This is the correct child.*/
923a60
+                                if (status.si_code == CLD_EXITED)
923a60
+                                        return (status.si_status == 0) ? 0 : -EPROTO;
923a60
+                                else
923a60
+                                        return -EPROTO;
923a60
+                        }
923a60
+                }
923a60
+                /* Not the child, check for errors and proceed appropriately */
923a60
+                if (r < 0) {
923a60
+                        switch (r) {
923a60
+                        case -EAGAIN:
923a60
+                                /* Timed out, child is likely hung. */
923a60
+                                return -ETIMEDOUT;
923a60
+                        case -EINTR:
923a60
+                                /* Received a different signal and should retry */
923a60
+                                continue;
923a60
+                        default:
923a60
+                                /* Return any unexpected errors */
923a60
+                                return r;
923a60
+                        }
923a60
+                }
923a60
+        }
923a60
+
923a60
+        return -EPROTO;
923a60
+}
923a60
diff --git a/src/shared/util.h b/src/shared/util.h
923a60
index 9c4be02566..998f882bbb 100644
923a60
--- a/src/shared/util.h
923a60
+++ b/src/shared/util.h
923a60
@@ -22,6 +22,7 @@
923a60
 ***/
923a60
 
923a60
 #include <alloca.h>
923a60
+#include <def.h>
923a60
 #include <fcntl.h>
923a60
 #include <inttypes.h>
923a60
 #include <time.h>
923a60
@@ -1122,3 +1123,18 @@ enum {
923a60
 };
923a60
 
923a60
 int acquire_data_fd(const void *data, size_t size, unsigned flags);
923a60
+
923a60
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
923a60
+
923a60
+static inline void block_signals_reset(sigset_t *ss) {
923a60
+        assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
923a60
+}
923a60
+
923a60
+#define BLOCK_SIGNALS(...)                                                         \
923a60
+        _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({        \
923a60
+                sigset_t _t;                                                       \
923a60
+                assert_se(sigprocmask(SIG_SETMASK, NULL, &_t) == 0);               \
923a60
+                assert_se(sigprocmask_many(SIG_BLOCK, __VA_ARGS__, -1) >= 0);      \
923a60
+                _t;                                                                \
923a60
+        })
923a60
+