dryang / rpms / systemd

Forked from rpms/systemd 2 years ago
Clone
Blob Blame History Raw
From 5ccae46f2a192a9347feb604901127c55ce1e039 Mon Sep 17 00:00:00 2001
From: Kyle Walker <kwalker@redhat.com>
Date: Wed, 13 Dec 2017 12:49:26 -0500
Subject: [PATCH] core: Implement timeout based umount/remount limit

Remount, and subsequent umount, attempts can hang for inaccessible network
based mount points. This can leave a system in a hard hang state that
requires a hard reset in order to recover. This change moves the remount,
and umount attempts into separate child processes. The remount and umount
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
those waits fail, the parent will issue a SIGKILL to the child and continue
with the shutdown efforts.

In addition, instead of only reporting some additional errors on the final
attempt, failures are reported as they occur.

(cherry picked from commit d5641e0d7e8f55937fbc3a7ecd667e42c5836d80)

Related: #1571098
---
 src/core/umount.c         | 112 ++++++++++++++++++++++++++++++--------
 src/shared/def.h          |   2 -
 src/shared/login-shared.c |   1 +
 src/shared/util.c         |  61 +++++++++++++++++++++
 src/shared/util.h         |  16 ++++++
 5 files changed, 168 insertions(+), 24 deletions(-)

diff --git a/src/core/umount.c b/src/core/umount.c
index 91d67c06ca..bd38966124 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -363,7 +363,84 @@ static int delete_dm(dev_t devnum) {
         return r >= 0 ? 0 : -errno;
 }
 
-static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_error) {
+static int remount_with_timeout(MountPoint *m, char *options, int *n_failed) {
+        pid_t pid;
+        int r;
+
+        BLOCK_SIGNALS(SIGCHLD);
+
+        /* Due to the possiblity of a remount operation hanging, we
+         * fork a child process and set a timeout. If the timeout
+         * lapses, the assumption is that that particular remount
+         * failed. */
+        pid = fork();
+        if (pid < 0)
+                return log_error_errno(errno, "Failed to fork: %m");
+
+        if (pid == 0) {
+                log_info("Remounting '%s' read-only in with options '%s'.", m->path, options);
+
+                /* Start the mount operation here in the child */
+                r = mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
+                if (r < 0)
+                        log_error_errno(errno, "Failed to remount '%s' read-only: %m", m->path);
+
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+        if (r == -ETIMEDOUT) {
+                log_error_errno(errno, "Remounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
+                (void) kill(pid, SIGKILL);
+        } else if (r < 0)
+                log_error_errno(r, "Failed to wait for process: %m");
+
+        return r;
+}
+
+static int umount_with_timeout(MountPoint *m, bool *changed) {
+        pid_t pid;
+        int r;
+
+        BLOCK_SIGNALS(SIGCHLD);
+
+        /* Due to the possiblity of a umount operation hanging, we
+         * fork a child process and set a timeout. If the timeout
+         * lapses, the assumption is that that particular umount
+         * failed. */
+        pid = fork();
+        if (pid < 0)
+                return log_error_errno(errno, "Failed to fork: %m");
+
+        if (pid == 0) {
+                log_info("Unmounting '%s'.", m->path);
+
+                /* Start the mount operation here in the child Using MNT_FORCE
+                 * causes some filesystems (e.g. FUSE and NFS and other network
+                 * filesystems) to abort any pending requests and return -EIO
+                 * rather than blocking indefinitely. If the filesysten is
+                 * "busy", this may allow processes to die, thus making the
+                 * filesystem less busy so the unmount might succeed (rather
+                 * then return EBUSY).*/
+                r = umount2(m->path, MNT_FORCE);
+                if (r < 0)
+                        log_error_errno(errno, "Failed to unmount %s: %m", m->path);
+
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+        if (r == -ETIMEDOUT) {
+                log_error_errno(errno, "Unmounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
+                (void) kill(pid, SIGKILL);
+        } else if (r < 0)
+                log_error_errno(r, "Failed to wait for process: %m");
+
+        return r;
+}
+
+
+static int mount_points_list_umount(MountPoint **head, bool *changed) {
         MountPoint *m, *n;
         int n_failed = 0;
 
@@ -405,9 +482,13 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                          * explicitly remount the super block of that
                          * alias read-only we hence should be
                          * relatively safe regarding keeping the fs we
-                         * can otherwise not see dirty. */
-                        log_info("Remounting '%s' read-only with options '%s'.", m->path, options);
-                        (void) mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
+                         * can otherwise not see dirty.
+                         *
+                         * Since the remount can hang in the instance of
+                         * remote filesystems, we remount asynchronously
+                         * and skip the subsequent umount if it fails */
+                        if (remount_with_timeout(m, options, &n_failed) < 0)
+                                continue;
                 }
 
                 /* Skip / and /usr since we cannot unmount that
@@ -420,22 +501,14 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                 )
                         continue;
 
-                /* Trying to umount. Using MNT_FORCE causes some
-                 * filesystems (e.g. FUSE and NFS and other network
-                 * filesystems) to abort any pending requests and
-                 * return -EIO rather than blocking indefinitely.
-                 * If the filesysten is "busy", this may allow processes
-                 * to die, thus making the filesystem less busy so
-                 * the unmount might succeed (rather then return EBUSY).*/
-                log_info("Unmounting %s.", m->path);
-                if (umount2(m->path, MNT_FORCE) == 0) {
+                /* Trying to umount */
+                if (umount_with_timeout(m, changed) < 0)
+                        n_failed++;
+                else {
                         if (changed)
                                 *changed = true;
 
                         mount_point_free(head, m);
-                } else if (log_error) {
-                        log_warning_errno(errno, "Could not unmount %s: %m", m->path);
-                        n_failed++;
                 }
         }
 
@@ -550,17 +623,12 @@ int umount_all(bool *changed) {
         do {
                 umount_changed = false;
 
-                mount_points_list_umount(&mp_list_head, &umount_changed, false);
+                mount_points_list_umount(&mp_list_head, &umount_changed);
                 if (umount_changed)
                         *changed = true;
 
         } while (umount_changed);
 
-        /* umount one more time with logging enabled */
-        r = mount_points_list_umount(&mp_list_head, &umount_changed, true);
-        if (r <= 0)
-                goto end;
-
   end:
         mount_points_list_free(&mp_list_head);
 
diff --git a/src/shared/def.h b/src/shared/def.h
index 9e008a6d2d..f193ab1f9f 100644
--- a/src/shared/def.h
+++ b/src/shared/def.h
@@ -21,8 +21,6 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-#include "util.h"
-
 #define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
 #define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
 #define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
diff --git a/src/shared/login-shared.c b/src/shared/login-shared.c
index 054c77503b..5da0f05838 100644
--- a/src/shared/login-shared.c
+++ b/src/shared/login-shared.c
@@ -21,6 +21,7 @@
 
 #include "login-shared.h"
 #include "def.h"
+#include "util.h"
 
 bool session_id_valid(const char *id) {
         assert(id);
diff --git a/src/shared/util.c b/src/shared/util.c
index 982f5e044f..3216f004ad 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -9049,3 +9049,64 @@ try_dev_shm_without_o_tmpfile:
 
         return -EOPNOTSUPP;
 }
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
+ *       process, the process timed out, the process was terminated by a
+ *       signal, or failed for an unknown reason.
+ * >=0 : The process terminated normally with no failures.
+ *
+ * Success is indicated by a return value of zero, a timeout is indicated
+ * by ETIMEDOUT, and all other child failure states are indicated by error
+ * is indicated by a non-zero value.
+*/
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
+        sigset_t mask;
+        int r;
+        usec_t until;
+
+        assert_se(sigemptyset(&mask) == 0);
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
+
+        /* Drop into a sigtimewait-based timeout. Waiting for the
+         * pid to exit. */
+        until = now(CLOCK_MONOTONIC) + timeout;
+        for (;;) {
+                usec_t n;
+                siginfo_t status = {};
+                struct timespec ts;
+
+                n = now(CLOCK_MONOTONIC);
+                if (n >= until)
+                        break;
+
+                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
+                /* Assuming we woke due to the child exiting. */
+                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
+                        if (status.si_pid == pid) {
+                                /* This is the correct child.*/
+                                if (status.si_code == CLD_EXITED)
+                                        return (status.si_status == 0) ? 0 : -EPROTO;
+                                else
+                                        return -EPROTO;
+                        }
+                }
+                /* Not the child, check for errors and proceed appropriately */
+                if (r < 0) {
+                        switch (r) {
+                        case -EAGAIN:
+                                /* Timed out, child is likely hung. */
+                                return -ETIMEDOUT;
+                        case -EINTR:
+                                /* Received a different signal and should retry */
+                                continue;
+                        default:
+                                /* Return any unexpected errors */
+                                return r;
+                        }
+                }
+        }
+
+        return -EPROTO;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index 9c4be02566..998f882bbb 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -22,6 +22,7 @@
 ***/
 
 #include <alloca.h>
+#include <def.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <time.h>
@@ -1122,3 +1123,18 @@ enum {
 };
 
 int acquire_data_fd(const void *data, size_t size, unsigned flags);
+
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
+
+static inline void block_signals_reset(sigset_t *ss) {
+        assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
+}
+
+#define BLOCK_SIGNALS(...)                                                         \
+        _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({        \
+                sigset_t _t;                                                       \
+                assert_se(sigprocmask(SIG_SETMASK, NULL, &_t) == 0);               \
+                assert_se(sigprocmask_many(SIG_BLOCK, __VA_ARGS__, -1) >= 0);      \
+                _t;                                                                \
+        })
+