|
|
a3944b |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
a3944b |
From: Martin Wilck <mwilck@suse.com>
|
|
|
a3944b |
Date: Wed, 24 Apr 2019 11:07:59 +0200
|
|
|
a3944b |
Subject: [PATCH] BZ 1700451: test socket connection in non-blocking mode
|
|
|
a3944b |
|
|
|
a3944b |
Since commit d7188fcd "multipathd: start daemon after udev trigger",
|
|
|
a3944b |
multipathd startup is delayed during boot until after "udev settle"
|
|
|
a3944b |
terminates. But "multipath -u" is run by udev workers for storage devices,
|
|
|
a3944b |
and attempts to connect to the multipathd socket. This causes a start job
|
|
|
a3944b |
for multipathd to be scheduled by systemd, but that job won't be started
|
|
|
a3944b |
until "udev settle" finishes. This is not a problem on systems with 129 or
|
|
|
a3944b |
less storage units, because the connect() call of "multipath -u" will
|
|
|
a3944b |
succeed anyway. But on larger systems, the listen backlog of the systemd
|
|
|
a3944b |
socket can be exceeded, which causes connect() calls for the socket to
|
|
|
a3944b |
block until multipathd starts up and begins calling accept(). This creates
|
|
|
a3944b |
a deadlock situation, because "multipath -u" (called by udev workers)
|
|
|
a3944b |
blocks, and thus "udev settle" doesn't finish, delaying multipathd
|
|
|
a3944b |
startup. This situation then persists until either the workers or "udev
|
|
|
a3944b |
settle" time out. In the former case, path devices might be misclassified
|
|
|
a3944b |
as non-multipath devices by "multipath -u".
|
|
|
a3944b |
|
|
|
a3944b |
Fix this by using a non-blocking socket fd for connect() and interpret the
|
|
|
a3944b |
errno appropriately.
|
|
|
a3944b |
|
|
|
a3944b |
This patch reverts most of the changes from commit 8cdf6661 "multipath:
|
|
|
a3944b |
check on multipathd without starting it". Instead, "multipath -u" does
|
|
|
a3944b |
access the socket and start multipath again (which is what we want IMO),
|
|
|
a3944b |
but it is now able to detect and handle the "full backlog" situation.
|
|
|
a3944b |
|
|
|
a3944b |
Signed-off-by: Martin Wilck <mwilck@suse.com>
|
|
|
a3944b |
|
|
|
a3944b |
V2:
|
|
|
a3944b |
|
|
|
a3944b |
Use same error reporting convention in __mpath_connect() as in
|
|
|
a3944b |
mpath_connect() (Hannes Reinecke). We can't easily change the latter,
|
|
|
a3944b |
because it's part of the "public" libmpathcmd API.
|
|
|
a3944b |
|
|
|
a3944b |
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
a3944b |
---
|
|
|
a3944b |
libmpathcmd/mpath_cmd.c | 24 +++++++++++++-
|
|
|
a3944b |
libmpathcmd/mpath_cmd.h | 15 +++++++++
|
|
|
a3944b |
multipath/main.c | 70 +++++++++++++----------------------------
|
|
|
a3944b |
3 files changed, 60 insertions(+), 49 deletions(-)
|
|
|
a3944b |
|
|
|
a3944b |
diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c
|
|
|
a1c519 |
index df4ca54..b681311 100644
|
|
|
a3944b |
--- a/libmpathcmd/mpath_cmd.c
|
|
|
a3944b |
+++ b/libmpathcmd/mpath_cmd.c
|
|
|
a3944b |
@@ -26,6 +26,7 @@
|
|
|
a3944b |
#include <poll.h>
|
|
|
a3944b |
#include <string.h>
|
|
|
a3944b |
#include <errno.h>
|
|
|
a3944b |
+#include <fcntl.h>
|
|
|
a3944b |
|
|
|
a3944b |
#include "mpath_cmd.h"
|
|
|
a3944b |
|
|
|
a3944b |
@@ -93,10 +94,11 @@ static size_t write_all(int fd, const void *buf, size_t len)
|
|
|
a3944b |
/*
|
|
|
a3944b |
* connect to a unix domain socket
|
|
|
a3944b |
*/
|
|
|
a3944b |
-int mpath_connect(void)
|
|
|
a3944b |
+int __mpath_connect(int nonblocking)
|
|
|
a3944b |
{
|
|
|
a3944b |
int fd, len;
|
|
|
a3944b |
struct sockaddr_un addr;
|
|
|
a3944b |
+ int flags = 0;
|
|
|
a3944b |
|
|
|
a3944b |
memset(&addr, 0, sizeof(addr));
|
|
|
a3944b |
addr.sun_family = AF_LOCAL;
|
|
|
a3944b |
@@ -108,14 +110,34 @@ int mpath_connect(void)
|
|
|
a3944b |
if (fd == -1)
|
|
|
a3944b |
return -1;
|
|
|
a3944b |
|
|
|
a3944b |
+ if (nonblocking) {
|
|
|
a3944b |
+ flags = fcntl(fd, F_GETFL, 0);
|
|
|
a3944b |
+ if (flags != -1)
|
|
|
a3944b |
+ (void)fcntl(fd, F_SETFL, flags|O_NONBLOCK);
|
|
|
a3944b |
+ }
|
|
|
a3944b |
+
|
|
|
a3944b |
if (connect(fd, (struct sockaddr *)&addr, len) == -1) {
|
|
|
a3944b |
+ int err = errno;
|
|
|
a3944b |
+
|
|
|
a3944b |
close(fd);
|
|
|
a3944b |
+ errno = err;
|
|
|
a3944b |
return -1;
|
|
|
a3944b |
}
|
|
|
a3944b |
|
|
|
a3944b |
+ if (nonblocking && flags != -1)
|
|
|
a3944b |
+ (void)fcntl(fd, F_SETFL, flags);
|
|
|
a3944b |
+
|
|
|
a3944b |
return fd;
|
|
|
a3944b |
}
|
|
|
a3944b |
|
|
|
a3944b |
+/*
|
|
|
a3944b |
+ * connect to a unix domain socket
|
|
|
a3944b |
+ */
|
|
|
a3944b |
+int mpath_connect(void)
|
|
|
a3944b |
+{
|
|
|
a3944b |
+ return __mpath_connect(0);
|
|
|
a3944b |
+}
|
|
|
a3944b |
+
|
|
|
a3944b |
int mpath_disconnect(int fd)
|
|
|
a3944b |
{
|
|
|
a3944b |
return close(fd);
|
|
|
a3944b |
diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h
|
|
|
a1c519 |
index 15aeb06..ccfd35f 100644
|
|
|
a3944b |
--- a/libmpathcmd/mpath_cmd.h
|
|
|
a3944b |
+++ b/libmpathcmd/mpath_cmd.h
|
|
|
a1c519 |
@@ -34,6 +34,21 @@ extern "C" {
|
|
|
a3944b |
#define DEFAULT_REPLY_TIMEOUT 4000
|
|
|
a3944b |
|
|
|
a3944b |
|
|
|
a3944b |
+/*
|
|
|
a3944b |
+ * DESCRIPTION:
|
|
|
a3944b |
+ * Same as mpath_connect() (see below) except for the "nonblocking"
|
|
|
a3944b |
+ * parameter.
|
|
|
a3944b |
+ * If "nonblocking" is set, connects in non-blocking mode. This is
|
|
|
a3944b |
+ * useful to avoid blocking if the listening socket's backlog is
|
|
|
a3944b |
+ * exceeded. In this case, errno will be set to EAGAIN.
|
|
|
a3944b |
+ * In case of success, the returned file descriptor is in in blocking
|
|
|
a3944b |
+ * mode, even if "nonblocking" was true.
|
|
|
a3944b |
+ *
|
|
|
a3944b |
+ * RETURNS:
|
|
|
a3944b |
+ * A file descriptor on success. -1 on failure (with errno set).
|
|
|
a3944b |
+ */
|
|
|
a3944b |
+int __mpath_connect(int nonblocking);
|
|
|
a3944b |
+
|
|
|
a3944b |
/*
|
|
|
a3944b |
* DESCRIPTION:
|
|
|
a3944b |
* Connect to the running multipathd daemon. On systems with the
|
|
|
a3944b |
diff --git a/multipath/main.c b/multipath/main.c
|
|
|
a1c519 |
index 632ce4d..3fb6699 100644
|
|
|
a3944b |
--- a/multipath/main.c
|
|
|
a3944b |
+++ b/multipath/main.c
|
|
|
a1c519 |
@@ -852,55 +852,29 @@ out:
|
|
|
a3944b |
return r;
|
|
|
a3944b |
}
|
|
|
a3944b |
|
|
|
a3944b |
-int is_multipathd_running(void)
|
|
|
a3944b |
+static int test_multipathd_socket(void)
|
|
|
a3944b |
{
|
|
|
a3944b |
- FILE *f = NULL;
|
|
|
a3944b |
- char buf[16];
|
|
|
a3944b |
- char path[PATH_MAX];
|
|
|
a3944b |
- int pid;
|
|
|
a3944b |
- char *end;
|
|
|
a3944b |
+ int fd;
|
|
|
a3944b |
+ /*
|
|
|
a3944b |
+ * "multipath -u" may be run before the daemon is started. In this
|
|
|
a3944b |
+ * case, systemd might own the socket but might delay multipathd
|
|
|
a3944b |
+ * startup until some other unit (udev settle!) has finished
|
|
|
a3944b |
+ * starting. With many LUNs, the listen backlog may be exceeded, which
|
|
|
a3944b |
+ * would cause connect() to block. This causes udev workers calling
|
|
|
a3944b |
+ * "multipath -u" to hang, and thus creates a deadlock, until "udev
|
|
|
a3944b |
+ * settle" times out. To avoid this, call connect() in non-blocking
|
|
|
a3944b |
+ * mode here, and take EAGAIN as indication for a filled-up systemd
|
|
|
a3944b |
+ * backlog.
|
|
|
a3944b |
+ */
|
|
|
a3944b |
|
|
|
a3944b |
- f = fopen(DEFAULT_PIDFILE, "r");
|
|
|
a3944b |
- if (!f) {
|
|
|
a3944b |
- if (errno != ENOENT)
|
|
|
a3944b |
- condlog(4, "can't open " DEFAULT_PIDFILE ": %s",
|
|
|
a3944b |
- strerror(errno));
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
- }
|
|
|
a3944b |
- if (!fgets(buf, sizeof(buf), f)) {
|
|
|
a3944b |
- if (ferror(f))
|
|
|
a3944b |
- condlog(4, "read of " DEFAULT_PIDFILE " failed: %s",
|
|
|
a3944b |
- strerror(errno));
|
|
|
a3944b |
- fclose(f);
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
- }
|
|
|
a3944b |
- fclose(f);
|
|
|
a3944b |
- errno = 0;
|
|
|
a3944b |
- strchop(buf);
|
|
|
a3944b |
- pid = strtol(buf, &end, 10);
|
|
|
a3944b |
- if (errno != 0 || pid <= 0 || *end != '\0') {
|
|
|
a3944b |
- condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'",
|
|
|
a3944b |
- buf);
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
- }
|
|
|
a3944b |
- snprintf(path, sizeof(path), "/proc/%d/comm", pid);
|
|
|
a3944b |
- f = fopen(path, "r");
|
|
|
a3944b |
- if (!f) {
|
|
|
a3944b |
- if (errno != ENOENT)
|
|
|
a3944b |
- condlog(4, "can't open %s: %s", path, strerror(errno));
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
- }
|
|
|
a3944b |
- if (!fgets(buf, sizeof(buf), f)) {
|
|
|
a3944b |
- if (ferror(f))
|
|
|
a3944b |
- condlog(4, "read of %s failed: %s", path,
|
|
|
a3944b |
- strerror(errno));
|
|
|
a3944b |
- fclose(f);
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
- }
|
|
|
a3944b |
- fclose(f);
|
|
|
a3944b |
- strchop(buf);
|
|
|
a3944b |
- if (strcmp(buf, "multipathd") != 0)
|
|
|
a3944b |
- return 0;
|
|
|
a3944b |
+ fd = __mpath_connect(1);
|
|
|
a3944b |
+ if (fd == -1) {
|
|
|
a3944b |
+ if (errno == EAGAIN)
|
|
|
a3944b |
+ condlog(3, "daemon backlog exceeded");
|
|
|
a3944b |
+ else
|
|
|
a3944b |
+ return 0;
|
|
|
a3944b |
+ } else
|
|
|
a3944b |
+ close(fd);
|
|
|
a3944b |
return 1;
|
|
|
a3944b |
}
|
|
|
a3944b |
|
|
|
a1c519 |
@@ -1086,7 +1060,7 @@ main (int argc, char *argv[])
|
|
|
a3944b |
}
|
|
|
a3944b |
if (cmd == CMD_VALID_PATH &&
|
|
|
a3944b |
dev_type == DEV_UEVENT) {
|
|
|
a3944b |
- if (!is_multipathd_running()) {
|
|
|
a3944b |
+ if (!test_multipathd_socket()) {
|
|
|
a3944b |
condlog(3, "%s: daemon is not running", dev);
|
|
|
a3944b |
if (!systemd_service_enabled(dev)) {
|
|
|
a1c519 |
r = print_cmd_valid(RTVL_NO, NULL, conf);
|
|
|
a3944b |
--
|
|
|
a3944b |
2.17.2
|
|
|
a3944b |
|