|
 |
5c2e41 |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
 |
5c2e41 |
From: Benjamin Marzinski <bmarzins@redhat.com>
|
|
 |
a1c519 |
Date: Fri, 15 Feb 2019 17:19:46 -0600
|
|
 |
a1c519 |
Subject: [PATCH] multipathd: Fix miscounting active paths
|
|
 |
5c2e41 |
|
|
 |
5c2e41 |
When multipathd gets a change uevent, it calls pathinfo with DI_NOIO.
|
|
 |
5c2e41 |
This sets the path state to the return value of path_offline(). If a
|
|
 |
5c2e41 |
path is in the PATH_DOWN state but path_offline() returns PATH_UP, when
|
|
 |
5c2e41 |
that path gets a change event, its state will get moved to PATH_UP
|
|
 |
5c2e41 |
without either reinstating the path, or reloading the map. The next
|
|
 |
5c2e41 |
call to check_path() will move the path back to PATH_DOWN. Since
|
|
 |
5c2e41 |
check_path() simply increments and decrements nr_active instead of
|
|
 |
5c2e41 |
calculating it based on the actual number of active paths, nr_active
|
|
 |
5c2e41 |
will get decremented a second time for this failed path, potentially
|
|
 |
5c2e41 |
putting the multipath device into recovery mode.
|
|
 |
5c2e41 |
|
|
 |
a1c519 |
This commit does two things to avoid this situation. It makes the
|
|
 |
a1c519 |
DI_NOIO flag only set pp->state in pathinfo() if DI_CHECKER is also set.
|
|
 |
a1c519 |
This isn't set in uev_update_path() to avoid changing the path state in
|
|
 |
a1c519 |
this case. Also, to guard against pp->state getting changed in some
|
|
 |
a1c519 |
other code path without properly updating the map state, check_path()
|
|
 |
a1c519 |
now calls set_no_path_retry, which recalculates nr_active based on the
|
|
 |
a1c519 |
actual number of active paths, and makes sure that the queue_if_no_path
|
|
 |
a1c519 |
value in the features line is correct.
|
|
 |
5c2e41 |
|
|
 |
5c2e41 |
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
|
 |
5c2e41 |
---
|
|
 |
5c2e41 |
libmultipath/discovery.c | 11 ++++++-----
|
|
 |
5c2e41 |
multipath/main.c | 2 +-
|
|
 |
a1c519 |
multipathd/main.c | 4 +++-
|
|
 |
a1c519 |
3 files changed, 10 insertions(+), 7 deletions(-)
|
|
 |
5c2e41 |
|
|
 |
5c2e41 |
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
|
|
 |
a1c519 |
index 10bd8cd..729bcb9 100644
|
|
 |
5c2e41 |
--- a/libmultipath/discovery.c
|
|
 |
5c2e41 |
+++ b/libmultipath/discovery.c
|
|
 |
5c2e41 |
@@ -1914,11 +1914,12 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
|
|
 |
5c2e41 |
if (path_state == PATH_REMOVED)
|
|
 |
5c2e41 |
goto blank;
|
|
 |
5c2e41 |
else if (mask & DI_NOIO) {
|
|
 |
5c2e41 |
- /*
|
|
 |
5c2e41 |
- * Avoid any IO on the device itself.
|
|
 |
5c2e41 |
- * Behave like DI_CHECKER in the "path unavailable" case.
|
|
 |
5c2e41 |
- */
|
|
 |
5c2e41 |
- pp->chkrstate = pp->state = path_state;
|
|
 |
5c2e41 |
+ if (mask & DI_CHECKER)
|
|
 |
5c2e41 |
+ /*
|
|
 |
5c2e41 |
+ * Avoid any IO on the device itself.
|
|
 |
5c2e41 |
+ * simply use the path_offline() return as its state
|
|
 |
5c2e41 |
+ */
|
|
 |
5c2e41 |
+ pp->chkrstate = pp->state = path_state;
|
|
 |
5c2e41 |
return PATHINFO_OK;
|
|
 |
5c2e41 |
}
|
|
 |
5c2e41 |
|
|
 |
5c2e41 |
diff --git a/multipath/main.c b/multipath/main.c
|
|
 |
a1c519 |
index 5abb118..69141db 100644
|
|
 |
5c2e41 |
--- a/multipath/main.c
|
|
 |
5c2e41 |
+++ b/multipath/main.c
|
|
 |
a1c519 |
@@ -356,7 +356,7 @@ static int check_usable_paths(struct config *conf,
|
|
 |
5c2e41 |
pp->udev = get_udev_device(pp->dev_t, DEV_DEVT);
|
|
 |
5c2e41 |
if (pp->udev == NULL)
|
|
 |
5c2e41 |
continue;
|
|
 |
5c2e41 |
- if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK)
|
|
 |
5c2e41 |
+ if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO|DI_CHECKER) != PATHINFO_OK)
|
|
 |
5c2e41 |
continue;
|
|
 |
5c2e41 |
|
|
 |
5c2e41 |
if (pp->state == PATH_UP &&
|
|
 |
a1c519 |
diff --git a/multipathd/main.c b/multipathd/main.c
|
|
 |
a1c519 |
index 43830e8..678ecf8 100644
|
|
 |
a1c519 |
--- a/multipathd/main.c
|
|
 |
a1c519 |
+++ b/multipathd/main.c
|
|
 |
a1c519 |
@@ -392,7 +392,8 @@ static void set_no_path_retry(struct multipath *mpp)
|
|
 |
a1c519 |
default:
|
|
 |
a1c519 |
if (mpp->nr_active > 0) {
|
|
 |
a1c519 |
mpp->retry_tick = 0;
|
|
 |
a1c519 |
- dm_queue_if_no_path(mpp->alias, 1);
|
|
 |
a1c519 |
+ if (!is_queueing)
|
|
 |
a1c519 |
+ dm_queue_if_no_path(mpp->alias, 1);
|
|
 |
a1c519 |
} else if (is_queueing && mpp->retry_tick == 0)
|
|
 |
a1c519 |
enter_recovery_mode(mpp);
|
|
 |
a1c519 |
break;
|
|
 |
a1c519 |
@@ -2072,6 +2073,7 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
|
|
 |
a1c519 |
/* if update_multipath_strings orphaned the path, quit early */
|
|
 |
a1c519 |
if (!pp->mpp)
|
|
 |
a1c519 |
return 0;
|
|
 |
a1c519 |
+ set_no_path_retry(pp->mpp);
|
|
 |
a1c519 |
|
|
 |
a1c519 |
if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
|
|
 |
a1c519 |
check_path_reinstate_state(pp)) {
|
|
 |
5c2e41 |
--
|
|
 |
5c2e41 |
2.17.2
|
|
 |
5c2e41 |
|