|
|
05be62 |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
05be62 |
From: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
05be62 |
Date: Thu, 14 Jan 2021 20:20:23 -0600
|
|
|
05be62 |
Subject: [PATCH] multipathd: avoid io_err_stat crash during shutdown
|
|
|
05be62 |
|
|
|
05be62 |
The checker thread is reponsible for enqueueing paths for the
|
|
|
05be62 |
io_err_stat thread to check. During shutdown, the io_err_stat thread is
|
|
|
05be62 |
shut down and cleaned up before the checker thread. There is no code
|
|
|
05be62 |
to make sure that the checker thread isn't accessing the io_err_stat
|
|
|
05be62 |
pathvec or its mutex while they are being freed, which can lead to
|
|
|
05be62 |
memory corruption crashes.
|
|
|
05be62 |
|
|
|
05be62 |
To solve this, get rid of the io_err_stat_pathvec structure, and
|
|
|
05be62 |
statically define the mutex. This means that the mutex is always valid
|
|
|
05be62 |
to access, and the io_err_stat pathvec can only be accessed while
|
|
|
05be62 |
holding it. If the io_err_stat thread has already been cleaned up
|
|
|
05be62 |
when the checker tries to access the pathvec, it will be NULL, and the
|
|
|
05be62 |
checker will simply fail to enqueue the path.
|
|
|
05be62 |
|
|
|
05be62 |
This change also fixes a bug in free_io_err_pathvec(), which previously
|
|
|
05be62 |
only attempted to free the pathvec if it was not set, instead of when it
|
|
|
05be62 |
was set.
|
|
|
05be62 |
|
|
|
05be62 |
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
05be62 |
Reviewed-by: Martin Wilck <mwilck@suse.com>
|
|
|
05be62 |
---
|
|
|
05be62 |
libmultipath/io_err_stat.c | 112 +++++++++++++++----------------------
|
|
|
05be62 |
libmultipath/util.c | 5 ++
|
|
|
05be62 |
libmultipath/util.h | 1 +
|
|
|
05be62 |
3 files changed, 50 insertions(+), 68 deletions(-)
|
|
|
05be62 |
|
|
|
05be62 |
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
|
|
|
05be62 |
index 449760a0..f6c564f0 100644
|
|
|
05be62 |
--- a/libmultipath/io_err_stat.c
|
|
|
05be62 |
+++ b/libmultipath/io_err_stat.c
|
|
|
05be62 |
@@ -34,6 +34,7 @@
|
|
|
05be62 |
#include "lock.h"
|
|
|
05be62 |
#include "time-util.h"
|
|
|
05be62 |
#include "io_err_stat.h"
|
|
|
05be62 |
+#include "util.h"
|
|
|
05be62 |
|
|
|
05be62 |
#define IOTIMEOUT_SEC 60
|
|
|
05be62 |
#define TIMEOUT_NO_IO_NSEC 10000000 /*10ms = 10000000ns*/
|
|
|
05be62 |
@@ -46,12 +47,6 @@
|
|
|
05be62 |
#define io_err_stat_log(prio, fmt, args...) \
|
|
|
05be62 |
condlog(prio, "io error statistic: " fmt, ##args)
|
|
|
05be62 |
|
|
|
05be62 |
-
|
|
|
05be62 |
-struct io_err_stat_pathvec {
|
|
|
05be62 |
- pthread_mutex_t mutex;
|
|
|
05be62 |
- vector pathvec;
|
|
|
05be62 |
-};
|
|
|
05be62 |
-
|
|
|
05be62 |
struct dio_ctx {
|
|
|
05be62 |
struct timespec io_starttime;
|
|
|
05be62 |
unsigned int blksize;
|
|
|
05be62 |
@@ -76,9 +71,10 @@ pthread_attr_t io_err_stat_attr;
|
|
|
05be62 |
|
|
|
05be62 |
static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
05be62 |
static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER;
|
|
|
05be62 |
+static pthread_mutex_t io_err_pathvec_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
05be62 |
static int io_err_thread_running = 0;
|
|
|
05be62 |
|
|
|
05be62 |
-static struct io_err_stat_pathvec *paths;
|
|
|
05be62 |
+static vector io_err_pathvec;
|
|
|
05be62 |
struct vectors *vecs;
|
|
|
05be62 |
io_context_t ioctx;
|
|
|
05be62 |
|
|
|
05be62 |
@@ -208,46 +204,23 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
|
|
|
05be62 |
FREE(p);
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
-static struct io_err_stat_pathvec *alloc_pathvec(void)
|
|
|
05be62 |
-{
|
|
|
05be62 |
- struct io_err_stat_pathvec *p;
|
|
|
05be62 |
- int r;
|
|
|
05be62 |
-
|
|
|
05be62 |
- p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p));
|
|
|
05be62 |
- if (!p)
|
|
|
05be62 |
- return NULL;
|
|
|
05be62 |
- p->pathvec = vector_alloc();
|
|
|
05be62 |
- if (!p->pathvec)
|
|
|
05be62 |
- goto out_free_struct_pathvec;
|
|
|
05be62 |
- r = pthread_mutex_init(&p->mutex, NULL);
|
|
|
05be62 |
- if (r)
|
|
|
05be62 |
- goto out_free_member_pathvec;
|
|
|
05be62 |
-
|
|
|
05be62 |
- return p;
|
|
|
05be62 |
-
|
|
|
05be62 |
-out_free_member_pathvec:
|
|
|
05be62 |
- vector_free(p->pathvec);
|
|
|
05be62 |
-out_free_struct_pathvec:
|
|
|
05be62 |
- FREE(p);
|
|
|
05be62 |
- return NULL;
|
|
|
05be62 |
-}
|
|
|
05be62 |
-
|
|
|
05be62 |
-static void free_io_err_pathvec(struct io_err_stat_pathvec *p)
|
|
|
05be62 |
+static void free_io_err_pathvec(void)
|
|
|
05be62 |
{
|
|
|
05be62 |
struct io_err_stat_path *path;
|
|
|
05be62 |
int i;
|
|
|
05be62 |
|
|
|
05be62 |
- if (!p)
|
|
|
05be62 |
- return;
|
|
|
05be62 |
- pthread_mutex_destroy(&p->mutex);
|
|
|
05be62 |
- if (!p->pathvec) {
|
|
|
05be62 |
- vector_foreach_slot(p->pathvec, path, i) {
|
|
|
05be62 |
- destroy_directio_ctx(path);
|
|
|
05be62 |
- free_io_err_stat_path(path);
|
|
|
05be62 |
- }
|
|
|
05be62 |
- vector_free(p->pathvec);
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock);
|
|
|
05be62 |
+ if (!io_err_pathvec)
|
|
|
05be62 |
+ goto out;
|
|
|
05be62 |
+ vector_foreach_slot(io_err_pathvec, path, i) {
|
|
|
05be62 |
+ destroy_directio_ctx(path);
|
|
|
05be62 |
+ free_io_err_stat_path(path);
|
|
|
05be62 |
}
|
|
|
05be62 |
- FREE(p);
|
|
|
05be62 |
+ vector_free(io_err_pathvec);
|
|
|
05be62 |
+ io_err_pathvec = NULL;
|
|
|
05be62 |
+out:
|
|
|
05be62 |
+ pthread_cleanup_pop(1);
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
/*
|
|
|
05be62 |
@@ -259,13 +232,13 @@ static int enqueue_io_err_stat_by_path(struct path *path)
|
|
|
05be62 |
{
|
|
|
05be62 |
struct io_err_stat_path *p;
|
|
|
05be62 |
|
|
|
05be62 |
- pthread_mutex_lock(&paths->mutex);
|
|
|
05be62 |
- p = find_err_path_by_dev(paths->pathvec, path->dev);
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ p = find_err_path_by_dev(io_err_pathvec, path->dev);
|
|
|
05be62 |
if (p) {
|
|
|
05be62 |
- pthread_mutex_unlock(&paths->mutex);
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
return 0;
|
|
|
05be62 |
}
|
|
|
05be62 |
- pthread_mutex_unlock(&paths->mutex);
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
|
|
|
05be62 |
p = alloc_io_err_stat_path();
|
|
|
05be62 |
if (!p)
|
|
|
05be62 |
@@ -277,18 +250,18 @@ static int enqueue_io_err_stat_by_path(struct path *path)
|
|
|
05be62 |
|
|
|
05be62 |
if (setup_directio_ctx(p))
|
|
|
05be62 |
goto free_ioerr_path;
|
|
|
05be62 |
- pthread_mutex_lock(&paths->mutex);
|
|
|
05be62 |
- if (!vector_alloc_slot(paths->pathvec))
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ if (!vector_alloc_slot(io_err_pathvec))
|
|
|
05be62 |
goto unlock_destroy;
|
|
|
05be62 |
- vector_set_slot(paths->pathvec, p);
|
|
|
05be62 |
- pthread_mutex_unlock(&paths->mutex);
|
|
|
05be62 |
+ vector_set_slot(io_err_pathvec, p);
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
|
|
|
05be62 |
io_err_stat_log(2, "%s: enqueue path %s to check",
|
|
|
05be62 |
path->mpp->alias, path->dev);
|
|
|
05be62 |
return 0;
|
|
|
05be62 |
|
|
|
05be62 |
unlock_destroy:
|
|
|
05be62 |
- pthread_mutex_unlock(&paths->mutex);
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
destroy_directio_ctx(p);
|
|
|
05be62 |
free_ioerr_path:
|
|
|
05be62 |
free_io_err_stat_path(p);
|
|
|
05be62 |
@@ -421,9 +394,9 @@ static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
|
|
|
05be62 |
{
|
|
|
05be62 |
int i;
|
|
|
05be62 |
|
|
|
05be62 |
- i = find_slot(paths->pathvec, p);
|
|
|
05be62 |
+ i = find_slot(io_err_pathvec, p);
|
|
|
05be62 |
if (i != -1)
|
|
|
05be62 |
- vector_del_slot(paths->pathvec, i);
|
|
|
05be62 |
+ vector_del_slot(io_err_pathvec, i);
|
|
|
05be62 |
|
|
|
05be62 |
destroy_directio_ctx(p);
|
|
|
05be62 |
free_io_err_stat_path(p);
|
|
|
05be62 |
@@ -594,7 +567,7 @@ static void poll_async_io_timeout(void)
|
|
|
05be62 |
|
|
|
05be62 |
if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
|
|
|
05be62 |
return;
|
|
|
05be62 |
- vector_foreach_slot(paths->pathvec, pp, i) {
|
|
|
05be62 |
+ vector_foreach_slot(io_err_pathvec, pp, i) {
|
|
|
05be62 |
for (j = 0; j < CONCUR_NR_EVENT; j++) {
|
|
|
05be62 |
rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j,
|
|
|
05be62 |
&curr_time, pp->devname);
|
|
|
05be62 |
@@ -640,7 +613,7 @@ static void handle_async_io_done_event(struct io_event *io_evt)
|
|
|
05be62 |
int rc = PATH_UNCHECKED;
|
|
|
05be62 |
int i, j;
|
|
|
05be62 |
|
|
|
05be62 |
- vector_foreach_slot(paths->pathvec, pp, i) {
|
|
|
05be62 |
+ vector_foreach_slot(io_err_pathvec, pp, i) {
|
|
|
05be62 |
for (j = 0; j < CONCUR_NR_EVENT; j++) {
|
|
|
05be62 |
ct = pp->dio_ctx_array + j;
|
|
|
05be62 |
if (&ct->io == io_evt->obj) {
|
|
|
05be62 |
@@ -674,19 +647,14 @@ static void service_paths(void)
|
|
|
05be62 |
struct io_err_stat_path *pp;
|
|
|
05be62 |
int i;
|
|
|
05be62 |
|
|
|
05be62 |
- pthread_mutex_lock(&paths->mutex);
|
|
|
05be62 |
- vector_foreach_slot(paths->pathvec, pp, i) {
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ vector_foreach_slot(io_err_pathvec, pp, i) {
|
|
|
05be62 |
send_batch_async_ios(pp);
|
|
|
05be62 |
process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname);
|
|
|
05be62 |
poll_async_io_timeout();
|
|
|
05be62 |
poll_io_err_stat(vecs, pp);
|
|
|
05be62 |
}
|
|
|
05be62 |
- pthread_mutex_unlock(&paths->mutex);
|
|
|
05be62 |
-}
|
|
|
05be62 |
-
|
|
|
05be62 |
-static void cleanup_unlock(void *arg)
|
|
|
05be62 |
-{
|
|
|
05be62 |
- pthread_mutex_unlock((pthread_mutex_t*) arg);
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
static void cleanup_exited(__attribute__((unused)) void *arg)
|
|
|
05be62 |
@@ -744,12 +712,17 @@ int start_io_err_stat_thread(void *data)
|
|
|
05be62 |
io_err_stat_log(4, "io_setup failed");
|
|
|
05be62 |
return 1;
|
|
|
05be62 |
}
|
|
|
05be62 |
- paths = alloc_pathvec();
|
|
|
05be62 |
- if (!paths)
|
|
|
05be62 |
+
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ io_err_pathvec = vector_alloc();
|
|
|
05be62 |
+ if (!io_err_pathvec) {
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
goto destroy_ctx;
|
|
|
05be62 |
+ }
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
|
|
|
05be62 |
pthread_mutex_lock(&io_err_thread_lock);
|
|
|
05be62 |
- pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock);
|
|
|
05be62 |
+ pthread_cleanup_push(cleanup_mutex, &io_err_thread_lock);
|
|
|
05be62 |
|
|
|
05be62 |
ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr,
|
|
|
05be62 |
io_err_stat_loop, data);
|
|
|
05be62 |
@@ -769,7 +742,10 @@ int start_io_err_stat_thread(void *data)
|
|
|
05be62 |
return 0;
|
|
|
05be62 |
|
|
|
05be62 |
out_free:
|
|
|
05be62 |
- free_io_err_pathvec(paths);
|
|
|
05be62 |
+ pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ vector_free(io_err_pathvec);
|
|
|
05be62 |
+ io_err_pathvec = NULL;
|
|
|
05be62 |
+ pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
destroy_ctx:
|
|
|
05be62 |
io_destroy(ioctx);
|
|
|
05be62 |
io_err_stat_log(0, "failed to start io_error statistic thread");
|
|
|
05be62 |
@@ -785,6 +761,6 @@ void stop_io_err_stat_thread(void)
|
|
|
05be62 |
pthread_cancel(io_err_stat_thr);
|
|
|
05be62 |
|
|
|
05be62 |
pthread_join(io_err_stat_thr, NULL);
|
|
|
05be62 |
- free_io_err_pathvec(paths);
|
|
|
05be62 |
+ free_io_err_pathvec();
|
|
|
05be62 |
io_destroy(ioctx);
|
|
|
05be62 |
}
|
|
|
05be62 |
diff --git a/libmultipath/util.c b/libmultipath/util.c
|
|
|
05be62 |
index 51c38c87..dd30a46e 100644
|
|
|
05be62 |
--- a/libmultipath/util.c
|
|
|
05be62 |
+++ b/libmultipath/util.c
|
|
|
05be62 |
@@ -469,3 +469,8 @@ void close_fd(void *arg)
|
|
|
05be62 |
{
|
|
|
05be62 |
close((long)arg);
|
|
|
05be62 |
}
|
|
|
05be62 |
+
|
|
|
05be62 |
+void cleanup_mutex(void *arg)
|
|
|
05be62 |
+{
|
|
|
05be62 |
+ pthread_mutex_unlock(arg);
|
|
|
05be62 |
+}
|
|
|
05be62 |
diff --git a/libmultipath/util.h b/libmultipath/util.h
|
|
|
05be62 |
index 56bd78c7..ce277680 100644
|
|
|
05be62 |
--- a/libmultipath/util.h
|
|
|
05be62 |
+++ b/libmultipath/util.h
|
|
|
05be62 |
@@ -44,6 +44,7 @@ void set_max_fds(rlim_t max_fds);
|
|
|
05be62 |
pthread_cleanup_push(((void (*)(void *))&f), (arg))
|
|
|
05be62 |
|
|
|
05be62 |
void close_fd(void *arg);
|
|
|
05be62 |
+void cleanup_mutex(void *arg);
|
|
|
05be62 |
|
|
|
05be62 |
struct scandir_result {
|
|
|
05be62 |
struct dirent **di;
|
|
|
05be62 |
--
|
|
|
05be62 |
2.17.2
|
|
|
05be62 |
|