From a385bac6870d647e52f8135e92018256662e1805 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 06 2019 11:09:35 +0000 Subject: import device-mapper-multipath-0.4.9-127.el7 --- diff --git a/SOURCES/0250-RHBZ-1610867-rescan-change.patch b/SOURCES/0250-RHBZ-1610867-rescan-change.patch new file mode 100644 index 0000000..4ac655b --- /dev/null +++ b/SOURCES/0250-RHBZ-1610867-rescan-change.patch @@ -0,0 +1,268 @@ +--- + libmultipath/structs_vec.c | 103 +++++++++------------------------------------ + libmultipath/structs_vec.h | 6 ++ + multipathd/main.c | 50 +++++++++++++++++++++ + 3 files changed, 75 insertions(+), 84 deletions(-) + +Index: multipath-tools-130222/libmultipath/structs_vec.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/structs_vec.c ++++ multipath-tools-130222/libmultipath/structs_vec.c +@@ -103,7 +103,7 @@ orphan_paths (vector pathvec, struct mul + } + } + +-static void ++void + set_multipath_wwid (struct multipath * mpp) + { + if (strlen(mpp->wwid)) +@@ -188,57 +188,36 @@ remove_maps_and_stop_waiters (struct vec + _remove_maps(vecs, STOP_WAITER); + } + +-static struct hwentry * ++void + extract_hwe_from_path(struct multipath * mpp) + { + struct path * pp = NULL; +- int pg_num = -1, p_num = -1, i; +- struct pathgroup * pgp = NULL; +- +- condlog(3, "%s: searching paths for valid hwe", mpp->alias); ++ int i; + +- if (mpp && mpp->pg) { +- vector_foreach_slot(mpp->pg, pgp, i) { +- if (pgp->status == PGSTATE_ACTIVE || +- pgp->status == PGSTATE_ENABLED) { +- pg_num = i; +- break; +- } +- } +- if (pg_num >= 0) +- pgp = VECTOR_SLOT(mpp->pg, pg_num); +- } ++ if (mpp->hwe || !mpp->paths) ++ return; + +- if (pgp && pgp->paths) { +- vector_foreach_slot(pgp->paths, pp, i) { +- if (pp->dmstate == PSTATE_FAILED) +- continue; +- if (strlen(pp->vendor_id) > 0 && +- strlen(pp->product_id) > 0 && +- strlen(pp->rev) > 0) { +- p_num = i; +- break; +- } ++ condlog(3, "%s: searching paths for valid hwe", mpp->alias); ++ /* doing this in two passes seems like paranoia to me */ ++ vector_foreach_slot(mpp->paths, pp, i) { ++ if (pp->state != PATH_UP) ++ continue; ++ if (pp->hwe) { ++ mpp->hwe = pp->hwe; ++ return; + } +- if (p_num >= 0) +- pp = VECTOR_SLOT(pgp->paths, i); + } +- +- if (pp) { +- condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id); +- condlog(3, "%s: product = %s", pp->dev, pp->product_id); +- condlog(3, "%s: rev = %s", pp->dev, pp->rev); +- if (!pp->hwe) { +- condlog(3, "searching hwtable"); +- pp->hwe = find_hwe(conf->hwtable, pp->vendor_id, +- pp->product_id, pp->rev); ++ vector_foreach_slot(mpp->paths, pp, i) { ++ if (pp->state == PATH_UP) ++ continue; ++ if (pp->hwe) { ++ mpp->hwe = pp->hwe; ++ return; + } + } +- +- return pp?pp->hwe:NULL; + } + +-static int ++int + update_multipath_table (struct multipath *mpp, vector pathvec) + { + char params[PARAMS_SIZE] = {0}; +@@ -259,7 +238,7 @@ update_multipath_table (struct multipath + return 0; + } + +-static int ++int + update_multipath_status (struct multipath *mpp) + { + char status[PARAMS_SIZE] = {0}; +@@ -371,21 +350,11 @@ __setup_multipath (struct vectors * vecs + goto out; + } + +- set_multipath_wwid(mpp); +- mpp->mpe = find_mpe(mpp->wwid); +- condlog(3, "%s: discover", mpp->alias); +- + if (update_multipath_strings(mpp, vecs->pathvec)) { + condlog(0, "%s: failed to setup multipath", mpp->alias); + goto out; + } + +- if (!mpp->hwe) +- mpp->hwe = extract_hwe_from_path(mpp); +- if (!mpp->hwe) { +- condlog(3, "%s: no hardware entry found, using defaults", +- mpp->alias); +- } + if (reset) { + select_rr_weight(mpp); + select_pgfailback(mpp); +@@ -402,36 +371,6 @@ out: + return 1; + } + +-extern struct multipath * +-add_map_without_path (struct vectors * vecs, char * alias) +-{ +- struct multipath * mpp = alloc_multipath(); +- +- if (!mpp || !alias) +- return NULL; +- +- mpp->alias = STRDUP(alias); +- +- if (setup_multipath(vecs, mpp)) +- return NULL; /* mpp freed in setup_multipath */ +- +- if (adopt_paths(vecs->pathvec, mpp, 1)) +- goto out; +- +- if (!vector_alloc_slot(vecs->mpvec)) +- goto out; +- +- vector_set_slot(vecs->mpvec, mpp); +- +- if (start_waiter_thread(mpp, vecs)) +- goto out; +- +- return mpp; +-out: +- remove_map(mpp, vecs, PURGE_VEC); +- return NULL; +-} +- + static void + find_existing_alias (struct multipath * mpp, + struct vectors *vecs) +Index: multipath-tools-130222/libmultipath/structs_vec.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/structs_vec.h ++++ multipath-tools-130222/libmultipath/structs_vec.h +@@ -31,11 +31,15 @@ void remove_map_and_stop_waiter (struct + void remove_maps (struct vectors * vecs); + void remove_maps_and_stop_waiters (struct vectors * vecs); + +-struct multipath * add_map_without_path (struct vectors * vecs, char * alias); + struct multipath * add_map_with_path (struct vectors * vecs, + struct path * pp, int add_vec); + int update_multipath (struct vectors *vecs, char *mapname, int reset); + void update_queue_mode_del_path(struct multipath *mpp); + void update_queue_mode_add_path(struct multipath *mpp); + ++void extract_hwe_from_path(struct multipath * mpp); ++void set_multipath_wwid (struct multipath * mpp); ++int update_multipath_table (struct multipath *mpp, vector pathvec); ++int update_multipath_status (struct multipath *mpp); ++ + #endif /* _STRUCTS_VEC_H */ +Index: multipath-tools-130222/multipathd/main.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/main.c ++++ multipath-tools-130222/multipathd/main.c +@@ -273,6 +273,7 @@ retry: + mpp->flush_on_last_del = FLUSH_UNDEF; + mpp->action = ACT_RELOAD; + ++ extract_hwe_from_path(mpp); + if (setup_map(mpp, params, PARAMS_SIZE)) { + condlog(0, "%s: failed to setup new map in update", mpp->alias); + retries = -1; +@@ -296,6 +297,49 @@ fail: + return 0; + } + ++static struct multipath * ++add_map_without_path (struct vectors * vecs, char * alias) ++{ ++ struct multipath * mpp = alloc_multipath(); ++ ++ if (!mpp) ++ return NULL; ++ if (!alias) { ++ FREE(mpp); ++ return NULL; ++ } ++ ++ mpp->alias = STRDUP(alias); ++ ++ if (dm_get_info(mpp->alias, &mpp->dmi)) { ++ condlog(3, "%s: cannot access table", mpp->alias); ++ goto out; ++ } ++ set_multipath_wwid(mpp); ++ mpp->mpe = find_mpe(mpp->wwid); ++ ++ if (update_multipath_table(mpp, vecs->pathvec)) ++ goto out; ++ if (update_multipath_status(mpp)) ++ goto out; ++ ++ if (!vector_alloc_slot(vecs->mpvec)) ++ goto out; ++ ++ vector_set_slot(vecs->mpvec, mpp); ++ ++ if (update_map(mpp, vecs) != 0) /* map removed */ ++ return NULL; ++ ++ if (start_waiter_thread(mpp, vecs)) ++ goto out; ++ ++ return mpp; ++out: ++ remove_map(mpp, vecs, 1); ++ return NULL; ++} ++ + static int + uev_add_map (struct uevent * uev, struct vectors * vecs) + { +@@ -569,6 +613,7 @@ rescan: + verify_paths(mpp, vecs, NULL); + mpp->flush_on_last_del = FLUSH_UNDEF; + mpp->action = ACT_RELOAD; ++ extract_hwe_from_path(mpp); + } else { + if (!should_multipath(pp, vecs->pathvec)) { + orphan_path(pp); +@@ -855,8 +900,11 @@ map_discovery (struct vectors * vecs) + return 1; + + vector_foreach_slot (vecs->mpvec, mpp, i) +- if (setup_multipath(vecs, mpp)) ++ if (update_multipath_table(mpp, vecs->pathvec) || ++ update_multipath_status(mpp)) { ++ remove_map(mpp, vecs, 1); + i--; ++ } + + return 0; + } diff --git a/SOURCES/0251-RHBZ-1614011-discovery-timeout.patch b/SOURCES/0251-RHBZ-1614011-discovery-timeout.patch new file mode 100644 index 0000000..0a7bda7 --- /dev/null +++ b/SOURCES/0251-RHBZ-1614011-discovery-timeout.patch @@ -0,0 +1,34 @@ +--- + libmultipath/discovery.c | 5 ++++- + libmultipath/discovery.h | 2 +- + 2 files changed, 5 insertions(+), 2 deletions(-) + +Index: multipath-tools-130222/libmultipath/discovery.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/discovery.c ++++ multipath-tools-130222/libmultipath/discovery.c +@@ -749,7 +749,10 @@ do_inq(int sg_fd, int cmddt, int evpd, u + io_hdr.dxferp = resp; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_b; +- io_hdr.timeout = DEF_TIMEOUT; ++ if (conf->checker_timeout) ++ io_hdr.timeout = conf->checker_timeout * 1000; ++ else ++ io_hdr.timeout = DEF_TIMEOUT; + + if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) + return -1; +Index: multipath-tools-130222/libmultipath/discovery.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/discovery.h ++++ multipath-tools-130222/libmultipath/discovery.h +@@ -14,7 +14,7 @@ + #endif + + #ifndef DEF_TIMEOUT +-#define DEF_TIMEOUT 300000 ++#define DEF_TIMEOUT 60000 + #endif + + /* diff --git a/SOURCES/0252-RHBZ-1623595-cmd-error-status.patch b/SOURCES/0252-RHBZ-1623595-cmd-error-status.patch new file mode 100644 index 0000000..aaf06c4 --- /dev/null +++ b/SOURCES/0252-RHBZ-1623595-cmd-error-status.patch @@ -0,0 +1,85 @@ +--- + multipathd/main.c | 6 ++---- + multipathd/uxclnt.c | 22 +++++++++++++--------- + 2 files changed, 15 insertions(+), 13 deletions(-) + +Index: multipath-tools-130222/multipathd/main.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/main.c ++++ multipath-tools-130222/multipathd/main.c +@@ -2234,8 +2234,7 @@ main (int argc, char *argv[]) + conf->verbosity = atoi(optarg); + break; + case 'k': +- uxclnt(optarg); +- exit(0); ++ return(uxclnt(optarg)); + case 'B': + conf->bindings_read_only = 1; + break; +@@ -2256,8 +2255,7 @@ main (int argc, char *argv[]) + optind++; + } + c += snprintf(c, s + CMDSIZE - c, "\n"); +- uxclnt(s); +- exit(0); ++ return(uxclnt(s)); + } + + if (!logsink) +Index: multipath-tools-130222/multipathd/uxclnt.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/uxclnt.c ++++ multipath-tools-130222/multipathd/uxclnt.c +@@ -74,20 +74,24 @@ static void process(int fd) + } + } + +-static void process_req(int fd, char * inbuf) ++static int process_req(int fd, char * inbuf) + { + char *reply; ++ int ret; + + if (send_packet(fd, inbuf) != 0) { + printf("cannot send packet\n"); +- return; ++ return 1; + } +- if (recv_packet(fd, &reply) != 0) ++ if (recv_packet(fd, &reply) != 0) { + printf("error receiving packet\n"); +- else { +- printf("%s", reply); +- FREE(reply); ++ return 1; + } ++ printf("%s", reply); ++ ret = (strcmp(reply, "fail\n") == 0); ++ FREE(reply); ++ /* Need to do better about getting return value */ ++ return ret; + } + + /* +@@ -95,7 +99,7 @@ static void process_req(int fd, char * i + */ + int uxclnt(char * inbuf) + { +- int fd; ++ int fd, ret = 0; + + fd = mpath_connect(); + if (fd == -1) { +@@ -104,9 +108,9 @@ int uxclnt(char * inbuf) + } + + if (inbuf) +- process_req(fd, inbuf); ++ ret = process_req(fd, inbuf); + else + process(fd); + +- return 0; ++ return ret; + } diff --git a/SOURCES/0253-RHBZ-1618549-mix-hw-handler.patch b/SOURCES/0253-RHBZ-1618549-mix-hw-handler.patch new file mode 100644 index 0000000..0ff9346 --- /dev/null +++ b/SOURCES/0253-RHBZ-1618549-mix-hw-handler.patch @@ -0,0 +1,104 @@ +--- + libmultipath/configure.c | 2 - + libmultipath/propsel.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 59 insertions(+), 1 deletion(-) + +Index: multipath-tools-130222/libmultipath/configure.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/configure.c ++++ multipath-tools-130222/libmultipath/configure.c +@@ -282,6 +282,7 @@ setup_map (struct multipath * mpp, char + select_pgpolicy(mpp); + select_selector(mpp); + select_features(mpp); ++ select_retain_hwhandler(mpp); + select_hwhandler(mpp); + select_rr_weight(mpp); + select_minio(mpp); +@@ -293,7 +294,6 @@ setup_map (struct multipath * mpp, char + select_fast_io_fail(mpp); + select_dev_loss(mpp); + select_reservation_key(mpp); +- select_retain_hwhandler(mpp); + select_deferred_remove(mpp); + select_delay_watch_checks(mpp); + select_delay_wait_checks(mpp); +Index: multipath-tools-130222/libmultipath/propsel.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/propsel.c ++++ multipath-tools-130222/libmultipath/propsel.c +@@ -19,6 +19,8 @@ + #include "discovery.h" + #include "prioritizers/alua_rtpg.h" + #include "prkey.h" ++#include "sysfs.h" ++#include "util.h" + #include + #include + #include +@@ -317,9 +319,65 @@ select_features (struct multipath * mp) + return 0; + } + ++static int get_dh_state(struct path *pp, char *value, size_t value_len) ++{ ++ int ret; ++ struct udev_device *ud; ++ ++ if (pp->udev == NULL) ++ return -1; ++ ++ ud = udev_device_get_parent_with_subsystem_devtype(pp->udev, "scsi", ++ "scsi_device"); ++ if (ud == NULL) ++ return -1; ++ ++ ret = sysfs_attr_get_value(ud, "dh_state", value, value_len); ++ if (ret > 0) ++ strchop(value); ++ return ret; ++} ++ ++static int ++use_attached_hwhandler(struct multipath * mp) ++{ ++ int i; ++ struct path *pp; ++ int attached_hwhandler = 0; ++ /* dh_state is no longer than "detached" */ ++ char dh_state[10]; ++ ++ vector_foreach_slot (mp->paths, pp, i) { ++ if (get_dh_state(pp, dh_state, sizeof(dh_state)) > 0 && ++ strcmp(dh_state, "detached") != 0) { ++ if (!attached_hwhandler) { ++ if (asprintf(&mp->hwhandler, "1 %s", ++ dh_state) < 0) ++ return 0; ++ attached_hwhandler = 1; ++ /* if we find 2 different hardware handlers, disable ++ * retain_attached_hw_handler, and use the configured ++ * handler */ ++ } else if (strcmp(dh_state, &mp->hwhandler[2]) != 0) { ++ FREE(mp->hwhandler); ++ mp->hwhandler = NULL; ++ mp->retain_hwhandler = RETAIN_HWHANDLER_OFF; ++ condlog(0, "%s: retain_attached_hw_hander disabled (inconsistent handlers on paths)", mp->alias); ++ return 0; ++ } ++ } ++ } ++ return attached_hwhandler; ++} ++ + extern int + select_hwhandler (struct multipath * mp) + { ++ if (mp->retain_hwhandler == RETAIN_HWHANDLER_ON && ++ use_attached_hwhandler(mp)) { ++ condlog(3, "%s: hwhandler = %s (setting: retained by kernel driver)", mp->alias, mp->hwhandler); ++ return 0; ++ } + if (mp->hwe && mp->hwe->hwhandler) { + mp->hwhandler = mp->hwe->hwhandler; + condlog(3, "%s: hwhandler = %s (controller setting)", diff --git a/SOURCES/0254-RHBZ-1635819-fix-mpathpersist-crash.patch b/SOURCES/0254-RHBZ-1635819-fix-mpathpersist-crash.patch new file mode 100644 index 0000000..5a41381 --- /dev/null +++ b/SOURCES/0254-RHBZ-1635819-fix-mpathpersist-crash.patch @@ -0,0 +1,36 @@ +--- + libmpathpersist/mpath_persist.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +Index: multipath-tools-130222/libmpathpersist/mpath_persist.c +=================================================================== +--- multipath-tools-130222.orig/libmpathpersist/mpath_persist.c ++++ multipath-tools-130222/libmpathpersist/mpath_persist.c +@@ -524,10 +524,10 @@ int mpath_prout_reg(struct multipath *mp + if (!rollback && (thread[i].param.status == MPATH_PR_RESERV_CONFLICT)){ + rollback = 1; + sa_key = 0; +- for (i = 0; i < 8; ++i){ +- if (i > 0) ++ for (j = 0; j < 8; ++j){ ++ if (j > 0) + sa_key <<= 8; +- sa_key |= paramp->sa_key[i]; ++ sa_key |= paramp->sa_key[j]; + } + status = MPATH_PR_RESERV_CONFLICT ; + } +@@ -537,11 +537,10 @@ int mpath_prout_reg(struct multipath *mp + } + if (rollback && ((rq_servact == MPATH_PROUT_REG_SA) && sa_key != 0 )){ + condlog (3, "%s: ERROR: initiating pr out rollback", mpp->wwid); ++ memcpy(¶mp->key, ¶mp->sa_key, 8); ++ memset(¶mp->sa_key, 0, 8); + for( i=0 ; i < count ; i++){ + if (thread[i].param.status == MPATH_PR_SUCCESS) { +- memcpy(&thread[i].param.paramp->key, &thread[i].param.paramp->sa_key, 8); +- memset(&thread[i].param.paramp->sa_key, 0, 8); +- thread[i].param.status = MPATH_PR_SUCCESS; + rc = pthread_create(&thread[i].id, &attr, mpath_prout_pthread_fn, + (void *)(&thread[i].param)); + if (rc){ diff --git a/SOURCES/0255-RHBZ-1638651-marginal-path.patch b/SOURCES/0255-RHBZ-1638651-marginal-path.patch new file mode 100644 index 0000000..93521b4 --- /dev/null +++ b/SOURCES/0255-RHBZ-1638651-marginal-path.patch @@ -0,0 +1,2028 @@ +--- + libmultipath/Makefile | 7 + libmultipath/config.h | 12 + libmultipath/configure.c | 18 - + libmultipath/configure.h | 3 + libmultipath/defaults.h | 1 + libmultipath/dict.c | 410 ++++++++++++++++++++++++ + libmultipath/io_err_stat.c | 763 +++++++++++++++++++++++++++++++++++++++++++++ + libmultipath/io_err_stat.h | 15 + libmultipath/propsel.c | 98 +++++ + libmultipath/propsel.h | 4 + libmultipath/structs.h | 14 + libmultipath/time-util.c | 42 ++ + libmultipath/time-util.h | 13 + libmultipath/uevent.c | 38 ++ + libmultipath/uevent.h | 2 + multipath/multipath.conf.5 | 108 ++++++ + multipathd/cli_handlers.c | 2 + multipathd/main.c | 64 +++ + 18 files changed, 1599 insertions(+), 15 deletions(-) + +Index: multipath-tools-130222/libmultipath/Makefile +=================================================================== +--- multipath-tools-130222.orig/libmultipath/Makefile ++++ multipath-tools-130222/libmultipath/Makefile +@@ -7,16 +7,17 @@ include ../Makefile.inc + SONAME=0 + DEVLIB = libmultipath.so + LIBS = $(DEVLIB).$(SONAME) +-LIBDEPS = -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd ++LIBDEPS = -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -laio + CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir) + + OBJS = memory.o parser.o vector.o devmapper.o \ + hwtable.o blacklist.o util.o dmparser.o config.o \ + structs.o discovery.o propsel.o dict.o \ +- pgpolicies.o debug.o regex.o defaults.o uevent.o \ ++ pgpolicies.o debug.o regex.o defaults.o uevent.o time-util.o \ + switchgroup.o uxsock.o print.o alias.o log_pthread.o \ + log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \ +- lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o ++ lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \ ++ io_err_stat.o + + LIBDM_API_FLUSH = $(shell grep -Ecs '^[a-z]*[[:space:]]+dm_task_no_flush' /usr/include/libdevmapper.h) + +Index: multipath-tools-130222/libmultipath/config.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/config.h ++++ multipath-tools-130222/libmultipath/config.h +@@ -67,6 +67,10 @@ struct hwentry { + int deferred_remove; + int delay_watch_checks; + int delay_wait_checks; ++ int marginal_path_err_sample_time; ++ int marginal_path_err_rate_threshold; ++ int marginal_path_err_recheck_gap_time; ++ int marginal_path_double_failed_time; + int skip_kpartx; + int max_sectors_kb; + int unpriv_sgio; +@@ -100,6 +104,10 @@ struct mpentry { + int deferred_remove; + int delay_watch_checks; + int delay_wait_checks; ++ int marginal_path_err_sample_time; ++ int marginal_path_err_rate_threshold; ++ int marginal_path_err_recheck_gap_time; ++ int marginal_path_double_failed_time; + int skip_kpartx; + int max_sectors_kb; + int unpriv_sgio; +@@ -153,6 +161,10 @@ struct config { + int processed_main_config; + int delay_watch_checks; + int delay_wait_checks; ++ int marginal_path_err_sample_time; ++ int marginal_path_err_rate_threshold; ++ int marginal_path_err_recheck_gap_time; ++ int marginal_path_double_failed_time; + int retrigger_tries; + int retrigger_delay; + int new_bindings_in_boot; +Index: multipath-tools-130222/libmultipath/configure.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/configure.c ++++ multipath-tools-130222/libmultipath/configure.c +@@ -42,6 +42,7 @@ + #include "uxsock.h" + #include "wwids.h" + #include "sysfs.h" ++#include "io_err_stat.h" + + /* group paths in pg by host adapter + */ +@@ -257,7 +258,8 @@ int rr_optimize_path_order(struct pathgr + } + + extern int +-setup_map (struct multipath * mpp, char * params, int params_size) ++setup_map (struct multipath * mpp, char * params, int params_size, ++ struct vectors *vecs) + { + struct pathgroup * pgp; + int i, old_nr_active; +@@ -297,11 +299,21 @@ setup_map (struct multipath * mpp, char + select_deferred_remove(mpp); + select_delay_watch_checks(mpp); + select_delay_wait_checks(mpp); ++ select_marginal_path_err_sample_time(mpp); ++ select_marginal_path_err_rate_threshold(mpp); ++ select_marginal_path_err_recheck_gap_time(mpp); ++ select_marginal_path_double_failed_time(mpp); + select_skip_kpartx(mpp); + select_max_sectors_kb(mpp); + select_unpriv_sgio(mpp); + + sysfs_set_scsi_tmo(mpp); ++ ++ if (mpp->marginal_path_double_failed_time > 0 && ++ mpp->marginal_path_err_sample_time > 0 && ++ mpp->marginal_path_err_recheck_gap_time > 0 && ++ mpp->marginal_path_err_rate_threshold >= 0) ++ start_io_err_stat_thread(vecs); + /* + * assign paths to path groups -- start with no groups and all paths + * in mpp->paths +@@ -867,7 +879,7 @@ coalesce_paths (struct vectors * vecs, v + verify_paths(mpp, vecs, NULL); + + params[0] = '\0'; +- if (setup_map(mpp, params, PARAMS_SIZE)) { ++ if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + remove_map(mpp, vecs, 0); + continue; + } +@@ -1118,7 +1130,7 @@ extern int reload_map(struct vectors *ve + vector_foreach_slot (mpp->paths, pp, i) + pathinfo(pp, conf->hwtable, DI_PRIO); + } +- if (setup_map(mpp, params, PARAMS_SIZE)) { ++ if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map", mpp->alias); + return 1; + } +Index: multipath-tools-130222/libmultipath/configure.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/configure.h ++++ multipath-tools-130222/libmultipath/configure.h +@@ -24,7 +24,8 @@ enum actions { + #define FLUSH_ONE 1 + #define FLUSH_ALL 2 + +-int setup_map (struct multipath * mpp, char * params, int params_size ); ++int setup_map (struct multipath * mpp, char * params, int params_size, ++ struct vectors *vecs); + int domap (struct multipath * mpp, char * params); + int reinstate_paths (struct multipath *mpp); + int check_daemon(void); +Index: multipath-tools-130222/libmultipath/defaults.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/defaults.h ++++ multipath-tools-130222/libmultipath/defaults.h +@@ -22,6 +22,7 @@ + #define DEFAULT_DETECT_CHECKER DETECT_CHECKER_OFF + #define DEFAULT_DEFERRED_REMOVE DEFERRED_REMOVE_OFF + #define DEFAULT_DELAY_CHECKS DELAY_CHECKS_OFF ++#define DEFAULT_MARGINAL_PATH MARGINAL_PATH_OFF + #define DEFAULT_RETRIGGER_DELAY 10 + #define DEFAULT_RETRIGGER_TRIES 3 + #define DEFAULT_UEV_WAIT_TIMEOUT 30 +Index: multipath-tools-130222/libmultipath/dict.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/dict.c ++++ multipath-tools-130222/libmultipath/dict.c +@@ -1077,6 +1077,81 @@ def_all_tg_pt_handler(vector strvec) + return 0; + } + ++static int ++def_marginal_path_err_sample_time_handler(vector strvec) ++{ ++ char * buff; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ conf->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ else if ((conf->marginal_path_err_sample_time = atoi(buff)) < 1) ++ conf->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++def_marginal_path_err_rate_threshold_handler(vector strvec) ++{ ++ char * buff; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ conf->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ else if ((conf->marginal_path_err_rate_threshold = atoi(buff)) < 1) ++ conf->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++def_marginal_path_err_recheck_gap_time_handler(vector strvec) ++{ ++ char * buff; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ conf->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ else if ((conf->marginal_path_err_recheck_gap_time = atoi(buff)) < 1) ++ conf->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++def_marginal_path_double_failed_time_handler(vector strvec) ++{ ++ char * buff; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ conf->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ else if ((conf->marginal_path_double_failed_time = atoi(buff)) < 1) ++ conf->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} + + /* + * blacklist block handlers +@@ -2055,6 +2130,98 @@ hw_all_tg_pt_handler(vector strvec) + return 0; + } + ++static int ++hw_marginal_path_err_sample_time_handler(vector strvec) ++{ ++ struct hwentry *hwe = VECTOR_LAST_SLOT(conf->hwtable); ++ char * buff; ++ ++ if (!hwe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ hwe->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ else if ((hwe->marginal_path_err_sample_time = atoi(buff)) < 1) ++ hwe->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++hw_marginal_path_err_rate_threshold_handler(vector strvec) ++{ ++ struct hwentry *hwe = VECTOR_LAST_SLOT(conf->hwtable); ++ char * buff; ++ ++ if (!hwe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ hwe->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ else if ((hwe->marginal_path_err_rate_threshold = atoi(buff)) < 1) ++ hwe->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++hw_marginal_path_err_recheck_gap_time_handler(vector strvec) ++{ ++ struct hwentry *hwe = VECTOR_LAST_SLOT(conf->hwtable); ++ char * buff; ++ ++ if (!hwe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ hwe->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ else if ((hwe->marginal_path_err_recheck_gap_time = atoi(buff)) < 1) ++ hwe->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++hw_marginal_path_double_failed_time_handler(vector strvec) ++{ ++ struct hwentry *hwe = VECTOR_LAST_SLOT(conf->hwtable); ++ char * buff; ++ ++ if (!hwe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ hwe->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ else if ((hwe->marginal_path_double_failed_time = atoi(buff)) < 1) ++ hwe->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ + /* + * multipaths block handlers + */ +@@ -2659,6 +2826,98 @@ mp_ghost_delay_handler(vector strvec) + return 0; + } + ++static int ++mp_marginal_path_err_sample_time_handler(vector strvec) ++{ ++ struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable); ++ char * buff; ++ ++ if (!mpe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ mpe->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ else if ((mpe->marginal_path_err_sample_time = atoi(buff)) < 1) ++ mpe->marginal_path_err_sample_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++mp_marginal_path_err_rate_threshold_handler(vector strvec) ++{ ++ struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable); ++ char * buff; ++ ++ if (!mpe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ mpe->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ else if ((mpe->marginal_path_err_rate_threshold = atoi(buff)) < 1) ++ mpe->marginal_path_err_rate_threshold = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++mp_marginal_path_err_recheck_gap_time_handler(vector strvec) ++{ ++ struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable); ++ char * buff; ++ ++ if (!mpe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ mpe->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ else if ((mpe->marginal_path_err_recheck_gap_time = atoi(buff)) < 1) ++ mpe->marginal_path_err_recheck_gap_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ ++static int ++mp_marginal_path_double_failed_time_handler(vector strvec) ++{ ++ struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable); ++ char * buff; ++ ++ if (!mpe) ++ return 1; ++ ++ buff = set_value(strvec); ++ if (!buff) ++ return 1; ++ ++ if ((strlen(buff) == 2 && !strcmp(buff, "no")) || ++ (strlen(buff) == 1 && !strcmp(buff, "0"))) ++ mpe->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ else if ((mpe->marginal_path_double_failed_time = atoi(buff)) < 1) ++ mpe->marginal_path_double_failed_time = MARGINAL_PATH_OFF; ++ ++ FREE(buff); ++ return 0; ++} ++ + /* + * config file keywords printing + */ +@@ -2989,6 +3248,56 @@ snprint_mp_ghost_delay (char * buff, int + } + + static int ++snprint_mp_marginal_path_err_sample_time (char * buff, int len, void * data) ++{ ++ struct mpentry * mpe = (struct mpentry *)data; ++ ++ if (mpe->marginal_path_err_sample_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (mpe->marginal_path_err_sample_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", mpe->marginal_path_err_sample_time); ++} ++ ++static int ++snprint_mp_marginal_path_err_rate_threshold (char * buff, int len, void * data) ++{ ++ struct mpentry * mpe = (struct mpentry *)data; ++ ++ if (mpe->marginal_path_err_rate_threshold == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (mpe->marginal_path_err_rate_threshold == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", mpe->marginal_path_err_rate_threshold); ++} ++ ++static int ++snprint_mp_marginal_path_err_recheck_gap_time (char * buff, int len, ++ void * data) ++{ ++ struct mpentry * mpe = (struct mpentry *)data; ++ ++ if (mpe->marginal_path_err_recheck_gap_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (mpe->marginal_path_err_recheck_gap_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", ++ mpe->marginal_path_err_recheck_gap_time); ++} ++ ++static int ++snprint_mp_marginal_path_double_failed_time (char * buff, int len, void * data) ++{ ++ struct mpentry * mpe = (struct mpentry *)data; ++ ++ if (mpe->marginal_path_double_failed_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (mpe->marginal_path_double_failed_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", mpe->marginal_path_double_failed_time); ++} ++ ++static int + snprint_hw_fast_io_fail(char * buff, int len, void * data) + { + struct hwentry * hwe = (struct hwentry *)data; +@@ -3429,6 +3738,55 @@ snprint_hw_all_tg_pt(char * buff, int le + } + + static int ++snprint_hw_marginal_path_err_sample_time(char * buff, int len, void * data) ++{ ++ struct hwentry * hwe = (struct hwentry *)data; ++ ++ if (hwe->marginal_path_err_sample_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (hwe->marginal_path_err_sample_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", hwe->marginal_path_err_sample_time); ++} ++ ++static int ++snprint_hw_marginal_path_err_rate_threshold(char * buff, int len, void * data) ++{ ++ struct hwentry * hwe = (struct hwentry *)data; ++ ++ if (hwe->marginal_path_err_rate_threshold == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (hwe->marginal_path_err_rate_threshold == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", hwe->marginal_path_err_rate_threshold); ++} ++ ++static int ++snprint_hw_marginal_path_err_recheck_gap_time(char * buff, int len, void * data) ++{ ++ struct hwentry * hwe = (struct hwentry *)data; ++ ++ if (hwe->marginal_path_err_recheck_gap_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (hwe->marginal_path_err_recheck_gap_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", ++ hwe->marginal_path_err_recheck_gap_time); ++} ++ ++static int ++snprint_hw_marginal_path_double_failed_time(char * buff, int len, void * data) ++{ ++ struct hwentry * hwe = (struct hwentry *)data; ++ ++ if (hwe->marginal_path_double_failed_time == MARGINAL_PATH_UNDEF) ++ return 0; ++ if (hwe->marginal_path_double_failed_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", hwe->marginal_path_double_failed_time); ++} ++ ++static int + snprint_def_polling_interval (char * buff, int len, void * data) + { + return snprintf(buff, len, "%i", conf->checkint); +@@ -3945,6 +4303,46 @@ snprint_def_all_tg_pt(char * buff, int l + } + + static int ++snprint_def_marginal_path_err_sample_time(char * buff, int len, void * data) ++{ ++ if (conf->marginal_path_err_sample_time == MARGINAL_PATH_UNDEF || ++ conf->marginal_path_err_sample_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", conf->marginal_path_err_sample_time); ++} ++ ++static int ++snprint_def_marginal_path_err_rate_threshold(char * buff, int len, void * data) ++{ ++ if (conf->marginal_path_err_rate_threshold == MARGINAL_PATH_UNDEF || ++ conf->marginal_path_err_rate_threshold == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", ++ conf->marginal_path_err_rate_threshold); ++} ++ ++static int ++snprint_def_marginal_path_err_recheck_gap_time(char * buff, int len, ++ void * data) ++{ ++ if (conf->marginal_path_err_recheck_gap_time == MARGINAL_PATH_UNDEF || ++ conf->marginal_path_err_recheck_gap_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", ++ conf->marginal_path_err_recheck_gap_time); ++} ++ ++static int ++snprint_def_marginal_path_double_failed_time(char * buff, int len, void * data) ++{ ++ if (conf->marginal_path_double_failed_time == MARGINAL_PATH_UNDEF || ++ conf->marginal_path_double_failed_time == MARGINAL_PATH_OFF) ++ return snprintf(buff, len, "no"); ++ return snprintf(buff, len, "%d", ++ conf->marginal_path_double_failed_time); ++} ++ ++static int + snprint_ble_simple (char * buff, int len, void * data) + { + struct blentry * ble = (struct blentry *)data; +@@ -4043,6 +4441,10 @@ init_keywords(void) + install_keyword("unpriv_sgio", &def_unpriv_sgio_handler, &snprint_def_unpriv_sgio); + install_keyword("ghost_delay", &def_ghost_delay_handler, &snprint_def_ghost_delay); + install_keyword("all_tg_pt", &def_all_tg_pt_handler, &snprint_def_all_tg_pt); ++ install_keyword("marginal_path_err_sample_time", &def_marginal_path_err_sample_time_handler, &snprint_def_marginal_path_err_sample_time); ++ install_keyword("marginal_path_err_rate_threshold", &def_marginal_path_err_rate_threshold_handler, &snprint_def_marginal_path_err_rate_threshold); ++ install_keyword("marginal_path_err_recheck_gap_time", &def_marginal_path_err_recheck_gap_time_handler, &snprint_def_marginal_path_err_recheck_gap_time); ++ install_keyword("marginal_path_double_failed_time", &def_marginal_path_double_failed_time_handler, &snprint_def_marginal_path_double_failed_time); + __deprecated install_keyword("default_selector", &def_selector_handler, NULL); + __deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL); + __deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL); +@@ -4120,6 +4522,10 @@ init_keywords(void) + install_keyword("unpriv_sgio", &hw_unpriv_sgio_handler, &snprint_hw_unpriv_sgio); + install_keyword("ghost_delay", &hw_ghost_delay_handler, &snprint_hw_ghost_delay); + install_keyword("all_tg_pt", &hw_all_tg_pt_handler, &snprint_hw_all_tg_pt); ++ install_keyword("marginal_path_err_sample_time", &hw_marginal_path_err_sample_time_handler, &snprint_hw_marginal_path_err_sample_time); ++ install_keyword("marginal_path_err_rate_threshold", &hw_marginal_path_err_rate_threshold_handler, &snprint_hw_marginal_path_err_rate_threshold); ++ install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time); ++ install_keyword("marginal_path_double_failed_time", &hw_marginal_path_double_failed_time_handler, &snprint_hw_marginal_path_double_failed_time); + install_sublevel_end(); + + install_keyword_root("overrides", &nop_handler); +@@ -4184,5 +4590,9 @@ init_keywords(void) + install_keyword("max_sectors_kb", &mp_max_sectors_kb_handler, &snprint_mp_max_sectors_kb); + install_keyword("unpriv_sgio", &mp_unpriv_sgio_handler, &snprint_mp_unpriv_sgio); + install_keyword("ghost_delay", &mp_ghost_delay_handler, &snprint_mp_ghost_delay); ++ install_keyword("marginal_path_err_sample_time", &mp_marginal_path_err_sample_time_handler, &snprint_mp_marginal_path_err_sample_time); ++ install_keyword("marginal_path_err_rate_threshold", &mp_marginal_path_err_rate_threshold_handler, &snprint_mp_marginal_path_err_rate_threshold); ++ install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time); ++ install_keyword("marginal_path_double_failed_time", &mp_marginal_path_double_failed_time_handler, &snprint_mp_marginal_path_double_failed_time); + install_sublevel_end(); + } +Index: multipath-tools-130222/libmultipath/io_err_stat.c +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/io_err_stat.c +@@ -0,0 +1,763 @@ ++/* ++ * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved. ++ * ++ * io_err_stat.c ++ * version 1.0 ++ * ++ * IO error stream statistic process for path failure event from kernel ++ * ++ * Author(s): Guan Junxiong 2017 ++ * ++ * This file is released under the GPL version 2, or any later version. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "vector.h" ++#include "memory.h" ++#include "checkers.h" ++#include "config.h" ++#include "structs.h" ++#include "structs_vec.h" ++#include "devmapper.h" ++#include "debug.h" ++#include "lock.h" ++#include "time-util.h" ++#include "io_err_stat.h" ++ ++#define IOTIMEOUT_SEC 60 ++#define TIMEOUT_NO_IO_NSEC 10000000 /*10ms = 10000000ns*/ ++#define FLAKY_PATHFAIL_THRESHOLD 2 ++#define CONCUR_NR_EVENT 32 ++ ++#define PATH_IO_ERR_IN_CHECKING -1 ++#define PATH_IO_ERR_WAITING_TO_CHECK -2 ++ ++#define io_err_stat_log(prio, fmt, args...) \ ++ condlog(prio, "io error statistic: " fmt, ##args) ++ ++ ++struct io_err_stat_pathvec { ++ pthread_mutex_t mutex; ++ vector pathvec; ++}; ++ ++struct dio_ctx { ++ struct timespec io_starttime; ++ int blksize; ++ void *buf; ++ struct iocb io; ++}; ++ ++struct io_err_stat_path { ++ char devname[FILE_NAME_SIZE]; ++ int fd; ++ struct dio_ctx *dio_ctx_array; ++ int io_err_nr; ++ int io_nr; ++ struct timespec start_time; ++ ++ int total_time; ++ int err_rate_threshold; ++}; ++ ++pthread_t io_err_stat_thr; ++pthread_attr_t io_err_stat_attr; ++ ++static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER; ++static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER; ++static int io_err_thread_running = 0; ++ ++#define uatomic_read(ptr) __atomic_load_n((ptr), __ATOMIC_SEQ_CST) ++#define uatomic_set(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_SEQ_CST) ++ ++static struct io_err_stat_pathvec *paths; ++struct vectors *vecs; ++io_context_t ioctx; ++ ++static void cancel_inflight_io(struct io_err_stat_path *pp); ++ ++struct io_err_stat_path *find_err_path_by_dev(vector pathvec, char *dev) ++{ ++ int i; ++ struct io_err_stat_path *pp; ++ ++ if (!pathvec) ++ return NULL; ++ vector_foreach_slot(pathvec, pp, i) ++ if (!strcmp(pp->devname, dev)) ++ return pp; ++ ++ io_err_stat_log(4, "%s: not found in check queue", dev); ++ ++ return NULL; ++} ++ ++static int init_each_dio_ctx(struct dio_ctx *ct, int blksize, ++ unsigned long pgsize) ++{ ++ ct->blksize = blksize; ++ if (posix_memalign(&ct->buf, pgsize, blksize)) ++ return 1; ++ memset(ct->buf, 0, blksize); ++ ct->io_starttime.tv_sec = 0; ++ ct->io_starttime.tv_nsec = 0; ++ ++ return 0; ++} ++ ++static void deinit_each_dio_ctx(struct dio_ctx *ct) ++{ ++ if (ct->buf) ++ free(ct->buf); ++} ++ ++static int setup_directio_ctx(struct io_err_stat_path *p) ++{ ++ unsigned long pgsize = getpagesize(); ++ char fpath[PATH_MAX]; ++ int blksize = 0; ++ int i; ++ ++ if (snprintf(fpath, PATH_MAX, "/dev/%s", p->devname) >= PATH_MAX) ++ return 1; ++ if (p->fd < 0) ++ p->fd = open(fpath, O_RDONLY | O_DIRECT); ++ if (p->fd < 0) ++ return 1; ++ ++ p->dio_ctx_array = MALLOC(sizeof(struct dio_ctx) * CONCUR_NR_EVENT); ++ if (!p->dio_ctx_array) ++ goto fail_close; ++ ++ if (ioctl(p->fd, BLKBSZGET, &blksize) < 0) { ++ io_err_stat_log(4, "%s:cannot get blocksize, set default 512", ++ p->devname); ++ blksize = 512; ++ } ++ if (!blksize) ++ goto free_pdctx; ++ ++ for (i = 0; i < CONCUR_NR_EVENT; i++) { ++ if (init_each_dio_ctx(p->dio_ctx_array + i, blksize, pgsize)) ++ goto deinit; ++ } ++ return 0; ++ ++deinit: ++ for (i = 0; i < CONCUR_NR_EVENT; i++) ++ deinit_each_dio_ctx(p->dio_ctx_array + i); ++free_pdctx: ++ FREE(p->dio_ctx_array); ++fail_close: ++ close(p->fd); ++ ++ return 1; ++} ++ ++static void destroy_directio_ctx(struct io_err_stat_path *p) ++{ ++ int i; ++ ++ if (!p || !p->dio_ctx_array) ++ return; ++ cancel_inflight_io(p); ++ ++ for (i = 0; i < CONCUR_NR_EVENT; i++) ++ deinit_each_dio_ctx(p->dio_ctx_array + i); ++ FREE(p->dio_ctx_array); ++ ++ if (p->fd > 0) ++ close(p->fd); ++} ++ ++static struct io_err_stat_path *alloc_io_err_stat_path(void) ++{ ++ struct io_err_stat_path *p; ++ ++ p = (struct io_err_stat_path *)MALLOC(sizeof(*p)); ++ if (!p) ++ return NULL; ++ ++ memset(p->devname, 0, sizeof(p->devname)); ++ p->io_err_nr = 0; ++ p->io_nr = 0; ++ p->total_time = 0; ++ p->start_time.tv_sec = 0; ++ p->start_time.tv_nsec = 0; ++ p->err_rate_threshold = 0; ++ p->fd = -1; ++ ++ return p; ++} ++ ++static void free_io_err_stat_path(struct io_err_stat_path *p) ++{ ++ FREE(p); ++} ++ ++static struct io_err_stat_pathvec *alloc_pathvec(void) ++{ ++ struct io_err_stat_pathvec *p; ++ int r; ++ ++ p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p)); ++ if (!p) ++ return NULL; ++ p->pathvec = vector_alloc(); ++ if (!p->pathvec) ++ goto out_free_struct_pathvec; ++ r = pthread_mutex_init(&p->mutex, NULL); ++ if (r) ++ goto out_free_member_pathvec; ++ ++ return p; ++ ++out_free_member_pathvec: ++ vector_free(p->pathvec); ++out_free_struct_pathvec: ++ FREE(p); ++ return NULL; ++} ++ ++static void free_io_err_pathvec(struct io_err_stat_pathvec *p) ++{ ++ struct io_err_stat_path *path; ++ int i; ++ ++ if (!p) ++ return; ++ pthread_mutex_destroy(&p->mutex); ++ if (!p->pathvec) { ++ vector_foreach_slot(p->pathvec, path, i) { ++ destroy_directio_ctx(path); ++ free_io_err_stat_path(path); ++ } ++ vector_free(p->pathvec); ++ } ++ FREE(p); ++} ++ ++/* ++ * return value ++ * 0: enqueue OK ++ * 1: fails because of internal error ++ */ ++static int enqueue_io_err_stat_by_path(struct path *path) ++{ ++ struct io_err_stat_path *p; ++ ++ pthread_mutex_lock(&paths->mutex); ++ p = find_err_path_by_dev(paths->pathvec, path->dev); ++ if (p) { ++ pthread_mutex_unlock(&paths->mutex); ++ return 0; ++ } ++ pthread_mutex_unlock(&paths->mutex); ++ ++ p = alloc_io_err_stat_path(); ++ if (!p) ++ return 1; ++ ++ memcpy(p->devname, path->dev, sizeof(p->devname)); ++ p->total_time = path->mpp->marginal_path_err_sample_time; ++ p->err_rate_threshold = path->mpp->marginal_path_err_rate_threshold; ++ ++ if (setup_directio_ctx(p)) ++ goto free_ioerr_path; ++ pthread_mutex_lock(&paths->mutex); ++ if (!vector_alloc_slot(paths->pathvec)) ++ goto unlock_destroy; ++ vector_set_slot(paths->pathvec, p); ++ pthread_mutex_unlock(&paths->mutex); ++ ++ io_err_stat_log(2, "%s: enqueue path %s to check", ++ path->mpp->alias, path->dev); ++ return 0; ++ ++unlock_destroy: ++ pthread_mutex_unlock(&paths->mutex); ++ destroy_directio_ctx(p); ++free_ioerr_path: ++ free_io_err_stat_path(p); ++ ++ return 1; ++} ++ ++int io_err_stat_handle_pathfail(struct path *path) ++{ ++ struct timespec curr_time; ++ ++ if (uatomic_read(&io_err_thread_running) == 0) ++ return 1; ++ ++ if (path->io_err_disable_reinstate) { ++ io_err_stat_log(3, "%s: reinstate is already disabled", ++ path->dev); ++ return 1; ++ } ++ if (path->io_err_pathfail_cnt < 0) ++ return 1; ++ ++ if (!path->mpp) ++ return 1; ++ if (path->mpp->marginal_path_double_failed_time <= 0 || ++ path->mpp->marginal_path_err_sample_time <= 0 || ++ path->mpp->marginal_path_err_recheck_gap_time <= 0 || ++ path->mpp->marginal_path_err_rate_threshold < 0) { ++ io_err_stat_log(4, "%s: parameter not set", path->mpp->alias); ++ return 1; ++ } ++ if (path->mpp->marginal_path_err_sample_time < (2 * IOTIMEOUT_SEC)) { ++ io_err_stat_log(2, "%s: marginal_path_err_sample_time should not less than %d", ++ path->mpp->alias, 2 * IOTIMEOUT_SEC); ++ return 1; ++ } ++ /* ++ * The test should only be started for paths that have failed ++ * repeatedly in a certain time frame, so that we have reason ++ * to assume they're flaky. Without bother the admin to configure ++ * the repeated count threshold and time frame, we assume a path ++ * which fails at least twice within 60 seconds is flaky. ++ */ ++ if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) ++ return 1; ++ if (path->io_err_pathfail_cnt == 0) { ++ path->io_err_pathfail_cnt++; ++ path->io_err_pathfail_starttime = curr_time.tv_sec; ++ io_err_stat_log(5, "%s: start path flakiness pre-checking", ++ path->dev); ++ return 0; ++ } ++ if ((curr_time.tv_sec - path->io_err_pathfail_starttime) > ++ path->mpp->marginal_path_double_failed_time) { ++ path->io_err_pathfail_cnt = 0; ++ path->io_err_pathfail_starttime = curr_time.tv_sec; ++ io_err_stat_log(5, "%s: restart path flakiness pre-checking", ++ path->dev); ++ } ++ path->io_err_pathfail_cnt++; ++ if (path->io_err_pathfail_cnt >= FLAKY_PATHFAIL_THRESHOLD) { ++ path->io_err_disable_reinstate = 1; ++ path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK; ++ /* enqueue path as soon as it comes up */ ++ path->io_err_dis_reinstate_time = 0; ++ if (path->state != PATH_DOWN) { ++ int oldstate = path->state; ++ io_err_stat_log(2, "%s: mark as failed", path->dev); ++ path->mpp->stat_path_failures++; ++ path->state = PATH_DOWN; ++ path->dmstate = PSTATE_FAILED; ++ if (oldstate == PATH_UP || oldstate == PATH_GHOST) ++ update_queue_mode_del_path(path->mpp); ++ if (path->tick > conf->checkint) ++ path->tick = conf->checkint; ++ } ++ } ++ ++ return 0; ++} ++ ++int need_io_err_check(struct path *pp) ++{ ++ struct timespec curr_time; ++ int r; ++ ++ if (uatomic_read(&io_err_thread_running) == 0) ++ return 0; ++ if (pp->mpp->nr_active <= 0) { ++ io_err_stat_log(2, "%s: recover path early", pp->dev); ++ goto recover; ++ } ++ if (pp->io_err_pathfail_cnt != PATH_IO_ERR_WAITING_TO_CHECK) ++ return 1; ++ if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 || ++ (curr_time.tv_sec - pp->io_err_dis_reinstate_time) > ++ pp->mpp->marginal_path_err_recheck_gap_time) { ++ io_err_stat_log(4, "%s: reschedule checking after %d seconds", ++ pp->dev, ++ pp->mpp->marginal_path_err_recheck_gap_time); ++ r = enqueue_io_err_stat_by_path(pp); ++ /* ++ * Enqueue fails because of internal error. ++ * In this case , we recover this path ++ * Or else, return 1 to set path state to PATH_SHAKY ++ */ ++ if (r == 1) { ++ io_err_stat_log(3, "%s: enqueue fails, recovering", ++ pp->dev); ++ goto recover; ++ } else ++ pp->io_err_pathfail_cnt = PATH_IO_ERR_IN_CHECKING; ++ } ++ ++ return 1; ++ ++recover: ++ pp->io_err_pathfail_cnt = 0; ++ pp->io_err_disable_reinstate = 0; ++ return 0; ++} ++ ++static int delete_io_err_stat_by_addr(struct io_err_stat_path *p) ++{ ++ int i; ++ ++ i = find_slot(paths->pathvec, p); ++ if (i != -1) ++ vector_del_slot(paths->pathvec, i); ++ ++ destroy_directio_ctx(p); ++ free_io_err_stat_path(p); ++ ++ return 0; ++} ++ ++static void account_async_io_state(struct io_err_stat_path *pp, int rc) ++{ ++ switch (rc) { ++ case PATH_DOWN: ++ pp->io_err_nr++; ++ break; ++ case PATH_UNCHECKED: ++ case PATH_UP: ++ case PATH_PENDING: ++ break; ++ default: ++ break; ++ } ++} ++ ++static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp) ++{ ++ struct timespec currtime, difftime; ++ struct path *path; ++ double err_rate; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0) ++ return 1; ++ timespecsub(&currtime, &pp->start_time, &difftime); ++ if (difftime.tv_sec < pp->total_time) ++ return 0; ++ ++ io_err_stat_log(4, "%s: check end", pp->devname); ++ ++ err_rate = pp->io_nr == 0 ? 0 : (pp->io_err_nr * 1000.0f) / pp->io_nr; ++ io_err_stat_log(3, "%s: IO error rate (%.1f/1000)", ++ pp->devname, err_rate); ++ pthread_cleanup_push(cleanup_lock, &vecs->lock); ++ lock(vecs->lock); ++ pthread_testcancel(); ++ path = find_path_by_dev(vecs->pathvec, pp->devname); ++ if (!path) { ++ io_err_stat_log(4, "path %s not found'", pp->devname); ++ } else if (err_rate <= pp->err_rate_threshold) { ++ path->io_err_pathfail_cnt = 0; ++ path->io_err_disable_reinstate = 0; ++ io_err_stat_log(3, "%s: (%d/%d) good to enable reinstating", ++ pp->devname, pp->io_err_nr, pp->io_nr); ++ /* ++ * schedule path check as soon as possible to ++ * update path state. Do NOT reinstate dm path here ++ */ ++ path->tick = 1; ++ ++ } else if (path->mpp && path->mpp->nr_active > 0) { ++ io_err_stat_log(3, "%s: keep failing the dm path %s", ++ path->mpp->alias, path->dev); ++ path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK; ++ path->io_err_disable_reinstate = 1; ++ path->io_err_dis_reinstate_time = currtime.tv_sec; ++ io_err_stat_log(3, "%s: disable reinstating of %s", ++ path->mpp->alias, path->dev); ++ } else { ++ path->io_err_pathfail_cnt = 0; ++ path->io_err_disable_reinstate = 0; ++ io_err_stat_log(3, "%s: there is orphan path, enable reinstating", ++ pp->devname); ++ } ++ lock_cleanup_pop(vecs->lock); ++ ++ delete_io_err_stat_by_addr(pp); ++ ++ return 0; ++} ++ ++static int send_each_async_io(struct dio_ctx *ct, int fd, char *dev) ++{ ++ int rc = -1; ++ ++ if (ct->io_starttime.tv_nsec == 0 && ++ ct->io_starttime.tv_sec == 0) { ++ struct iocb *ios[1] = { &ct->io }; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &ct->io_starttime) != 0) { ++ ct->io_starttime.tv_sec = 0; ++ ct->io_starttime.tv_nsec = 0; ++ return rc; ++ } ++ io_prep_pread(&ct->io, fd, ct->buf, ct->blksize, 0); ++ if (io_submit(ioctx, 1, ios) != 1) { ++ io_err_stat_log(5, "%s: io_submit error %i", ++ dev, errno); ++ return rc; ++ } ++ rc = 0; ++ } ++ ++ return rc; ++} ++ ++static void send_batch_async_ios(struct io_err_stat_path *pp) ++{ ++ int i; ++ struct dio_ctx *ct; ++ struct timespec currtime, difftime; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0) ++ return; ++ /* ++ * Give a free time for all IO to complete or timeout ++ */ ++ if (pp->start_time.tv_sec != 0) { ++ timespecsub(&currtime, &pp->start_time, &difftime); ++ if (difftime.tv_sec + IOTIMEOUT_SEC >= pp->total_time) ++ return; ++ } ++ ++ for (i = 0; i < CONCUR_NR_EVENT; i++) { ++ ct = pp->dio_ctx_array + i; ++ if (!send_each_async_io(ct, pp->fd, pp->devname)) ++ pp->io_nr++; ++ } ++ if (pp->start_time.tv_sec == 0 && pp->start_time.tv_nsec == 0 && ++ clock_gettime(CLOCK_MONOTONIC, &pp->start_time)) { ++ pp->start_time.tv_sec = 0; ++ pp->start_time.tv_nsec = 0; ++ } ++} ++ ++static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t, ++ char *dev) ++{ ++ struct timespec difftime; ++ struct io_event event; ++ int rc = PATH_UNCHECKED; ++ int r; ++ ++ if (ct->io_starttime.tv_sec == 0) ++ return rc; ++ timespecsub(t, &ct->io_starttime, &difftime); ++ if (difftime.tv_sec > IOTIMEOUT_SEC) { ++ struct iocb *ios[1] = { &ct->io }; ++ ++ io_err_stat_log(5, "%s: abort check on timeout", dev); ++ r = io_cancel(ioctx, ios[0], &event); ++ if (r) ++ io_err_stat_log(5, "%s: io_cancel error %i", ++ dev, errno); ++ ct->io_starttime.tv_sec = 0; ++ ct->io_starttime.tv_nsec = 0; ++ rc = PATH_DOWN; ++ } else { ++ rc = PATH_PENDING; ++ } ++ ++ return rc; ++} ++ ++static void poll_async_io_timeout(void) ++{ ++ struct io_err_stat_path *pp; ++ struct timespec curr_time; ++ int rc = PATH_UNCHECKED; ++ int i, j; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) ++ return; ++ vector_foreach_slot(paths->pathvec, pp, i) { ++ for (j = 0; j < CONCUR_NR_EVENT; j++) { ++ rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j, ++ &curr_time, pp->devname); ++ account_async_io_state(pp, rc); ++ } ++ } ++} ++ ++static void cancel_inflight_io(struct io_err_stat_path *pp) ++{ ++ struct io_event event; ++ int i, r; ++ ++ for (i = 0; i < CONCUR_NR_EVENT; i++) { ++ struct dio_ctx *ct = pp->dio_ctx_array + i; ++ struct iocb *ios[1] = { &ct->io }; ++ ++ if (ct->io_starttime.tv_sec == 0 ++ && ct->io_starttime.tv_nsec == 0) ++ continue; ++ io_err_stat_log(5, "%s: abort infligh io", ++ pp->devname); ++ r = io_cancel(ioctx, ios[0], &event); ++ if (r) ++ io_err_stat_log(5, "%s: io_cancel error %d, %i", ++ pp->devname, r, errno); ++ ct->io_starttime.tv_sec = 0; ++ ct->io_starttime.tv_nsec = 0; ++ } ++} ++ ++static inline int handle_done_dio_ctx(struct dio_ctx *ct, struct io_event *ev) ++{ ++ ct->io_starttime.tv_sec = 0; ++ ct->io_starttime.tv_nsec = 0; ++ return (ev->res == ct->blksize) ? PATH_UP : PATH_DOWN; ++} ++ ++static void handle_async_io_done_event(struct io_event *io_evt) ++{ ++ struct io_err_stat_path *pp; ++ struct dio_ctx *ct; ++ int rc = PATH_UNCHECKED; ++ int i, j; ++ ++ vector_foreach_slot(paths->pathvec, pp, i) { ++ for (j = 0; j < CONCUR_NR_EVENT; j++) { ++ ct = pp->dio_ctx_array + j; ++ if (&ct->io == io_evt->obj) { ++ rc = handle_done_dio_ctx(ct, io_evt); ++ account_async_io_state(pp, rc); ++ return; ++ } ++ } ++ } ++} ++ ++static void process_async_ios_event(int timeout_nsecs, char *dev) ++{ ++ struct io_event events[CONCUR_NR_EVENT]; ++ int i, n; ++ struct timespec timeout = { .tv_nsec = timeout_nsecs }; ++ ++ errno = 0; ++ n = io_getevents(ioctx, 1L, CONCUR_NR_EVENT, events, &timeout); ++ if (n < 0) { ++ io_err_stat_log(3, "%s: async io events returned %d (errno=%s)", ++ dev, n, strerror(errno)); ++ } else { ++ for (i = 0; i < n; i++) ++ handle_async_io_done_event(&events[i]); ++ } ++} ++ ++static void service_paths(void) ++{ ++ struct io_err_stat_path *pp; ++ int i; ++ ++ pthread_mutex_lock(&paths->mutex); ++ vector_foreach_slot(paths->pathvec, pp, i) { ++ send_batch_async_ios(pp); ++ process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname); ++ poll_async_io_timeout(); ++ poll_io_err_stat(vecs, pp); ++ } ++ pthread_mutex_unlock(&paths->mutex); ++} ++ ++static void cleanup_unlock(void *arg) ++{ ++ pthread_mutex_unlock((pthread_mutex_t*) arg); ++} ++ ++static void cleanup_exited(void *arg) ++{ ++ uatomic_set(&io_err_thread_running, 0); ++} ++ ++static void *io_err_stat_loop(void *data) ++{ ++ vecs = (struct vectors *)data; ++ ++ pthread_cleanup_push(cleanup_exited, NULL); ++ ++ mlockall(MCL_CURRENT | MCL_FUTURE); ++ ++ pthread_mutex_lock(&io_err_thread_lock); ++ uatomic_set(&io_err_thread_running, 1); ++ pthread_cond_broadcast(&io_err_thread_cond); ++ pthread_mutex_unlock(&io_err_thread_lock); ++ ++ while (1) { ++ service_paths(); ++ usleep(100000); ++ } ++ ++ pthread_cleanup_pop(1); ++ return NULL; ++} ++ ++int start_io_err_stat_thread(void *data) ++{ ++ int ret; ++ ++ if (uatomic_read(&io_err_thread_running) == 1) ++ return 0; ++ ++ if (io_setup(CONCUR_NR_EVENT, &ioctx) != 0) { ++ io_err_stat_log(4, "io_setup failed"); ++ return 1; ++ } ++ paths = alloc_pathvec(); ++ if (!paths) ++ goto destroy_ctx; ++ ++ pthread_mutex_lock(&io_err_thread_lock); ++ pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock); ++ ++ ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr, ++ io_err_stat_loop, data); ++ ++ while (!ret && !uatomic_read(&io_err_thread_running) && ++ pthread_cond_wait(&io_err_thread_cond, ++ &io_err_thread_lock) == 0); ++ ++ pthread_cleanup_pop(1); ++ ++ if (ret) { ++ io_err_stat_log(0, "cannot create io_error statistic thread"); ++ goto out_free; ++ } ++ ++ io_err_stat_log(2, "io_error statistic thread started"); ++ return 0; ++ ++out_free: ++ free_io_err_pathvec(paths); ++destroy_ctx: ++ io_destroy(ioctx); ++ io_err_stat_log(0, "failed to start io_error statistic thread"); ++ return 1; ++} ++ ++void stop_io_err_stat_thread(void) ++{ ++ if (io_err_stat_thr == (pthread_t)0) ++ return; ++ ++ if (uatomic_read(&io_err_thread_running) == 1) ++ pthread_cancel(io_err_stat_thr); ++ ++ pthread_join(io_err_stat_thr, NULL); ++ free_io_err_pathvec(paths); ++ io_destroy(ioctx); ++} +Index: multipath-tools-130222/libmultipath/io_err_stat.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/io_err_stat.h +@@ -0,0 +1,15 @@ ++#ifndef _IO_ERR_STAT_H ++#define _IO_ERR_STAT_H ++ ++#include "vector.h" ++#include "lock.h" ++ ++ ++extern pthread_attr_t io_err_stat_attr; ++ ++int start_io_err_stat_thread(void *data); ++void stop_io_err_stat_thread(void); ++int io_err_stat_handle_pathfail(struct path *path); ++int need_io_err_check(struct path *pp); ++ ++#endif /* _IO_ERR_STAT_H */ +Index: multipath-tools-130222/libmultipath/propsel.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/propsel.c ++++ multipath-tools-130222/libmultipath/propsel.c +@@ -956,6 +956,104 @@ select_delay_wait_checks (struct multipa + } + + extern int ++select_marginal_path_err_sample_time(struct multipath * mp) ++{ ++ if (mp->mpe && ++ mp->mpe->marginal_path_err_sample_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_sample_time = mp->mpe->marginal_path_err_sample_time; ++ condlog(3, "marginal_path_err_sample_time = %i (multipath setting)", mp->marginal_path_err_sample_time); ++ return 0; ++ } ++ if (mp->hwe && ++ mp->hwe->marginal_path_err_sample_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_sample_time = mp->hwe->marginal_path_err_sample_time; ++ condlog(3, "marginal_path_err_sample_time = %i (controler setting)", mp->marginal_path_err_sample_time); ++ return 0; ++ } ++ if (conf->marginal_path_err_sample_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_sample_time = conf->marginal_path_err_sample_time; ++ condlog(3, "marginal_path_err_sample_time = %i (config file default)", mp->marginal_path_err_sample_time); ++ return 0; ++ } ++ mp->marginal_path_err_sample_time = DEFAULT_DELAY_CHECKS; ++ condlog(3, "marginal_path_err_sample_time = DISABLED (internal default)"); ++ return 0; ++} ++ ++extern int ++select_marginal_path_err_rate_threshold(struct multipath * mp) ++{ ++ if (mp->mpe && ++ mp->mpe->marginal_path_err_rate_threshold != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_rate_threshold = mp->mpe->marginal_path_err_rate_threshold; ++ condlog(3, "marginal_path_err_rate_threshold = %i (multipath setting)", mp->marginal_path_err_rate_threshold); ++ return 0; ++ } ++ if (mp->hwe && ++ mp->hwe->marginal_path_err_rate_threshold != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_rate_threshold = mp->hwe->marginal_path_err_rate_threshold; ++ condlog(3, "marginal_path_err_rate_threshold = %i (controler setting)", mp->marginal_path_err_rate_threshold); ++ return 0; ++ } ++ if (conf->marginal_path_err_rate_threshold != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_rate_threshold = conf->marginal_path_err_rate_threshold; ++ condlog(3, "marginal_path_err_rate_threshold = %i (config file default)", mp->marginal_path_err_rate_threshold); ++ return 0; ++ } ++ mp->marginal_path_err_rate_threshold = DEFAULT_DELAY_CHECKS; ++ condlog(3, "marginal_path_err_rate_threshold = DISABLED (internal default)"); ++ return 0; ++} ++ ++extern int ++select_marginal_path_err_recheck_gap_time(struct multipath * mp) ++{ ++ if (mp->mpe && mp->mpe->marginal_path_err_recheck_gap_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_recheck_gap_time = mp->mpe->marginal_path_err_recheck_gap_time; ++ condlog(3, "marginal_path_err_recheck_gap_time = %i (multipath setting)", mp->marginal_path_err_recheck_gap_time); ++ return 0; ++ } ++ if (mp->hwe && mp->hwe->marginal_path_err_recheck_gap_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_recheck_gap_time = mp->hwe->marginal_path_err_recheck_gap_time; ++ condlog(3, "marginal_path_err_recheck_gap_time = %i (controler setting)", mp->marginal_path_err_recheck_gap_time); ++ return 0; ++ } ++ if (conf->marginal_path_err_recheck_gap_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_err_recheck_gap_time = conf->marginal_path_err_recheck_gap_time; ++ condlog(3, "marginal_path_err_recheck_gap_time = %i (config file default)", mp->marginal_path_err_recheck_gap_time); ++ return 0; ++ } ++ mp->marginal_path_err_recheck_gap_time = DEFAULT_DELAY_CHECKS; ++ condlog(3, "marginal_path_err_recheck_gap_time = DISABLED (internal default)"); ++ return 0; ++} ++ ++extern int ++select_marginal_path_double_failed_time(struct multipath * mp) ++{ ++ if (mp->mpe && ++ mp->mpe->marginal_path_double_failed_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_double_failed_time = mp->mpe->marginal_path_double_failed_time; ++ condlog(3, "marginal_path_double_failed_time = %i (multipath setting)", mp->marginal_path_double_failed_time); ++ return 0; ++ } ++ if (mp->hwe && ++ mp->hwe->marginal_path_double_failed_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_double_failed_time = mp->hwe->marginal_path_double_failed_time; ++ condlog(3, "marginal_path_double_failed_time = %i (controler setting)", mp->marginal_path_double_failed_time); ++ return 0; ++ } ++ if (conf->marginal_path_double_failed_time != MARGINAL_PATH_UNDEF) { ++ mp->marginal_path_double_failed_time = conf->marginal_path_double_failed_time; ++ condlog(3, "marginal_path_double_failed_time = %i (config file default)", mp->marginal_path_double_failed_time); ++ return 0; ++ } ++ mp->marginal_path_double_failed_time = DEFAULT_DELAY_CHECKS; ++ condlog(3, "marginal_path_double_failed_time = DISABLED (internal default)"); ++ return 0; ++} ++ ++extern int + select_skip_kpartx (struct multipath * mp) + { + if (mp->mpe && mp->mpe->skip_kpartx != SKIP_KPARTX_UNDEF) { +Index: multipath-tools-130222/libmultipath/propsel.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/propsel.h ++++ multipath-tools-130222/libmultipath/propsel.h +@@ -24,6 +24,10 @@ int select_detect_checker(struct path * + int select_deferred_remove(struct multipath *mp); + int select_delay_watch_checks (struct multipath * mp); + int select_delay_wait_checks (struct multipath * mp); ++int select_marginal_path_err_sample_time(struct multipath *mp); ++int select_marginal_path_err_rate_threshold(struct multipath *mp); ++int select_marginal_path_err_recheck_gap_time(struct multipath *mp); ++int select_marginal_path_double_failed_time(struct multipath *mp); + int select_skip_kpartx (struct multipath * mp); + int select_max_sectors_kb (struct multipath * mp); + int select_unpriv_sgio (struct multipath * mp); +Index: multipath-tools-130222/libmultipath/structs.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/structs.h ++++ multipath-tools-130222/libmultipath/structs.h +@@ -3,6 +3,7 @@ + + #include + #include ++#include + + #include "prio.h" + #include "byteorder.h" +@@ -176,6 +177,11 @@ enum delay_checks_states { + DELAY_CHECKS_UNDEF = 0, + }; + ++enum marginal_path_states { ++ MARGINAL_PATH_OFF = -1, ++ MARGINAL_PATH_UNDEF = 0, ++}; ++ + enum missing_udev_info_states { + INFO_OK, + INFO_MISSING, +@@ -252,6 +258,10 @@ struct path { + int missing_udev_info; + int retriggers; + int wwid_changed; ++ time_t io_err_dis_reinstate_time; ++ int io_err_disable_reinstate; ++ int io_err_pathfail_cnt; ++ int io_err_pathfail_starttime; + + /* configlet pointers */ + struct hwentry * hwe; +@@ -285,6 +295,10 @@ struct multipath { + int deferred_remove; + int delay_watch_checks; + int delay_wait_checks; ++ int marginal_path_err_sample_time; ++ int marginal_path_err_rate_threshold; ++ int marginal_path_err_recheck_gap_time; ++ int marginal_path_double_failed_time; + int force_udev_reload; + int skip_kpartx; + int max_sectors_kb; +Index: multipath-tools-130222/libmultipath/time-util.c +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/time-util.c +@@ -0,0 +1,42 @@ ++#include ++#include ++#include ++#include "time-util.h" ++ ++/* Initialize @cond as a condition variable that uses the monotonic clock */ ++void pthread_cond_init_mono(pthread_cond_t *cond) ++{ ++ pthread_condattr_t attr; ++ int res; ++ ++ res = pthread_condattr_init(&attr); ++ assert(res == 0); ++ res = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); ++ assert(res == 0); ++ res = pthread_cond_init(cond, &attr); ++ assert(res == 0); ++ res = pthread_condattr_destroy(&attr); ++ assert(res == 0); ++} ++ ++/* Ensure that 0 <= ts->tv_nsec && ts->tv_nsec < 1000 * 1000 * 1000. */ ++void normalize_timespec(struct timespec *ts) ++{ ++ while (ts->tv_nsec < 0) { ++ ts->tv_nsec += 1000UL * 1000 * 1000; ++ ts->tv_sec--; ++ } ++ while (ts->tv_nsec >= 1000UL * 1000 * 1000) { ++ ts->tv_nsec -= 1000UL * 1000 * 1000; ++ ts->tv_sec++; ++ } ++} ++ ++/* Compute *res = *a - *b */ ++void timespecsub(const struct timespec *a, const struct timespec *b, ++ struct timespec *res) ++{ ++ res->tv_sec = a->tv_sec - b->tv_sec; ++ res->tv_nsec = a->tv_nsec - b->tv_nsec; ++ normalize_timespec(res); ++} +Index: multipath-tools-130222/libmultipath/time-util.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/time-util.h +@@ -0,0 +1,13 @@ ++#ifndef _TIME_UTIL_H_ ++#define _TIME_UTIL_H_ ++ ++#include ++ ++struct timespec; ++ ++void pthread_cond_init_mono(pthread_cond_t *cond); ++void normalize_timespec(struct timespec *ts); ++void timespecsub(const struct timespec *a, const struct timespec *b, ++ struct timespec *res); ++ ++#endif /* _TIME_UTIL_H_ */ +Index: multipath-tools-130222/libmultipath/uevent.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/uevent.c ++++ multipath-tools-130222/libmultipath/uevent.c +@@ -616,12 +616,46 @@ uevent_get_dm_name(struct uevent *uev) + int i; + + for (i = 0; uev->envp[i] != NULL; i++) { +- if (!strncmp(uev->envp[i], "DM_NAME", 6) && +- strlen(uev->envp[i]) > 7) { ++ if (!strncmp(uev->envp[i], "DM_NAME", 7) && ++ strlen(uev->envp[i]) > 8) { + p = MALLOC(strlen(uev->envp[i] + 8) + 1); + strcpy(p, uev->envp[i] + 8); + break; + } + } ++ return p; ++} ++ ++extern char * ++uevent_get_dm_path(struct uevent *uev) ++{ ++ char *p = NULL; ++ int i; ++ ++ for (i = 0; uev->envp[i] != NULL; i++) { ++ if (!strncmp(uev->envp[i], "DM_PATH", 7) && ++ strlen(uev->envp[i]) > 8) { ++ p = MALLOC(strlen(uev->envp[i] + 8) + 1); ++ strcpy(p, uev->envp[i] + 8); ++ break; ++ } ++ } ++ return p; ++} ++ ++extern char * ++uevent_get_dm_action(struct uevent *uev) ++{ ++ char *p = NULL; ++ int i; ++ ++ for (i = 0; uev->envp[i] != NULL; i++) { ++ if (!strncmp(uev->envp[i], "DM_ACTION", 9) && ++ strlen(uev->envp[i]) > 10) { ++ p = MALLOC(strlen(uev->envp[i] + 10) + 1); ++ strcpy(p, uev->envp[i] + 10); ++ break; ++ } ++ } + return p; + } +Index: multipath-tools-130222/libmultipath/uevent.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/uevent.h ++++ multipath-tools-130222/libmultipath/uevent.h +@@ -36,5 +36,7 @@ int uevent_get_major(struct uevent *uev) + int uevent_get_minor(struct uevent *uev); + int uevent_get_disk_ro(struct uevent *uev); + char *uevent_get_dm_name(struct uevent *uev); ++char *uevent_get_dm_path(struct uevent *uev); ++char *uevent_get_dm_action(struct uevent *uev); + + #endif /* _UEVENT_H */ +Index: multipath-tools-130222/multipath/multipath.conf.5 +=================================================================== +--- multipath-tools-130222.orig/multipath/multipath.conf.5 ++++ multipath-tools-130222/multipath/multipath.conf.5 +@@ -527,7 +527,7 @@ recently become valid for this many chec + being watched, when they next become valid, they will not be used until they + have stayed up for + .I delay_wait_checks +-checks. Default is ++checks. See "Shaky paths detection" below. Default is + .I no + .TP + .B delay_wait_checks +@@ -537,9 +537,56 @@ online fails again within + checks, the next time it comes back online, it will marked and delayed, and not + used until it has passed + .I delay_wait_checks +-checks. Default is ++checks. See "Shaky paths detection" below. Default is + .I no + .TP ++.B marginal_path_double_failed_time ++One of the four parameters of supporting path check based on accounting IO ++error such as intermittent error. When a path failed event occurs twice in ++\fImarginal_path_double_failed_time\fR seconds due to an IO error and all the ++other three parameters are set, multipathd will fail the path and enqueue ++this path into a queue of which members are sent a couple of continuous ++direct reading asynchronous IOs at a fixed sample rate of 10HZ to start IO ++error accounting process. See "Shaky paths detection" below. Default is ++\fIno\fR ++.TP ++.B marginal_path_err_sample_time ++One of the four parameters of supporting path check based on accounting IO ++error such as intermittent error. If it is set to a value no less than 120, ++when a path fail event occurs twice in \fImarginal_path_double_failed_time\fR ++second due to an IO error, multipathd will fail the path and enqueue this ++path into a queue of which members are sent a couple of continuous direct ++reading asynchronous IOs at a fixed sample rate of 10HZ to start the IO ++accounting process for the path will last for ++\fImarginal_path_err_sample_time\fR. ++If the rate of IO error on a particular path is greater than the ++\fImarginal_path_err_rate_threshold\fR, then the path will not reinstate for ++\fImarginal_path_err_recheck_gap_time\fR seconds unless there is only one ++active path. After \fImarginal_path_err_recheck_gap_time\fR expires, the path ++will be requeueed for rechecking. If checking result is good enough, the ++path will be reinstated. See "Shaky paths detection" below. Default is ++\fIno\fR ++.TP ++.B marginal_path_err_rate_threshold ++The error rate threshold as a permillage (1/1000). One of the four parameters ++of supporting path check based on accounting IO error such as intermittent ++error. Refer to \fImarginal_path_err_sample_time\fR. If the rate of IO errors ++on a particular path is greater than this parameter, then the path will not ++reinstate for \fImarginal_path_err_recheck_gap_time\fR seconds unless there is ++only one active path. See "Shaky paths detection" below. Default is \fIno\fR ++.TP ++.B marginal_path_err_recheck_gap_time ++One of the four parameters of supporting path check based on accounting IO ++error such as intermittent error. Refer to ++\fImarginal_path_err_sample_time\fR. If this parameter is set to a positive ++value, the failed path of which the IO error rate is larger than ++\fImarginal_path_err_rate_threshold\fR will be kept in failed state for ++\fImarginal_path_err_recheck_gap_time\fR seconds. When ++\fImarginal_path_err_recheck_gap_time\fR seconds expires, the path will be ++requeueed for checking. If checking result is good enough, the path will be ++reinstated, or else it will keep failed. See "Shaky paths detection" below. ++Default is \fIno\fR ++.TP + .B missing_uev_wait_timeout + Controls how many seconds multipathd will wait, after a new multipath device + is created, to receive a change event from udev for the device, before +@@ -771,6 +818,14 @@ section: + .TP + .B delay_wait_checks + .TP ++.B marginal_path_err_sample_time ++.TP ++.B marginal_path_err_rate_threshold ++.TP ++.B marginal_path_err_recheck_gap_time ++.TP ++.B marginal_path_double_failed_time ++.TP + .B skip_kpartx + .TP + .B max_sectors_kb +@@ -877,6 +932,14 @@ section: + .TP + .B delay_wait_checks + .TP ++.B marginal_path_err_sample_time ++.TP ++.B marginal_path_err_rate_threshold ++.TP ++.B marginal_path_err_recheck_gap_time ++.TP ++.B marginal_path_double_failed_time ++.TP + .B skip_kpartx + .TP + .B max_sectors_kb +@@ -887,6 +950,47 @@ section: + .RE + .PD + .LP ++.SH "Shaky paths detection" ++A common problem in SAN setups is the occurence of intermittent errors: a ++path is unreachable, then reachable again for a short time, disappears again, ++and so forth. This happens typically on unstable interconnects. It is ++undesirable to switch pathgroups unnecessarily on such frequent, unreliable ++events. \fImultipathd\fR supports two different methods for detecting this ++situation and dealing with it. Both methods share the same basic mode of ++operation: If a path is found to be \(dqshaky\(dq or \(dqflipping\(dq, ++and appears to be in healthy status, it is not reinstated (put back to use) ++immediately. Instead, it is watched for some time, and only reinstated ++if the healthy state appears to be stable. The logic of determining ++\(dqshaky\(dq condition, as well as the logic when to reinstate, ++differs between the two methods. ++.TP 8 ++.B \(dqdelay_checks\(dq failure tracking ++If a path fails again within a ++\fIdelay_watch_checks\fR interval after a failure, don't ++reinstate it until it passes a \fIdelay_wait_checks\fR interval ++in always good status. ++The intervals are measured in \(dqticks\(dq, i.e. the ++time between path checks by multipathd, which is variable and controlled by the ++\fIpolling_interval\fR and \fImax_polling_interval\fR parameters. ++.TP ++.B \(dqmarginal_path\(dq failure tracking ++If a second failure event (good->bad transition) occurs within ++\fImarginal_path_double_failed_time\fR seconds after a failure, high-frequency ++monitoring is started for the affected path: I/O is sent at a rate of 10 per ++second. This is done for \fImarginal_path_err_sample_time\fR seconds. During ++this period, the path is not reinstated. If the ++rate of errors remains below \fImarginal_path_err_rate_threshold\fR during the ++monitoring period, the path is reinstated. Otherwise, it ++is kept in failed state for \fImarginal_path_err_recheck_gap_time\fR, and ++after that, it is monitored again. For this method, time intervals are measured ++in seconds. ++.RE ++.LP ++See the documentation ++of the individual options above for details. ++It is \fBstrongly discouraged\fR to use more than one of these methods for any ++given multipath map, because the two concurrent methods may interact in ++unpredictable ways. + .SH "KNOWN ISSUES" + The usage of + .B queue_if_no_path +Index: multipath-tools-130222/multipathd/cli_handlers.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/cli_handlers.c ++++ multipath-tools-130222/multipathd/cli_handlers.c +@@ -721,7 +721,7 @@ int resize_map(struct multipath *mpp, un + + mpp->size = size; + update_mpp_paths(mpp, vecs->pathvec); +- setup_map(mpp, params, PARAMS_SIZE); ++ setup_map(mpp, params, PARAMS_SIZE, vecs); + mpp->action = ACT_RESIZE; + if (domap(mpp, params) <= 0) { + condlog(0, "%s: failed to resize map : %s", mpp->alias, +Index: multipath-tools-130222/multipathd/main.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/main.c ++++ multipath-tools-130222/multipathd/main.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include "main.h" + #include "pidfile.h" +@@ -274,7 +275,7 @@ retry: + mpp->action = ACT_RELOAD; + + extract_hwe_from_path(mpp); +- if (setup_map(mpp, params, PARAMS_SIZE)) { ++ if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup new map in update", mpp->alias); + retries = -1; + goto fail; +@@ -638,7 +639,7 @@ rescan: + /* + * push the map to the device-mapper + */ +- if (setup_map(mpp, params, PARAMS_SIZE)) { ++ if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map for addition of new " + "path %s", mpp->alias, pp->dev); + goto fail_map; +@@ -771,7 +772,7 @@ ev_remove_path (struct path *pp, struct + */ + } + +- if (setup_map(mpp, params, PARAMS_SIZE)) { ++ if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map for" + " removal of path %s", mpp->alias, pp->dev); + goto fail; +@@ -891,6 +892,41 @@ uev_update_path (struct uevent *uev, str + } + + static int ++uev_pathfail_check(struct uevent *uev, struct vectors *vecs) ++{ ++ char *action = NULL, *devt = NULL; ++ struct path *pp; ++ int r = 1; ++ ++ action = uevent_get_dm_action(uev); ++ if (!action) ++ return 1; ++ if (strncmp(action, "PATH_FAILED", 11)) ++ goto out; ++ devt = uevent_get_dm_path(uev); ++ if (!devt) { ++ condlog(3, "%s: No DM_PATH in uevent", uev->kernel); ++ goto out; ++ } ++ ++ pp = find_path_by_devt(vecs->pathvec, devt); ++ if (!pp) ++ goto out_devt; ++ r = io_err_stat_handle_pathfail(pp); ++ ++ if (r) ++ condlog(3, "io_err_stat: %s: cannot handle pathfail uevent", ++ pp->dev); ++out_devt: ++ FREE(devt); ++ FREE(action); ++ return r; ++out: ++ FREE(action); ++ return 1; ++} ++ ++static int + map_discovery (struct vectors * vecs) + { + struct multipath * mpp; +@@ -974,6 +1010,14 @@ uev_trigger (struct uevent * uev, void * + if (!strncmp(uev->kernel, "dm-", 3)) { + if (!strncmp(uev->action, "change", 6)) { + r = uev_add_map(uev, vecs); ++ ++ /* ++ * the kernel-side dm-mpath issues a PATH_FAILED event ++ * when it encounters a path IO error. It is reason- ++ * able be the entry of path IO error accounting pro- ++ * cess. ++ */ ++ uev_pathfail_check(uev, vecs); + goto out; + } + if (!strncmp(uev->action, "remove", 6)) { +@@ -1405,6 +1449,17 @@ check_path (struct vectors * vecs, struc + return; + + if ((newstate == PATH_UP || newstate == PATH_GHOST) && ++ pp->io_err_disable_reinstate && need_io_err_check(pp)) { ++ pp->state = PATH_SHAKY; ++ /* ++ * to reschedule as soon as possible,so that this path can ++ * be recoverd in time ++ */ ++ pp->tick = 1; ++ return; ++ } ++ ++ if ((newstate == PATH_UP || newstate == PATH_GHOST) && + pp->wait_checks > 0) { + if (pp->mpp && pp->mpp->nr_active > 0) { + pp->state = PATH_DELAYED; +@@ -1955,6 +2010,7 @@ child (void * param) + setup_thread_attr(&misc_attr, 64 * 1024, 1); + setup_thread_attr(&uevent_attr, 128 * 1024, 1); + setup_thread_attr(&waiter_attr, 32 * 1024, 1); ++ setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0); + + if (logsink) { + setup_thread_attr(&log_attr, 64 * 1024, 0); +@@ -2097,6 +2153,8 @@ child (void * param) + */ + cleanup_checkers(); + cleanup_prio(); ++ stop_io_err_stat_thread(); ++ pthread_attr_destroy(&io_err_stat_attr); + + dm_lib_release(); + dm_lib_exit(); diff --git a/SOURCES/0256-RHBZ-1672175-retry-no-fd-paths.patch b/SOURCES/0256-RHBZ-1672175-retry-no-fd-paths.patch new file mode 100644 index 0000000..a4e9632 --- /dev/null +++ b/SOURCES/0256-RHBZ-1672175-retry-no-fd-paths.patch @@ -0,0 +1,215 @@ +--- + libmultipath/discovery.c | 10 ++++++-- + libmultipath/structs.h | 1 + libmultipath/structs_vec.c | 4 ++- + multipathd/main.c | 52 ++++++++++++++++++++++++++++++++------------- + 4 files changed, 49 insertions(+), 18 deletions(-) + +Index: multipath-tools-130222/libmultipath/discovery.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/discovery.c ++++ multipath-tools-130222/libmultipath/discovery.c +@@ -1425,10 +1425,13 @@ pathinfo (struct path *pp, vector hwtabl + pp->fd = open(udev_device_get_devnode(pp->udev), O_RDONLY); + + if (pp->fd < 0) { ++ pp->missing_udev_info = INFO_REINIT; + condlog(4, "Couldn't open node for %s: %s", + pp->dev, strerror(errno)); + goto blank; + } ++ if (pp->missing_udev_info == INFO_REINIT) ++ pp->missing_udev_info = INFO_OK; + + if (mask & DI_SERIAL) + get_geometry(pp); +@@ -1443,8 +1446,11 @@ pathinfo (struct path *pp, vector hwtabl + + if (mask & DI_CHECKER) { + if (path_state == PATH_UP) { +- pp->chkrstate = pp->state = get_state(pp, 0, +- path_state); ++ int newstate = get_state(pp, 0, path_state); ++ if (newstate != PATH_PENDING || ++ pp->state == PATH_UNCHECKED || ++ pp->state == PATH_WILD) ++ pp->chkrstate = pp->state = newstate; + if (pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD) + goto blank; +Index: multipath-tools-130222/libmultipath/structs.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/structs.h ++++ multipath-tools-130222/libmultipath/structs.h +@@ -184,6 +184,7 @@ enum marginal_path_states { + + enum missing_udev_info_states { + INFO_OK, ++ INFO_REINIT, + INFO_MISSING, + INFO_REQUESTED, + }; +Index: multipath-tools-130222/multipathd/main.c +=================================================================== +--- multipath-tools-130222.orig/multipathd/main.c ++++ multipath-tools-130222/multipathd/main.c +@@ -1381,7 +1381,7 @@ int update_path_groups(struct multipath + return 0; + } + +-void ++int + check_path (struct vectors * vecs, struct path * pp) + { + int newstate; +@@ -1390,19 +1390,20 @@ check_path (struct vectors * vecs, struc + int disable_reinstate = 0; + int oldchkrstate = pp->chkrstate; + +- if (!pp->mpp && (pp->missing_udev_info != INFO_MISSING || +- pp->retriggers >= conf->retrigger_tries)) +- return; ++ if (!pp->mpp && pp->missing_udev_info != INFO_REINIT && ++ (pp->missing_udev_info != INFO_MISSING || ++ pp->retriggers >= conf->retrigger_tries)) ++ return 0; + + if (pp->tick && --pp->tick) +- return; /* don't check this path yet */ ++ return 0; /* don't check this path yet */ + +- if (!pp->mpp) { ++ if (!pp->mpp && pp->missing_udev_info == INFO_MISSING) { + pp->missing_udev_info = INFO_REQUESTED; + pp->retriggers++; + sysfs_attr_set_value(pp->udev, "uevent", "change", + strlen("change")); +- return; ++ return 0; + } + + /* +@@ -1412,6 +1413,21 @@ check_path (struct vectors * vecs, struc + pp->tick = conf->checkint; + + newstate = path_offline(pp); ++ if (!pp->mpp) { ++ if (newstate == PATH_UP && ++ pp->missing_udev_info == INFO_REINIT) { ++ int ret; ++ condlog(3, "%s: add missing path", pp->dev); ++ ret = pathinfo(pp, conf->hwtable, ++ DI_ALL | DI_BLACKLIST); ++ if (ret == PATHINFO_OK && strlen(pp->wwid)) { ++ ev_add_path(pp, vecs); ++ pp->tick = 1; ++ } else if (ret == PATHINFO_SKIPPED) ++ return -1; ++ } ++ return 0; ++ } + if (newstate == PATH_UP) + newstate = get_state(pp, 1, newstate); + else +@@ -1426,7 +1442,7 @@ check_path (struct vectors * vecs, struc + if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) { + condlog(2, "%s: unusable path", pp->dev); + pathinfo(pp, conf->hwtable, 0); +- return; ++ return 0; + } + /* + * Async IO in flight. Keep the previous path state +@@ -1434,7 +1450,7 @@ check_path (struct vectors * vecs, struc + */ + if (newstate == PATH_PENDING) { + pp->tick = 1; +- return; ++ return 0; + } + /* + * Synchronize with kernel state +@@ -1446,7 +1462,7 @@ check_path (struct vectors * vecs, struc + } + /* if update_multipath_strings orphaned the path, quit early */ + if (!pp->mpp) +- return; ++ return 0; + + if ((newstate == PATH_UP || newstate == PATH_GHOST) && + pp->io_err_disable_reinstate && need_io_err_check(pp)) { +@@ -1456,7 +1472,7 @@ check_path (struct vectors * vecs, struc + * be recoverd in time + */ + pp->tick = 1; +- return; ++ return 0; + } + + if ((newstate == PATH_UP || newstate == PATH_GHOST) && +@@ -1464,7 +1480,7 @@ check_path (struct vectors * vecs, struc + if (pp->mpp && pp->mpp->nr_active > 0) { + pp->state = PATH_DELAYED; + pp->wait_checks--; +- return; ++ return 0; + } else + pp->wait_checks = 0; + } +@@ -1512,7 +1528,7 @@ check_path (struct vectors * vecs, struc + pp->mpp->failback_tick = 0; + + pp->mpp->stat_path_failures++; +- return; ++ return 0; + } + + if(newstate == PATH_UP || newstate == PATH_GHOST){ +@@ -1594,7 +1610,7 @@ check_path (struct vectors * vecs, struc + + + if (pp->mpp->wait_for_udev) +- return; ++ return 0; + /* + * path prio refreshing + */ +@@ -1613,6 +1629,7 @@ check_path (struct vectors * vecs, struc + (chkr_new_path_up && followover_should_failback(pp))) + switch_pathgroup(pp->mpp); + } ++ return 0; + } + + static void * +@@ -1642,7 +1659,12 @@ checkerloop (void *ap) + + if (vecs->pathvec) { + vector_foreach_slot (vecs->pathvec, pp, i) { +- check_path(vecs, pp); ++ int rc = check_path(vecs, pp); ++ if (rc < 0) { ++ vector_del_slot(vecs->pathvec, i); ++ free_path(pp); ++ i--; ++ } + } + } + if (vecs->mpvec) { +Index: multipath-tools-130222/libmultipath/structs_vec.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/structs_vec.c ++++ multipath-tools-130222/libmultipath/structs_vec.c +@@ -274,9 +274,11 @@ void sync_paths(struct multipath *mpp, v + } + } + if (!found) { +- condlog(3, "%s dropped path %s", mpp->alias, pp->dev); ++ condlog(2, "%s dropped path %s", mpp->alias, pp->dev); + vector_del_slot(mpp->paths, i--); + orphan_path(pp); ++ memset(pp->wwid, 0, WWID_SIZE); ++ pp->missing_udev_info = INFO_REINIT; + } + } + update_mpp_paths(mpp, pathvec); diff --git a/SOURCES/0257-RHBZ-1679556-dont-check-dm-devices.patch b/SOURCES/0257-RHBZ-1679556-dont-check-dm-devices.patch new file mode 100644 index 0000000..bb1168b --- /dev/null +++ b/SOURCES/0257-RHBZ-1679556-dont-check-dm-devices.patch @@ -0,0 +1,16 @@ +--- + multipath/multipath.rules | 1 + + 1 file changed, 1 insertion(+) + +Index: multipath-tools-130222/multipath/multipath.rules +=================================================================== +--- multipath-tools-130222.orig/multipath/multipath.rules ++++ multipath-tools-130222/multipath/multipath.rules +@@ -15,6 +15,7 @@ LABEL="test_dev" + ENV{MPATH_SBIN_PATH}="/sbin" + TEST!="$env{MPATH_SBIN_PATH}/multipath", ENV{MPATH_SBIN_PATH}="/usr/sbin" + TEST!="/etc/multipath.conf", GOTO="check_kpartx" ++KERNEL=="dm-*", GOTO="check_kpartx" + + ACTION=="add", ENV{DM_MULTIPATH_DEVICE_PATH}!="1", \ + PROGRAM=="$env{MPATH_SBIN_PATH}/multipath -c $tempnode", \ diff --git a/SOURCES/0258-RHBZ-1634183-ANA-prioritizer.patch b/SOURCES/0258-RHBZ-1634183-ANA-prioritizer.patch new file mode 100644 index 0000000..5c87985 --- /dev/null +++ b/SOURCES/0258-RHBZ-1634183-ANA-prioritizer.patch @@ -0,0 +1,3458 @@ +--- + Makefile.inc | 1 + libmultipath/Makefile | 7 + libmultipath/hwtable.c | 1 + libmultipath/nvme-ioctl.c | 869 ++++++++++++++++++++ + libmultipath/nvme-ioctl.h | 139 +++ + libmultipath/nvme-lib.c | 49 + + libmultipath/nvme-lib.h | 39 + libmultipath/nvme/argconfig.h | 99 ++ + libmultipath/nvme/json.h | 87 ++ + libmultipath/nvme/linux/nvme.h | 1450 +++++++++++++++++++++++++++++++++++ + libmultipath/nvme/linux/nvme_ioctl.h | 67 + + libmultipath/nvme/nvme.h | 163 +++ + libmultipath/nvme/plugin.h | 36 + libmultipath/prio.h | 1 + libmultipath/prioritizers/Makefile | 4 + libmultipath/prioritizers/ana.c | 236 +++++ + libmultipath/propsel.c | 10 + libmultipath/util.h | 2 + multipath/multipath.conf.5 | 3 + 19 files changed, 3258 insertions(+), 5 deletions(-) + +Index: multipath-tools-130222/libmultipath/nvme/argconfig.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/argconfig.h +@@ -0,0 +1,99 @@ ++//////////////////////////////////////////////////////////////////////// ++// ++// Copyright 2014 PMC-Sierra, Inc. ++// ++// This program is free software; you can redistribute it and/or ++// modify it under the terms of the GNU General Public License ++// as published by the Free Software Foundation; either version 2 ++// of the License, or (at your option) any later version. ++// ++// This program is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++// ++// You should have received a copy of the GNU General Public License ++// along with this program; if not, write to the Free Software ++// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++// ++//////////////////////////////////////////////////////////////////////// ++ ++//////////////////////////////////////////////////////////////////////// ++// ++// Author: Logan Gunthorpe ++// Logan Gunthorpe ++// ++// Date: Oct 23 2014 ++// ++// Description: ++// Header file for argconfig.c ++// ++//////////////////////////////////////////////////////////////////////// ++ ++#ifndef argconfig_H ++#define argconfig_H ++ ++#include ++#include ++#include ++ ++enum argconfig_types { ++ CFG_NONE, ++ CFG_STRING, ++ CFG_INT, ++ CFG_SIZE, ++ CFG_LONG, ++ CFG_LONG_SUFFIX, ++ CFG_DOUBLE, ++ CFG_BOOL, ++ CFG_BYTE, ++ CFG_SHORT, ++ CFG_POSITIVE, ++ CFG_INCREMENT, ++ CFG_SUBOPTS, ++ CFG_FILE_A, ++ CFG_FILE_W, ++ CFG_FILE_R, ++ CFG_FILE_AP, ++ CFG_FILE_WP, ++ CFG_FILE_RP, ++}; ++ ++struct argconfig_commandline_options { ++ const char *option; ++ const char short_option; ++ const char *meta; ++ enum argconfig_types config_type; ++ void *default_value; ++ int argument_type; ++ const char *help; ++}; ++ ++#define CFG_MAX_SUBOPTS 500 ++#define MAX_HELP_FUNC 20 ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef void argconfig_help_func(void); ++void argconfig_append_usage(const char *str); ++void argconfig_print_help(const char *program_desc, ++ const struct argconfig_commandline_options *options); ++int argconfig_parse(int argc, char *argv[], const char *program_desc, ++ const struct argconfig_commandline_options *options, ++ void *config_out, size_t config_size); ++int argconfig_parse_subopt_string(char *string, char **options, ++ size_t max_options); ++unsigned argconfig_parse_comma_sep_array(char *string, int *ret, ++ unsigned max_length); ++unsigned argconfig_parse_comma_sep_array_long(char *string, ++ unsigned long long *ret, ++ unsigned max_length); ++void argconfig_register_help_func(argconfig_help_func * f); ++ ++void print_word_wrapped(const char *s, int indent, int start); ++#ifdef __cplusplus ++} ++#endif ++#endif +Index: multipath-tools-130222/libmultipath/nvme/json.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/json.h +@@ -0,0 +1,87 @@ ++#ifndef __JSON__H ++#define __JSON__H ++ ++struct json_object; ++struct json_array; ++struct json_pair; ++ ++#define JSON_TYPE_STRING 0 ++#define JSON_TYPE_INTEGER 1 ++#define JSON_TYPE_FLOAT 2 ++#define JSON_TYPE_OBJECT 3 ++#define JSON_TYPE_ARRAY 4 ++#define JSON_TYPE_UINT 5 ++#define JSON_PARENT_TYPE_PAIR 0 ++#define JSON_PARENT_TYPE_ARRAY 1 ++struct json_value { ++ int type; ++ union { ++ long long integer_number; ++ unsigned long long uint_number; ++ long double float_number; ++ char *string; ++ struct json_object *object; ++ struct json_array *array; ++ }; ++ int parent_type; ++ union { ++ struct json_pair *parent_pair; ++ struct json_array *parent_array; ++ }; ++}; ++ ++struct json_array { ++ struct json_value **values; ++ int value_cnt; ++ struct json_value *parent; ++}; ++ ++struct json_object { ++ struct json_pair **pairs; ++ int pair_cnt; ++ struct json_value *parent; ++}; ++ ++struct json_pair { ++ char *name; ++ struct json_value *value; ++ struct json_object *parent; ++}; ++ ++struct json_object *json_create_object(void); ++struct json_array *json_create_array(void); ++ ++void json_free_object(struct json_object *obj); ++ ++int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...); ++#define json_object_add_value_int(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val)) ++#define json_object_add_value_uint(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val)) ++#define json_object_add_value_float(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val)) ++#define json_object_add_value_string(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val)) ++#define json_object_add_value_object(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val)) ++#define json_object_add_value_array(obj, name, val) \ ++ json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val)) ++int json_array_add_value_type(struct json_array *array, int type, ...); ++#define json_array_add_value_int(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val)) ++#define json_array_add_value_uint(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_UINT, (val)) ++#define json_array_add_value_float(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val)) ++#define json_array_add_value_string(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_STRING, (val)) ++#define json_array_add_value_object(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val)) ++#define json_array_add_value_array(obj, val) \ ++ json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val)) ++ ++#define json_array_last_value_object(obj) \ ++ (obj->values[obj->value_cnt - 1]->object) ++ ++void json_print_object(struct json_object *obj, void *); ++#endif +Index: multipath-tools-130222/libmultipath/nvme/nvme.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/nvme.h +@@ -0,0 +1,163 @@ ++/* ++ * Definitions for the NVM Express interface ++ * Copyright (c) 2011-2014, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ */ ++ ++#ifndef _NVME_H ++#define _NVME_H ++ ++#include ++#include ++#include ++#include "plugin.h" ++#include "json.h" ++ ++#define unlikely(x) x ++ ++#ifdef LIBUUID ++#include ++#else ++typedef struct { ++ uint8_t b[16]; ++} uuid_t; ++#endif ++ ++#include "linux/nvme.h" ++ ++struct nvme_effects_log_page { ++ __le32 acs[256]; ++ __le32 iocs[256]; ++ __u8 resv[2048]; ++}; ++ ++struct nvme_error_log_page { ++ __u64 error_count; ++ __u16 sqid; ++ __u16 cmdid; ++ __u16 status_field; ++ __u16 parm_error_location; ++ __u64 lba; ++ __u32 nsid; ++ __u8 vs; ++ __u8 resv[3]; ++ __u64 cs; ++ __u8 resv2[24]; ++}; ++ ++struct nvme_firmware_log_page { ++ __u8 afi; ++ __u8 resv[7]; ++ __u64 frs[7]; ++ __u8 resv2[448]; ++}; ++ ++/* idle and active power scales occupy the last 2 bits of the field */ ++#define POWER_SCALE(s) ((s) >> 6) ++ ++struct nvme_host_mem_buffer { ++ __u32 hsize; ++ __u32 hmdlal; ++ __u32 hmdlau; ++ __u32 hmdlec; ++ __u8 rsvd16[4080]; ++}; ++ ++struct nvme_auto_pst { ++ __u32 data; ++ __u32 rsvd32; ++}; ++ ++struct nvme_timestamp { ++ __u8 timestamp[6]; ++ __u8 attr; ++ __u8 rsvd; ++}; ++ ++struct nvme_controller_list { ++ __le16 num; ++ __le16 identifier[]; ++}; ++ ++struct nvme_bar_cap { ++ __u16 mqes; ++ __u8 ams_cqr; ++ __u8 to; ++ __u16 bps_css_nssrs_dstrd; ++ __u8 mpsmax_mpsmin; ++ __u8 reserved; ++}; ++ ++#ifdef __CHECKER__ ++#define __force __attribute__((force)) ++#else ++#define __force ++#endif ++ ++#define cpu_to_le16(x) \ ++ ((__force __le16)htole16(x)) ++#define cpu_to_le32(x) \ ++ ((__force __le32)htole32(x)) ++#define cpu_to_le64(x) \ ++ ((__force __le64)htole64(x)) ++ ++#define le16_to_cpu(x) \ ++ le16toh((__force __u16)(x)) ++#define le32_to_cpu(x) \ ++ le32toh((__force __u32)(x)) ++#define le64_to_cpu(x) \ ++ le64toh((__force __u64)(x)) ++ ++#define MAX_LIST_ITEMS 256 ++struct list_item { ++ char node[1024]; ++ struct nvme_id_ctrl ctrl; ++ int nsid; ++ struct nvme_id_ns ns; ++ unsigned block; ++}; ++ ++struct ctrl_list_item { ++ char *name; ++ char *address; ++ char *transport; ++ char *state; ++ char *ana_state; ++}; ++ ++struct subsys_list_item { ++ char *name; ++ char *subsysnqn; ++ int nctrls; ++ struct ctrl_list_item *ctrls; ++}; ++ ++enum { ++ NORMAL, ++ JSON, ++ BINARY, ++}; ++ ++void register_extension(struct plugin *plugin); ++ ++#include "argconfig.h" ++int parse_and_open(int argc, char **argv, const char *desc, ++ const struct argconfig_commandline_options *clo, void *cfg, size_t size); ++ ++extern const char *devicename; ++ ++int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root)); ++int validate_output_format(char *format); ++ ++struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid); ++void free_subsys_list(struct subsys_list_item *slist, int n); ++char *nvme_char_from_block(char *block); ++#endif /* _NVME_H */ +Index: multipath-tools-130222/libmultipath/nvme/plugin.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/plugin.h +@@ -0,0 +1,36 @@ ++#ifndef PLUGIN_H ++#define PLUGIN_H ++ ++#include ++ ++struct program { ++ const char *name; ++ const char *version; ++ const char *usage; ++ const char *desc; ++ const char *more; ++ struct command **commands; ++ struct plugin *extensions; ++}; ++ ++struct plugin { ++ const char *name; ++ const char *desc; ++ struct command **commands; ++ struct program *parent; ++ struct plugin *next; ++ struct plugin *tail; ++}; ++ ++struct command { ++ char *name; ++ char *help; ++ int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin); ++ char *alias; ++}; ++ ++void usage(struct plugin *plugin); ++void general_help(struct plugin *plugin); ++int handle_plugin(int argc, char **argv, struct plugin *plugin); ++ ++#endif +Index: multipath-tools-130222/libmultipath/nvme/linux/nvme.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/linux/nvme.h +@@ -0,0 +1,1450 @@ ++/* ++ * Definitions for the NVM Express interface ++ * Copyright (c) 2011-2014, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ */ ++ ++#ifndef _LINUX_NVME_H ++#define _LINUX_NVME_H ++ ++#include ++#include ++ ++/* NQN names in commands fields specified one size */ ++#define NVMF_NQN_FIELD_LEN 256 ++ ++/* However the max length of a qualified name is another size */ ++#define NVMF_NQN_SIZE 223 ++ ++#define NVMF_TRSVCID_SIZE 32 ++#define NVMF_TRADDR_SIZE 256 ++#define NVMF_TSAS_SIZE 256 ++ ++#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" ++ ++#define NVME_RDMA_IP_PORT 4420 ++ ++#define NVME_NSID_ALL 0xffffffff ++ ++enum nvme_subsys_type { ++ NVME_NQN_DISC = 1, /* Discovery type target subsystem */ ++ NVME_NQN_NVME = 2, /* NVME type target subsystem */ ++}; ++ ++/* Address Family codes for Discovery Log Page entry ADRFAM field */ ++enum { ++ NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ ++ NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */ ++ NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ ++ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ ++ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ ++}; ++ ++/* Transport Type codes for Discovery Log Page entry TRTYPE field */ ++enum { ++ NVMF_TRTYPE_RDMA = 1, /* RDMA */ ++ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ ++ NVMF_TRTYPE_TCP = 3, /* TCP */ ++ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ ++ NVMF_TRTYPE_MAX, ++}; ++ ++/* Transport Requirements codes for Discovery Log Page entry TREQ field */ ++enum { ++ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ ++ NVMF_TREQ_REQUIRED = 1, /* Required */ ++ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ ++ NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* SQ flow control disable supported */ ++}; ++ ++/* RDMA QP Service Type codes for Discovery Log Page entry TSAS ++ * RDMA_QPTYPE field ++ */ ++enum { ++ NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ ++ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ ++}; ++ ++/* RDMA QP Service Type codes for Discovery Log Page entry TSAS ++ * RDMA_QPTYPE field ++ */ ++enum { ++ NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ ++ NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */ ++ NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */ ++ NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */ ++ NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */ ++}; ++ ++/* RDMA Connection Management Service Type codes for Discovery Log Page ++ * entry TSAS RDMA_CMS field ++ */ ++enum { ++ NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ ++}; ++ ++/* TCP port security type for Discovery Log Page entry TSAS ++ */ ++enum { ++ NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ ++ NVMF_TCP_SECTYPE_TLS = 1, /* Transport Layer Security */ ++}; ++ ++#define NVME_AQ_DEPTH 32 ++#define NVME_NR_AEN_COMMANDS 1 ++#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) ++ ++/* ++ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See ++ * NVM-Express 1.2 specification, section 4.1.2. ++ */ ++#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1) ++ ++enum { ++ NVME_REG_CAP = 0x0000, /* Controller Capabilities */ ++ NVME_REG_VS = 0x0008, /* Version */ ++ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */ ++ NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */ ++ NVME_REG_CC = 0x0014, /* Controller Configuration */ ++ NVME_REG_CSTS = 0x001c, /* Controller Status */ ++ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */ ++ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ ++ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ ++ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ ++ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ ++ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ ++ NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */ ++ NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */ ++ NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer Location */ ++ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ ++}; ++ ++#define NVME_CAP_MQES(cap) ((cap) & 0xffff) ++#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) ++#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) ++#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1) ++#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) ++#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) ++ ++#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) ++#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) ++#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff) ++#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf) ++ ++#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10) ++#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8) ++#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4) ++#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) ++#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) ++ ++/* ++ * Submission and Completion Queue Entry Sizes for the NVM command set. ++ * (In bytes and specified as a power of two (2^n)). ++ */ ++#define NVME_NVM_IOSQES 6 ++#define NVME_NVM_IOCQES 4 ++ ++enum { ++ NVME_CC_ENABLE = 1 << 0, ++ NVME_CC_CSS_NVM = 0 << 4, ++ NVME_CC_EN_SHIFT = 0, ++ NVME_CC_CSS_SHIFT = 4, ++ NVME_CC_MPS_SHIFT = 7, ++ NVME_CC_AMS_SHIFT = 11, ++ NVME_CC_SHN_SHIFT = 14, ++ NVME_CC_IOSQES_SHIFT = 16, ++ NVME_CC_IOCQES_SHIFT = 20, ++ NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, ++ NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, ++ NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, ++ NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, ++ NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, ++ NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, ++ NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, ++ NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, ++ NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, ++ NVME_CSTS_RDY = 1 << 0, ++ NVME_CSTS_CFS = 1 << 1, ++ NVME_CSTS_NSSRO = 1 << 4, ++ NVME_CSTS_PP = 1 << 5, ++ NVME_CSTS_SHST_NORMAL = 0 << 2, ++ NVME_CSTS_SHST_OCCUR = 1 << 2, ++ NVME_CSTS_SHST_CMPLT = 2 << 2, ++ NVME_CSTS_SHST_MASK = 3 << 2, ++}; ++ ++struct nvme_id_power_state { ++ __le16 max_power; /* centiwatts */ ++ __u8 rsvd2; ++ __u8 flags; ++ __le32 entry_lat; /* microseconds */ ++ __le32 exit_lat; /* microseconds */ ++ __u8 read_tput; ++ __u8 read_lat; ++ __u8 write_tput; ++ __u8 write_lat; ++ __le16 idle_power; ++ __u8 idle_scale; ++ __u8 rsvd19; ++ __le16 active_power; ++ __u8 active_work_scale; ++ __u8 rsvd23[9]; ++}; ++ ++enum { ++ NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, ++ NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, ++}; ++ ++struct nvme_id_ctrl { ++ __le16 vid; ++ __le16 ssvid; ++ char sn[20]; ++ char mn[40]; ++ char fr[8]; ++ __u8 rab; ++ __u8 ieee[3]; ++ __u8 cmic; ++ __u8 mdts; ++ __le16 cntlid; ++ __le32 ver; ++ __le32 rtd3r; ++ __le32 rtd3e; ++ __le32 oaes; ++ __le32 ctratt; ++ __le16 rrls; ++ __u8 rsvd102[154]; ++ __le16 oacs; ++ __u8 acl; ++ __u8 aerl; ++ __u8 frmw; ++ __u8 lpa; ++ __u8 elpe; ++ __u8 npss; ++ __u8 avscc; ++ __u8 apsta; ++ __le16 wctemp; ++ __le16 cctemp; ++ __le16 mtfa; ++ __le32 hmpre; ++ __le32 hmmin; ++ __u8 tnvmcap[16]; ++ __u8 unvmcap[16]; ++ __le32 rpmbs; ++ __le16 edstt; ++ __u8 dsto; ++ __u8 fwug; ++ __le16 kas; ++ __le16 hctma; ++ __le16 mntmt; ++ __le16 mxtmt; ++ __le32 sanicap; ++ __le32 hmminds; ++ __le16 hmmaxd; ++ __le16 nsetidmax; ++ __u8 rsvd340[2]; ++ __u8 anatt; ++ __u8 anacap; ++ __le32 anagrpmax; ++ __le32 nanagrpid; ++ __u8 rsvd352[160]; ++ __u8 sqes; ++ __u8 cqes; ++ __le16 maxcmd; ++ __le32 nn; ++ __le16 oncs; ++ __le16 fuses; ++ __u8 fna; ++ __u8 vwc; ++ __le16 awun; ++ __le16 awupf; ++ __u8 nvscc; ++ __u8 nwpc; ++ __le16 acwu; ++ __u8 rsvd534[2]; ++ __le32 sgls; ++ __le32 mnan; ++ __u8 rsvd544[224]; ++ char subnqn[256]; ++ __u8 rsvd1024[768]; ++ __le32 ioccsz; ++ __le32 iorcsz; ++ __le16 icdoff; ++ __u8 ctrattr; ++ __u8 msdbd; ++ __u8 rsvd1804[244]; ++ struct nvme_id_power_state psd[32]; ++ __u8 vs[1024]; ++}; ++ ++enum { ++ NVME_CTRL_ONCS_COMPARE = 1 << 0, ++ NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, ++ NVME_CTRL_ONCS_DSM = 1 << 2, ++ NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, ++ NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, ++ NVME_CTRL_VWC_PRESENT = 1 << 0, ++ NVME_CTRL_OACS_SEC_SUPP = 1 << 0, ++ NVME_CTRL_OACS_DIRECTIVES = 1 << 5, ++ NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, ++ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, ++ NVME_CTRL_CTRATT_128_ID = 1 << 0, ++ NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1, ++ NVME_CTRL_CTRATT_NVM_SETS = 1 << 2, ++ NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3, ++ NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4, ++ NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, ++}; ++ ++struct nvme_lbaf { ++ __le16 ms; ++ __u8 ds; ++ __u8 rp; ++}; ++ ++struct nvme_id_ns { ++ __le64 nsze; ++ __le64 ncap; ++ __le64 nuse; ++ __u8 nsfeat; ++ __u8 nlbaf; ++ __u8 flbas; ++ __u8 mc; ++ __u8 dpc; ++ __u8 dps; ++ __u8 nmic; ++ __u8 rescap; ++ __u8 fpi; ++ __u8 dlfeat; ++ __le16 nawun; ++ __le16 nawupf; ++ __le16 nacwu; ++ __le16 nabsn; ++ __le16 nabo; ++ __le16 nabspf; ++ __le16 noiob; ++ __u8 nvmcap[16]; ++ __u8 rsvd64[28]; ++ __le32 anagrpid; ++ __u8 rsvd96[3]; ++ __u8 nsattr; ++ __le16 nvmsetid; ++ __le16 endgid; ++ __u8 nguid[16]; ++ __u8 eui64[8]; ++ struct nvme_lbaf lbaf[16]; ++ __u8 rsvd192[192]; ++ __u8 vs[3712]; ++}; ++ ++enum { ++ NVME_ID_CNS_NS = 0x00, ++ NVME_ID_CNS_CTRL = 0x01, ++ NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, ++ NVME_ID_CNS_NS_DESC_LIST = 0x03, ++ NVME_ID_CNS_NVMSET_LIST = 0x04, ++ NVME_ID_CNS_NS_PRESENT_LIST = 0x10, ++ NVME_ID_CNS_NS_PRESENT = 0x11, ++ NVME_ID_CNS_CTRL_NS_LIST = 0x12, ++ NVME_ID_CNS_CTRL_LIST = 0x13, ++}; ++ ++enum { ++ NVME_DIR_IDENTIFY = 0x00, ++ NVME_DIR_STREAMS = 0x01, ++ NVME_DIR_SND_ID_OP_ENABLE = 0x01, ++ NVME_DIR_SND_ST_OP_REL_ID = 0x01, ++ NVME_DIR_SND_ST_OP_REL_RSC = 0x02, ++ NVME_DIR_RCV_ID_OP_PARAM = 0x01, ++ NVME_DIR_RCV_ST_OP_PARAM = 0x01, ++ NVME_DIR_RCV_ST_OP_STATUS = 0x02, ++ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03, ++ NVME_DIR_ENDIR = 0x01, ++}; ++ ++enum { ++ NVME_NS_FEAT_THIN = 1 << 0, ++ NVME_NS_FLBAS_LBA_MASK = 0xf, ++ NVME_NS_FLBAS_META_EXT = 0x10, ++ NVME_LBAF_RP_BEST = 0, ++ NVME_LBAF_RP_BETTER = 1, ++ NVME_LBAF_RP_GOOD = 2, ++ NVME_LBAF_RP_DEGRADED = 3, ++ NVME_NS_DPC_PI_LAST = 1 << 4, ++ NVME_NS_DPC_PI_FIRST = 1 << 3, ++ NVME_NS_DPC_PI_TYPE3 = 1 << 2, ++ NVME_NS_DPC_PI_TYPE2 = 1 << 1, ++ NVME_NS_DPC_PI_TYPE1 = 1 << 0, ++ NVME_NS_DPS_PI_FIRST = 1 << 3, ++ NVME_NS_DPS_PI_MASK = 0x7, ++ NVME_NS_DPS_PI_TYPE1 = 1, ++ NVME_NS_DPS_PI_TYPE2 = 2, ++ NVME_NS_DPS_PI_TYPE3 = 3, ++}; ++ ++struct nvme_ns_id_desc { ++ __u8 nidt; ++ __u8 nidl; ++ __le16 reserved; ++}; ++ ++#define NVME_NIDT_EUI64_LEN 8 ++#define NVME_NIDT_NGUID_LEN 16 ++#define NVME_NIDT_UUID_LEN 16 ++ ++enum { ++ NVME_NIDT_EUI64 = 0x01, ++ NVME_NIDT_NGUID = 0x02, ++ NVME_NIDT_UUID = 0x03, ++}; ++ ++#define NVME_MAX_NVMSET 31 ++ ++struct nvme_nvmset_attr_entry { ++ __le16 id; ++ __le16 endurance_group_id; ++ __u8 rsvd4[4]; ++ __le32 random_4k_read_typical; ++ __le32 opt_write_size; ++ __u8 total_nvmset_cap[16]; ++ __u8 unalloc_nvmset_cap[16]; ++ __u8 rsvd48[80]; ++}; ++ ++struct nvme_id_nvmset { ++ __u8 nid; ++ __u8 rsvd1[127]; ++ struct nvme_nvmset_attr_entry ent[NVME_MAX_NVMSET]; ++}; ++ ++/* Derived from 1.3a Figure 101: Get Log Page – Telemetry Host ++ * -Initiated Log (Log Identifier 07h) ++ */ ++struct nvme_telemetry_log_page_hdr { ++ __u8 lpi; /* Log page identifier */ ++ __u8 rsvd[4]; ++ __u8 iee_oui[3]; ++ __u16 dalb1; /* Data area 1 last block */ ++ __u16 dalb2; /* Data area 2 last block */ ++ __u16 dalb3; /* Data area 3 last block */ ++ __u8 rsvd1[368]; /* TODO verify */ ++ __u8 ctrlavail; /* Controller initiated data avail?*/ ++ __u8 ctrldgn; /* Controller initiated telemetry Data Gen # */ ++ __u8 rsnident[128]; ++ /* We'll have to double fetch so we can get the header, ++ * parse dalb1->3 determine how much size we need for the ++ * log then alloc below. Or just do a secondary non-struct ++ * allocation. ++ */ ++ __u8 telemetry_dataarea[0]; ++}; ++ ++struct nvme_endurance_group_log { ++ __u32 rsvd0; ++ __u8 avl_spare_threshold; ++ __u8 percent_used; ++ __u8 rsvd6[26]; ++ __u8 endurance_estimate[16]; ++ __u8 data_units_read[16]; ++ __u8 data_units_written[16]; ++ __u8 media_units_written[16]; ++ __u8 rsvd96[416]; ++}; ++ ++struct nvme_smart_log { ++ __u8 critical_warning; ++ __u8 temperature[2]; ++ __u8 avail_spare; ++ __u8 spare_thresh; ++ __u8 percent_used; ++ __u8 rsvd6[26]; ++ __u8 data_units_read[16]; ++ __u8 data_units_written[16]; ++ __u8 host_reads[16]; ++ __u8 host_writes[16]; ++ __u8 ctrl_busy_time[16]; ++ __u8 power_cycles[16]; ++ __u8 power_on_hours[16]; ++ __u8 unsafe_shutdowns[16]; ++ __u8 media_errors[16]; ++ __u8 num_err_log_entries[16]; ++ __le32 warning_temp_time; ++ __le32 critical_comp_time; ++ __le16 temp_sensor[8]; ++ __le32 thm_temp1_trans_count; ++ __le32 thm_temp2_trans_count; ++ __le32 thm_temp1_total_time; ++ __le32 thm_temp2_total_time; ++ __u8 rsvd232[280]; ++}; ++ ++struct nvme_self_test_res { ++ __u8 device_self_test_status; ++ __u8 segment_num; ++ __u8 valid_diagnostic_info; ++ __u8 rsvd; ++ __le64 power_on_hours; ++ __le32 nsid; ++ __le64 failing_lba; ++ __u8 status_code_type; ++ __u8 status_code; ++ __u8 vendor_specific[2]; ++} __attribute__((packed)); ++ ++struct nvme_self_test_log { ++ __u8 crnt_dev_selftest_oprn; ++ __u8 crnt_dev_selftest_compln; ++ __u8 rsvd[2]; ++ struct nvme_self_test_res result[20]; ++} __attribute__((packed)); ++ ++struct nvme_fw_slot_info_log { ++ __u8 afi; ++ __u8 rsvd1[7]; ++ __le64 frs[7]; ++ __u8 rsvd64[448]; ++}; ++ ++/* NVMe Namespace Write Protect State */ ++enum { ++ NVME_NS_NO_WRITE_PROTECT = 0, ++ NVME_NS_WRITE_PROTECT, ++ NVME_NS_WRITE_PROTECT_POWER_CYCLE, ++ NVME_NS_WRITE_PROTECT_PERMANENT, ++}; ++ ++#define NVME_MAX_CHANGED_NAMESPACES 1024 ++ ++struct nvme_changed_ns_list_log { ++ __le32 log[NVME_MAX_CHANGED_NAMESPACES]; ++}; ++ ++enum { ++ NVME_CMD_EFFECTS_CSUPP = 1 << 0, ++ NVME_CMD_EFFECTS_LBCC = 1 << 1, ++ NVME_CMD_EFFECTS_NCC = 1 << 2, ++ NVME_CMD_EFFECTS_NIC = 1 << 3, ++ NVME_CMD_EFFECTS_CCC = 1 << 4, ++ NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, ++}; ++ ++struct nvme_effects_log { ++ __le32 acs[256]; ++ __le32 iocs[256]; ++ __u8 resv[2048]; ++}; ++ ++enum nvme_ana_state { ++ NVME_ANA_OPTIMIZED = 0x01, ++ NVME_ANA_NONOPTIMIZED = 0x02, ++ NVME_ANA_INACCESSIBLE = 0x03, ++ NVME_ANA_PERSISTENT_LOSS = 0x04, ++ NVME_ANA_CHANGE = 0x0f, ++}; ++ ++struct nvme_ana_group_desc { ++ __le32 grpid; ++ __le32 nnsids; ++ __le64 chgcnt; ++ __u8 state; ++ __u8 rsvd17[15]; ++ __le32 nsids[]; ++}; ++ ++/* flag for the log specific field of the ANA log */ ++#define NVME_ANA_LOG_RGO (1 << 0) ++ ++struct nvme_ana_rsp_hdr { ++ __le64 chgcnt; ++ __le16 ngrps; ++ __le16 rsvd10[3]; ++}; ++ ++enum { ++ NVME_SMART_CRIT_SPARE = 1 << 0, ++ NVME_SMART_CRIT_TEMPERATURE = 1 << 1, ++ NVME_SMART_CRIT_RELIABILITY = 1 << 2, ++ NVME_SMART_CRIT_MEDIA = 1 << 3, ++ NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, ++}; ++ ++enum { ++ NVME_AER_ERROR = 0, ++ NVME_AER_SMART = 1, ++ NVME_AER_CSS = 6, ++ NVME_AER_VS = 7, ++ NVME_AER_NOTICE_NS_CHANGED = 0x0002, ++ NVME_AER_NOTICE_ANA = 0x0003, ++ NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, ++}; ++ ++struct nvme_lba_range_type { ++ __u8 type; ++ __u8 attributes; ++ __u8 rsvd2[14]; ++ __u64 slba; ++ __u64 nlb; ++ __u8 guid[16]; ++ __u8 rsvd48[16]; ++}; ++ ++enum { ++ NVME_LBART_TYPE_FS = 0x01, ++ NVME_LBART_TYPE_RAID = 0x02, ++ NVME_LBART_TYPE_CACHE = 0x03, ++ NVME_LBART_TYPE_SWAP = 0x04, ++ ++ NVME_LBART_ATTRIB_TEMP = 1 << 0, ++ NVME_LBART_ATTRIB_HIDE = 1 << 1, ++}; ++ ++struct nvme_plm_config { ++ __u16 enable_event; ++ __u8 rsvd2[30]; ++ __u64 dtwin_reads_thresh; ++ __u64 dtwin_writes_thresh; ++ __u64 dtwin_time_thresh; ++ __u8 rsvd56[456]; ++}; ++ ++struct nvme_reservation_status { ++ __le32 gen; ++ __u8 rtype; ++ __u8 regctl[2]; ++ __u8 resv5[2]; ++ __u8 ptpls; ++ __u8 resv10[13]; ++ struct { ++ __le16 cntlid; ++ __u8 rcsts; ++ __u8 resv3[5]; ++ __le64 hostid; ++ __le64 rkey; ++ } regctl_ds[]; ++}; ++ ++struct nvme_reservation_status_ext { ++ __le32 gen; ++ __u8 rtype; ++ __u8 regctl[2]; ++ __u8 resv5[2]; ++ __u8 ptpls; ++ __u8 resv10[14]; ++ __u8 resv24[40]; ++ struct { ++ __le16 cntlid; ++ __u8 rcsts; ++ __u8 resv3[5]; ++ __le64 rkey; ++ __u8 hostid[16]; ++ __u8 resv32[32]; ++ } regctl_eds[]; ++}; ++ ++enum nvme_async_event_type { ++ NVME_AER_TYPE_ERROR = 0, ++ NVME_AER_TYPE_SMART = 1, ++ NVME_AER_TYPE_NOTICE = 2, ++}; ++ ++/* I/O commands */ ++ ++enum nvme_opcode { ++ nvme_cmd_flush = 0x00, ++ nvme_cmd_write = 0x01, ++ nvme_cmd_read = 0x02, ++ nvme_cmd_write_uncor = 0x04, ++ nvme_cmd_compare = 0x05, ++ nvme_cmd_write_zeroes = 0x08, ++ nvme_cmd_dsm = 0x09, ++ nvme_cmd_resv_register = 0x0d, ++ nvme_cmd_resv_report = 0x0e, ++ nvme_cmd_resv_acquire = 0x11, ++ nvme_cmd_resv_release = 0x15, ++}; ++ ++/* ++ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier ++ * ++ * @NVME_SGL_FMT_ADDRESS: absolute address of the data block ++ * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block ++ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA ++ * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation ++ * request subtype ++ */ ++enum { ++ NVME_SGL_FMT_ADDRESS = 0x00, ++ NVME_SGL_FMT_OFFSET = 0x01, ++ NVME_SGL_FMT_TRANSPORT_A = 0x0A, ++ NVME_SGL_FMT_INVALIDATE = 0x0f, ++}; ++ ++/* ++ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier ++ * ++ * For struct nvme_sgl_desc: ++ * @NVME_SGL_FMT_DATA_DESC: data block descriptor ++ * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor ++ * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor ++ * ++ * For struct nvme_keyed_sgl_desc: ++ * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor ++ * ++ * Transport-specific SGL types: ++ * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor ++ */ ++enum { ++ NVME_SGL_FMT_DATA_DESC = 0x00, ++ NVME_SGL_FMT_SEG_DESC = 0x02, ++ NVME_SGL_FMT_LAST_SEG_DESC = 0x03, ++ NVME_KEY_SGL_FMT_DATA_DESC = 0x04, ++ NVME_TRANSPORT_SGL_DATA_DESC = 0x05, ++}; ++ ++struct nvme_sgl_desc { ++ __le64 addr; ++ __le32 length; ++ __u8 rsvd[3]; ++ __u8 type; ++}; ++ ++struct nvme_keyed_sgl_desc { ++ __le64 addr; ++ __u8 length[3]; ++ __u8 key[4]; ++ __u8 type; ++}; ++ ++union nvme_data_ptr { ++ struct { ++ __le64 prp1; ++ __le64 prp2; ++ }; ++ struct nvme_sgl_desc sgl; ++ struct nvme_keyed_sgl_desc ksgl; ++}; ++ ++/* ++ * Lowest two bits of our flags field (FUSE field in the spec): ++ * ++ * @NVME_CMD_FUSE_FIRST: Fused Operation, first command ++ * @NVME_CMD_FUSE_SECOND: Fused Operation, second command ++ * ++ * Highest two bits in our flags field (PSDT field in the spec): ++ * ++ * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer, ++ * If used, MPTR contains addr of single physical buffer (byte aligned). ++ * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer, ++ * If used, MPTR contains an address of an SGL segment containing ++ * exactly 1 SGL descriptor (qword aligned). ++ */ ++enum { ++ NVME_CMD_FUSE_FIRST = (1 << 0), ++ NVME_CMD_FUSE_SECOND = (1 << 1), ++ ++ NVME_CMD_SGL_METABUF = (1 << 6), ++ NVME_CMD_SGL_METASEG = (1 << 7), ++ NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG, ++}; ++ ++struct nvme_common_command { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __le32 cdw2[2]; ++ __le64 metadata; ++ union nvme_data_ptr dptr; ++ __le32 cdw10[6]; ++}; ++ ++struct nvme_rw_command { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2; ++ __le64 metadata; ++ union nvme_data_ptr dptr; ++ __le64 slba; ++ __le16 length; ++ __le16 control; ++ __le32 dsmgmt; ++ __le32 reftag; ++ __le16 apptag; ++ __le16 appmask; ++}; ++ ++enum { ++ NVME_RW_LR = 1 << 15, ++ NVME_RW_FUA = 1 << 14, ++ NVME_RW_DEAC = 1 << 9, ++ NVME_RW_DSM_FREQ_UNSPEC = 0, ++ NVME_RW_DSM_FREQ_TYPICAL = 1, ++ NVME_RW_DSM_FREQ_RARE = 2, ++ NVME_RW_DSM_FREQ_READS = 3, ++ NVME_RW_DSM_FREQ_WRITES = 4, ++ NVME_RW_DSM_FREQ_RW = 5, ++ NVME_RW_DSM_FREQ_ONCE = 6, ++ NVME_RW_DSM_FREQ_PREFETCH = 7, ++ NVME_RW_DSM_FREQ_TEMP = 8, ++ NVME_RW_DSM_LATENCY_NONE = 0 << 4, ++ NVME_RW_DSM_LATENCY_IDLE = 1 << 4, ++ NVME_RW_DSM_LATENCY_NORM = 2 << 4, ++ NVME_RW_DSM_LATENCY_LOW = 3 << 4, ++ NVME_RW_DSM_SEQ_REQ = 1 << 6, ++ NVME_RW_DSM_COMPRESSED = 1 << 7, ++ NVME_RW_PRINFO_PRCHK_REF = 1 << 10, ++ NVME_RW_PRINFO_PRCHK_APP = 1 << 11, ++ NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, ++ NVME_RW_PRINFO_PRACT = 1 << 13, ++ NVME_RW_DTYPE_STREAMS = 1 << 4, ++}; ++ ++struct nvme_dsm_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[2]; ++ union nvme_data_ptr dptr; ++ __le32 nr; ++ __le32 attributes; ++ __u32 rsvd12[4]; ++}; ++ ++enum { ++ NVME_DSMGMT_IDR = 1 << 0, ++ NVME_DSMGMT_IDW = 1 << 1, ++ NVME_DSMGMT_AD = 1 << 2, ++}; ++ ++#define NVME_DSM_MAX_RANGES 256 ++ ++struct nvme_dsm_range { ++ __le32 cattr; ++ __le32 nlb; ++ __le64 slba; ++}; ++ ++struct nvme_write_zeroes_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2; ++ __le64 metadata; ++ union nvme_data_ptr dptr; ++ __le64 slba; ++ __le16 length; ++ __le16 control; ++ __le32 dsmgmt; ++ __le32 reftag; ++ __le16 apptag; ++ __le16 appmask; ++}; ++ ++/* Features */ ++ ++struct nvme_feat_auto_pst { ++ __le64 entries[32]; ++}; ++ ++enum { ++ NVME_HOST_MEM_ENABLE = (1 << 0), ++ NVME_HOST_MEM_RETURN = (1 << 1), ++}; ++ ++/* Admin commands */ ++ ++enum nvme_admin_opcode { ++ nvme_admin_delete_sq = 0x00, ++ nvme_admin_create_sq = 0x01, ++ nvme_admin_get_log_page = 0x02, ++ nvme_admin_delete_cq = 0x04, ++ nvme_admin_create_cq = 0x05, ++ nvme_admin_identify = 0x06, ++ nvme_admin_abort_cmd = 0x08, ++ nvme_admin_set_features = 0x09, ++ nvme_admin_get_features = 0x0a, ++ nvme_admin_async_event = 0x0c, ++ nvme_admin_ns_mgmt = 0x0d, ++ nvme_admin_activate_fw = 0x10, ++ nvme_admin_download_fw = 0x11, ++ nvme_admin_dev_self_test = 0x14, ++ nvme_admin_ns_attach = 0x15, ++ nvme_admin_keep_alive = 0x18, ++ nvme_admin_directive_send = 0x19, ++ nvme_admin_directive_recv = 0x1a, ++ nvme_admin_virtual_mgmt = 0x1c, ++ nvme_admin_nvme_mi_send = 0x1d, ++ nvme_admin_nvme_mi_recv = 0x1e, ++ nvme_admin_dbbuf = 0x7C, ++ nvme_admin_format_nvm = 0x80, ++ nvme_admin_security_send = 0x81, ++ nvme_admin_security_recv = 0x82, ++ nvme_admin_sanitize_nvm = 0x84, ++}; ++ ++enum { ++ NVME_QUEUE_PHYS_CONTIG = (1 << 0), ++ NVME_CQ_IRQ_ENABLED = (1 << 1), ++ NVME_SQ_PRIO_URGENT = (0 << 1), ++ NVME_SQ_PRIO_HIGH = (1 << 1), ++ NVME_SQ_PRIO_MEDIUM = (2 << 1), ++ NVME_SQ_PRIO_LOW = (3 << 1), ++ NVME_FEAT_ARBITRATION = 0x01, ++ NVME_FEAT_POWER_MGMT = 0x02, ++ NVME_FEAT_LBA_RANGE = 0x03, ++ NVME_FEAT_TEMP_THRESH = 0x04, ++ NVME_FEAT_ERR_RECOVERY = 0x05, ++ NVME_FEAT_VOLATILE_WC = 0x06, ++ NVME_FEAT_NUM_QUEUES = 0x07, ++ NVME_FEAT_IRQ_COALESCE = 0x08, ++ NVME_FEAT_IRQ_CONFIG = 0x09, ++ NVME_FEAT_WRITE_ATOMIC = 0x0a, ++ NVME_FEAT_ASYNC_EVENT = 0x0b, ++ NVME_FEAT_AUTO_PST = 0x0c, ++ NVME_FEAT_HOST_MEM_BUF = 0x0d, ++ NVME_FEAT_TIMESTAMP = 0x0e, ++ NVME_FEAT_KATO = 0x0f, ++ NVME_FEAT_HCTM = 0X10, ++ NVME_FEAT_NOPSC = 0X11, ++ NVME_FEAT_RRL = 0x12, ++ NVME_FEAT_PLM_CONFIG = 0x13, ++ NVME_FEAT_PLM_WINDOW = 0x14, ++ NVME_FEAT_SW_PROGRESS = 0x80, ++ NVME_FEAT_HOST_ID = 0x81, ++ NVME_FEAT_RESV_MASK = 0x82, ++ NVME_FEAT_RESV_PERSIST = 0x83, ++ NVME_FEAT_WRITE_PROTECT = 0x84, ++ NVME_LOG_ERROR = 0x01, ++ NVME_LOG_SMART = 0x02, ++ NVME_LOG_FW_SLOT = 0x03, ++ NVME_LOG_CHANGED_NS = 0x04, ++ NVME_LOG_CMD_EFFECTS = 0x05, ++ NVME_LOG_DEVICE_SELF_TEST = 0x06, ++ NVME_LOG_TELEMETRY_HOST = 0x07, ++ NVME_LOG_TELEMETRY_CTRL = 0x08, ++ NVME_LOG_ENDURANCE_GROUP = 0x09, ++ NVME_LOG_ANA = 0x0c, ++ NVME_LOG_DISC = 0x70, ++ NVME_LOG_RESERVATION = 0x80, ++ NVME_LOG_SANITIZE = 0x81, ++ NVME_FWACT_REPL = (0 << 3), ++ NVME_FWACT_REPL_ACTV = (1 << 3), ++ NVME_FWACT_ACTV = (2 << 3), ++}; ++ ++enum { ++ NVME_NO_LOG_LSP = 0x0, ++ NVME_NO_LOG_LPO = 0x0, ++ NVME_LOG_ANA_LSP_RGO = 0x1, ++ NVME_TELEM_LSP_CREATE = 0x1, ++}; ++ ++/* Sanitize and Sanitize Monitor/Log */ ++enum { ++ /* Sanitize */ ++ NVME_SANITIZE_NO_DEALLOC = 0x00000200, ++ NVME_SANITIZE_OIPBP = 0x00000100, ++ NVME_SANITIZE_OWPASS_SHIFT = 0x00000004, ++ NVME_SANITIZE_AUSE = 0x00000008, ++ NVME_SANITIZE_ACT_CRYPTO_ERASE = 0x00000004, ++ NVME_SANITIZE_ACT_OVERWRITE = 0x00000003, ++ NVME_SANITIZE_ACT_BLOCK_ERASE = 0x00000002, ++ NVME_SANITIZE_ACT_EXIT = 0x00000001, ++ ++ /* Sanitize Monitor/Log */ ++ NVME_SANITIZE_LOG_DATA_LEN = 0x0014, ++ NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED = 0x0100, ++ NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK = 0x00F8, ++ NVME_SANITIZE_LOG_STATUS_MASK = 0x0007, ++ NVME_SANITIZE_LOG_NEVER_SANITIZED = 0x0000, ++ NVME_SANITIZE_LOG_COMPLETED_SUCCESS = 0x0001, ++ NVME_SANITIZE_LOG_IN_PROGESS = 0x0002, ++ NVME_SANITIZE_LOG_COMPLETED_FAILED = 0x0003, ++}; ++ ++enum { ++ /* Self-test log Validation bits */ ++ NVME_SELF_TEST_VALID_NSID = 1 << 0, ++ NVME_SELF_TEST_VALID_FLBA = 1 << 1, ++ NVME_SELF_TEST_VALID_SCT = 1 << 2, ++ NVME_SELF_TEST_VALID_SC = 1 << 3, ++ NVME_SELF_TEST_REPORTS = 20, ++}; ++ ++struct nvme_identify { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[2]; ++ union nvme_data_ptr dptr; ++ __u8 cns; ++ __u8 rsvd3; ++ __le16 ctrlid; ++ __u32 rsvd11[5]; ++}; ++ ++#define NVME_IDENTIFY_DATA_SIZE 4096 ++ ++struct nvme_features { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[2]; ++ union nvme_data_ptr dptr; ++ __le32 fid; ++ __le32 dword11; ++ __le32 dword12; ++ __le32 dword13; ++ __le32 dword14; ++ __le32 dword15; ++}; ++ ++struct nvme_host_mem_buf_desc { ++ __le64 addr; ++ __le32 size; ++ __u32 rsvd; ++}; ++ ++struct nvme_create_cq { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[5]; ++ __le64 prp1; ++ __u64 rsvd8; ++ __le16 cqid; ++ __le16 qsize; ++ __le16 cq_flags; ++ __le16 irq_vector; ++ __u32 rsvd12[4]; ++}; ++ ++struct nvme_create_sq { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[5]; ++ __le64 prp1; ++ __u64 rsvd8; ++ __le16 sqid; ++ __le16 qsize; ++ __le16 sq_flags; ++ __le16 cqid; ++ __u32 rsvd12[4]; ++}; ++ ++struct nvme_delete_queue { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[9]; ++ __le16 qid; ++ __u16 rsvd10; ++ __u32 rsvd11[5]; ++}; ++ ++struct nvme_abort_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[9]; ++ __le16 sqid; ++ __u16 cid; ++ __u32 rsvd11[5]; ++}; ++ ++struct nvme_download_firmware { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[5]; ++ union nvme_data_ptr dptr; ++ __le32 numd; ++ __le32 offset; ++ __u32 rsvd12[4]; ++}; ++ ++struct nvme_format_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[4]; ++ __le32 cdw10; ++ __u32 rsvd11[5]; ++}; ++ ++struct nvme_get_log_page_command { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[2]; ++ union nvme_data_ptr dptr; ++ __u8 lid; ++ __u8 lsp; ++ __le16 numdl; ++ __le16 numdu; ++ __u16 rsvd11; ++ __le32 lpol; ++ __le32 lpou; ++ __u32 rsvd14[2]; ++}; ++ ++struct nvme_directive_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __le32 nsid; ++ __u64 rsvd2[2]; ++ union nvme_data_ptr dptr; ++ __le32 numd; ++ __u8 doper; ++ __u8 dtype; ++ __le16 dspec; ++ __u8 endir; ++ __u8 tdtype; ++ __u16 rsvd15; ++ ++ __u32 rsvd16[3]; ++}; ++ ++/* Sanitize Log Page */ ++struct nvme_sanitize_log_page { ++ __le16 progress; ++ __le16 status; ++ __le32 cdw10_info; ++ __le32 est_ovrwrt_time; ++ __le32 est_blk_erase_time; ++ __le32 est_crypto_erase_time; ++}; ++ ++/* ++ * Fabrics subcommands. ++ */ ++enum nvmf_fabrics_opcode { ++ nvme_fabrics_command = 0x7f, ++}; ++ ++enum nvmf_capsule_command { ++ nvme_fabrics_type_property_set = 0x00, ++ nvme_fabrics_type_connect = 0x01, ++ nvme_fabrics_type_property_get = 0x04, ++}; ++ ++struct nvmf_common_command { ++ __u8 opcode; ++ __u8 resv1; ++ __u16 command_id; ++ __u8 fctype; ++ __u8 resv2[35]; ++ __u8 ts[24]; ++}; ++ ++/* ++ * The legal cntlid range a NVMe Target will provide. ++ * Note that cntlid of value 0 is considered illegal in the fabrics world. ++ * Devices based on earlier specs did not have the subsystem concept; ++ * therefore, those devices had their cntlid value set to 0 as a result. ++ */ ++#define NVME_CNTLID_MIN 1 ++#define NVME_CNTLID_MAX 0xffef ++#define NVME_CNTLID_DYNAMIC 0xffff ++ ++#define MAX_DISC_LOGS 255 ++ ++/* Discovery log page entry */ ++struct nvmf_disc_rsp_page_entry { ++ __u8 trtype; ++ __u8 adrfam; ++ __u8 subtype; ++ __u8 treq; ++ __le16 portid; ++ __le16 cntlid; ++ __le16 asqsz; ++ __u8 resv8[22]; ++ char trsvcid[NVMF_TRSVCID_SIZE]; ++ __u8 resv64[192]; ++ char subnqn[NVMF_NQN_FIELD_LEN]; ++ char traddr[NVMF_TRADDR_SIZE]; ++ union tsas { ++ char common[NVMF_TSAS_SIZE]; ++ struct rdma { ++ __u8 qptype; ++ __u8 prtype; ++ __u8 cms; ++ __u8 resv3[5]; ++ __u16 pkey; ++ __u8 resv10[246]; ++ } rdma; ++ struct tcp { ++ __u8 sectype; ++ } tcp; ++ } tsas; ++}; ++ ++/* Discovery log page header */ ++struct nvmf_disc_rsp_page_hdr { ++ __le64 genctr; ++ __le64 numrec; ++ __le16 recfmt; ++ __u8 resv14[1006]; ++ struct nvmf_disc_rsp_page_entry entries[0]; ++}; ++ ++struct nvmf_connect_command { ++ __u8 opcode; ++ __u8 resv1; ++ __u16 command_id; ++ __u8 fctype; ++ __u8 resv2[19]; ++ union nvme_data_ptr dptr; ++ __le16 recfmt; ++ __le16 qid; ++ __le16 sqsize; ++ __u8 cattr; ++ __u8 resv3; ++ __le32 kato; ++ __u8 resv4[12]; ++}; ++ ++struct nvmf_connect_data { ++ uuid_t hostid; ++ __le16 cntlid; ++ char resv4[238]; ++ char subsysnqn[NVMF_NQN_FIELD_LEN]; ++ char hostnqn[NVMF_NQN_FIELD_LEN]; ++ char resv5[256]; ++}; ++ ++struct nvmf_property_set_command { ++ __u8 opcode; ++ __u8 resv1; ++ __u16 command_id; ++ __u8 fctype; ++ __u8 resv2[35]; ++ __u8 attrib; ++ __u8 resv3[3]; ++ __le32 offset; ++ __le64 value; ++ __u8 resv4[8]; ++}; ++ ++struct nvmf_property_get_command { ++ __u8 opcode; ++ __u8 resv1; ++ __u16 command_id; ++ __u8 fctype; ++ __u8 resv2[35]; ++ __u8 attrib; ++ __u8 resv3[3]; ++ __le32 offset; ++ __u8 resv4[16]; ++}; ++ ++struct nvme_dbbuf { ++ __u8 opcode; ++ __u8 flags; ++ __u16 command_id; ++ __u32 rsvd1[5]; ++ __le64 prp1; ++ __le64 prp2; ++ __u32 rsvd12[6]; ++}; ++ ++struct streams_directive_params { ++ __le16 msl; ++ __le16 nssa; ++ __le16 nsso; ++ __u8 rsvd[10]; ++ __le32 sws; ++ __le16 sgs; ++ __le16 nsa; ++ __le16 nso; ++ __u8 rsvd2[6]; ++}; ++ ++struct nvme_command { ++ union { ++ struct nvme_common_command common; ++ struct nvme_rw_command rw; ++ struct nvme_identify identify; ++ struct nvme_features features; ++ struct nvme_create_cq create_cq; ++ struct nvme_create_sq create_sq; ++ struct nvme_delete_queue delete_queue; ++ struct nvme_download_firmware dlfw; ++ struct nvme_format_cmd format; ++ struct nvme_dsm_cmd dsm; ++ struct nvme_write_zeroes_cmd write_zeroes; ++ struct nvme_abort_cmd abort; ++ struct nvme_get_log_page_command get_log_page; ++ struct nvmf_common_command fabrics; ++ struct nvmf_connect_command connect; ++ struct nvmf_property_set_command prop_set; ++ struct nvmf_property_get_command prop_get; ++ struct nvme_dbbuf dbbuf; ++ struct nvme_directive_cmd directive; ++ }; ++}; ++ ++static inline bool nvme_is_write(struct nvme_command *cmd) ++{ ++ /* ++ * What a mess... ++ * ++ * Why can't we simply have a Fabrics In and Fabrics out command? ++ */ ++ if (unlikely(cmd->common.opcode == nvme_fabrics_command)) ++ return cmd->fabrics.fctype & 1; ++ return cmd->common.opcode & 1; ++} ++ ++enum { ++ /* ++ * Generic Command Status: ++ */ ++ NVME_SC_SUCCESS = 0x0, ++ NVME_SC_INVALID_OPCODE = 0x1, ++ NVME_SC_INVALID_FIELD = 0x2, ++ NVME_SC_CMDID_CONFLICT = 0x3, ++ NVME_SC_DATA_XFER_ERROR = 0x4, ++ NVME_SC_POWER_LOSS = 0x5, ++ NVME_SC_INTERNAL = 0x6, ++ NVME_SC_ABORT_REQ = 0x7, ++ NVME_SC_ABORT_QUEUE = 0x8, ++ NVME_SC_FUSED_FAIL = 0x9, ++ NVME_SC_FUSED_MISSING = 0xa, ++ NVME_SC_INVALID_NS = 0xb, ++ NVME_SC_CMD_SEQ_ERROR = 0xc, ++ NVME_SC_SGL_INVALID_LAST = 0xd, ++ NVME_SC_SGL_INVALID_COUNT = 0xe, ++ NVME_SC_SGL_INVALID_DATA = 0xf, ++ NVME_SC_SGL_INVALID_METADATA = 0x10, ++ NVME_SC_SGL_INVALID_TYPE = 0x11, ++ ++ NVME_SC_SGL_INVALID_OFFSET = 0x16, ++ NVME_SC_SGL_INVALID_SUBTYPE = 0x17, ++ ++ NVME_SC_SANITIZE_FAILED = 0x1C, ++ NVME_SC_SANITIZE_IN_PROGRESS = 0x1D, ++ ++ NVME_SC_NS_WRITE_PROTECTED = 0x20, ++ ++ NVME_SC_LBA_RANGE = 0x80, ++ NVME_SC_CAP_EXCEEDED = 0x81, ++ NVME_SC_NS_NOT_READY = 0x82, ++ NVME_SC_RESERVATION_CONFLICT = 0x83, ++ ++ /* ++ * Command Specific Status: ++ */ ++ NVME_SC_CQ_INVALID = 0x100, ++ NVME_SC_QID_INVALID = 0x101, ++ NVME_SC_QUEUE_SIZE = 0x102, ++ NVME_SC_ABORT_LIMIT = 0x103, ++ NVME_SC_ABORT_MISSING = 0x104, ++ NVME_SC_ASYNC_LIMIT = 0x105, ++ NVME_SC_FIRMWARE_SLOT = 0x106, ++ NVME_SC_FIRMWARE_IMAGE = 0x107, ++ NVME_SC_INVALID_VECTOR = 0x108, ++ NVME_SC_INVALID_LOG_PAGE = 0x109, ++ NVME_SC_INVALID_FORMAT = 0x10a, ++ NVME_SC_FW_NEEDS_CONV_RESET = 0x10b, ++ NVME_SC_INVALID_QUEUE = 0x10c, ++ NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, ++ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, ++ NVME_SC_FEATURE_NOT_PER_NS = 0x10f, ++ NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, ++ NVME_SC_FW_NEEDS_RESET = 0x111, ++ NVME_SC_FW_NEEDS_MAX_TIME = 0x112, ++ NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, ++ NVME_SC_OVERLAPPING_RANGE = 0x114, ++ NVME_SC_NS_INSUFFICENT_CAP = 0x115, ++ NVME_SC_NS_ID_UNAVAILABLE = 0x116, ++ NVME_SC_NS_ALREADY_ATTACHED = 0x118, ++ NVME_SC_NS_IS_PRIVATE = 0x119, ++ NVME_SC_NS_NOT_ATTACHED = 0x11a, ++ NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, ++ NVME_SC_CTRL_LIST_INVALID = 0x11c, ++ NVME_SC_BP_WRITE_PROHIBITED = 0x11e, ++ ++ /* ++ * I/O Command Set Specific - NVM commands: ++ */ ++ NVME_SC_BAD_ATTRIBUTES = 0x180, ++ NVME_SC_INVALID_PI = 0x181, ++ NVME_SC_READ_ONLY = 0x182, ++ NVME_SC_ONCS_NOT_SUPPORTED = 0x183, ++ ++ /* ++ * I/O Command Set Specific - Fabrics commands: ++ */ ++ NVME_SC_CONNECT_FORMAT = 0x180, ++ NVME_SC_CONNECT_CTRL_BUSY = 0x181, ++ NVME_SC_CONNECT_INVALID_PARAM = 0x182, ++ NVME_SC_CONNECT_RESTART_DISC = 0x183, ++ NVME_SC_CONNECT_INVALID_HOST = 0x184, ++ ++ NVME_SC_DISCOVERY_RESTART = 0x190, ++ NVME_SC_AUTH_REQUIRED = 0x191, ++ ++ /* ++ * Media and Data Integrity Errors: ++ */ ++ NVME_SC_WRITE_FAULT = 0x280, ++ NVME_SC_READ_ERROR = 0x281, ++ NVME_SC_GUARD_CHECK = 0x282, ++ NVME_SC_APPTAG_CHECK = 0x283, ++ NVME_SC_REFTAG_CHECK = 0x284, ++ NVME_SC_COMPARE_FAILED = 0x285, ++ NVME_SC_ACCESS_DENIED = 0x286, ++ NVME_SC_UNWRITTEN_BLOCK = 0x287, ++ ++ /* ++ * Path-related Errors: ++ */ ++ NVME_SC_ANA_PERSISTENT_LOSS = 0x301, ++ NVME_SC_ANA_INACCESSIBLE = 0x302, ++ NVME_SC_ANA_TRANSITION = 0x303, ++ ++ NVME_SC_DNR = 0x4000, ++}; ++ ++struct nvme_completion { ++ /* ++ * Used by Admin and Fabrics commands to return data: ++ */ ++ union nvme_result { ++ __le16 u16; ++ __le32 u32; ++ __le64 u64; ++ } result; ++ __le16 sq_head; /* how much of this queue may be reclaimed */ ++ __le16 sq_id; /* submission queue that generated this entry */ ++ __u16 command_id; /* of the command which completed */ ++ __le16 status; /* did the command fail, and if so, why? */ ++}; ++ ++#define NVME_VS(major, minor, tertiary) \ ++ (((major) << 16) | ((minor) << 8) | (tertiary)) ++ ++#define NVME_MAJOR(ver) ((ver) >> 16) ++#define NVME_MINOR(ver) (((ver) >> 8) & 0xff) ++#define NVME_TERTIARY(ver) ((ver) & 0xff) ++ ++#endif /* _LINUX_NVME_H */ +Index: multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h +@@ -0,0 +1,67 @@ ++/* ++ * Definitions for the NVM Express ioctl interface ++ * Copyright (c) 2011-2014, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ */ ++ ++#ifndef _UAPI_LINUX_NVME_IOCTL_H ++#define _UAPI_LINUX_NVME_IOCTL_H ++ ++#include ++#include ++ ++struct nvme_user_io { ++ __u8 opcode; ++ __u8 flags; ++ __u16 control; ++ __u16 nblocks; ++ __u16 rsvd; ++ __u64 metadata; ++ __u64 addr; ++ __u64 slba; ++ __u32 dsmgmt; ++ __u32 reftag; ++ __u16 apptag; ++ __u16 appmask; ++}; ++ ++struct nvme_passthru_cmd { ++ __u8 opcode; ++ __u8 flags; ++ __u16 rsvd1; ++ __u32 nsid; ++ __u32 cdw2; ++ __u32 cdw3; ++ __u64 metadata; ++ __u64 addr; ++ __u32 metadata_len; ++ __u32 data_len; ++ __u32 cdw10; ++ __u32 cdw11; ++ __u32 cdw12; ++ __u32 cdw13; ++ __u32 cdw14; ++ __u32 cdw15; ++ __u32 timeout_ms; ++ __u32 result; ++}; ++ ++#define nvme_admin_cmd nvme_passthru_cmd ++ ++#define NVME_IOCTL_ID _IO('N', 0x40) ++#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) ++#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) ++#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) ++#define NVME_IOCTL_RESET _IO('N', 0x44) ++#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) ++#define NVME_IOCTL_RESCAN _IO('N', 0x46) ++ ++#endif /* _UAPI_LINUX_NVME_IOCTL_H */ +Index: multipath-tools-130222/Makefile.inc +=================================================================== +--- multipath-tools-130222.orig/Makefile.inc ++++ multipath-tools-130222/Makefile.inc +@@ -37,6 +37,7 @@ mpathpersistdir = $(TOPDIR)/libmpathpers + includedir = $(prefix)/usr/include + mpathcmddir = $(TOPDIR)/libmpathcmd + libdmmpdir = $(TOPDIR)/libdmmp ++nvmedir = $(TOPDIR)/libmultipath/nvme + pkgconfdir = $(prefix)/usr/$(LIB)/pkgconfig + + GZIP = /bin/gzip -9 -c +Index: multipath-tools-130222/libmultipath/Makefile +=================================================================== +--- multipath-tools-130222.orig/libmultipath/Makefile ++++ multipath-tools-130222/libmultipath/Makefile +@@ -8,7 +8,7 @@ SONAME=0 + DEVLIB = libmultipath.so + LIBS = $(DEVLIB).$(SONAME) + LIBDEPS = -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -laio +-CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir) ++CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir) + + OBJS = memory.o parser.o vector.o devmapper.o \ + hwtable.o blacklist.o util.o dmparser.o config.o \ +@@ -17,7 +17,7 @@ OBJS = memory.o parser.o vector.o devmap + switchgroup.o uxsock.o print.o alias.o log_pthread.o \ + log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \ + lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \ +- io_err_stat.o ++ io_err_stat.o nvme-lib.o + + LIBDM_API_FLUSH = $(shell grep -Ecs '^[a-z]*[[:space:]]+dm_task_no_flush' /usr/include/libdevmapper.h) + +@@ -46,6 +46,9 @@ endif + + all: $(LIBS) + ++nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h ++ $(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $< ++ + $(LIBS): $(OBJS) + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ $(CFLAGS) -o $@ $(OBJS) $(LIBDEPS) + ln -sf $@ $(DEVLIB) +Index: multipath-tools-130222/libmultipath/nvme-ioctl.c +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme-ioctl.c +@@ -0,0 +1,869 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "nvme-ioctl.h" ++ ++static int nvme_verify_chr(int fd) ++{ ++ static struct stat nvme_stat; ++ int err = fstat(fd, &nvme_stat); ++ ++ if (err < 0) { ++ perror("fstat"); ++ return errno; ++ } ++ if (!S_ISCHR(nvme_stat.st_mode)) { ++ fprintf(stderr, ++ "Error: requesting reset on non-controller handle\n"); ++ return ENOTBLK; ++ } ++ return 0; ++} ++ ++static int nvme_subsystem_reset(int fd) ++{ ++ int ret; ++ ++ ret = nvme_verify_chr(fd); ++ if (ret) ++ return ret; ++ return ioctl(fd, NVME_IOCTL_SUBSYS_RESET); ++} ++ ++static int nvme_reset_controller(int fd) ++{ ++ int ret; ++ ++ ret = nvme_verify_chr(fd); ++ if (ret) ++ return ret; ++ return ioctl(fd, NVME_IOCTL_RESET); ++} ++ ++static int nvme_ns_rescan(int fd) ++{ ++ int ret; ++ ++ ret = nvme_verify_chr(fd); ++ if (ret) ++ return ret; ++ return ioctl(fd, NVME_IOCTL_RESCAN); ++} ++ ++static int nvme_get_nsid(int fd) ++{ ++ static struct stat nvme_stat; ++ int err = fstat(fd, &nvme_stat); ++ ++ if (err < 0) ++ return -errno; ++ ++ if (!S_ISBLK(nvme_stat.st_mode)) { ++ fprintf(stderr, ++ "Error: requesting namespace-id from non-block device\n"); ++ errno = ENOTBLK; ++ return -errno; ++ } ++ return ioctl(fd, NVME_IOCTL_ID); ++} ++ ++static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd, ++ struct nvme_passthru_cmd *cmd) ++{ ++ return ioctl(fd, ioctl_cmd, cmd); ++} ++ ++static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd) ++{ ++ return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd); ++} ++ ++static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd) ++{ ++ return ioctl(fd, NVME_IOCTL_IO_CMD, cmd); ++} ++ ++static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, ++ __u8 flags, __u16 rsvd, ++ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11, ++ __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15, ++ __u32 data_len, void *data, __u32 metadata_len, ++ void *metadata, __u32 timeout_ms, __u32 *result) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = opcode, ++ .flags = flags, ++ .rsvd1 = rsvd, ++ .nsid = nsid, ++ .cdw2 = cdw2, ++ .cdw3 = cdw3, ++ .metadata = (__u64)(uintptr_t) metadata, ++ .addr = (__u64)(uintptr_t) data, ++ .metadata_len = metadata_len, ++ .data_len = data_len, ++ .cdw10 = cdw10, ++ .cdw11 = cdw11, ++ .cdw12 = cdw12, ++ .cdw13 = cdw13, ++ .cdw14 = cdw14, ++ .cdw15 = cdw15, ++ .timeout_ms = timeout_ms, ++ .result = 0, ++ }; ++ int err; ++ ++ err = nvme_submit_passthru(fd, ioctl_cmd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, ++ __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, ++ void *metadata) ++{ ++ struct nvme_user_io io = { ++ .opcode = opcode, ++ .flags = 0, ++ .control = control, ++ .nblocks = nblocks, ++ .rsvd = 0, ++ .metadata = (__u64)(uintptr_t) metadata, ++ .addr = (__u64)(uintptr_t) data, ++ .slba = slba, ++ .dsmgmt = dsmgmt, ++ .reftag = reftag, ++ .appmask = appmask, ++ .apptag = apptag, ++ }; ++ return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io); ++} ++ ++static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, ++ __u32 reftag, __u16 apptag, __u16 appmask, void *data, ++ void *metadata) ++{ ++ return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt, ++ reftag, apptag, appmask, data, metadata); ++} ++ ++static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, ++ __u32 reftag, __u16 apptag, __u16 appmask, void *data, ++ void *metadata) ++{ ++ return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt, ++ reftag, apptag, appmask, data, metadata); ++} ++ ++static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, ++ __u32 reftag, __u16 apptag, __u16 appmask, void *data, ++ void *metadata) ++{ ++ return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt, ++ reftag, apptag, appmask, data, metadata); ++} ++ ++static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd, ++ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, ++ __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14, ++ __u32 cdw15, __u32 data_len, void *data, ++ __u32 metadata_len, void *metadata, __u32 timeout_ms) ++{ ++ return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid, ++ cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14, ++ cdw15, data_len, data, metadata_len, metadata, ++ timeout_ms, NULL); ++} ++ ++static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb, ++ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_write_zeroes, ++ .nsid = nsid, ++ .cdw10 = slba & 0xffffffff, ++ .cdw11 = slba >> 32, ++ .cdw12 = nlb | (control << 16), ++ .cdw14 = reftag, ++ .cdw15 = apptag | (appmask << 16), ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_write_uncor, ++ .nsid = nsid, ++ .cdw10 = slba & 0xffffffff, ++ .cdw11 = slba >> 32, ++ .cdw12 = nlb, ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_flush(int fd, __u32 nsid) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_flush, ++ .nsid = nsid, ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm, ++ __u16 nr_ranges) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_dsm, ++ .nsid = nsid, ++ .addr = (__u64)(uintptr_t) dsm, ++ .data_len = nr_ranges * sizeof(*dsm), ++ .cdw10 = nr_ranges - 1, ++ .cdw11 = cdw11, ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas, ++ __u64 *slbas, __u16 nr_ranges) ++{ ++ int i; ++ struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm)); ++ ++ if (!dsm) { ++ fprintf(stderr, "malloc: %s\n", strerror(errno)); ++ return NULL; ++ } ++ for (i = 0; i < nr_ranges; i++) { ++ dsm[i].cattr = cpu_to_le32(ctx_attrs[i]); ++ dsm[i].nlb = cpu_to_le32(llbas[i]); ++ dsm[i].slba = cpu_to_le64(slbas[i]); ++ } ++ return dsm; ++} ++ ++static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa, ++ bool iekey, __u64 crkey, __u64 nrkey) ++{ ++ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) }; ++ __u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8; ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_resv_acquire, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ .addr = (__u64)(uintptr_t) (payload), ++ .data_len = sizeof(payload), ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl, ++ bool iekey, __u64 crkey, __u64 nrkey) ++{ ++ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) }; ++ __u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30; ++ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_resv_register, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ .addr = (__u64)(uintptr_t) (payload), ++ .data_len = sizeof(payload), ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela, ++ bool iekey, __u64 crkey) ++{ ++ __le64 payload[1] = { cpu_to_le64(crkey) }; ++ __u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8; ++ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_resv_release, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ .addr = (__u64)(uintptr_t) (payload), ++ .data_len = sizeof(payload), ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data) ++{ ++ struct nvme_passthru_cmd cmd = { ++ .opcode = nvme_cmd_resv_report, ++ .nsid = nsid, ++ .cdw10 = numd, ++ .cdw11 = cdw11, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = (numd + 1) << 2, ++ }; ++ ++ return nvme_submit_io_passthru(fd, &cmd); ++} ++ ++static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_identify, ++ .nsid = nsid, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = NVME_IDENTIFY_DATA_SIZE, ++ .cdw10 = cdw10, ++ .cdw11 = cdw11, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data) ++{ ++ return nvme_identify13(fd, nsid, cdw10, 0, data); ++} ++ ++static int nvme_identify_ctrl(int fd, void *data) ++{ ++ return nvme_identify(fd, 0, 1, data); ++} ++ ++static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data) ++{ ++ int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS; ++ ++ return nvme_identify(fd, nsid, cns, data); ++} ++ ++static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data) ++{ ++ int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST; ++ ++ return nvme_identify(fd, nsid, cns, data); ++} ++ ++static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data) ++{ ++ int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST; ++ ++ return nvme_identify(fd, nsid, (cntid << 16) | cns, data); ++} ++ ++static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data) ++{ ++ ++ return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data); ++} ++ ++static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data) ++{ ++ return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data); ++} ++ ++static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo, ++ __u16 lsi, bool rae, __u32 data_len, void *data) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_get_log_page, ++ .nsid = nsid, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ }; ++ __u32 numd = (data_len >> 2) - 1; ++ __u16 numdu = numd >> 16, numdl = numd & 0xffff; ++ ++ cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0); ++ if (lsp) ++ cmd.cdw10 |= lsp << 8; ++ ++ cmd.cdw11 = numdu | (lsi << 16); ++ cmd.cdw12 = lpo; ++ cmd.cdw13 = (lpo >> 32); ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++ ++} ++ ++static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae, ++ __u32 data_len, void *data) ++{ ++ void *ptr = data; ++ __u32 offset = 0, xfer_len = data_len; ++ int ret; ++ ++ /* ++ * 4k is the smallest possible transfer unit, so by ++ * restricting ourselves for 4k transfers we avoid having ++ * to check the MDTS value of the controller. ++ */ ++ do { ++ xfer_len = data_len - offset; ++ if (xfer_len > 4096) ++ xfer_len = 4096; ++ ++ ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP, ++ offset, 0, rae, xfer_len, ptr); ++ if (ret) ++ return ret; ++ ++ offset += xfer_len; ++ ptr += xfer_len; ++ } while (offset < data_len); ++ ++ return 0; ++} ++ ++static int nvme_get_telemetry_log(int fd, void *lp, int generate_report, ++ int ctrl_init, size_t log_page_size, __u64 offset) ++{ ++ if (ctrl_init) ++ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL, ++ NVME_NO_LOG_LSP, offset, ++ 0, 1, log_page_size, lp); ++ if (generate_report) ++ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST, ++ NVME_TELEM_LSP_CREATE, offset, ++ 0, 1, log_page_size, lp); ++ else ++ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST, ++ NVME_NO_LOG_LSP, offset, ++ 0, 1, log_page_size, lp); ++} ++ ++static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log) ++{ ++ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true, ++ sizeof(*fw_log), fw_log); ++} ++ ++static int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log) ++{ ++ return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true, ++ sizeof(changed_ns_list_log->log), ++ changed_ns_list_log->log); ++} ++ ++static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log) ++{ ++ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false, ++ entries * sizeof(*err_log), err_log); ++} ++ ++static int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log) ++{ ++ return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0, ++ sizeof(*endurance_log), endurance_log); ++} ++ ++static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log) ++{ ++ return nvme_get_log(fd, nsid, NVME_LOG_SMART, false, ++ sizeof(*smart_log), smart_log); ++} ++ ++static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo) ++{ ++ __u64 lpo = 0; ++ ++ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0, ++ true, ana_log_len, ana_log); ++} ++ ++static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log) ++{ ++ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false, ++ sizeof(*self_test_log), self_test_log); ++} ++ ++static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log) ++{ ++ return nvme_get_log(fd, 0, NVME_LOG_CMD_EFFECTS, false, ++ sizeof(*effects_log), effects_log); ++} ++ ++static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size) ++{ ++ return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log); ++} ++ ++static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log) ++{ ++ return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false, ++ sizeof(*sanitize_log), sanitize_log); ++} ++ ++static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11, ++ __u32 cdw12, __u32 data_len, void *data, __u32 *result) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = opcode, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ .cdw11 = cdw11, ++ .cdw12 = cdw12, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12, ++ bool save, __u32 data_len, void *data, __u32 *result) ++{ ++ __u32 cdw10 = fid | (save ? 1 << 31 : 0); ++ ++ return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value, ++ cdw12, data_len, data, result); ++} ++ ++static int nvme_property(int fd, __u8 fctype, __le32 off, __le64 *value, __u8 attrib) ++{ ++ int err; ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_fabrics_command, ++ .cdw10 = attrib, ++ .cdw11 = off, ++ }; ++ ++ if (!value) { ++ errno = EINVAL; ++ return -errno; ++ } ++ ++ if (fctype == nvme_fabrics_type_property_get){ ++ cmd.nsid = nvme_fabrics_type_property_get; ++ } else if(fctype == nvme_fabrics_type_property_set) { ++ cmd.nsid = nvme_fabrics_type_property_set; ++ cmd.cdw12 = *value; ++ } else { ++ errno = EINVAL; ++ return -errno; ++ } ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && fctype == nvme_fabrics_type_property_get) ++ *value = cpu_to_le64(cmd.result); ++ return err; ++} ++ ++static int get_property_helper(int fd, int offset, void *value, int *advance) ++{ ++ __le64 value64; ++ int err = -EINVAL; ++ ++ switch (offset) { ++ case NVME_REG_CAP: ++ case NVME_REG_ASQ: ++ case NVME_REG_ACQ: ++ *advance = 8; ++ break; ++ default: ++ *advance = 4; ++ } ++ ++ if (!value) ++ return err; ++ ++ err = nvme_property(fd, nvme_fabrics_type_property_get, ++ cpu_to_le32(offset), &value64, (*advance == 8)); ++ ++ if (!err) { ++ if (*advance == 8) ++ *((uint64_t *)value) = le64_to_cpu(value64); ++ else ++ *((uint32_t *)value) = le32_to_cpu(value64); ++ } ++ ++ return err; ++} ++ ++static int nvme_get_property(int fd, int offset, uint64_t *value) ++{ ++ int advance; ++ return get_property_helper(fd, offset, value, &advance); ++} ++ ++static int nvme_get_properties(int fd, void **pbar) ++{ ++ int offset, advance; ++ int err, ret = -EINVAL; ++ int size = getpagesize(); ++ ++ *pbar = malloc(size); ++ if (!*pbar) { ++ fprintf(stderr, "malloc: %s\n", strerror(errno)); ++ return -ENOMEM; ++ } ++ ++ memset(*pbar, 0xff, size); ++ for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ; offset += advance) { ++ err = get_property_helper(fd, offset, *pbar + offset, &advance); ++ if (!err) ++ ret = 0; ++ } ++ ++ return ret; ++} ++ ++static int nvme_set_property(int fd, int offset, int value) ++{ ++ __le64 val = cpu_to_le64(value); ++ __le32 off = cpu_to_le32(offset); ++ bool is64bit; ++ ++ switch (off) { ++ case NVME_REG_CAP: ++ case NVME_REG_ASQ: ++ case NVME_REG_ACQ: ++ is64bit = true; ++ break; ++ default: ++ is64bit = false; ++ } ++ ++ return nvme_property(fd, nvme_fabrics_type_property_set, ++ off, &val, is64bit ? 1: 0); ++} ++ ++static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11, ++ __u32 data_len, void *data, __u32 *result) ++{ ++ __u32 cdw10 = fid | sel << 8; ++ ++ return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11, ++ 0, data_len, data, result); ++} ++ ++static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi, ++ __u8 pil, __u8 ms, __u32 timeout) ++{ ++ __u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9; ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_format_nvm, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ .timeout_ms = timeout, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas, ++ __u8 dps, __u8 nmic, __u32 *result) ++{ ++ struct nvme_id_ns ns = { ++ .nsze = cpu_to_le64(nsze), ++ .ncap = cpu_to_le64(ncap), ++ .flbas = flbas, ++ .dps = dps, ++ .nmic = nmic, ++ }; ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_ns_mgmt, ++ .addr = (__u64)(uintptr_t) ((void *)&ns), ++ .cdw10 = 0, ++ .data_len = 0x1000, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_ns_delete(int fd, __u32 nsid) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_ns_mgmt, ++ .nsid = nsid, ++ .cdw10 = 1, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist, ++ bool attach) ++{ ++ int i; ++ __u8 buf[0x1000]; ++ struct nvme_controller_list *cntlist = ++ (struct nvme_controller_list *)buf; ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_ns_attach, ++ .nsid = nsid, ++ .addr = (__u64)(uintptr_t) cntlist, ++ .cdw10 = attach ? 0 : 1, ++ .data_len = 0x1000, ++ }; ++ ++ memset(buf, 0, sizeof(buf)); ++ cntlist->num = cpu_to_le16(num_ctrls); ++ for (i = 0; i < num_ctrls; i++) ++ cntlist->identifier[i] = cpu_to_le16(ctrlist[i]); ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist) ++{ ++ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true); ++} ++ ++static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist) ++{ ++ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false); ++} ++ ++static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_download_fw, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ .cdw10 = (data_len >> 2) - 1, ++ .cdw11 = offset >> 2, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_activate_fw, ++ .cdw10 = (bpid << 31) | (action << 3) | slot, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp, ++ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_security_send, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ .nsid = nsid, ++ .cdw10 = secp << 24 | spsp << 8 | nssf, ++ .cdw11 = tl, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp, ++ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_security_recv, ++ .nsid = nsid, ++ .cdw10 = secp << 24 | spsp << 8 | nssf, ++ .cdw11 = al, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, ++ __u32 data_len, __u32 dw12, void *data, __u32 *result) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_directive_send, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ .nsid = nsid, ++ .cdw10 = data_len? (data_len >> 2) - 1 : 0, ++ .cdw11 = dspec << 16 | dtype << 8 | doper, ++ .cdw12 = dw12, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, ++ __u32 data_len, __u32 dw12, void *data, __u32 *result) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_directive_recv, ++ .addr = (__u64)(uintptr_t) data, ++ .data_len = data_len, ++ .nsid = nsid, ++ .cdw10 = data_len? (data_len >> 2) - 1 : 0, ++ .cdw11 = dspec << 16 | dtype << 8 | doper, ++ .cdw12 = dw12, ++ }; ++ int err; ++ ++ err = nvme_submit_admin_passthru(fd, &cmd); ++ if (!err && result) ++ *result = cmd.result; ++ return err; ++} ++ ++static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp, ++ __u8 no_dealloc, __u32 ovrpat) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_sanitize_nvm, ++ .cdw10 = no_dealloc << 9 | oipbp << 8 | ++ owpass << NVME_SANITIZE_OWPASS_SHIFT | ++ ause << 3 | sanact, ++ .cdw11 = ovrpat, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} ++ ++static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10) ++{ ++ struct nvme_admin_cmd cmd = { ++ .opcode = nvme_admin_dev_self_test, ++ .nsid = nsid, ++ .cdw10 = cdw10, ++ }; ++ ++ return nvme_submit_admin_passthru(fd, &cmd); ++} +Index: multipath-tools-130222/libmultipath/nvme-ioctl.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme-ioctl.h +@@ -0,0 +1,139 @@ ++#ifndef _NVME_LIB_H ++#define _NVME_LIB_H ++ ++#include ++#include ++#include "linux/nvme_ioctl.h" ++#include "nvme.h" ++ ++static int nvme_get_nsid(int fd); ++ ++/* Generic passthrough */ ++static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd, ++ struct nvme_passthru_cmd *cmd); ++ ++static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags, ++ __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3, ++ __u32 cdw10, __u32 cdw11, __u32 cdw12, ++ __u32 cdw13, __u32 cdw14, __u32 cdw15, ++ __u32 data_len, void *data, __u32 metadata_len, ++ void *metadata, __u32 timeout_ms, __u32 *result); ++ ++/* NVME_SUBMIT_IO */ ++static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, ++ __u32 dsmgmt, __u32 reftag, __u16 apptag, ++ __u16 appmask, void *data, void *metadata); ++ ++static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, ++ __u32 dsmgmt, __u32 reftag, __u16 apptag, ++ __u16 appmask, void *data, void *metadata); ++ ++static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, ++ __u32 dsmgmt, __u32 reftag, __u16 apptag, ++ __u16 appmask, void *data, void *metadata); ++ ++static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, ++ __u32 dsmgmt, __u32 reftag, __u16 apptag, ++ __u16 appmask, void *data, void *metadata); ++ ++/* NVME_IO_CMD */ ++static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd, ++ __u32 nsid, __u32 cdw2, __u32 cdw3, ++ __u32 cdw10, __u32 cdw11, __u32 cdw12, ++ __u32 cdw13, __u32 cdw14, __u32 cdw15, ++ __u32 data_len, void *data, __u32 metadata_len, ++ void *metadata, __u32 timeout); ++ ++static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb, ++ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask); ++ ++static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb); ++ ++static int nvme_flush(int fd, __u32 nsid); ++ ++static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm, ++ __u16 nr_ranges); ++static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, ++ __u32 *llbas, __u64 *slbas, ++ __u16 nr_ranges); ++ ++static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa, ++ bool iekey, __u64 crkey, __u64 nrkey); ++static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl, ++ bool iekey, __u64 crkey, __u64 nrkey); ++static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela, ++ bool iekey, __u64 crkey); ++static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data); ++ ++static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data); ++static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data); ++static int nvme_identify_ctrl(int fd, void *data); ++static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data); ++static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data); ++static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data); ++static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data); ++static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data); ++static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo, ++ __u16 group_id, bool rae, __u32 data_len, void *data); ++static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae, ++ __u32 data_len, void *data); ++ ++ ++static int nvme_get_telemetry_log(int fd, void *lp, int generate_report, ++ int ctrl_gen, size_t log_page_size, __u64 offset); ++static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log); ++static int nvme_changed_ns_list_log(int fd, ++ struct nvme_changed_ns_list_log *changed_ns_list_log); ++static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log); ++static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log); ++static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo); ++static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log); ++static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size); ++static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log); ++static int nvme_endurance_log(int fd, __u16 group_id, ++ struct nvme_endurance_group_log *endurance_log); ++ ++static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, ++ __u32 cdw11, __u32 cdw12, __u32 data_len, void *data, ++ __u32 *result); ++static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12, ++ bool save, __u32 data_len, void *data, __u32 *result); ++static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, ++ __u32 cdw11, __u32 data_len, void *data, __u32 *result); ++ ++static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi, ++ __u8 pil, __u8 ms, __u32 timeout); ++ ++static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas, ++ __u8 dps, __u8 nmic, __u32 *result); ++static int nvme_ns_delete(int fd, __u32 nsid); ++ ++static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, ++ __u16 *ctrlist, bool attach); ++static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist); ++static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist); ++ ++static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data); ++static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid); ++ ++static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp, ++ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result); ++static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp, ++ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result); ++ ++static int nvme_subsystem_reset(int fd); ++static int nvme_reset_controller(int fd); ++static int nvme_ns_rescan(int fd); ++ ++static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, ++ __u32 data_len, __u32 dw12, void *data, __u32 *result); ++static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, ++ __u32 data_len, __u32 dw12, void *data, __u32 *result); ++static int nvme_get_properties(int fd, void **pbar); ++static int nvme_set_property(int fd, int offset, int value); ++static int nvme_get_property(int fd, int offset, uint64_t *value); ++static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp, ++ __u8 no_dealloc, __u32 ovrpat); ++static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10); ++static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log); ++#endif /* _NVME_LIB_H */ +Index: multipath-tools-130222/libmultipath/nvme-lib.c +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme-lib.c +@@ -0,0 +1,49 @@ ++#include ++/* avoid inclusion of standard API */ ++#define _NVME_LIB_C 1 ++#include "nvme-lib.h" ++#include "nvme-ioctl.c" ++#include "debug.h" ++ ++int log_nvme_errcode(int err, const char *dev, const char *msg) ++{ ++ if (err > 0) ++ condlog(3, "%s: %s: NVMe status %d", dev, msg, err); ++ else if (err < 0) ++ condlog(3, "%s: %s: %s", dev, msg, strerror(errno)); ++ return err; ++} ++ ++int libmp_nvme_get_nsid(int fd) ++{ ++ return nvme_get_nsid(fd); ++} ++ ++int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl) ++{ ++ return nvme_identify_ctrl(fd, ctrl); ++} ++ ++int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present, ++ struct nvme_id_ns *ns) ++{ ++ return nvme_identify_ns(fd, nsid, present, ns); ++} ++ ++int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo) ++{ ++ return nvme_ana_log(fd, ana_log, ana_log_len, rgo); ++} ++ ++int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl) ++{ ++ int rc; ++ struct nvme_id_ctrl c; ++ ++ rc = nvme_identify_ctrl(fd, &c); ++ if (rc < 0) ++ return rc; ++ if (ctrl) ++ *ctrl = c; ++ return c.cmic & (1 << 3) ? 1 : 0; ++} +Index: multipath-tools-130222/libmultipath/nvme-lib.h +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/nvme-lib.h +@@ -0,0 +1,39 @@ ++#ifndef NVME_LIB_H ++#define NVME_LIB_H ++ ++#include "nvme.h" ++ ++int log_nvme_errcode(int err, const char *dev, const char *msg); ++int libmp_nvme_get_nsid(int fd); ++int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl); ++int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present, ++ struct nvme_id_ns *ns); ++int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo); ++/* ++ * Identify controller, and return true if ANA is supported ++ * ctrl will be filled in if controller is identified, even w/o ANA ++ * ctrl may be NULL ++ */ ++int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl); ++ ++#ifndef _NVME_LIB_C ++/* ++ * In all files except nvme-lib.c, the nvme functions can be called ++ * by their usual name. ++ */ ++#define nvme_get_nsid libmp_nvme_get_nsid ++#define nvme_identify_ctrl libmp_nvme_identify_ctrl ++#define nvme_identify_ns libmp_nvme_identify_ns ++#define nvme_ana_log libmp_nvme_ana_log ++/* ++ * Undefine these to avoid clashes with libmultipath's byteorder.h ++ */ ++#undef cpu_to_le16 ++#undef cpu_to_le32 ++#undef cpu_to_le64 ++#undef le16_to_cpu ++#undef le32_to_cpu ++#undef le64_to_cpu ++#endif ++ ++#endif /* NVME_LIB_H */ +Index: multipath-tools-130222/libmultipath/prio.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/prio.h ++++ multipath-tools-130222/libmultipath/prio.h +@@ -29,6 +29,7 @@ struct path; + #define PRIO_RDAC "rdac" + #define PRIO_DATACORE "datacore" + #define PRIO_WEIGHTED_PATH "weightedpath" ++#define PRIO_ANA "ana" + + /* + * Value used to mark the fact prio was not defined +Index: multipath-tools-130222/libmultipath/prioritizers/Makefile +=================================================================== +--- multipath-tools-130222.orig/libmultipath/prioritizers/Makefile ++++ multipath-tools-130222/libmultipath/prioritizers/Makefile +@@ -2,6 +2,7 @@ + # + # Copyright (C) 2007 Christophe Varoqui, + # ++TOPDIR = ../.. + include ../../Makefile.inc + + LIBS = \ +@@ -15,9 +16,10 @@ LIBS = \ + libpriodatacore.so \ + libpriohds.so \ + libprioweightedpath.so \ ++ libprioana.so \ + libprioiet.so + +-CFLAGS += -fPIC -I.. ++CFLAGS += -fPIC -I.. -I$(nvmedir) + + all: $(LIBS) + +Index: multipath-tools-130222/libmultipath/prioritizers/ana.c +=================================================================== +--- /dev/null ++++ multipath-tools-130222/libmultipath/prioritizers/ana.c +@@ -0,0 +1,236 @@ ++/* ++ * (C) Copyright HUAWEI Technology Corp. 2017 All Rights Reserved. ++ * ++ * ana.c ++ * Version 1.00 ++ * ++ * Tool to make use of a NVMe-feature called Asymmetric Namespace Access. ++ * It determines the ANA state of a device and prints a priority value to stdout. ++ * ++ * Author(s): Cheng Jike ++ * Li Jie ++ * ++ * This file is released under the GPL version 2, or any later version. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "debug.h" ++#include "nvme-lib.h" ++#include "prio.h" ++#include "util.h" ++#include "structs.h" ++#include "def_func.h" ++ ++enum { ++ ANA_ERR_GETCTRL_FAILED = 1, ++ ANA_ERR_NOT_NVME, ++ ANA_ERR_NOT_SUPPORTED, ++ ANA_ERR_GETANAS_OVERFLOW, ++ ANA_ERR_GETANAS_NOTFOUND, ++ ANA_ERR_GETANALOG_FAILED, ++ ANA_ERR_GETNSID_FAILED, ++ ANA_ERR_GETNS_FAILED, ++ ANA_ERR_NO_MEMORY, ++ ANA_ERR_NO_INFORMATION, ++}; ++ ++static const char *ana_errmsg[] = { ++ [ANA_ERR_GETCTRL_FAILED] = "couldn't get ctrl info", ++ [ANA_ERR_NOT_NVME] = "not an NVMe device", ++ [ANA_ERR_NOT_SUPPORTED] = "ANA not supported", ++ [ANA_ERR_GETANAS_OVERFLOW] = "buffer overflow in ANA log", ++ [ANA_ERR_GETANAS_NOTFOUND] = "NSID or ANAGRPID not found", ++ [ANA_ERR_GETANALOG_FAILED] = "couldn't get ana log", ++ [ANA_ERR_GETNSID_FAILED] = "couldn't get NSID", ++ [ANA_ERR_GETNS_FAILED] = "couldn't get namespace info", ++ [ANA_ERR_NO_MEMORY] = "out of memory", ++ [ANA_ERR_NO_INFORMATION] = "invalid fd", ++}; ++ ++static const char *anas_string[] = { ++ [NVME_ANA_OPTIMIZED] = "ANA Optimized State", ++ [NVME_ANA_NONOPTIMIZED] = "ANA Non-Optimized State", ++ [NVME_ANA_INACCESSIBLE] = "ANA Inaccessible State", ++ [NVME_ANA_PERSISTENT_LOSS] = "ANA Persistent Loss State", ++ [NVME_ANA_CHANGE] = "ANA Change state", ++}; ++ ++static const char *aas_print_string(int rc) ++{ ++ rc &= 0xff; ++ if (rc >= 0 && rc < ARRAY_SIZE(anas_string) && ++ anas_string[rc] != NULL) ++ return anas_string[rc]; ++ ++ return "invalid ANA state"; ++} ++ ++static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log, ++ size_t ana_log_len) ++{ ++ void *base = ana_log; ++ struct nvme_ana_rsp_hdr *hdr = base; ++ struct nvme_ana_group_desc *ana_desc; ++ size_t offset = sizeof(struct nvme_ana_rsp_hdr); ++ __u32 nr_nsids; ++ size_t nsid_buf_size; ++ int i, j; ++ ++ for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) { ++ ana_desc = base + offset; ++ ++ offset += sizeof(*ana_desc); ++ if (offset > ana_log_len) ++ return -ANA_ERR_GETANAS_OVERFLOW; ++ ++ nr_nsids = le32_to_cpu(ana_desc->nnsids); ++ nsid_buf_size = nr_nsids * sizeof(__le32); ++ ++ offset += nsid_buf_size; ++ if (offset > ana_log_len) ++ return -ANA_ERR_GETANAS_OVERFLOW; ++ ++ for (j = 0; j < nr_nsids; j++) { ++ if (nsid == le32_to_cpu(ana_desc->nsids[j])) ++ return ana_desc->state; ++ } ++ ++ if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid)) ++ return ana_desc->state; ++ ++ } ++ return -ANA_ERR_GETANAS_NOTFOUND; ++} ++ ++int get_ana_info(struct path * pp, unsigned int timeout) ++{ ++ int rc; ++ __u32 nsid; ++ struct nvme_id_ctrl ctrl; ++ struct nvme_id_ns ns; ++ void *ana_log; ++ size_t ana_log_len; ++ bool is_anagrpid_const; ++ ++ rc = nvme_id_ctrl_ana(pp->fd, &ctrl); ++ if (rc < 0) { ++ log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl"); ++ return -ANA_ERR_GETCTRL_FAILED; ++ } else if (rc == 0) ++ return -ANA_ERR_NOT_SUPPORTED; ++ ++ nsid = nvme_get_nsid(pp->fd); ++ if (nsid <= 0) { ++ log_nvme_errcode(rc, pp->dev, "nvme_get_nsid"); ++ return -ANA_ERR_GETNSID_FAILED; ++ } ++ is_anagrpid_const = ctrl.anacap & (1 << 6); ++ ++ /* ++ * Code copied from nvme-cli/nvme.c. We don't need to allocate an ++ * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most ++ * in one ANA group. ++ */ ++ ana_log_len = sizeof(struct nvme_ana_rsp_hdr) + ++ le32_to_cpu(ctrl.nanagrpid) ++ * sizeof(struct nvme_ana_group_desc); ++ ++ if (is_anagrpid_const) { ++ rc = nvme_identify_ns(pp->fd, nsid, 0, &ns); ++ if (rc) { ++ log_nvme_errcode(rc, pp->dev, "nvme_identify_ns"); ++ return -ANA_ERR_GETNS_FAILED; ++ } ++ } else ++ ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32); ++ ++ ana_log = malloc(ana_log_len); ++ if (!ana_log) ++ return -ANA_ERR_NO_MEMORY; ++ pthread_cleanup_push(free, ana_log); ++ rc = nvme_ana_log(pp->fd, ana_log, ana_log_len, ++ is_anagrpid_const ? NVME_ANA_LOG_RGO : 0); ++ if (rc) { ++ log_nvme_errcode(rc, pp->dev, "nvme_ana_log"); ++ rc = -ANA_ERR_GETANALOG_FAILED; ++ } else ++ rc = get_ana_state(nsid, ++ is_anagrpid_const ? ++ le32_to_cpu(ns.anagrpid) : 0, ++ ana_log, ana_log_len); ++ pthread_cleanup_pop(1); ++ if (rc >= 0) ++ condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc, ++ aas_print_string(rc)); ++ return rc; ++} ++ ++/* ++ * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c) ++ * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018). ++ * ++ * Differences: ++ * ++ * - The ANA base spec defines no implicit or explicit (STPG) state management. ++ * If a state is encountered that doesn't allow normal I/O (all except ++ * OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a ++ * Access State Change Notice (can't do that in multipathd as we don't receive ++ * those), or retry commands in regular time intervals until ANATT is expired ++ * (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we ++ * can currently do. ++ * ++ * FIXME: Waiting for ANATT could be implemented with a "delayed failback" ++ * mechanism. The current "failback" method can't be used, as it would ++ * affect failback to every state, and here only failback to UNAVAILABLE ++ * should be delayed. ++ * ++ * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state. ++ * FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state ++ * in any way makes no sense (e.g. §8.19.6 - paths in this state shouldn't ++ * even be checked under "all paths down" conditions). Device mapper can, ++ * and will, select a PG for IO if it has non-failed paths, even if the ++ * PG has priority 0. We could avoid that only with an "ANA path checker". ++ * ++ * - ALUA has no CHANGE state. The ANA TP §8.18.3 / §8.19.4 suggests ++ * that CHANGE state should be treated in roughly the same way as ++ * INACCESSIBLE. Therefore we assign the same prio to it. ++ * ++ * - ALUA's LBA-dependent state has no ANA equivalent. ++ */ ++ ++int getprio(struct path *pp, char *args) ++{ ++ int rc; ++ ++ if (pp->fd < 0) ++ rc = -ANA_ERR_NO_INFORMATION; ++ else ++ rc = get_ana_info(pp, get_prio_timeout(60000)); ++ ++ switch (rc) { ++ case NVME_ANA_OPTIMIZED: ++ return 50; ++ case NVME_ANA_NONOPTIMIZED: ++ return 10; ++ case NVME_ANA_INACCESSIBLE: ++ case NVME_ANA_CHANGE: ++ return 1; ++ case NVME_ANA_PERSISTENT_LOSS: ++ return 0; ++ default: ++ break; ++ } ++ if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg)) ++ condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]); ++ else ++ condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc); ++ return -1; ++} ++ ++declare_nop_prio(initprio) ++declare_nop_prio(freeprio) +Index: multipath-tools-130222/libmultipath/util.h +=================================================================== +--- multipath-tools-130222.orig/libmultipath/util.h ++++ multipath-tools-130222/libmultipath/util.h +@@ -18,6 +18,8 @@ int parse_prkey(char *ptr, uint64_t *prk + int parse_prkey_flags(char *ptr, uint64_t *prkey, uint8_t *flags); + int safe_write(int fd, const void *buf, size_t count); + ++#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) ++ + #define safe_sprintf(var, format, args...) \ + snprintf(var, sizeof(var), format, ##args) >= sizeof(var) + #define safe_snprintf(var, size, format, args...) \ +Index: multipath-tools-130222/multipath/multipath.conf.5 +=================================================================== +--- multipath-tools-130222.orig/multipath/multipath.conf.5 ++++ multipath-tools-130222/multipath/multipath.conf.5 +@@ -196,6 +196,9 @@ Generate the path priority for LSI/Engen + Generate the path priority for Compaq/HP controller in + active/standby mode. + .TP ++.B ana ++Generate the path priority based on the NVMe ANA settings. ++.TP + .B hds + Generate the path priority for Hitachi HDS Modular storage arrays. + .TP +Index: multipath-tools-130222/libmultipath/propsel.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/propsel.c ++++ multipath-tools-130222/libmultipath/propsel.c +@@ -5,6 +5,7 @@ + */ + #include + ++#include "nvme-lib.h" + #include "checkers.h" + #include "memory.h" + #include "vector.h" +@@ -489,8 +490,13 @@ select_getuid (struct path * pp) + void + detect_prio(struct path * pp) + { +- if (detect_alua(pp)) +- prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS); ++ if (pp->bus == SYSFS_BUS_NVME) { ++ if (nvme_id_ctrl_ana(pp->fd, NULL) == 1) ++ prio_get(&pp->prio, PRIO_ANA, DEFAULT_PRIO_ARGS); ++ } else if (pp->bus == SYSFS_BUS_SCSI) { ++ if (detect_alua(pp)) ++ prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS); ++ } + } + + extern int +Index: multipath-tools-130222/libmultipath/hwtable.c +=================================================================== +--- multipath-tools-130222.orig/libmultipath/hwtable.c ++++ multipath-tools-130222/libmultipath/hwtable.c +@@ -1178,6 +1178,7 @@ static struct hwentry default_hw[] = { + .vendor = "NVME", + .product = ".*", + .uid_attribute = "ID_WWN", ++ .detect_prio = DETECT_PRIO_ON, + .checker_name = NONE, + }, + /* diff --git a/SPECS/device-mapper-multipath.spec b/SPECS/device-mapper-multipath.spec index a2e3f50..918bddb 100644 --- a/SPECS/device-mapper-multipath.spec +++ b/SPECS/device-mapper-multipath.spec @@ -1,7 +1,7 @@ Summary: Tools to manage multipath devices using device-mapper Name: device-mapper-multipath Version: 0.4.9 -Release: 123%{?dist} +Release: 127%{?dist} License: GPL+ Group: System Environment/Base URL: http://christophe.varoqui.free.fr/ @@ -256,6 +256,15 @@ Patch0246: 0246-RHBZ-1593459-add-transport-blacklist.patch Patch0247: 0247-RHBZ-1585824-mpathconf-allow-doc.patch Patch0248: 0248-RHBZ-1594360-fix-param-rk-doc.patch Patch0249: 0249-RHBZ-1610263-mpathpersist-max-fds.patch +Patch0250: 0250-RHBZ-1610867-rescan-change.patch +Patch0251: 0251-RHBZ-1614011-discovery-timeout.patch +Patch0252: 0252-RHBZ-1623595-cmd-error-status.patch +Patch0253: 0253-RHBZ-1618549-mix-hw-handler.patch +Patch0254: 0254-RHBZ-1635819-fix-mpathpersist-crash.patch +Patch0255: 0255-RHBZ-1638651-marginal-path.patch +Patch0256: 0256-RHBZ-1672175-retry-no-fd-paths.patch +Patch0257: 0257-RHBZ-1679556-dont-check-dm-devices.patch +Patch0258: 0258-RHBZ-1634183-ANA-prioritizer.patch # runtime Requires: %{name}-libs = %{version}-%{release} @@ -592,6 +601,15 @@ device-mapper-multipath's libdmmp C API library %patch0247 -p1 %patch0248 -p1 %patch0249 -p1 +%patch0250 -p1 +%patch0251 -p1 +%patch0252 -p1 +%patch0253 -p1 +%patch0254 -p1 +%patch0255 -p1 +%patch0256 -p1 +%patch0257 -p1 +%patch0258 -p1 cp %{SOURCE1} . %build @@ -709,6 +727,41 @@ fi %{_pkgconfdir}/libdmmp.pc %changelog +* Thu Mar 14 2019 Benjamin Marzinski 0.4.9-127 +- Add 0256-RHBZ-1672175-retry-no-fd-paths.patch + * retry adding paths if they couldn't be opened initially +- Add 0257-RHBZ-1679556-dont-check-dm-devices.patch + * don't check if dm devices are multipath paths +- Add 0258-RHBZ-1634183-ANA-prioritizer.patch + * Add NVMe ANA path prioritizer +- Resolves: bz #1634183, #1672175, #1679556 + +* Wed Feb 13 2019 Benjamin Marzinski 0.4.9-126 +- Modify 0255-RHBZ-1638651-marginal-path.patch + * Fix memory leak +- Resolves: bz #1638651 + +* Wed Feb 13 2019 Benjamin Marzinski 0.4.9-125 +- Modify 0250-RHBZ-1610867-rescan-change.patch + * Fix memory Leak +- Modify 0255-RHBZ-1638651-marginal-path.patch + * Fix NULL dereference +- Refresh 0252-RHBZ-1623595-cmd-error-status.patch +- Resolves: bz #1610867, #1638651 + +* Fri Feb 1 2019 Benjamin Marzinski 0.4.9-124 +- Add 0250-RHBZ-1610867-rescan-change.patch + * Update multipath devices on change events. +- Add 0251-RHBZ-1614011-discovery-timeout.patch +- Add 0252-RHBZ-1623595-cmd-error-status.patch +- Add 0253-RHBZ-1618549-mix-hw-handler.patch + * Don't retain attached hw handler when different hw handlers are + attached to different paths +- Add 0254-RHBZ-1635819-fix-mpathpersist-crash.patch +- Add 0255-RHBZ-1638651-marginal-path.patch + * backport marginal_path options from upstream +- Resolves: bz #1610867, #1614011, #1618549, #1623595, #1635819, #1638651 + * Fri Aug 10 2018 Benjamin Marzinski 0.4.9-123 - Add 0249-RHBZ-1610263-mpathpersist-max-fds.patch * make mpathpersist honor max_fds multipath.conf parameter