From 131573e8b1544a2cf771c0efa340566d4c640d74 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 16 2022 08:09:55 +0000 Subject: import fence-agents-4.2.1-102.el8 --- diff --git a/SOURCES/bz2033671-fence_sbd-improve-error-handling.patch b/SOURCES/bz2033671-fence_sbd-improve-error-handling.patch deleted file mode 100644 index c9ab5ad..0000000 --- a/SOURCES/bz2033671-fence_sbd-improve-error-handling.patch +++ /dev/null @@ -1,296 +0,0 @@ -From ab1de07902d9f380c10405d6ddac3aeb43838c86 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 28 Jul 2022 15:33:12 +0200 -Subject: [PATCH] fence_sbd: improve error handling - -basically when using 3 disks be happy with 2 answers -but give it 5s at least to collect all answers -increase default power-timeout to 30s so that waiting -those 5s still allows us to get done sending the -reboot - -RHBZ#2033671 ---- - agents/sbd/fence_sbd.py | 77 +++++++++++++-------- - lib/fencing.py.py | 109 ++++++++++++++++++++++++++++++ - tests/data/metadata/fence_sbd.xml | 2 +- - 3 files changed, 158 insertions(+), 30 deletions(-) - -diff --git a/agents/sbd/fence_sbd.py b/agents/sbd/fence_sbd.py -index 0c876b16e..2b0127d55 100644 ---- a/agents/sbd/fence_sbd.py -+++ b/agents/sbd/fence_sbd.py -@@ -5,7 +5,7 @@ - import os - import atexit - sys.path.append("@FENCEAGENTSLIBDIR@") --from fencing import fail_usage, run_command, fence_action, all_opt -+from fencing import fail_usage, run_commands, fence_action, all_opt - from fencing import atexit_handler, check_input, process_input, show_docs - from fencing import run_delay - import itertools -@@ -81,7 +81,7 @@ def check_sbd_device(options, device_path): - - cmd = "%s -d %s dump" % (options["--sbd-path"], device_path) - -- (return_code, out, err) = run_command(options, cmd) -+ (return_code, out, err) = run_commands(options, [ cmd ]) - - for line in itertools.chain(out.split("\n"), err.split("\n")): - if len(line) == 0: -@@ -94,21 +94,35 @@ def check_sbd_device(options, device_path): - - return DEVICE_INIT - -+ - def generate_sbd_command(options, command, arguments=None): - """Generates a sbd command based on given arguments. - - Return Value: -- generated sbd command (string) -+ generated list of sbd commands (strings) depending -+ on command multiple commands with a device each -+ or a single command with multiple devices - """ -- cmd = options["--sbd-path"] -+ cmds = [] -+ -+ if not command in ["list", "dump"]: -+ cmd = options["--sbd-path"] - -- # add "-d" for each sbd device -- for device in parse_sbd_devices(options): -- cmd += " -d %s" % device -+ # add "-d" for each sbd device -+ for device in parse_sbd_devices(options): -+ cmd += " -d %s" % device - -- cmd += " %s %s" % (command, arguments) -+ cmd += " %s %s" % (command, arguments) -+ cmds.append(cmd) -+ -+ else: -+ for device in parse_sbd_devices(options): -+ cmd = options["--sbd-path"] -+ cmd += " -d %s" % device -+ cmd += " %s %s" % (command, arguments) -+ cmds.append(cmd) - -- return cmd -+ return cmds - - def send_sbd_message(conn, options, plug, message): - """Sends a message to all sbd devices. -@@ -128,7 +142,7 @@ def send_sbd_message(conn, options, plug, message): - arguments = "%s %s" % (plug, message) - cmd = generate_sbd_command(options, "message", arguments) - -- (return_code, out, err) = run_command(options, cmd) -+ (return_code, out, err) = run_commands(options, cmd) - - return (return_code, out, err) - -@@ -147,7 +161,7 @@ def get_msg_timeout(options): - - cmd = generate_sbd_command(options, "dump") - -- (return_code, out, err) = run_command(options, cmd) -+ (return_code, out, err) = run_commands(options, cmd) - - for line in itertools.chain(out.split("\n"), err.split("\n")): - if len(line) == 0: -@@ -288,7 +302,7 @@ def get_node_list(conn, options): - - cmd = generate_sbd_command(options, "list") - -- (return_code, out, err) = run_command(options, cmd) -+ (return_code, out, err) = run_commands(options, cmd) - - for line in out.split("\n"): - if len(line) == 0: -@@ -356,6 +370,7 @@ def main(): - - all_opt["method"]["default"] = "cycle" - all_opt["method"]["help"] = "-m, --method=[method] Method to fence (onoff|cycle) (Default: cycle)" -+ all_opt["power_timeout"]["default"] = "30" - - options = check_input(device_opt, process_input(device_opt)) - -@@ -376,23 +391,27 @@ def main(): - - # We need to check if the provided sbd_devices exists. We need to do - # that for every given device. -- for device_path in parse_sbd_devices(options): -- logging.debug("check device \"%s\"", device_path) -- -- return_code = check_sbd_device(options, device_path) -- if PATH_NOT_EXISTS == return_code: -- logging.error("\"%s\" does not exist", device_path) -- elif PATH_NOT_BLOCK == return_code: -- logging.error("\"%s\" is not a valid block device", device_path) -- elif DEVICE_NOT_INIT == return_code: -- logging.error("\"%s\" is not initialized", device_path) -- elif DEVICE_INIT != return_code: -- logging.error("UNKNOWN error while checking \"%s\"", device_path) -- -- # If we get any error while checking the device we need to exit at this -- # point. -- if DEVICE_INIT != return_code: -- exit(return_code) -+ # Just for the case we are really rebooting / powering off a device -+ # (pacemaker as well uses the list command to generate a dynamic list) -+ # we leave it to sbd to try and decide if it was successful -+ if not options["--action"] in ["reboot", "off", "list"]: -+ for device_path in parse_sbd_devices(options): -+ logging.debug("check device \"%s\"", device_path) -+ -+ return_code = check_sbd_device(options, device_path) -+ if PATH_NOT_EXISTS == return_code: -+ logging.error("\"%s\" does not exist", device_path) -+ elif PATH_NOT_BLOCK == return_code: -+ logging.error("\"%s\" is not a valid block device", device_path) -+ elif DEVICE_NOT_INIT == return_code: -+ logging.error("\"%s\" is not initialized", device_path) -+ elif DEVICE_INIT != return_code: -+ logging.error("UNKNOWN error while checking \"%s\"", device_path) -+ -+ # If we get any error while checking the device we need to exit at this -+ # point. -+ if DEVICE_INIT != return_code: -+ exit(return_code) - - # we check against the defined timeouts. If the pacemaker timeout is smaller - # then that defined within sbd we should report this. -diff --git a/lib/fencing.py.py b/lib/fencing.py.py -index b746ede8b..fc3679e33 100644 ---- a/lib/fencing.py.py -+++ b/lib/fencing.py.py -@@ -1088,6 +1088,115 @@ def is_executable(path): - return True - return False - -+def run_commands(options, commands, timeout=None, env=None, log_command=None): -+ # inspired by psutils.wait_procs (BSD License) -+ def check_gone(proc, timeout): -+ try: -+ returncode = proc.wait(timeout=timeout) -+ except subprocess.TimeoutExpired: -+ pass -+ else: -+ if returncode is not None or not proc.is_running(): -+ proc.returncode = returncode -+ gone.add(proc) -+ -+ if timeout is None and "--power-timeout" in options: -+ timeout = options["--power-timeout"] -+ if timeout == 0: -+ timeout = None -+ if timeout is not None: -+ timeout = float(timeout) -+ -+ time_start = time.time() -+ procs = [] -+ status = None -+ pipe_stdout = "" -+ pipe_stderr = "" -+ -+ for command in commands: -+ logging.info("Executing: %s\n", log_command or command) -+ -+ try: -+ process = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, -+ # decodes newlines and in python3 also converts bytes to str -+ universal_newlines=(sys.version_info[0] > 2)) -+ except OSError: -+ fail_usage("Unable to run %s\n" % command) -+ -+ procs.append(process) -+ -+ gone = set() -+ alive = set(procs) -+ -+ while True: -+ if alive: -+ max_timeout = 2.0 / len(alive) -+ for proc in alive: -+ if timeout is not None: -+ if time.time()-time_start >= timeout: -+ # quickly go over the rest -+ max_timeout = 0 -+ check_gone(proc, max_timeout) -+ alive = alive - gone -+ -+ if not alive: -+ break -+ -+ if time.time()-time_start < 5.0: -+ # give it at least 5s to get a complete answer -+ # afterwards we're OK with a quorate answer -+ continue -+ -+ if len(gone) > len(alive): -+ good_cnt = 0 -+ for proc in gone: -+ if proc.returncode == 0: -+ good_cnt += 1 -+ # a positive result from more than half is fine -+ if good_cnt > len(procs)/2: -+ break -+ -+ if timeout is not None: -+ if time.time() - time_start >= timeout: -+ logging.debug("Stop waiting after %s\n", str(timeout)) -+ break -+ -+ logging.debug("Done: %d gone, %d alive\n", len(gone), len(alive)) -+ -+ for proc in gone: -+ if (status != 0): -+ status = proc.returncode -+ # hand over the best status we have -+ # but still collect as much stdout/stderr feedback -+ # avoid communicate as we know already process -+ # is gone and it seems to block when there -+ # are D state children we don't get rid off -+ os.set_blocking(proc.stdout.fileno(), False) -+ os.set_blocking(proc.stderr.fileno(), False) -+ try: -+ pipe_stdout += proc.stdout.read() -+ except: -+ pass -+ try: -+ pipe_stderr += proc.stderr.read() -+ except: -+ pass -+ proc.stdout.close() -+ proc.stderr.close() -+ -+ for proc in alive: -+ proc.kill() -+ -+ if status is None: -+ fail(EC_TIMED_OUT, stop=(int(options.get("retry", 0)) < 1)) -+ status = EC_TIMED_OUT -+ pipe_stdout = "" -+ pipe_stderr = "timed out" -+ -+ logging.debug("%s %s %s\n", str(status), str(pipe_stdout), str(pipe_stderr)) -+ -+ return (status, pipe_stdout, pipe_stderr) -+ - def run_command(options, command, timeout=None, env=None, log_command=None): - if timeout is None and "--power-timeout" in options: - timeout = options["--power-timeout"] -diff --git a/tests/data/metadata/fence_sbd.xml b/tests/data/metadata/fence_sbd.xml -index 516370c40..7248b864a 100644 ---- a/tests/data/metadata/fence_sbd.xml -+++ b/tests/data/metadata/fence_sbd.xml -@@ -87,7 +87,7 @@ - - - -- -+ - Test X seconds for status change after ON/OFF - - diff --git a/SPECS/fence-agents.spec b/SPECS/fence-agents.spec index c8be7fb..be3ece8 100644 --- a/SPECS/fence-agents.spec +++ b/SPECS/fence-agents.spec @@ -87,7 +87,7 @@ Name: fence-agents Summary: Set of unified programs capable of host isolation ("fencing") Version: 4.2.1 -Release: 101%{?alphatag:.%{alphatag}}%{?dist} +Release: 102%{?alphatag:.%{alphatag}}%{?dist} License: GPLv2+ and LGPLv2+ Group: System Environment/Base URL: https://github.com/ClusterLabs/fence-agents @@ -291,8 +291,7 @@ Patch116: bz2080729-2-fence_lpar-fix-import-fail_usage.patch Patch117: bz2072421-2-fence_zvmip-connect-error.patch Patch118: bz2091826-fence_ibm_vpc-add-proxy-support.patch Patch119: bz2092921-fence_ibm_powervs-proxy-private-api-servers.patch -Patch120: bz2033671-fence_sbd-improve-error-handling.patch -Patch121: bz1886074-4-fencing-source_env-dont-process-empty-lines.patch +Patch120: bz1886074-4-fencing-source_env-dont-process-empty-lines.patch %if 0%{?fedora} || 0%{?rhel} > 7 %global supportedagents amt_ws apc apc_snmp bladecenter brocade cisco_mds cisco_ucs compute drac5 eaton_snmp emerson eps evacuate hds_cb hpblade ibmblade ibm_powervs ibm_vpc ifmib ilo ilo_moonshot ilo_mp ilo_ssh intelmodular ipdu ipmilan kdump kubevirt lpar mpath redfish rhevm rsa rsb sbd scsi vmware_rest vmware_soap wti @@ -491,7 +490,6 @@ BuildRequires: python3-google-api-client python3-pip python3-wheel python3-jinja %patch118 -p1 %patch119 -p1 %patch120 -p1 -%patch121 -p1 # prevent compilation of something that won't get used anyway sed -i.orig 's|FENCE_ZVM=1|FENCE_ZVM=0|' configure.ac @@ -1516,14 +1514,10 @@ Fence agent for IBM z/VM over IP. %endif %changelog -* Mon Aug 15 2022 Oyvind Albrigtsen - 4.2.1-101 +* Mon Aug 15 2022 Oyvind Albrigtsen - 4.2.1-102 - fence_openstack: new fence agent Resolves: rhbz#1886074 -* Mon Aug 1 2022 Oyvind Albrigtsen - 4.2.1-100 -- fence_sbd: improve error handling - Resolves: rhbz#2033671 - * Wed Jun 22 2022 Oyvind Albrigtsen - 4.2.1-99 - fence_ibm_powervs: add support for proxy, private API servers and get token via API key