Blame SOURCES/bz1299577-fence_compute.patch

d5f3d8
From e80e142092c53102a46886e9748b8e25465ce4f6 Mon Sep 17 00:00:00 2001
d5f3d8
From: Marek 'marx' Grac <mgrac@redhat.com>
d5f3d8
Date: Wed, 20 Jan 2016 11:32:21 +0100
d5f3d8
Subject: [PATCH] fence_compute: Sync with master branch
d5f3d8
d5f3d8
---
d5f3d8
 fence/agents/compute/fence_compute.py | 180 ++++++++++++++++++++++++++--------
d5f3d8
 tests/data/metadata/fence_compute.xml |  16 +--
d5f3d8
 2 files changed, 150 insertions(+), 46 deletions(-)
d5f3d8
d5f3d8
diff --git a/fence/agents/compute/fence_compute.py b/fence/agents/compute/fence_compute.py
d5f3d8
index 82d9c46..d9fe54a 100644
d5f3d8
--- a/fence/agents/compute/fence_compute.py
d5f3d8
+++ b/fence/agents/compute/fence_compute.py
d5f3d8
@@ -19,6 +19,9 @@ REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved."
d5f3d8
 override_status = ""
d5f3d8
 nova = None
d5f3d8
 
d5f3d8
+EVACUABLE_TAG = "evacuable"
d5f3d8
+TRUE_TAGS = ['true']
d5f3d8
+
d5f3d8
 def get_power_status(_, options):
d5f3d8
 	global override_status
d5f3d8
 
d5f3d8
@@ -32,8 +35,8 @@ def get_power_status(_, options):
d5f3d8
 	if nova:
d5f3d8
 		try:
d5f3d8
 			services = nova.services.list(host=options["--plug"])
d5f3d8
-
d5f3d8
 			for service in services:
d5f3d8
+				logging.debug("Status of %s is %s" % (service.binary, service.state))
d5f3d8
 				if service.binary == "nova-compute":
d5f3d8
 					if service.state == "up":
d5f3d8
 						status = "on"
d5f3d8
@@ -49,31 +52,91 @@ def get_power_status(_, options):
d5f3d8
 # NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib
d5f3d8
 # module which is not stable
d5f3d8
 def _server_evacuate(server, on_shared_storage):
d5f3d8
-	success = True
d5f3d8
+	success = False
d5f3d8
 	error_message = ""
d5f3d8
 	try:
d5f3d8
-		nova.servers.evacuate(server=server['uuid'], on_shared_storage=on_shared_storage)
d5f3d8
+		logging.debug("Resurrecting instance: %s" % server)
d5f3d8
+		(response, dictionary) = nova.servers.evacuate(server=server, on_shared_storage=on_shared_storage)
d5f3d8
+
d5f3d8
+		if response == None:
d5f3d8
+			error_message = "No response while evacuating instance"
d5f3d8
+		elif response.status_code == 200:
d5f3d8
+			success = True
d5f3d8
+			error_message = response.reason
d5f3d8
+		else:
d5f3d8
+			error_message = response.reason
d5f3d8
+
d5f3d8
 	except Exception as e:
d5f3d8
-		success = False
d5f3d8
 		error_message = "Error while evacuating instance: %s" % e
d5f3d8
 
d5f3d8
 	return {
d5f3d8
-		"server_uuid": server['uuid'],
d5f3d8
-		"evacuate_accepted": success,
d5f3d8
-		"error_message": error_message,
d5f3d8
+		"uuid": server,
d5f3d8
+		"accepted": success,
d5f3d8
+		"reason": error_message,
d5f3d8
 		}
d5f3d8
 
d5f3d8
-def _host_evacuate(host, on_shared_storage):
d5f3d8
-	hypervisors = nova.hypervisors.search(host, servers=True)
d5f3d8
-	response = []
d5f3d8
-	for hyper in hypervisors:
d5f3d8
-		if hasattr(hyper, 'servers'):
d5f3d8
-			for server in hyper.servers:
d5f3d8
-				response.append(_server_evacuate(server, on_shared_storage))
d5f3d8
+def _is_server_evacuable(server, evac_flavors, evac_images):
d5f3d8
+	if server.flavor.get('id') in evac_flavors:
d5f3d8
+		return True
d5f3d8
+	if server.image.get('id') in evac_images:
d5f3d8
+		return True
d5f3d8
+	return False
d5f3d8
+
d5f3d8
+def _get_evacuable_flavors():
d5f3d8
+	result = []
d5f3d8
+	flavors = nova.flavors.list()
d5f3d8
+	# Since the detailed view for all flavors doesn't provide the extra specs,
d5f3d8
+	# we need to call each of the flavor to get them.
d5f3d8
+	for flavor in flavors:
d5f3d8
+		if flavor.get_keys().get(EVACUABLE_TAG).strip().lower() in TRUE_TAGS:
d5f3d8
+			result.append(flavor.id)
d5f3d8
+	return result
d5f3d8
+
d5f3d8
+def _get_evacuable_images():
d5f3d8
+	result = []
d5f3d8
+	images = nova.images.list(detailed=True)
d5f3d8
+	for image in images:
d5f3d8
+		if hasattr(image, 'metadata'):
d5f3d8
+			if image.metadata.get(EVACUABLE_TAG).strip.lower() in TRUE_TAGS:
d5f3d8
+				result.append(image.id)
d5f3d8
+	return result
d5f3d8
+
d5f3d8
+def _host_evacuate(options):
d5f3d8
+	result = True
d5f3d8
+	servers = nova.servers.list(search_opts={'host': options["--plug"]})
d5f3d8
+	if options["--instance-filtering"] == "False":
d5f3d8
+		evacuables = servers
d5f3d8
+	else:
d5f3d8
+		flavors = _get_evacuable_flavors()
d5f3d8
+		images = _get_evacuable_images()
d5f3d8
+		# Identify all evacuable servers
d5f3d8
+		evacuables = [server for server in servers
d5f3d8
+				if _is_server_evacuable(server, flavors, images)]
d5f3d8
+
d5f3d8
+	if options["--no-shared-storage"] != "False":
d5f3d8
+		on_shared_storage = False
d5f3d8
+	else:
d5f3d8
+		on_shared_storage = True
d5f3d8
+
d5f3d8
+	for server in evacuables:
d5f3d8
+		if hasattr(server, 'id'):
d5f3d8
+			response = _server_evacuate(server.id, on_shared_storage)
d5f3d8
+			if response["accepted"]:
d5f3d8
+				logging.debug("Evacuated %s from %s: %s" %
d5f3d8
+					      (response["uuid"], options["--plug"], response["reason"]))
d5f3d8
+			else:
d5f3d8
+				logging.error("Evacuation of %s on %s failed: %s" %
d5f3d8
+					      (response["uuid"], options["--plug"], response["reason"]))
d5f3d8
+				result = False
d5f3d8
+		else:
d5f3d8
+			logging.error("Could not evacuate instance: %s" % server.to_dict())
d5f3d8
+			# Should a malformed instance result in a failed evacuation?
d5f3d8
+			# result = False
d5f3d8
+	return result
d5f3d8
 
d5f3d8
 def set_attrd_status(host, status, options):
d5f3d8
 	logging.debug("Setting fencing status for %s to %s" % (host, status))
d5f3d8
-	run_command(options, "attrd_updater -p -n evacute -Q -N %s -v %s" % (host, status))
d5f3d8
+	run_command(options, "attrd_updater -p -n evacuate -Q -N %s -U %s" % (host, status))
d5f3d8
 
d5f3d8
 def set_power_status(_, options):
d5f3d8
 	global override_status
d5f3d8
@@ -86,28 +149,53 @@ def set_power_status(_, options):
d5f3d8
 
d5f3d8
 	if options["--action"] == "on":
d5f3d8
 		if get_power_status(_, options) == "on":
d5f3d8
+			# Forcing the service back up in case it was disabled
d5f3d8
 			nova.services.enable(options["--plug"], 'nova-compute')
d5f3d8
+			try:
d5f3d8
+				# Forcing the host back up
d5f3d8
+				nova.services.force_down(
d5f3d8
+					options["--plug"], "nova-compute", force_down=False)
d5f3d8
+			except Exception as e:
d5f3d8
+				# In theory, if foce_down=False fails, that's for the exact
d5f3d8
+				# same possible reasons that below with force_down=True
d5f3d8
+				# eg. either an incompatible version or an old client.
d5f3d8
+				# Since it's about forcing back to a default value, there is
d5f3d8
+				# no real worries to just consider it's still okay even if the
d5f3d8
+				# command failed
d5f3d8
+				logging.info("Exception from attempt to force "
d5f3d8
+					      "host back up via nova API: "
d5f3d8
+					      "%s: %s" % (e.__class__.__name__, e))
d5f3d8
 		else:
d5f3d8
 			# Pretend we're 'on' so that the fencing library doesn't loop forever waiting for the node to boot
d5f3d8
 			override_status = "on"
d5f3d8
 		return
d5f3d8
 
d5f3d8
-	# need to wait for nova to update its internal status or we
d5f3d8
-	# cannot call host-evacuate
d5f3d8
-	while get_power_status(_, options) != "off":
d5f3d8
-		# Loop forever if need be.
d5f3d8
-		#
d5f3d8
-		# Some callers (such as Pacemaker) will have a timer
d5f3d8
-		# running and kill us if necessary
d5f3d8
-		logging.debug("Waiting for nova to update it's internal state")
d5f3d8
-		time.sleep(1)
d5f3d8
-
d5f3d8
-	if options["--no-shared-storage"] != "False":
d5f3d8
-		on_shared_storage = False
d5f3d8
-	else:
d5f3d8
-		on_shared_storage = True
d5f3d8
+	try:
d5f3d8
+		nova.services.force_down(
d5f3d8
+			options["--plug"], "nova-compute", force_down=True)
d5f3d8
+	except Exception as e:
d5f3d8
+		# Something went wrong when we tried to force the host down.
d5f3d8
+		# That could come from either an incompatible API version
d5f3d8
+		# eg. UnsupportedVersion or VersionNotFoundForAPIMethod
d5f3d8
+		# or because novaclient is old and doesn't include force_down yet
d5f3d8
+		# eg. AttributeError
d5f3d8
+		# In that case, fallbacking to wait for Nova to catch the right state.
d5f3d8
+
d5f3d8
+		logging.error("Exception from attempt to force host down via nova API: "
d5f3d8
+			      "%s: %s" % (e.__class__.__name__, e))
d5f3d8
+		# need to wait for nova to update its internal status or we
d5f3d8
+		# cannot call host-evacuate
d5f3d8
+		while get_power_status(_, options) != "off":
d5f3d8
+			# Loop forever if need be.
d5f3d8
+			#
d5f3d8
+			# Some callers (such as Pacemaker) will have a timer
d5f3d8
+			# running and kill us if necessary
d5f3d8
+			logging.debug("Waiting for nova to update it's internal state for %s" % options["--plug"])
d5f3d8
+			time.sleep(1)
d5f3d8
+
d5f3d8
+	if not _host_evacuate(options):
d5f3d8
+		sys.exit(1)
d5f3d8
 
d5f3d8
-	_host_evacuate(options["--plug"], on_shared_storage)
d5f3d8
 	return
d5f3d8
 
d5f3d8
 def get_plugs_list(_, options):
d5f3d8
@@ -117,9 +205,9 @@ def get_plugs_list(_, options):
d5f3d8
 		hypervisors = nova.hypervisors.list()
d5f3d8
 		for hypervisor in hypervisors:
d5f3d8
 			longhost = hypervisor.hypervisor_hostname
d5f3d8
-			if options["--action"] == "list" and options["--domain"] != "":
d5f3d8
-				shorthost = longhost.replace("." + options["--domain"],
d5f3d8
-                                                 "")
d5f3d8
+			if options["--domain"] != "":
d5f3d8
+				shorthost = longhost.replace("." + options["--domain"], "")
d5f3d8
+				result[longhost] = ("", None)
d5f3d8
 				result[shorthost] = ("", None)
d5f3d8
 			else:
d5f3d8
 				result[longhost] = ("", None)
d5f3d8
@@ -164,7 +252,7 @@ def define_new_opts():
d5f3d8
 		"order": 5,
d5f3d8
 	}
d5f3d8
 	all_opt["record-only"] = {
d5f3d8
-		"getopt" : "",
d5f3d8
+		"getopt" : "r:",
d5f3d8
 		"longopt" : "record-only",
d5f3d8
 		"help" : "--record-only                  Record the target as needing evacuation but as yet do not intiate it",
d5f3d8
 		"required" : "0",
d5f3d8
@@ -172,6 +260,15 @@ def define_new_opts():
d5f3d8
 		"default" : "False",
d5f3d8
 		"order": 5,
d5f3d8
 	}
d5f3d8
+	all_opt["instance-filtering"] = {
d5f3d8
+		"getopt" : "",
d5f3d8
+		"longopt" : "instance-filtering",
d5f3d8
+		"help" : "--instance-filtering           Only evacuate instances create from images and flavors with evacuable=true",
d5f3d8
+		"required" : "0",
d5f3d8
+		"shortdesc" : "Only evacuate flagged instances",
d5f3d8
+		"default" : "False",
d5f3d8
+		"order": 5,
d5f3d8
+	}
d5f3d8
 	all_opt["no-shared-storage"] = {
d5f3d8
 		"getopt" : "",
d5f3d8
 		"longopt" : "no-shared-storage",
d5f3d8
@@ -187,17 +284,17 @@ def main():
d5f3d8
 	global nova
d5f3d8
 	atexit.register(atexit_handler)
d5f3d8
 
d5f3d8
-	device_opt = ["login", "passwd", "tenant-name", "auth-url",
d5f3d8
+	device_opt = ["login", "passwd", "tenant-name", "auth-url", "fabric_fencing", "on_target",
d5f3d8
 		"no_login", "no_password", "port", "domain", "no-shared-storage", "endpoint-type",
d5f3d8
-		"record-only"]
d5f3d8
+		"record-only", "instance-filtering"]
d5f3d8
 	define_new_opts()
d5f3d8
 	all_opt["shell_timeout"]["default"] = "180"
d5f3d8
 
d5f3d8
 	options = check_input(device_opt, process_input(device_opt))
d5f3d8
 
d5f3d8
 	docs = {}
d5f3d8
-	docs["shortdesc"] = "Fence agent for nova compute nodes"
d5f3d8
-	docs["longdesc"] = "fence_nova_host is a Nova fencing notification agent"
d5f3d8
+	docs["shortdesc"] = "Fence agent for the automatic resurrection of OpenStack compute instances"
d5f3d8
+	docs["longdesc"] = "Used to tell Nova that compute nodes are down and to reschedule flagged instances"
d5f3d8
 	docs["vendorurl"] = ""
d5f3d8
 
d5f3d8
 	show_docs(options, docs)
d5f3d8
@@ -213,7 +310,10 @@ def main():
d5f3d8
 	if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
d5f3d8
 		options["--plug"] = options["--plug"] + "." + options["--domain"]
d5f3d8
 
d5f3d8
-	if options["--record-only"] != "False":
d5f3d8
+	if options["--record-only"] in [ "2", "Disabled", "disabled" ]:
d5f3d8
+		sys.exit(0)
d5f3d8
+
d5f3d8
+	elif options["--record-only"] in [ "1", "True", "true", "Yes", "yes"]:
d5f3d8
 		if options["--action"] == "on":
d5f3d8
 			set_attrd_status(options["--plug"], "no", options)
d5f3d8
 			sys.exit(0)
d5f3d8
@@ -222,7 +322,7 @@ def main():
d5f3d8
 			set_attrd_status(options["--plug"], "yes", options)
d5f3d8
 			sys.exit(0)
d5f3d8
 
d5f3d8
-		elif options["--action"] in ["status", "monitor"]:
d5f3d8
+		elif options["--action"] in ["monitor", "status"]:
d5f3d8
 			sys.exit(0)
d5f3d8
 
d5f3d8
 	# The first argument is the Nova client version
d5f3d8
diff --git a/tests/data/metadata/fence_compute.xml b/tests/data/metadata/fence_compute.xml
d5f3d8
index 846a861..98bed4e 100644
d5f3d8
--- a/tests/data/metadata/fence_compute.xml
d5f3d8
+++ b/tests/data/metadata/fence_compute.xml
d5f3d8
@@ -1,6 +1,6 @@
d5f3d8
 
d5f3d8
-<resource-agent name="fence_compute" shortdesc="Fence agent for nova compute nodes" >
d5f3d8
-<longdesc>fence_nova_host is a Nova fencing notification agent</longdesc>
d5f3d8
+<resource-agent name="fence_compute" shortdesc="Fence agent for the automatic resurrection of OpenStack compute instances" >
d5f3d8
+<longdesc>Used to tell Nova that compute nodes are down and to reschedule flagged instances</longdesc>
d5f3d8
 <vendor-url></vendor-url>
d5f3d8
 <parameters>
d5f3d8
 	<parameter name="port" unique="0" required="1">
d5f3d8
@@ -35,7 +35,7 @@
d5f3d8
 	</parameter>
d5f3d8
 	<parameter name="action" unique="0" required="1">
d5f3d8
 		<getopt mixed="-o, --action=[action]" />
d5f3d8
-		<content type="string" default="reboot"  />
d5f3d8
+		<content type="string" default="off"  />
d5f3d8
 		<shortdesc lang="en">Fencing Action</shortdesc>
d5f3d8
 	</parameter>
d5f3d8
 	<parameter name="login" unique="0" required="0">
d5f3d8
@@ -48,6 +48,11 @@
d5f3d8
 		<content type="string"  />
d5f3d8
 		<shortdesc lang="en">DNS domain in which hosts live</shortdesc>
d5f3d8
 	</parameter>
d5f3d8
+	<parameter name="instance-filtering" unique="0" required="0">
d5f3d8
+		<getopt mixed="--instance-filtering" />
d5f3d8
+		<content type="boolean" default="False"  />
d5f3d8
+		<shortdesc lang="en">Only evacuate flagged instances</shortdesc>
d5f3d8
+	</parameter>
d5f3d8
 	<parameter name="no-shared-storage" unique="0" required="0">
d5f3d8
 		<getopt mixed="--no-shared-storage" />
d5f3d8
 		<content type="boolean" default="False"  />
d5f3d8
@@ -55,7 +60,7 @@
d5f3d8
 	</parameter>
d5f3d8
 	<parameter name="record-only" unique="0" required="0">
d5f3d8
 		<getopt mixed="--record-only" />
d5f3d8
-		<content type="boolean" default="False"  />
d5f3d8
+		<content type="string" default="False"  />
d5f3d8
 		<shortdesc lang="en">Only record the target as needing evacuation</shortdesc>
d5f3d8
 	</parameter>
d5f3d8
 	<parameter name="verbose" unique="0" required="0">
d5f3d8
@@ -115,9 +120,8 @@
d5f3d8
 	</parameter>
d5f3d8
 </parameters>
d5f3d8
 <actions>
d5f3d8
-	<action name="on" automatic="0"/>
d5f3d8
+	<action name="on" on_target="1" automatic="1"/>
d5f3d8
 	<action name="off" />
d5f3d8
-	<action name="reboot" />
d5f3d8
 	<action name="status" />
d5f3d8
 	<action name="list" />
d5f3d8
 	<action name="list-status" />
d5f3d8
-- 
d5f3d8
2.4.3
d5f3d8