Blame SOURCES/0100-kpatch-wait-for-module-ref-counts-on-unload.patch

f85c19
From cdee6bd650a35075515d4fe2bb67657811c9640c Mon Sep 17 00:00:00 2001
f85c19
From: Joe Lawrence <joe.lawrence@redhat.com>
f85c19
Date: Mon, 16 Nov 2020 15:21:59 -0500
f85c19
Subject: [PATCH] kpatch: wait for module ref counts on unload
f85c19
f85c19
There exists a very small timing window in which "kpatch unload" gets to
f85c19
its "rmmod" step before the kpatch-patch module's reference count has
f85c19
cleared and the "rmmod" fails.
f85c19
f85c19
This is only a transient problem, but we can adopt code from upstream
f85c19
livepatch kselftests which wait for the module refcounts to settle
f85c19
before moving onto "rmmod".
f85c19
f85c19
A small wrinkle is that this is not supported by the older kpatch.ko
f85c19
core.  The price for circumventing the activeness safety check via
f85c19
KPATCH_FORCE_UNSAFE is that it must leave the kpatch patch modules in
f85c19
place (see e1890e627a9b ("prevent rmmod of forced modules")).
f85c19
f85c19
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
f85c19
---
f85c19
 kpatch/kpatch | 40 ++++++++++++++++++++++++++++++++++++++--
f85c19
 1 file changed, 38 insertions(+), 2 deletions(-)
f85c19
f85c19
diff --git a/kpatch/kpatch b/kpatch/kpatch
f85c19
index bca8f41..b35b742 100755
f85c19
--- a/kpatch/kpatch
f85c19
+++ b/kpatch/kpatch
f85c19
@@ -28,6 +28,7 @@ SCRIPTDIR="$(readlink -f "$(dirname "$(type -p "$0")")")"
f85c19
 VERSION="0.9.2"
f85c19
 POST_ENABLE_WAIT=15	# seconds
f85c19
 POST_SIGNAL_WAIT=60	# seconds
f85c19
+MODULE_REF_WAIT=15	# seconds
f85c19
 
f85c19
 # How many times to try loading the patch if activeness safety check fails.
f85c19
 MAX_LOAD_ATTEMPTS=5
f85c19
@@ -125,6 +126,10 @@ find_core_module() {
f85c19
 	return 1
f85c19
 }
f85c19
 
f85c19
+kpatch_core_loaded() {
f85c19
+	grep -q -e "T kpatch_register" /proc/kallsyms
f85c19
+}
f85c19
+
f85c19
 core_loaded () {
f85c19
 	grep -q -e "T klp_enable_patch" -e "T kpatch_register" /proc/kallsyms
f85c19
 }
f85c19
@@ -265,6 +270,31 @@ wait_for_patch_transition() {
f85c19
 	return 1
f85c19
 }
f85c19
 
f85c19
+module_ref_count() {
f85c19
+	local modname="$1"
f85c19
+	[[ $(cat "/sys/module/$modname/refcnt" 2>/dev/null) != "0" ]]
f85c19
+}
f85c19
+
f85c19
+wait_for_zero_module_ref_count() {
f85c19
+	local modname="$1"
f85c19
+	local i=0
f85c19
+
f85c19
+	# We can't rely on a zero refcount with kpatch.ko as it
f85c19
+	# implements KPATCH_FORCE_UNSAFE with an additional reference on
f85c19
+	# kpatch-patch modules to avoid potential crashes.
f85c19
+	kpatch_core_loaded && return 0
f85c19
+
f85c19
+	module_ref_count "$modname" || return 0
f85c19
+
f85c19
+	echo "waiting (up to $MODULE_REF_WAIT seconds) for module refcount..."
f85c19
+	for (( i=0; i
f85c19
+		module_ref_count "$modname" || return 0
f85c19
+		sleep 1s
f85c19
+	done
f85c19
+
f85c19
+	return 1
f85c19
+}
f85c19
+
f85c19
 load_module () {
f85c19
 	local module="$1"
f85c19
 
f85c19
@@ -381,10 +411,16 @@ disable_patch_strict () {
f85c19
 }
f85c19
 
f85c19
 remove_module () {
f85c19
-	echo "unloading patch module: $1"
f85c19
+	local modname="$1"
f85c19
+
f85c19
+	if ! wait_for_zero_module_ref_count "$modname"; then
f85c19
+		die "failed to unload module $modname (refcnt)"
f85c19
+	fi
f85c19
+
f85c19
+	echo "unloading patch module: $modname"
f85c19
 	# ignore any error here because rmmod can fail if the module used
f85c19
 	# KPATCH_FORCE_UNSAFE.
f85c19
-	rmmod "$1" 2> /dev/null || return 0
f85c19
+	rmmod "$modname" 2> /dev/null || return 0
f85c19
 }
f85c19
 
f85c19
 unload_module () {
f85c19
-- 
f85c19
2.25.4
f85c19