de47d7
From 2e88d7ecfdd096ec3cd0b2fe7be0dacef74fe0c5 Mon Sep 17 00:00:00 2001
26b369
From: Mark Reynolds <mreynolds@redhat.com>
26b369
Date: Tue, 19 May 2020 11:25:13 -0400
de47d7
Subject: [PATCH 2/2] Issue 50745: ns-slapd hangs during CleanAllRUV tests
26b369
26b369
Bug Description:
26b369
	The hang condition:
26b369
		- is not systematic
26b369
		- occurs in rare case, for example here during the deletion of a replica.
26b369
		- a thread is waiting for a dblock that an other thread "forgot" to
26b369
		  release.
26b369
		- have always existed, at least since 1.4.0 but likely since 1.2.x
26b369
26b369
	When deleting a replica, the replica is retrieved from
26b369
	mapping tree structure (mtnode).
26b369
	The replica is also retrieved through the mapping tree
26b369
	when writing updates to the changelog.
26b369
26b369
	When deleting the replica, mapping tree structure is cleared
26b369
	after the changelog is deleted (that can take some cycles).
26b369
	There is a window where an update can retrieve the replica,
26b369
	from the not yet cleared MT, while the changelog being removed.
26b369
26b369
	At the end, the update will update the changelog that is
26b369
	currently removed and keeps an unfree lock in the DB.
26b369
26b369
Fix description:
26b369
	Ideally mapping tree should be protected by a lock but it
26b369
	is not done systematically (e.g.  slapi_get_mapping_tree_node).
26b369
	Using a lock looks an overkill and can probably introduce
26b369
	deadlock and performance hit.
26b369
	The idea of the fix is to reduce the window, moving the
26b369
	mapping tree clear before the changelog removal.
26b369
26b369
https://pagure.io/389-ds-base/issue/50745
26b369
26b369
Reviewed by: Mark Reynolds, Ludwig Krispenz
26b369
---
26b369
 ldap/servers/plugins/replication/repl5_replica_config.c | 6 +++---
26b369
 1 file changed, 3 insertions(+), 3 deletions(-)
26b369
26b369
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
26b369
index 80a079784..95b7fa50e 100644
26b369
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
26b369
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
26b369
@@ -735,18 +735,18 @@ replica_config_delete(Slapi_PBlock *pb __attribute__((unused)),
26b369
     PR_ASSERT(mtnode_ext);
26b369
 
26b369
     if (mtnode_ext->replica) {
26b369
+        Object *repl_obj = mtnode_ext->replica;
26b369
         /* remove object from the hash */
26b369
         r = (Replica *)object_get_data(mtnode_ext->replica);
26b369
+        mtnode_ext->replica = NULL;
26b369
         PR_ASSERT(r);
26b369
         /* The changelog for this replica is no longer valid, so we should remove it. */
26b369
         slapi_log_err(SLAPI_LOG_WARNING, repl_plugin_name, "replica_config_delete - "
26b369
                                                            "The changelog for replica %s is no longer valid since "
26b369
                                                            "the replica config is being deleted.  Removing the changelog.\n",
26b369
                       slapi_sdn_get_dn(replica_get_root(r)));
26b369
-        cl5DeleteDBSync(mtnode_ext->replica);
26b369
+        cl5DeleteDBSync(repl_obj);
26b369
         replica_delete_by_name(replica_get_name(r));
26b369
-        object_release(mtnode_ext->replica);
26b369
-        mtnode_ext->replica = NULL;
26b369
     }
26b369
 
26b369
     PR_Unlock(s_configLock);
26b369
-- 
de47d7
2.25.4
26b369