|
|
8394b4 |
From 06e1fe32e47b98efaa3598629fb59e5f7791e28d Mon Sep 17 00:00:00 2001
|
|
|
8394b4 |
From: Thierry Bordaz <tbordaz@redhat.com>
|
|
|
8394b4 |
Date: Wed, 27 Nov 2019 14:04:14 +0100
|
|
|
8394b4 |
Subject: [PATCH] Ticket 50745: ns-slapd hangs during CleanAllRUV tests
|
|
|
8394b4 |
|
|
|
8394b4 |
Bug Description:
|
|
|
8394b4 |
The hang condition:
|
|
|
8394b4 |
- is not systematic
|
|
|
8394b4 |
- occurs in rare case, for example here during the deletion of a replica.
|
|
|
8394b4 |
- a thread is waiting for a dblock that an other thread "forgot" to
|
|
|
8394b4 |
release.
|
|
|
8394b4 |
- have always existed, at least since 1.4.0 but likely since 1.2.x
|
|
|
8394b4 |
|
|
|
8394b4 |
When deleting a replica, the replica is retrieved from
|
|
|
8394b4 |
mapping tree structure (mtnode).
|
|
|
8394b4 |
The replica is also retrieved through the mapping tree
|
|
|
8394b4 |
when writing updates to the changelog.
|
|
|
8394b4 |
|
|
|
8394b4 |
When deleting the replica, mapping tree structure is cleared
|
|
|
8394b4 |
after the changelog is deleted (that can take some cycles).
|
|
|
8394b4 |
There is a window where an update can retrieve the replica,
|
|
|
8394b4 |
from the not yet cleared MT, while the changelog being removed.
|
|
|
8394b4 |
|
|
|
8394b4 |
At the end, the update will update the changelog that is
|
|
|
8394b4 |
currently removed and keeps an unfree lock in the DB.
|
|
|
8394b4 |
|
|
|
8394b4 |
Fix description:
|
|
|
8394b4 |
Ideally mapping tree should be protected by a lock but it
|
|
|
8394b4 |
is not done systematically (e.g. slapi_get_mapping_tree_node).
|
|
|
8394b4 |
Using a lock looks an overkill and can probably introduce
|
|
|
8394b4 |
deadlock and performance hit.
|
|
|
8394b4 |
The idea of the fix is to reduce the window, moving the
|
|
|
8394b4 |
mapping tree clear before the changelog removal.
|
|
|
8394b4 |
|
|
|
8394b4 |
https://pagure.io/389-ds-base/issue/50745
|
|
|
8394b4 |
|
|
|
8394b4 |
Reviewed by: Mark Reynolds, Ludwig Krispenz
|
|
|
8394b4 |
---
|
|
|
8394b4 |
ldap/servers/plugins/replication/repl5_replica_config.c | 5 ++++-
|
|
|
8394b4 |
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
|
8394b4 |
|
|
|
8394b4 |
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
8394b4 |
index 79b257564..02b36f6ad 100644
|
|
|
8394b4 |
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
8394b4 |
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
8394b4 |
@@ -757,6 +757,10 @@ replica_config_delete(Slapi_PBlock *pb __attribute__((unused)),
|
|
|
8394b4 |
if (mtnode_ext->replica) {
|
|
|
8394b4 |
/* remove object from the hash */
|
|
|
8394b4 |
r = (Replica *)object_get_data(mtnode_ext->replica);
|
|
|
8394b4 |
+ mtnode_ext->replica = NULL; /* moving it before deleting the CL because
|
|
|
8394b4 |
+ * deletion can take some time giving the opportunity
|
|
|
8394b4 |
+ * to an operation to start while CL is deleted
|
|
|
8394b4 |
+ */
|
|
|
8394b4 |
PR_ASSERT(r);
|
|
|
8394b4 |
/* The changelog for this replica is no longer valid, so we should remove it. */
|
|
|
8394b4 |
slapi_log_err(SLAPI_LOG_WARNING, repl_plugin_name, "replica_config_delete - "
|
|
|
8394b4 |
@@ -765,7 +769,6 @@ replica_config_delete(Slapi_PBlock *pb __attribute__((unused)),
|
|
|
8394b4 |
slapi_sdn_get_dn(replica_get_root(r)));
|
|
|
8394b4 |
cl5DeleteDBSync(r);
|
|
|
8394b4 |
replica_delete_by_name(replica_get_name(r));
|
|
|
8394b4 |
- mtnode_ext->replica = NULL;
|
|
|
8394b4 |
}
|
|
|
8394b4 |
|
|
|
8394b4 |
PR_Unlock(s_configLock);
|
|
|
8394b4 |
--
|
|
|
8394b4 |
2.21.1
|
|
|
8394b4 |
|