|
|
96373c |
From 0ac68e15a9a4048d3c1ad4519000996cd65fdefb Mon Sep 17 00:00:00 2001
|
|
|
96373c |
From: Thierry Bordaz <tbordaz@redhat.com>
|
|
|
96373c |
Date: Fri, 1 Dec 2017 16:23:11 +0100
|
|
|
96373c |
Subject: [PATCH] Ticket 49463 - After cleanALLruv, there is a flow of keep
|
|
|
96373c |
alive DEL
|
|
|
96373c |
|
|
|
96373c |
Bug Description:
|
|
|
96373c |
When cleanAllRuv is launched, it spawn cleanAllRuv on all replicas.
|
|
|
96373c |
Each replica will clean its changelog and database RUV AND in addition
|
|
|
96373c |
will DEL the keep alive entry of the target ReplicaID.
|
|
|
96373c |
So for the same entry (keep alive) there will be as many DEL as there are replicas
|
|
|
96373c |
|
|
|
96373c |
This flow of DEL is useless as only one DEL is enough.
|
|
|
96373c |
In addition because of https://pagure.io/389-ds-base/issue/49466, replication may
|
|
|
96373c |
loop on each of those DELs.
|
|
|
96373c |
|
|
|
96373c |
Fix Description:
|
|
|
96373c |
The fix is only to prevent the flow of DEL.
|
|
|
96373c |
It adds a flag ('original_task') in the task payload.
|
|
|
96373c |
The server receiving the task (replica_execute_cleanall_ruv_task) flags the
|
|
|
96373c |
task as 'original_task'.
|
|
|
96373c |
In the opposite, the propagated cleanAllRuv (multimaster_extop_cleanruv) does
|
|
|
96373c |
not flag the task as 'original_task'
|
|
|
96373c |
Only original task does the DEL of the keep alive entry.
|
|
|
96373c |
Note the propageted payload (extop) is not changed. In a mixed version
|
|
|
96373c |
environment "old" servers will DEL the keep alive and flow can still happen
|
|
|
96373c |
|
|
|
96373c |
https://pagure.io/389-ds-base/issue/49466
|
|
|
96373c |
|
|
|
96373c |
Reviewed by: Ludwig Krispenz
|
|
|
96373c |
|
|
|
96373c |
Platforms tested: F23
|
|
|
96373c |
|
|
|
96373c |
Flag Day: no
|
|
|
96373c |
|
|
|
96373c |
Doc impact: no
|
|
|
96373c |
---
|
|
|
96373c |
ldap/servers/plugins/replication/repl5.h | 49 ++++++++++++----------
|
|
|
96373c |
ldap/servers/plugins/replication/repl5_replica.c | 21 ++++++++++
|
|
|
96373c |
.../plugins/replication/repl5_replica_config.c | 32 +++++++++++---
|
|
|
96373c |
ldap/servers/plugins/replication/repl_extop.c | 2 +
|
|
|
96373c |
4 files changed, 76 insertions(+), 28 deletions(-)
|
|
|
96373c |
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
index 4e206a0fc..e08fec752 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
@@ -783,12 +783,37 @@ void multimaster_mtnode_construct_replicas(void);
|
|
|
96373c |
|
|
|
96373c |
void multimaster_be_state_change(void *handle, char *be_name, int old_be_state, int new_be_state);
|
|
|
96373c |
|
|
|
96373c |
+#define CLEANRIDSIZ 64 /* maximum number for concurrent CLEANALLRUV tasks */
|
|
|
96373c |
+
|
|
|
96373c |
+typedef struct _cleanruv_data
|
|
|
96373c |
+{
|
|
|
96373c |
+ Object *repl_obj;
|
|
|
96373c |
+ Replica *replica;
|
|
|
96373c |
+ ReplicaId rid;
|
|
|
96373c |
+ Slapi_Task *task;
|
|
|
96373c |
+ struct berval *payload;
|
|
|
96373c |
+ CSN *maxcsn;
|
|
|
96373c |
+ char *repl_root;
|
|
|
96373c |
+ Slapi_DN *sdn;
|
|
|
96373c |
+ char *certify;
|
|
|
96373c |
+ char *force;
|
|
|
96373c |
+ PRBool original_task;
|
|
|
96373c |
+} cleanruv_data;
|
|
|
96373c |
+
|
|
|
96373c |
+typedef struct _cleanruv_purge_data
|
|
|
96373c |
+{
|
|
|
96373c |
+ int cleaned_rid;
|
|
|
96373c |
+ const Slapi_DN *suffix_sdn;
|
|
|
96373c |
+ char *replName;
|
|
|
96373c |
+ char *replGen;
|
|
|
96373c |
+} cleanruv_purge_data;
|
|
|
96373c |
+
|
|
|
96373c |
/* In repl5_replica_config.c */
|
|
|
96373c |
int replica_config_init(void);
|
|
|
96373c |
void replica_config_destroy(void);
|
|
|
96373c |
int get_replica_type(Replica *r);
|
|
|
96373c |
int replica_execute_cleanruv_task_ext(Object *r, ReplicaId rid);
|
|
|
96373c |
-void add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing);
|
|
|
96373c |
+void add_cleaned_rid(cleanruv_data *data, char *maxcsn);
|
|
|
96373c |
int is_cleaned_rid(ReplicaId rid);
|
|
|
96373c |
int replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, int *returncode, char *returntext, void *arg);
|
|
|
96373c |
void replica_cleanallruv_thread_ext(void *arg);
|
|
|
96373c |
@@ -808,29 +833,7 @@ void set_cleaned_rid(ReplicaId rid);
|
|
|
96373c |
void cleanruv_log(Slapi_Task *task, int rid, char *task_type, int sev_level, char *fmt, ...);
|
|
|
96373c |
char *replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn);
|
|
|
96373c |
|
|
|
96373c |
-#define CLEANRIDSIZ 64 /* maximum number for concurrent CLEANALLRUV tasks */
|
|
|
96373c |
|
|
|
96373c |
-typedef struct _cleanruv_data
|
|
|
96373c |
-{
|
|
|
96373c |
- Object *repl_obj;
|
|
|
96373c |
- Replica *replica;
|
|
|
96373c |
- ReplicaId rid;
|
|
|
96373c |
- Slapi_Task *task;
|
|
|
96373c |
- struct berval *payload;
|
|
|
96373c |
- CSN *maxcsn;
|
|
|
96373c |
- char *repl_root;
|
|
|
96373c |
- Slapi_DN *sdn;
|
|
|
96373c |
- char *certify;
|
|
|
96373c |
- char *force;
|
|
|
96373c |
-} cleanruv_data;
|
|
|
96373c |
-
|
|
|
96373c |
-typedef struct _cleanruv_purge_data
|
|
|
96373c |
-{
|
|
|
96373c |
- int cleaned_rid;
|
|
|
96373c |
- const Slapi_DN *suffix_sdn;
|
|
|
96373c |
- char *replName;
|
|
|
96373c |
- char *replGen;
|
|
|
96373c |
-} cleanruv_purge_data;
|
|
|
96373c |
|
|
|
96373c |
/* replutil.c */
|
|
|
96373c |
LDAPControl *create_managedsait_control(void);
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
index 77f4f18e4..e75807a62 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
@@ -2120,6 +2120,7 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e)
|
|
|
96373c |
char csnstr[CSN_STRSIZE];
|
|
|
96373c |
char *token = NULL;
|
|
|
96373c |
char *forcing;
|
|
|
96373c |
+ PRBool original_task;
|
|
|
96373c |
char *csnpart;
|
|
|
96373c |
char *ridstr;
|
|
|
96373c |
char *iter = NULL;
|
|
|
96373c |
@@ -2151,8 +2152,15 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e)
|
|
|
96373c |
csn_init_by_string(maxcsn, csnpart);
|
|
|
96373c |
csn_as_string(maxcsn, PR_FALSE, csnstr);
|
|
|
96373c |
forcing = ldap_utf8strtok_r(iter, ":", &iter);
|
|
|
96373c |
+ original_task = PR_TRUE;
|
|
|
96373c |
if (forcing == NULL) {
|
|
|
96373c |
forcing = "no";
|
|
|
96373c |
+ } else if (!strcasecmp(forcing, "yes") || !strcasecmp(forcing, "no")) {
|
|
|
96373c |
+ /* forcing was correctly set, lets try to read the original task flag */
|
|
|
96373c |
+ token = ldap_utf8strtok_r(iter, ":", &iter);
|
|
|
96373c |
+ if (token && !atoi(token)) {
|
|
|
96373c |
+ original_task = PR_FALSE;
|
|
|
96373c |
+ }
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_NOTICE, repl_plugin_name, "CleanAllRUV Task - cleanAllRUV task found, "
|
|
|
96373c |
@@ -2190,6 +2198,13 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e)
|
|
|
96373c |
data->force = slapi_ch_strdup(forcing);
|
|
|
96373c |
data->repl_root = NULL;
|
|
|
96373c |
|
|
|
96373c |
+ /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash
|
|
|
96373c |
+ * We retrieved from type_replicaCleanRUV if the cleanAllRuv request
|
|
|
96373c |
+ * was received from a direct task ADD or if was received via
|
|
|
96373c |
+ * the cleanAllRuv extop.
|
|
|
96373c |
+ */
|
|
|
96373c |
+ data->original_task = original_task;
|
|
|
96373c |
+
|
|
|
96373c |
thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext,
|
|
|
96373c |
(void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
|
|
96373c |
PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE);
|
|
|
96373c |
@@ -2284,6 +2299,12 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e)
|
|
|
96373c |
data->sdn = slapi_sdn_dup(r->repl_root);
|
|
|
96373c |
data->certify = slapi_ch_strdup(certify);
|
|
|
96373c |
|
|
|
96373c |
+ /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash
|
|
|
96373c |
+ * Let's assum this replica was the original receiver of the task.
|
|
|
96373c |
+ * This flag has no impact on Abort cleanAllRuv
|
|
|
96373c |
+ */
|
|
|
96373c |
+ data->original_task = PR_TRUE;
|
|
|
96373c |
+
|
|
|
96373c |
thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread,
|
|
|
96373c |
(void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
|
|
96373c |
PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE);
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
index 005528a41..95b933bb8 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
@@ -1573,6 +1573,11 @@ replica_execute_cleanall_ruv_task(Object *r, ReplicaId rid, Slapi_Task *task, co
|
|
|
96373c |
data->repl_root = slapi_ch_strdup(basedn);
|
|
|
96373c |
data->force = slapi_ch_strdup(force_cleaning);
|
|
|
96373c |
|
|
|
96373c |
+ /* It is either a consequence of a direct ADD cleanAllRuv task
|
|
|
96373c |
+ * or modify of the replica to add nsds5task: cleanAllRuv
|
|
|
96373c |
+ */
|
|
|
96373c |
+ data->original_task = PR_TRUE;
|
|
|
96373c |
+
|
|
|
96373c |
thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread,
|
|
|
96373c |
(void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
|
|
96373c |
PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE);
|
|
|
96373c |
@@ -1702,7 +1707,7 @@ replica_cleanallruv_thread(void *arg)
|
|
|
96373c |
/*
|
|
|
96373c |
* Add the cleanallruv task to the repl config - so we can handle restarts
|
|
|
96373c |
*/
|
|
|
96373c |
- add_cleaned_rid(data->rid, data->replica, csnstr, data->force); /* marks config that we started cleaning a rid */
|
|
|
96373c |
+ add_cleaned_rid(data, csnstr); /* marks config that we started cleaning a rid */
|
|
|
96373c |
cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Cleaning rid (%d)...", data->rid);
|
|
|
96373c |
/*
|
|
|
96373c |
* First, wait for the maxcsn to be covered
|
|
|
96373c |
@@ -1878,7 +1883,13 @@ done:
|
|
|
96373c |
*/
|
|
|
96373c |
delete_cleaned_rid_config(data);
|
|
|
96373c |
check_replicas_are_done_cleaning(data);
|
|
|
96373c |
- remove_keep_alive_entry(data->task, data->rid, data->repl_root);
|
|
|
96373c |
+ if (data->original_task) {
|
|
|
96373c |
+ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Original task deletes Keep alive entry (%d).", data->rid);
|
|
|
96373c |
+ remove_keep_alive_entry(data->task, data->rid, data->repl_root);
|
|
|
96373c |
+ } else {
|
|
|
96373c |
+ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Propagated task does not delete Keep alive entry (%d).", data->rid);
|
|
|
96373c |
+ }
|
|
|
96373c |
+
|
|
|
96373c |
clean_agmts(data);
|
|
|
96373c |
remove_cleaned_rid(data->rid);
|
|
|
96373c |
cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Successfully cleaned rid(%d).", data->rid);
|
|
|
96373c |
@@ -2029,7 +2040,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data)
|
|
|
96373c |
"Waiting for all the replicas to finish cleaning...");
|
|
|
96373c |
|
|
|
96373c |
csn_as_string(data->maxcsn, PR_FALSE, csnstr);
|
|
|
96373c |
- filter = PR_smprintf("(%s=%d:%s:%s)", type_replicaCleanRUV, (int)data->rid, csnstr, data->force);
|
|
|
96373c |
+ filter = PR_smprintf("(%s=%d:%s:%s:%d)", type_replicaCleanRUV, (int)data->rid, csnstr, data->force, data->original_task ? 1 : 0);
|
|
|
96373c |
while (not_all_cleaned && !is_task_aborted(data->rid) && !slapi_is_shutting_down()) {
|
|
|
96373c |
agmt_obj = agmtlist_get_first_agreement_for_replica(data->replica);
|
|
|
96373c |
if (agmt_obj == NULL) {
|
|
|
96373c |
@@ -2502,7 +2513,7 @@ set_cleaned_rid(ReplicaId rid)
|
|
|
96373c |
* Add the rid and maxcsn to the repl config (so we can resume after a server restart)
|
|
|
96373c |
*/
|
|
|
96373c |
void
|
|
|
96373c |
-add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing)
|
|
|
96373c |
+add_cleaned_rid(cleanruv_data *cleanruv_data, char *maxcsn)
|
|
|
96373c |
{
|
|
|
96373c |
Slapi_PBlock *pb;
|
|
|
96373c |
struct berval *vals[2];
|
|
|
96373c |
@@ -2512,6 +2523,16 @@ add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing)
|
|
|
96373c |
char data[CSN_STRSIZE + 10];
|
|
|
96373c |
char *dn;
|
|
|
96373c |
int rc;
|
|
|
96373c |
+ ReplicaId rid;
|
|
|
96373c |
+ Replica *r;
|
|
|
96373c |
+ char *forcing;
|
|
|
96373c |
+
|
|
|
96373c |
+ if (data == NULL) {
|
|
|
96373c |
+ return;
|
|
|
96373c |
+ }
|
|
|
96373c |
+ rid = cleanruv_data->rid;
|
|
|
96373c |
+ r = cleanruv_data->replica;
|
|
|
96373c |
+ forcing = cleanruv_data->force;
|
|
|
96373c |
|
|
|
96373c |
if (r == NULL || maxcsn == NULL) {
|
|
|
96373c |
return;
|
|
|
96373c |
@@ -2519,7 +2540,7 @@ add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing)
|
|
|
96373c |
/*
|
|
|
96373c |
* Write the rid & maxcsn to the config entry
|
|
|
96373c |
*/
|
|
|
96373c |
- val.bv_len = PR_snprintf(data, sizeof(data), "%d:%s:%s", rid, maxcsn, forcing);
|
|
|
96373c |
+ val.bv_len = PR_snprintf(data, sizeof(data), "%d:%s:%s:%d", rid, maxcsn, forcing, cleanruv_data->original_task ? 1 : 0);
|
|
|
96373c |
dn = replica_get_dn(r);
|
|
|
96373c |
pb = slapi_pblock_new();
|
|
|
96373c |
mod.mod_op = LDAP_MOD_ADD | LDAP_MOD_BVALUES;
|
|
|
96373c |
@@ -2961,6 +2982,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb __attribute__((unused)),
|
|
|
96373c |
data->repl_root = slapi_ch_strdup(base_dn);
|
|
|
96373c |
data->sdn = NULL;
|
|
|
96373c |
data->certify = slapi_ch_strdup(certify_all);
|
|
|
96373c |
+ data->original_task = PR_TRUE;
|
|
|
96373c |
|
|
|
96373c |
thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread,
|
|
|
96373c |
(void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
96373c |
index c49c6bd8d..68e2544b4 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl_extop.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
96373c |
@@ -1412,6 +1412,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb)
|
|
|
96373c |
data->rid = rid;
|
|
|
96373c |
data->repl_root = slapi_ch_strdup(repl_root);
|
|
|
96373c |
data->certify = slapi_ch_strdup(certify_all);
|
|
|
96373c |
+ data->original_task = PR_FALSE;
|
|
|
96373c |
/*
|
|
|
96373c |
* Set the aborted rid and stop the cleaning
|
|
|
96373c |
*/
|
|
|
96373c |
@@ -1555,6 +1556,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
|
|
|
96373c |
data->payload = slapi_ch_bvdup(extop_payload);
|
|
|
96373c |
data->force = slapi_ch_strdup(force);
|
|
|
96373c |
data->repl_root = slapi_ch_strdup(repl_root);
|
|
|
96373c |
+ data->original_task = PR_FALSE;
|
|
|
96373c |
|
|
|
96373c |
thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext,
|
|
|
96373c |
(void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
|
|
|
96373c |
--
|
|
|
96373c |
2.13.6
|
|
|
96373c |
|