|
|
4c04d8 |
From 59f03e332061b2c68bb53eed5949ddcfdc563300 Mon Sep 17 00:00:00 2001
|
|
|
e79480 |
From: Mark Reynolds <mreynolds@redhat.com>
|
|
|
e79480 |
Date: Wed, 7 Aug 2019 20:36:53 -0400
|
|
|
e79480 |
Subject: [PATCH] Issue 50538 - cleanAllRUV task limit is not enforced for
|
|
|
e79480 |
replicated tasks
|
|
|
e79480 |
|
|
|
e79480 |
Bug Description:
|
|
|
e79480 |
|
|
|
e79480 |
There is a hard limit of 64 concurrent cleanAllRUV tasks, but this limit is
|
|
|
e79480 |
only enforced when creating "new" tasks. It was not enforced when a task was
|
|
|
e79480 |
received via an extended operation. There were also race conditions in the
|
|
|
e79480 |
existing logic that allowed the array of cleaned rids to get corrupted . This
|
|
|
e79480 |
allowed for a very large number of task threads to be created.
|
|
|
e79480 |
|
|
|
e79480 |
Fix Description:
|
|
|
e79480 |
|
|
|
e79480 |
Maintain a new counter to keep track of the number of clean and abort threads
|
|
|
e79480 |
to make sure it never over runs the rid array buffers.
|
|
|
e79480 |
|
|
|
e79480 |
relates: https://pagure.io/389-ds-base/issue/50538
|
|
|
e79480 |
|
|
|
e79480 |
Reviewed by: lkrispenz(Thanks!)
|
|
|
e79480 |
---
|
|
|
e79480 |
.../suites/replication/cleanallruv_test.py | 47 +++-
|
|
|
e79480 |
ldap/servers/plugins/replication/repl5.h | 7 +-
|
|
|
e79480 |
.../replication/repl5_replica_config.c | 247 ++++++++++--------
|
|
|
e79480 |
ldap/servers/plugins/replication/repl_extop.c | 19 +-
|
|
|
e79480 |
4 files changed, 202 insertions(+), 118 deletions(-)
|
|
|
e79480 |
|
|
|
e79480 |
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
|
|
|
e79480 |
index 620a53e1a..43801dd52 100644
|
|
|
e79480 |
--- a/dirsrvtests/tests/suites/replication/cleanallruv_test.py
|
|
|
e79480 |
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
|
|
|
e79480 |
@@ -1,5 +1,5 @@
|
|
|
e79480 |
# --- BEGIN COPYRIGHT BLOCK ---
|
|
|
e79480 |
-# Copyright (C) 2016 Red Hat, Inc.
|
|
|
e79480 |
+# Copyright (C) 2019 Red Hat, Inc.
|
|
|
e79480 |
# All rights reserved.
|
|
|
e79480 |
#
|
|
|
e79480 |
# License: GPL (version 3 or any later version).
|
|
|
e79480 |
@@ -7,7 +7,6 @@
|
|
|
e79480 |
# --- END COPYRIGHT BLOCK ---
|
|
|
e79480 |
#
|
|
|
e79480 |
import threading
|
|
|
e79480 |
-
|
|
|
e79480 |
import pytest
|
|
|
e79480 |
from lib389.tasks import *
|
|
|
e79480 |
from lib389.utils import *
|
|
|
e79480 |
@@ -859,6 +858,50 @@ def test_multiple_tasks_with_force(topology_m4):
|
|
|
e79480 |
restore_master4(topology_m4)
|
|
|
e79480 |
|
|
|
e79480 |
|
|
|
e79480 |
+def test_max_tasks(topology_m4):
|
|
|
e79480 |
+ """Test we can not create more than 64 cleaning tasks
|
|
|
e79480 |
+
|
|
|
e79480 |
+ :id: c34d0b40-3c3e-4f53-8656-5e4c2a310a1f
|
|
|
e79480 |
+ :setup: Replication setup with four masters
|
|
|
e79480 |
+ :steps:
|
|
|
e79480 |
+ 1. Stop masters 3 & 4
|
|
|
e79480 |
+ 2. Create over 64 tasks between m1 and m2
|
|
|
e79480 |
+ 3. Check logs to see if (>65) tasks were rejected
|
|
|
e79480 |
+
|
|
|
e79480 |
+ :expectedresults:
|
|
|
e79480 |
+ 1. Success
|
|
|
e79480 |
+ 2. Success
|
|
|
e79480 |
+ 3. Success
|
|
|
e79480 |
+ """
|
|
|
e79480 |
+
|
|
|
e79480 |
+ # Stop masters 3 & 4
|
|
|
e79480 |
+ m1 = topology_m4.ms["master1"]
|
|
|
e79480 |
+ m2 = topology_m4.ms["master2"]
|
|
|
e79480 |
+ m3 = topology_m4.ms["master3"]
|
|
|
e79480 |
+ m4 = topology_m4.ms["master4"]
|
|
|
e79480 |
+ m3.stop()
|
|
|
e79480 |
+ m4.stop()
|
|
|
e79480 |
+
|
|
|
e79480 |
+ # Add over 64 tasks between master1 & 2 to try to exceed the 64 task limit
|
|
|
e79480 |
+ for i in range(1, 64):
|
|
|
e79480 |
+ cruv_task = CleanAllRUVTask(m1)
|
|
|
e79480 |
+ cruv_task.create(properties={
|
|
|
e79480 |
+ 'replica-id': str(i),
|
|
|
e79480 |
+ 'replica-base-dn': DEFAULT_SUFFIX,
|
|
|
e79480 |
+ 'replica-force-cleaning': 'no', # This forces these tasks to stick around
|
|
|
e79480 |
+ })
|
|
|
e79480 |
+ cruv_task = CleanAllRUVTask(m2)
|
|
|
e79480 |
+ cruv_task.create(properties={
|
|
|
e79480 |
+ 'replica-id': "10" + str(i),
|
|
|
e79480 |
+ 'replica-base-dn': DEFAULT_SUFFIX,
|
|
|
e79480 |
+ 'replica-force-cleaning': 'yes', # This allows the tasks to propagate
|
|
|
e79480 |
+ })
|
|
|
e79480 |
+
|
|
|
e79480 |
+ # Check the errors log for our error message in master 1
|
|
|
e79480 |
+ assert m1.searchErrorsLog('Exceeded maximum number of active CLEANALLRUV tasks')
|
|
|
e79480 |
+>>>>>>> ab24aa4cb... Issue 50538 - cleanAllRUV task limit is not enforced for replicated tasks
|
|
|
e79480 |
+
|
|
|
e79480 |
+
|
|
|
e79480 |
if __name__ == '__main__':
|
|
|
e79480 |
# Run isolated
|
|
|
e79480 |
# -s for DEBUG mode
|
|
|
e79480 |
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
|
|
|
4c04d8 |
index 3c7f06f36..b06c6fbf4 100644
|
|
|
e79480 |
--- a/ldap/servers/plugins/replication/repl5.h
|
|
|
e79480 |
+++ b/ldap/servers/plugins/replication/repl5.h
|
|
|
e79480 |
@@ -80,6 +80,8 @@
|
|
|
e79480 |
#define CLEANRUV_FINISHED "finished"
|
|
|
e79480 |
#define CLEANRUV_CLEANING "cleaning"
|
|
|
e79480 |
#define CLEANRUV_NO_MAXCSN "no maxcsn"
|
|
|
e79480 |
+#define CLEANALLRUV_ID "CleanAllRUV Task"
|
|
|
e79480 |
+#define ABORT_CLEANALLRUV_ID "Abort CleanAllRUV Task"
|
|
|
e79480 |
|
|
|
e79480 |
/* DS 5.0 replication protocol error codes */
|
|
|
e79480 |
#define NSDS50_REPL_REPLICA_READY 0x00 /* Replica ready, go ahead */
|
|
|
e79480 |
@@ -784,6 +786,7 @@ void multimaster_mtnode_construct_replicas(void);
|
|
|
e79480 |
void multimaster_be_state_change(void *handle, char *be_name, int old_be_state, int new_be_state);
|
|
|
e79480 |
|
|
|
e79480 |
#define CLEANRIDSIZ 64 /* maximum number for concurrent CLEANALLRUV tasks */
|
|
|
e79480 |
+#define CLEANRID_BUFSIZ 128
|
|
|
e79480 |
|
|
|
e79480 |
typedef struct _cleanruv_data
|
|
|
e79480 |
{
|
|
|
e79480 |
@@ -815,6 +818,8 @@ int get_replica_type(Replica *r);
|
|
|
e79480 |
int replica_execute_cleanruv_task_ext(Object *r, ReplicaId rid);
|
|
|
e79480 |
void add_cleaned_rid(cleanruv_data *data, char *maxcsn);
|
|
|
e79480 |
int is_cleaned_rid(ReplicaId rid);
|
|
|
e79480 |
+int32_t check_and_set_cleanruv_task_count(ReplicaId rid);
|
|
|
e79480 |
+int32_t check_and_set_abort_cleanruv_task_count(void);
|
|
|
e79480 |
int replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, int *returncode, char *returntext, void *arg);
|
|
|
e79480 |
void replica_cleanallruv_thread_ext(void *arg);
|
|
|
e79480 |
void stop_ruv_cleaning(void);
|
|
|
e79480 |
@@ -833,8 +838,6 @@ void set_cleaned_rid(ReplicaId rid);
|
|
|
e79480 |
void cleanruv_log(Slapi_Task *task, int rid, char *task_type, int sev_level, char *fmt, ...);
|
|
|
e79480 |
char *replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn);
|
|
|
e79480 |
|
|
|
e79480 |
-
|
|
|
e79480 |
-
|
|
|
e79480 |
/* replutil.c */
|
|
|
e79480 |
LDAPControl *create_managedsait_control(void);
|
|
|
e79480 |
LDAPControl *create_backend_control(Slapi_DN *sdn);
|
|
|
e79480 |
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
4c04d8 |
index 62bfcf6ce..80a079784 100644
|
|
|
e79480 |
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
e79480 |
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
e79480 |
@@ -30,17 +30,18 @@
|
|
|
e79480 |
#define CLEANALLRUV "CLEANALLRUV"
|
|
|
e79480 |
#define CLEANALLRUVLEN 11
|
|
|
e79480 |
#define REPLICA_RDN "cn=replica"
|
|
|
e79480 |
-#define CLEANALLRUV_ID "CleanAllRUV Task"
|
|
|
e79480 |
-#define ABORT_CLEANALLRUV_ID "Abort CleanAllRUV Task"
|
|
|
e79480 |
|
|
|
e79480 |
int slapi_log_urp = SLAPI_LOG_REPL;
|
|
|
e79480 |
-static ReplicaId cleaned_rids[CLEANRIDSIZ + 1] = {0};
|
|
|
e79480 |
-static ReplicaId pre_cleaned_rids[CLEANRIDSIZ + 1] = {0};
|
|
|
e79480 |
-static ReplicaId aborted_rids[CLEANRIDSIZ + 1] = {0};
|
|
|
e79480 |
-static Slapi_RWLock *rid_lock = NULL;
|
|
|
e79480 |
-static Slapi_RWLock *abort_rid_lock = NULL;
|
|
|
e79480 |
+static ReplicaId cleaned_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+static ReplicaId pre_cleaned_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+static ReplicaId aborted_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+static PRLock *rid_lock = NULL;
|
|
|
e79480 |
+static PRLock *abort_rid_lock = NULL;
|
|
|
e79480 |
static PRLock *notify_lock = NULL;
|
|
|
e79480 |
static PRCondVar *notify_cvar = NULL;
|
|
|
e79480 |
+static PRLock *task_count_lock = NULL;
|
|
|
e79480 |
+static int32_t clean_task_count = 0;
|
|
|
e79480 |
+static int32_t abort_task_count = 0;
|
|
|
e79480 |
|
|
|
e79480 |
/* Forward Declartions */
|
|
|
e79480 |
static int replica_config_add(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *entryAfter, int *returncode, char *returntext, void *arg);
|
|
|
e79480 |
@@ -67,8 +68,6 @@ static int replica_cleanallruv_send_abort_extop(Repl_Agmt *ra, Slapi_Task *task,
|
|
|
e79480 |
static int replica_cleanallruv_check_maxcsn(Repl_Agmt *agmt, char *basedn, char *rid_text, char *maxcsn, Slapi_Task *task);
|
|
|
e79480 |
static int replica_cleanallruv_replica_alive(Repl_Agmt *agmt);
|
|
|
e79480 |
static int replica_cleanallruv_check_ruv(char *repl_root, Repl_Agmt *ra, char *rid_text, Slapi_Task *task, char *force);
|
|
|
e79480 |
-static int get_cleanruv_task_count(void);
|
|
|
e79480 |
-static int get_abort_cleanruv_task_count(void);
|
|
|
e79480 |
static int replica_cleanup_task(Object *r, const char *task_name, char *returntext, int apply_mods);
|
|
|
e79480 |
static int replica_task_done(Replica *replica);
|
|
|
e79480 |
static void delete_cleaned_rid_config(cleanruv_data *data);
|
|
|
e79480 |
@@ -114,20 +113,27 @@ replica_config_init()
|
|
|
e79480 |
PR_GetError());
|
|
|
e79480 |
return -1;
|
|
|
e79480 |
}
|
|
|
e79480 |
- rid_lock = slapi_new_rwlock();
|
|
|
e79480 |
+ rid_lock = PR_NewLock();
|
|
|
e79480 |
if (rid_lock == NULL) {
|
|
|
e79480 |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_config_init - "
|
|
|
e79480 |
"Failed to create rid_lock; NSPR error - %d\n",
|
|
|
e79480 |
PR_GetError());
|
|
|
e79480 |
return -1;
|
|
|
e79480 |
}
|
|
|
e79480 |
- abort_rid_lock = slapi_new_rwlock();
|
|
|
e79480 |
+ abort_rid_lock = PR_NewLock();
|
|
|
e79480 |
if (abort_rid_lock == NULL) {
|
|
|
e79480 |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_config_init - "
|
|
|
e79480 |
"Failed to create abort_rid_lock; NSPR error - %d\n",
|
|
|
e79480 |
PR_GetError());
|
|
|
e79480 |
return -1;
|
|
|
e79480 |
}
|
|
|
e79480 |
+ task_count_lock = PR_NewLock();
|
|
|
e79480 |
+ if (task_count_lock == NULL) {
|
|
|
e79480 |
+ slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_config_init - "
|
|
|
e79480 |
+ "Failed to create task_count_lock; NSPR error - %d\n",
|
|
|
e79480 |
+ PR_GetError());
|
|
|
e79480 |
+ return -1;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
if ((notify_lock = PR_NewLock()) == NULL) {
|
|
|
e79480 |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_config_init - "
|
|
|
e79480 |
"Failed to create notify lock; NSPR error - %d\n",
|
|
|
4c04d8 |
@@ -1483,12 +1489,6 @@ replica_execute_cleanall_ruv_task(Object *r, ReplicaId rid, Slapi_Task *task, co
|
|
|
e79480 |
|
|
|
e79480 |
cleanruv_log(pre_task, rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Initiating CleanAllRUV Task...");
|
|
|
e79480 |
|
|
|
e79480 |
- if (get_cleanruv_task_count() >= CLEANRIDSIZ) {
|
|
|
e79480 |
- /* we are already running the maximum number of tasks */
|
|
|
e79480 |
- cleanruv_log(pre_task, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR,
|
|
|
e79480 |
- "Exceeded maximum number of active CLEANALLRUV tasks(%d)", CLEANRIDSIZ);
|
|
|
e79480 |
- return LDAP_UNWILLING_TO_PERFORM;
|
|
|
e79480 |
- }
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Grab the replica
|
|
|
e79480 |
*/
|
|
|
4c04d8 |
@@ -1540,6 +1540,13 @@ replica_execute_cleanall_ruv_task(Object *r, ReplicaId rid, Slapi_Task *task, co
|
|
|
e79480 |
goto fail;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
+ if (check_and_set_cleanruv_task_count(rid) != LDAP_SUCCESS) {
|
|
|
e79480 |
+ cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR,
|
|
|
e79480 |
+ "Exceeded maximum number of active CLEANALLRUV tasks(%d)", CLEANRIDSIZ);
|
|
|
e79480 |
+ rc = LDAP_UNWILLING_TO_PERFORM;
|
|
|
e79480 |
+ goto fail;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Launch the cleanallruv thread. Once all the replicas are cleaned it will release the rid
|
|
|
e79480 |
*/
|
|
|
4c04d8 |
@@ -1547,6 +1554,9 @@ replica_execute_cleanall_ruv_task(Object *r, ReplicaId rid, Slapi_Task *task, co
|
|
|
e79480 |
if (data == NULL) {
|
|
|
e79480 |
cleanruv_log(pre_task, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR, "Failed to allocate cleanruv_data. Aborting task.");
|
|
|
e79480 |
rc = -1;
|
|
|
e79480 |
+ PR_Lock(task_count_lock);
|
|
|
e79480 |
+ clean_task_count--;
|
|
|
e79480 |
+ PR_Unlock(task_count_lock);
|
|
|
e79480 |
goto fail;
|
|
|
e79480 |
}
|
|
|
e79480 |
data->repl_obj = r;
|
|
|
4c04d8 |
@@ -1629,13 +1639,13 @@ replica_cleanallruv_thread(void *arg)
|
|
|
e79480 |
int aborted = 0;
|
|
|
e79480 |
int rc = 0;
|
|
|
e79480 |
|
|
|
e79480 |
- if (!data || slapi_is_shutting_down()) {
|
|
|
e79480 |
- return; /* no data */
|
|
|
e79480 |
- }
|
|
|
e79480 |
-
|
|
|
e79480 |
/* Increase active thread count to prevent a race condition at server shutdown */
|
|
|
e79480 |
g_incr_active_threadcnt();
|
|
|
e79480 |
|
|
|
e79480 |
+ if (!data || slapi_is_shutting_down()) {
|
|
|
e79480 |
+ goto done;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+
|
|
|
e79480 |
if (data->task) {
|
|
|
e79480 |
slapi_task_inc_refcount(data->task);
|
|
|
e79480 |
slapi_log_err(SLAPI_LOG_PLUGIN, repl_plugin_name,
|
|
|
4c04d8 |
@@ -1682,16 +1692,13 @@ replica_cleanallruv_thread(void *arg)
|
|
|
e79480 |
slapi_task_begin(data->task, 1);
|
|
|
e79480 |
}
|
|
|
e79480 |
/*
|
|
|
e79480 |
- * Presetting the rid prevents duplicate thread creation, but allows the db and changelog to still
|
|
|
e79480 |
- * process updates from the rid.
|
|
|
e79480 |
- * set_cleaned_rid() blocks updates, so we don't want to do that... yet unless we are in force mode.
|
|
|
e79480 |
- * If we are forcing a clean independent of state of other servers for this RID we can set_cleaned_rid()
|
|
|
e79480 |
+ * We have already preset this rid, but if we are forcing a clean independent of state
|
|
|
e79480 |
+ * of other servers for this RID we can set_cleaned_rid()
|
|
|
e79480 |
*/
|
|
|
e79480 |
if (data->force) {
|
|
|
e79480 |
set_cleaned_rid(data->rid);
|
|
|
e79480 |
- } else {
|
|
|
e79480 |
- preset_cleaned_rid(data->rid);
|
|
|
e79480 |
}
|
|
|
e79480 |
+
|
|
|
e79480 |
rid_text = slapi_ch_smprintf("%d", data->rid);
|
|
|
e79480 |
csn_as_string(data->maxcsn, PR_FALSE, csnstr);
|
|
|
e79480 |
/*
|
|
|
4c04d8 |
@@ -1861,6 +1868,9 @@ done:
|
|
|
e79480 |
/*
|
|
|
e79480 |
* If the replicas are cleaned, release the rid
|
|
|
e79480 |
*/
|
|
|
e79480 |
+ if (slapi_is_shutting_down()) {
|
|
|
e79480 |
+ stop_ruv_cleaning();
|
|
|
e79480 |
+ }
|
|
|
e79480 |
if (!aborted && !slapi_is_shutting_down()) {
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Success - the rid has been cleaned!
|
|
|
4c04d8 |
@@ -1879,10 +1889,9 @@ done:
|
|
|
e79480 |
} else {
|
|
|
e79480 |
cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Propagated task does not delete Keep alive entry (%d).", data->rid);
|
|
|
e79480 |
}
|
|
|
e79480 |
-
|
|
|
e79480 |
clean_agmts(data);
|
|
|
e79480 |
remove_cleaned_rid(data->rid);
|
|
|
e79480 |
- cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Successfully cleaned rid(%d).", data->rid);
|
|
|
e79480 |
+ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Successfully cleaned rid(%d)", data->rid);
|
|
|
e79480 |
} else {
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Shutdown or abort
|
|
|
4c04d8 |
@@ -1915,6 +1924,10 @@ done:
|
|
|
e79480 |
slapi_ch_free_string(&data->force);
|
|
|
e79480 |
slapi_ch_free_string(&rid_text);
|
|
|
e79480 |
slapi_ch_free((void **)&data);
|
|
|
e79480 |
+ /* decrement task count */
|
|
|
e79480 |
+ PR_Lock(task_count_lock);
|
|
|
e79480 |
+ clean_task_count--;
|
|
|
e79480 |
+ PR_Unlock(task_count_lock);
|
|
|
e79480 |
g_decr_active_threadcnt();
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
4c04d8 |
@@ -2414,16 +2427,14 @@ replica_send_cleanruv_task(Repl_Agmt *agmt, cleanruv_data *clean_data)
|
|
|
e79480 |
int
|
|
|
e79480 |
is_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
- int i;
|
|
|
e79480 |
-
|
|
|
e79480 |
- slapi_rwlock_rdlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && cleaned_rids[i] != 0; i++) {
|
|
|
e79480 |
+ PR_Lock(rid_lock);
|
|
|
e79480 |
+ for (size_t i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
if (rid == cleaned_rids[i]) {
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
return 1;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
|
|
|
e79480 |
return 0;
|
|
|
e79480 |
}
|
|
|
4c04d8 |
@@ -2431,16 +2442,14 @@ is_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
int
|
|
|
e79480 |
is_pre_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
- int i;
|
|
|
e79480 |
-
|
|
|
e79480 |
- slapi_rwlock_rdlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && pre_cleaned_rids[i] != 0; i++) {
|
|
|
e79480 |
+ PR_Lock(rid_lock);
|
|
|
e79480 |
+ for (size_t i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
if (rid == pre_cleaned_rids[i]) {
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
return 1;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
|
|
|
e79480 |
return 0;
|
|
|
e79480 |
}
|
|
|
4c04d8 |
@@ -2453,14 +2462,14 @@ is_task_aborted(ReplicaId rid)
|
|
|
e79480 |
if (rid == 0) {
|
|
|
e79480 |
return 0;
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_rdlock(abort_rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && aborted_rids[i] != 0; i++) {
|
|
|
e79480 |
+ PR_Lock(abort_rid_lock);
|
|
|
e79480 |
+ for (i = 0; i < CLEANRID_BUFSIZ && aborted_rids[i] != 0; i++) {
|
|
|
e79480 |
if (rid == aborted_rids[i]) {
|
|
|
e79480 |
- slapi_rwlock_unlock(abort_rid_lock);
|
|
|
e79480 |
+ PR_Unlock(abort_rid_lock);
|
|
|
e79480 |
return 1;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(abort_rid_lock);
|
|
|
e79480 |
+ PR_Unlock(abort_rid_lock);
|
|
|
e79480 |
return 0;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
4c04d8 |
@@ -2469,15 +2478,14 @@ preset_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
int i;
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_wrlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
+ PR_Lock(rid_lock);
|
|
|
e79480 |
+ for (i = 0; i < CLEANRID_BUFSIZ && pre_cleaned_rids[i] != rid; i++) {
|
|
|
e79480 |
if (pre_cleaned_rids[i] == 0) {
|
|
|
e79480 |
pre_cleaned_rids[i] = rid;
|
|
|
e79480 |
- pre_cleaned_rids[i + 1] = 0;
|
|
|
e79480 |
break;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
/*
|
|
|
4c04d8 |
@@ -2490,14 +2498,13 @@ set_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
int i;
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_wrlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
+ PR_Lock(rid_lock);
|
|
|
e79480 |
+ for (i = 0; i < CLEANRID_BUFSIZ && cleaned_rids[i] != rid; i++) {
|
|
|
e79480 |
if (cleaned_rids[i] == 0) {
|
|
|
e79480 |
cleaned_rids[i] = rid;
|
|
|
e79480 |
- cleaned_rids[i + 1] = 0;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
/*
|
|
|
4c04d8 |
@@ -2569,15 +2576,14 @@ add_aborted_rid(ReplicaId rid, Replica *r, char *repl_root)
|
|
|
e79480 |
int rc;
|
|
|
e79480 |
int i;
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_wrlock(abort_rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
+ PR_Lock(abort_rid_lock);
|
|
|
e79480 |
+ for (i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
if (aborted_rids[i] == 0) {
|
|
|
e79480 |
aborted_rids[i] = rid;
|
|
|
e79480 |
- aborted_rids[i + 1] = 0;
|
|
|
e79480 |
break;
|
|
|
e79480 |
}
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(abort_rid_lock);
|
|
|
e79480 |
+ PR_Unlock(abort_rid_lock);
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Write the rid to the config entry
|
|
|
e79480 |
*/
|
|
|
4c04d8 |
@@ -2620,21 +2626,24 @@ delete_aborted_rid(Replica *r, ReplicaId rid, char *repl_root, int skip)
|
|
|
e79480 |
char *data;
|
|
|
e79480 |
char *dn;
|
|
|
e79480 |
int rc;
|
|
|
e79480 |
- int i;
|
|
|
e79480 |
|
|
|
e79480 |
if (r == NULL)
|
|
|
e79480 |
return;
|
|
|
e79480 |
|
|
|
e79480 |
if (skip) {
|
|
|
e79480 |
/* skip the deleting of the config, and just remove the in memory rid */
|
|
|
e79480 |
- slapi_rwlock_wrlock(abort_rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && aborted_rids[i] != rid; i++)
|
|
|
e79480 |
- ; /* found rid, stop */
|
|
|
e79480 |
- for (; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
- /* rewrite entire array */
|
|
|
e79480 |
- aborted_rids[i] = aborted_rids[i + 1];
|
|
|
e79480 |
- }
|
|
|
e79480 |
- slapi_rwlock_unlock(abort_rid_lock);
|
|
|
e79480 |
+ ReplicaId new_abort_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+ int32_t idx = 0;
|
|
|
e79480 |
+
|
|
|
e79480 |
+ PR_Lock(abort_rid_lock);
|
|
|
e79480 |
+ for (size_t i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
+ if (aborted_rids[i] != rid) {
|
|
|
e79480 |
+ new_abort_rids[idx] = aborted_rids[i];
|
|
|
e79480 |
+ idx++;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+ memcpy(aborted_rids, new_abort_rids, sizeof(new_abort_rids));
|
|
|
e79480 |
+ PR_Unlock(abort_rid_lock);
|
|
|
e79480 |
} else {
|
|
|
e79480 |
/* only remove the config, leave the in-memory rid */
|
|
|
e79480 |
dn = replica_get_dn(r);
|
|
|
4c04d8 |
@@ -2792,27 +2801,31 @@ bail:
|
|
|
e79480 |
void
|
|
|
e79480 |
remove_cleaned_rid(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
- int i;
|
|
|
e79480 |
- /*
|
|
|
e79480 |
- * Remove this rid, and optimize the array
|
|
|
e79480 |
- */
|
|
|
e79480 |
- slapi_rwlock_wrlock(rid_lock);
|
|
|
e79480 |
+ ReplicaId new_cleaned_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+ ReplicaId new_pre_cleaned_rids[CLEANRID_BUFSIZ] = {0};
|
|
|
e79480 |
+ size_t idx = 0;
|
|
|
e79480 |
+
|
|
|
e79480 |
+ PR_Lock(rid_lock);
|
|
|
e79480 |
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && cleaned_rids[i] != rid; i++)
|
|
|
e79480 |
- ; /* found rid, stop */
|
|
|
e79480 |
- for (; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
- /* rewrite entire array */
|
|
|
e79480 |
- cleaned_rids[i] = cleaned_rids[i + 1];
|
|
|
e79480 |
+ for (size_t i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
+ if (cleaned_rids[i] != rid) {
|
|
|
e79480 |
+ new_cleaned_rids[idx] = cleaned_rids[i];
|
|
|
e79480 |
+ idx++;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
}
|
|
|
e79480 |
+ memcpy(cleaned_rids, new_cleaned_rids, sizeof(new_cleaned_rids));
|
|
|
e79480 |
+
|
|
|
e79480 |
/* now do the preset cleaned rids */
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ && pre_cleaned_rids[i] != rid; i++)
|
|
|
e79480 |
- ; /* found rid, stop */
|
|
|
e79480 |
- for (; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
- /* rewrite entire array */
|
|
|
e79480 |
- pre_cleaned_rids[i] = pre_cleaned_rids[i + 1];
|
|
|
e79480 |
+ idx = 0;
|
|
|
e79480 |
+ for (size_t i = 0; i < CLEANRID_BUFSIZ; i++) {
|
|
|
e79480 |
+ if (pre_cleaned_rids[i] != rid) {
|
|
|
e79480 |
+ new_pre_cleaned_rids[idx] = pre_cleaned_rids[i];
|
|
|
e79480 |
+ idx++;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
}
|
|
|
e79480 |
+ memcpy(pre_cleaned_rids, new_pre_cleaned_rids, sizeof(new_pre_cleaned_rids));
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(rid_lock);
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
/*
|
|
|
4c04d8 |
@@ -2840,16 +2853,6 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb __attribute__((unused)),
|
|
|
e79480 |
char *ridstr = NULL;
|
|
|
e79480 |
int rc = SLAPI_DSE_CALLBACK_OK;
|
|
|
e79480 |
|
|
|
e79480 |
- if (get_abort_cleanruv_task_count() >= CLEANRIDSIZ) {
|
|
|
e79480 |
- /* we are already running the maximum number of tasks */
|
|
|
e79480 |
- PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE,
|
|
|
e79480 |
- "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",
|
|
|
e79480 |
- CLEANRIDSIZ);
|
|
|
e79480 |
- cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, SLAPI_LOG_ERR, "%s", returntext);
|
|
|
e79480 |
- *returncode = LDAP_OPERATIONS_ERROR;
|
|
|
e79480 |
- return SLAPI_DSE_CALLBACK_ERROR;
|
|
|
e79480 |
- }
|
|
|
e79480 |
-
|
|
|
e79480 |
/* allocate new task now */
|
|
|
e79480 |
task = slapi_new_task(slapi_entry_get_ndn(e));
|
|
|
e79480 |
|
|
|
4c04d8 |
@@ -2934,6 +2937,16 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb __attribute__((unused)),
|
|
|
e79480 |
*/
|
|
|
e79480 |
certify_all = "no";
|
|
|
e79480 |
}
|
|
|
e79480 |
+
|
|
|
e79480 |
+ if (check_and_set_abort_cleanruv_task_count() != LDAP_SUCCESS) {
|
|
|
e79480 |
+ /* we are already running the maximum number of tasks */
|
|
|
e79480 |
+ PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE,
|
|
|
e79480 |
+ "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",
|
|
|
e79480 |
+ CLEANRIDSIZ);
|
|
|
e79480 |
+ cleanruv_log(task, -1, ABORT_CLEANALLRUV_ID, SLAPI_LOG_ERR, "%s", returntext);
|
|
|
e79480 |
+ *returncode = LDAP_UNWILLING_TO_PERFORM;
|
|
|
e79480 |
+ goto out;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Create payload
|
|
|
e79480 |
*/
|
|
|
4c04d8 |
@@ -3142,6 +3155,9 @@ done:
|
|
|
e79480 |
slapi_ch_free_string(&data->certify);
|
|
|
e79480 |
slapi_sdn_free(&data->sdn);
|
|
|
e79480 |
slapi_ch_free((void **)&data);
|
|
|
e79480 |
+ PR_Lock(task_count_lock);
|
|
|
e79480 |
+ abort_task_count--;
|
|
|
e79480 |
+ PR_Unlock(task_count_lock);
|
|
|
e79480 |
g_decr_active_threadcnt();
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
4c04d8 |
@@ -3493,36 +3509,43 @@ replica_cleanallruv_check_ruv(char *repl_root, Repl_Agmt *agmt, char *rid_text,
|
|
|
e79480 |
return rc;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
-static int
|
|
|
e79480 |
-get_cleanruv_task_count(void)
|
|
|
e79480 |
+/*
|
|
|
e79480 |
+ * Before starting a cleanAllRUV task make sure there are not
|
|
|
e79480 |
+ * too many task threads already running. If everything is okay
|
|
|
e79480 |
+ * also pre-set the RID now so rebounding extended ops do not
|
|
|
e79480 |
+ * try to clean it over and over.
|
|
|
e79480 |
+ */
|
|
|
e79480 |
+int32_t
|
|
|
e79480 |
+check_and_set_cleanruv_task_count(ReplicaId rid)
|
|
|
e79480 |
{
|
|
|
e79480 |
- int i, count = 0;
|
|
|
e79480 |
+ int32_t rc = 0;
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_wrlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
- if (pre_cleaned_rids[i] != 0) {
|
|
|
e79480 |
- count++;
|
|
|
e79480 |
- }
|
|
|
e79480 |
+ PR_Lock(task_count_lock);
|
|
|
e79480 |
+ if (clean_task_count >= CLEANRIDSIZ) {
|
|
|
e79480 |
+ rc = -1;
|
|
|
e79480 |
+ } else {
|
|
|
e79480 |
+ clean_task_count++;
|
|
|
e79480 |
+ preset_cleaned_rid(rid);
|
|
|
e79480 |
}
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(task_count_lock);
|
|
|
e79480 |
|
|
|
e79480 |
- return count;
|
|
|
e79480 |
+ return rc;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
-static int
|
|
|
e79480 |
-get_abort_cleanruv_task_count(void)
|
|
|
e79480 |
+int32_t
|
|
|
e79480 |
+check_and_set_abort_cleanruv_task_count(void)
|
|
|
e79480 |
{
|
|
|
e79480 |
- int i, count = 0;
|
|
|
e79480 |
+ int32_t rc = 0;
|
|
|
e79480 |
|
|
|
e79480 |
- slapi_rwlock_wrlock(rid_lock);
|
|
|
e79480 |
- for (i = 0; i < CLEANRIDSIZ; i++) {
|
|
|
e79480 |
- if (aborted_rids[i] != 0) {
|
|
|
e79480 |
- count++;
|
|
|
e79480 |
+ PR_Lock(task_count_lock);
|
|
|
e79480 |
+ if (abort_task_count > CLEANRIDSIZ) {
|
|
|
e79480 |
+ rc = -1;
|
|
|
e79480 |
+ } else {
|
|
|
e79480 |
+ abort_task_count++;
|
|
|
e79480 |
}
|
|
|
e79480 |
- }
|
|
|
e79480 |
- slapi_rwlock_unlock(rid_lock);
|
|
|
e79480 |
+ PR_Unlock(task_count_lock);
|
|
|
e79480 |
|
|
|
e79480 |
- return count;
|
|
|
e79480 |
+ return rc;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
/*
|
|
|
e79480 |
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
e79480 |
index 68e2544b4..0c2abb6d5 100644
|
|
|
e79480 |
--- a/ldap/servers/plugins/replication/repl_extop.c
|
|
|
e79480 |
+++ b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
e79480 |
@@ -1393,6 +1393,12 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb)
|
|
|
e79480 |
rc = LDAP_OPERATIONS_ERROR;
|
|
|
e79480 |
goto out;
|
|
|
e79480 |
}
|
|
|
e79480 |
+ if (check_and_set_abort_cleanruv_task_count() != LDAP_SUCCESS) {
|
|
|
e79480 |
+ cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR,
|
|
|
e79480 |
+ "Exceeded maximum number of active abort CLEANALLRUV tasks(%d)", CLEANRIDSIZ);
|
|
|
e79480 |
+ rc = LDAP_UNWILLING_TO_PERFORM;
|
|
|
e79480 |
+ goto out;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Prepare the abort data
|
|
|
e79480 |
*/
|
|
|
e79480 |
@@ -1499,6 +1505,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
|
|
|
e79480 |
if (force == NULL) {
|
|
|
e79480 |
force = "no";
|
|
|
e79480 |
}
|
|
|
e79480 |
+
|
|
|
e79480 |
maxcsn = csn_new();
|
|
|
e79480 |
csn_init_by_string(maxcsn, csnstr);
|
|
|
e79480 |
/*
|
|
|
e79480 |
@@ -1535,13 +1542,21 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
|
|
|
e79480 |
goto free_and_return;
|
|
|
e79480 |
}
|
|
|
e79480 |
|
|
|
e79480 |
+ if (check_and_set_cleanruv_task_count((ReplicaId)rid) != LDAP_SUCCESS) {
|
|
|
e79480 |
+ cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR,
|
|
|
e79480 |
+ "Exceeded maximum number of active CLEANALLRUV tasks(%d)", CLEANRIDSIZ);
|
|
|
e79480 |
+ rc = LDAP_UNWILLING_TO_PERFORM;
|
|
|
e79480 |
+ goto free_and_return;
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+
|
|
|
e79480 |
if (replica_get_type(r) != REPLICA_TYPE_READONLY) {
|
|
|
e79480 |
/*
|
|
|
e79480 |
* Launch the cleanruv monitoring thread. Once all the replicas are cleaned it will release the rid
|
|
|
e79480 |
*
|
|
|
e79480 |
* This will also release mtnode_ext->replica
|
|
|
e79480 |
*/
|
|
|
e79480 |
- slapi_log_err(SLAPI_LOG_INFO, repl_plugin_name, "multimaster_extop_cleanruv - CleanAllRUV Task - Launching cleanAllRUV thread...\n");
|
|
|
e79480 |
+
|
|
|
e79480 |
+ cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR, "Launching cleanAllRUV thread...\n");
|
|
|
e79480 |
data = (cleanruv_data *)slapi_ch_calloc(1, sizeof(cleanruv_data));
|
|
|
e79480 |
if (data == NULL) {
|
|
|
e79480 |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "multimaster_extop_cleanruv - CleanAllRUV Task - Failed to allocate "
|
|
|
e79480 |
@@ -1635,7 +1650,7 @@ free_and_return:
|
|
|
e79480 |
ber_printf(resp_bere, "{s}", CLEANRUV_ACCEPTED);
|
|
|
e79480 |
ber_flatten(resp_bere, &resp_bval);
|
|
|
e79480 |
slapi_pblock_set(pb, SLAPI_EXT_OP_RET_VALUE, resp_bval);
|
|
|
e79480 |
- slapi_send_ldap_result(pb, LDAP_SUCCESS, NULL, NULL, 0, NULL);
|
|
|
e79480 |
+ slapi_send_ldap_result(pb, rc, NULL, NULL, 0, NULL);
|
|
|
e79480 |
/* resp_bere */
|
|
|
e79480 |
if (NULL != resp_bere) {
|
|
|
e79480 |
ber_free(resp_bere, 1);
|
|
|
e79480 |
--
|
|
|
e79480 |
2.21.0
|
|
|
e79480 |
|