From 01e941e3eadd7a208982d20c0ca9c104142f2b91 Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Wed, 10 Aug 2022 08:58:28 -0400
Subject: [PATCH 4/4] Issue 3903 - fix repl keep alive event interval
Description: Previously we passed the interval as seconds to the
event queue, but it is supposed to be milliseconds.
Fixed a crash with repl logging and decoding extended
op payload (referrals).
Also reworked alot of the replication CI tests that
were flaky.
relates: https://github.com/389ds/389-ds-base/issues/3903
Reviewed by: tbordaz & spichugi(Thanks!)
---
.../suites/replication/acceptance_test.py | 52 +-
.../cleanallruv_abort_certify_test.py | 136 ++++
.../cleanallruv_abort_restart_test.py | 146 ++++
.../replication/cleanallruv_abort_test.py | 123 +++
.../replication/cleanallruv_force_test.py | 187 +++++
.../cleanallruv_multiple_force_test.py | 214 +++++
.../replication/cleanallruv_restart_test.py | 161 ++++
.../cleanallruv_shutdown_crash_test.py | 123 +++
.../replication/cleanallruv_stress_test.py | 216 +++++
.../suites/replication/cleanallruv_test.py | 742 +-----------------
.../suites/replication/regression_m2_test.py | 13 +-
.../replication/regression_m2c2_test.py | 1 +
.../plugins/replication/repl5_replica.c | 12 +-
ldap/servers/plugins/replication/repl_extop.c | 4 +-
ldap/servers/slapd/task.c | 8 +-
src/lib389/lib389/instance/remove.py | 6 +
16 files changed, 1385 insertions(+), 759 deletions(-)
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
diff --git a/dirsrvtests/tests/suites/replication/acceptance_test.py b/dirsrvtests/tests/suites/replication/acceptance_test.py
index a5f0c4c6b..863ee2553 100644
--- a/dirsrvtests/tests/suites/replication/acceptance_test.py
+++ b/dirsrvtests/tests/suites/replication/acceptance_test.py
@@ -8,6 +8,7 @@
#
import pytest
import logging
+import time
from lib389.replica import Replicas
from lib389.tasks import *
from lib389.utils import *
@@ -124,12 +125,16 @@ def test_modify_entry(topo_m4, create_entry):
8. Some time should pass
9. The change should be present on all suppliers
"""
+ if DEBUGGING:
+ sleep_time = 8
+ else:
+ sleep_time = 2
log.info('Modifying entry {} - add operation'.format(TEST_ENTRY_DN))
test_user = UserAccount(topo_m4.ms["supplier1"], TEST_ENTRY_DN)
test_user.add('mail', '{}@redhat.com'.format(TEST_ENTRY_NAME))
- time.sleep(1)
+ time.sleep(sleep_time)
all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
for u in all_user:
@@ -137,7 +142,7 @@ def test_modify_entry(topo_m4, create_entry):
log.info('Modifying entry {} - replace operation'.format(TEST_ENTRY_DN))
test_user.replace('mail', '{}@greenhat.com'.format(TEST_ENTRY_NAME))
- time.sleep(1)
+ time.sleep(sleep_time)
all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
for u in all_user:
@@ -145,7 +150,7 @@ def test_modify_entry(topo_m4, create_entry):
log.info('Modifying entry {} - delete operation'.format(TEST_ENTRY_DN))
test_user.remove('mail', '{}@greenhat.com'.format(TEST_ENTRY_NAME))
- time.sleep(1)
+ time.sleep(sleep_time)
all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
for u in all_user:
@@ -167,7 +172,10 @@ def test_delete_entry(topo_m4, create_entry):
log.info('Deleting entry {} during the test'.format(TEST_ENTRY_DN))
topo_m4.ms["supplier1"].delete_s(TEST_ENTRY_DN)
-
+ if DEBUGGING:
+ time.sleep(8)
+ else:
+ time.sleep(1)
entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["uid"])
assert not entries, "Entry deletion {} wasn't replicated successfully".format(TEST_ENTRY_DN)
@@ -231,6 +239,11 @@ def test_modrdn_after_pause(topo_m4):
5. The change should be present on all suppliers
"""
+ if DEBUGGING:
+ sleep_time = 8
+ else:
+ sleep_time = 3
+
newrdn_name = 'newrdn'
newrdn_dn = 'uid={},{}'.format(newrdn_name, DEFAULT_SUFFIX)
@@ -264,7 +277,7 @@ def test_modrdn_after_pause(topo_m4):
topo_m4.resume_all_replicas()
log.info('Wait for replication to happen')
- time.sleep(3)
+ time.sleep(sleep_time)
try:
entries_new = get_repl_entries(topo_m4, newrdn_name, ["uid"])
@@ -354,6 +367,11 @@ def test_many_attrs(topo_m4, create_entry):
for add_name in add_list:
test_user.add('description', add_name)
+ if DEBUGGING:
+ time.sleep(10)
+ else:
+ time.sleep(1)
+
log.info('Check that everything was properly replicated after an add operation')
entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["description"])
for entry in entries:
@@ -363,6 +381,11 @@ def test_many_attrs(topo_m4, create_entry):
for delete_name in delete_list:
test_user.remove('description', delete_name)
+ if DEBUGGING:
+ time.sleep(10)
+ else:
+ time.sleep(1)
+
log.info('Check that everything was properly replicated after a delete operation')
entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["description"])
for entry in entries:
@@ -386,12 +409,22 @@ def test_double_delete(topo_m4, create_entry):
log.info('Deleting entry {} from supplier1'.format(TEST_ENTRY_DN))
topo_m4.ms["supplier1"].delete_s(TEST_ENTRY_DN)
+ if DEBUGGING:
+ time.sleep(5)
+ else:
+ time.sleep(1)
+
log.info('Deleting entry {} from supplier2'.format(TEST_ENTRY_DN))
try:
topo_m4.ms["supplier2"].delete_s(TEST_ENTRY_DN)
except ldap.NO_SUCH_OBJECT:
log.info("Entry {} wasn't found supplier2. It is expected.".format(TEST_ENTRY_DN))
+ if DEBUGGING:
+ time.sleep(5)
+ else:
+ time.sleep(1)
+
log.info('Make searches to check if server is alive')
entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["uid"])
assert not entries, "Entry deletion {} wasn't replicated successfully".format(TEST_ENTRY_DN)
@@ -436,6 +469,11 @@ def test_password_repl_error(topo_m4, create_entry):
m3_conn = test_user_m3.bind(TEST_ENTRY_NEW_PASS)
m4_conn = test_user_m4.bind(TEST_ENTRY_NEW_PASS)
+ if DEBUGGING:
+ time.sleep(5)
+ else:
+ time.sleep(1)
+
log.info('Check the error log for the error with {}'.format(TEST_ENTRY_DN))
assert not m2.ds_error_log.match('.*can.t add a change for uid={}.*'.format(TEST_ENTRY_NAME))
@@ -552,7 +590,7 @@ def test_csnpurge_large_valueset(topo_m2):
replica = replicas.list()[0]
log.info('nsds5ReplicaPurgeDelay to 5')
replica.set('nsds5ReplicaPurgeDelay', '5')
- time.sleep(6)
+ time.sleep(10)
# add some new values to the valueset containing entries that should be purged
for i in range(21,25):
@@ -612,7 +650,7 @@ def test_urp_trigger_substring_search(topo_m2):
break
else:
log.info('Entry not yet replicated on M2, wait a bit')
- time.sleep(2)
+ time.sleep(3)
# check that M2 access logs does not "(&(objectclass=nstombstone)(nscpentrydn=uid=asterisk_*_in_value,dc=example,dc=com))"
log.info('Check that on M2, URP as not triggered such internal search')
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
new file mode 100644
index 000000000..603693b9e
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
@@ -0,0 +1,136 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import ReplicationManager, Replicas
+
+log = logging.getLogger(__name__)
+
+
+def remove_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4. """
+
+ log.info('%s: remove all the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+@pytest.mark.flaky(max_runs=2, min_passes=1)
+def test_abort_certify(topology_m4):
+ """Test the abort task with a replica-certify-all option
+
+ :id: 78959966-d644-44a8-b98c-1fcf21b45eb0
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Disable replication on supplier 4
+ 2. Remove agreements to supplier 4 from other suppliers
+ 3. Stop supplier 2
+ 4. Run a cleanallruv task on supplier 1
+ 5. Run a cleanallruv abort task on supplier 1 with a replica-certify-all option
+ :expectedresults: No hanging tasks left
+ 1. Replication on supplier 4 should be disabled
+ 2. Agreements to supplier 4 should be removed
+ 3. Supplier 2 should be stopped
+ 4. Operation should be successful
+ 5. Operation should be successful
+ """
+
+ log.info('Running test_abort_certify...')
+
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_supplier4_agmts("test_abort_certify", topology_m4)
+
+ # Stop supplier 2
+ log.info('test_abort_certify: stop supplier 2 to freeze the cleanAllRUV task...')
+ topology_m4.ms["supplier2"].stop()
+
+ # Run the task
+ log.info('test_abort_certify: add the cleanAllRUV task...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'yes'
+ })
+ # Wait a bit
+ time.sleep(2)
+
+ # Abort the task
+ log.info('test_abort_certify: abort the cleanAllRUV task...')
+ abort_task = cruv_task.abort(certify=True)
+
+ # Wait a while and make sure the abort task is still running
+ log.info('test_abort_certify...')
+
+ if task_done(topology_m4, abort_task.dn, 10):
+ log.fatal('test_abort_certify: abort task incorrectly finished')
+ assert False
+
+ # Now start supplier 2 so it can be aborted
+ log.info('test_abort_certify: start supplier 2 to allow the abort task to finish...')
+ topology_m4.ms["supplier2"].start()
+
+ # Wait for the abort task to stop
+ if not task_done(topology_m4, abort_task.dn, 90):
+ log.fatal('test_abort_certify: The abort CleanAllRUV task was not aborted')
+ assert False
+
+ # Check supplier 1 does not have the clean task running
+ log.info('test_abort_certify: check supplier 1 no longer has a cleanAllRUV task...')
+ if not task_done(topology_m4, cruv_task.dn):
+ log.fatal('test_abort_certify: CleanAllRUV task was not aborted')
+ assert False
+
+ log.info('test_abort_certify PASSED')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
new file mode 100644
index 000000000..1406c6553
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
@@ -0,0 +1,146 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import ReplicationManager
+
+log = logging.getLogger(__name__)
+
+
+def remove_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4. """
+
+ log.info('%s: remove all the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+
+@pytest.mark.flaky(max_runs=2, min_passes=1)
+def test_abort_restart(topology_m4):
+ """Test the abort task can handle a restart, and then resume
+
+ :id: b66e33d4-fe85-4e1c-b882-75da80f70ab3
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Disable replication on supplier 4
+ 2. Remove agreements to supplier 4 from other suppliers
+ 3. Stop supplier 3
+ 4. Run a cleanallruv task on supplier 1
+ 5. Run a cleanallruv abort task on supplier 1
+ 6. Restart supplier 1
+ 7. Make sure that no crash happened
+ 8. Start supplier 3
+ 9. Check supplier 1 does not have the clean task running
+ 10. Check that errors log doesn't have 'Aborting abort task' message
+ :expectedresults:
+ 1. Replication on supplier 4 should be disabled
+ 2. Agreements to supplier 4 should be removed
+ 3. Supplier 3 should be stopped
+ 4. Operation should be successful
+ 5. Operation should be successful
+ 6. Supplier 1 should be restarted
+ 7. No crash should happened
+ 8. Supplier 3 should be started
+ 9. Check supplier 1 shouldn't have the clean task running
+ 10. Errors log shouldn't have 'Aborting abort task' message
+ """
+
+ log.info('Running test_abort_restart...')
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_supplier4_agmts("test_abort", topology_m4)
+
+ # Stop supplier 3
+ log.info('test_abort_restart: stop supplier 3 to freeze the cleanAllRUV task...')
+ topology_m4.ms["supplier3"].stop()
+
+ # Run the task
+ log.info('test_abort_restart: add the cleanAllRUV task...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'yes'
+ })
+ # Wait a bit
+ time.sleep(2)
+
+ # Abort the task
+ cruv_task.abort(certify=True)
+
+ # Check supplier 1 does not have the clean task running
+ log.info('test_abort_abort: check supplier 1 no longer has a cleanAllRUV task...')
+ if not task_done(topology_m4, cruv_task.dn):
+ log.fatal('test_abort_restart: CleanAllRUV task was not aborted')
+ assert False
+
+ # Now restart supplier 1, and make sure the abort process completes
+ topology_m4.ms["supplier1"].restart()
+ if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
+ log.fatal('test_abort_restart: Supplier 1 previously crashed!')
+ assert False
+
+ # Start supplier 3
+ topology_m4.ms["supplier3"].start()
+
+ # Need to wait 5 seconds before server processes any leftover tasks
+ time.sleep(6)
+
+ # Check supplier 1 tried to run abort task. We expect the abort task to be aborted.
+ if not topology_m4.ms["supplier1"].searchErrorsLog('Aborting abort task'):
+ log.fatal('test_abort_restart: Abort task did not restart')
+ assert False
+
+ log.info('test_abort_restart PASSED')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
new file mode 100644
index 000000000..f89188165
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
@@ -0,0 +1,123 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import ReplicationManager
+
+log = logging.getLogger(__name__)
+
+
+def remove_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4. """
+
+ log.info('%s: remove all the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+
+@pytest.mark.flaky(max_runs=2, min_passes=1)
+def test_abort(topology_m4):
+ """Test the abort task basic functionality
+
+ :id: b09a6887-8de0-4fac-8e41-73ccbaaf7a08
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Disable replication on supplier 4
+ 2. Remove agreements to supplier 4 from other suppliers
+ 3. Stop supplier 2
+ 4. Run a cleanallruv task on supplier 1
+ 5. Run a cleanallruv abort task on supplier 1
+ :expectedresults: No hanging tasks left
+ 1. Replication on supplier 4 should be disabled
+ 2. Agreements to supplier 4 should be removed
+ 3. Supplier 2 should be stopped
+ 4. Operation should be successful
+ 5. Operation should be successful
+ """
+
+ log.info('Running test_abort...')
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_supplier4_agmts("test_abort", topology_m4)
+
+ # Stop supplier 2
+ log.info('test_abort: stop supplier 2 to freeze the cleanAllRUV task...')
+ topology_m4.ms["supplier2"].stop()
+
+ # Run the task
+ log.info('test_abort: add the cleanAllRUV task...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'yes'
+ })
+ # Wait a bit
+ time.sleep(2)
+
+ # Abort the task
+ cruv_task.abort()
+
+ # Check supplier 1 does not have the clean task running
+ log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
+ if not task_done(topology_m4, cruv_task.dn):
+ log.fatal('test_abort: CleanAllRUV task was not aborted')
+ assert False
+
+ # Start supplier 2
+ log.info('test_abort: start supplier 2 to begin the restore process...')
+ topology_m4.ms["supplier2"].start()
+
+ log.info('test_abort PASSED')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
new file mode 100644
index 000000000..d5b930584
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
@@ -0,0 +1,187 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+import random
+import threading
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import Replicas, ReplicationManager
+from lib389.idm.directorymanager import DirectoryManager
+from lib389.idm.user import UserAccounts
+
+log = logging.getLogger(__name__)
+
+
+class AddUsers(threading.Thread):
+ def __init__(self, inst, num_users):
+ threading.Thread.__init__(self)
+ self.daemon = True
+ self.inst = inst
+ self.num_users = num_users
+
+ def run(self):
+ """Start adding users"""
+
+ dm = DirectoryManager(self.inst)
+ conn = dm.bind()
+
+ users = UserAccounts(conn, DEFAULT_SUFFIX)
+
+ u_range = list(range(self.num_users))
+ random.shuffle(u_range)
+
+ for idx in u_range:
+ try:
+ users.create(properties={
+ 'uid': 'testuser%s' % idx,
+ 'cn' : 'testuser%s' % idx,
+ 'sn' : 'user%s' % idx,
+ 'uidNumber' : '%s' % (1000 + idx),
+ 'gidNumber' : '%s' % (1000 + idx),
+ 'homeDirectory' : '/home/testuser%s' % idx
+ })
+ # One of the suppliers was probably put into read only mode - just break out
+ except ldap.UNWILLING_TO_PERFORM:
+ break
+ except ldap.ALREADY_EXISTS:
+ pass
+ conn.close()
+
+def remove_some_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4 except from supplier3. Used by
+ the force tests."""
+
+ log.info('%s: remove the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+def check_ruvs(msg, topology_m4, m4rid):
+ """Check suppliers 1-3 for supplier 4's rid."""
+ for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
+ clean = False
+ replicas = Replicas(inst)
+ replica = replicas.get(DEFAULT_SUFFIX)
+ log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
+
+ count = 0
+ while not clean and count < 20:
+ ruv = replica.get_ruv()
+ if m4rid in ruv._rids:
+ time.sleep(5)
+ count = count + 1
+ else:
+ clean = True
+ if not clean:
+ raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
+ return True
+
+def test_clean_force(topology_m4):
+ """Check that multiple tasks with a 'force' option work properly
+
+ :id: f8810dfe-d2d2-4dd9-ba03-5fc14896fabe
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Stop supplier 3
+ 2. Add a bunch of updates to supplier 4
+ 3. Disable replication on supplier 4
+ 4. Start supplier 3
+ 5. Remove agreements to supplier 4 from other suppliers
+ 6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
+ 7. Check that everything was cleaned
+ :expectedresults:
+ 1. Supplier 3 should be stopped
+ 2. Operation should be successful
+ 3. Replication on supplier 4 should be disabled
+ 4. Supplier 3 should be started
+ 5. Agreements to supplier 4 should be removed
+ 6. Operation should be successful
+ 7. Everything should be cleaned
+ """
+
+ log.info('Running test_clean_force...')
+
+ # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
+ topology_m4.ms["supplier3"].stop()
+
+ # Add a bunch of updates to supplier 4
+ m4_add_users = AddUsers(topology_m4.ms["supplier4"], 10)
+ m4_add_users.start()
+ m4_add_users.join()
+
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_some_supplier4_agmts("test_clean_force", topology_m4)
+
+ # Start supplier 3, it should be out of sync with the other replicas...
+ topology_m4.ms["supplier3"].start()
+
+ # Remove the agreement to replica 4
+ replica = Replicas(topology_m4.ms["supplier3"]).get(DEFAULT_SUFFIX)
+ replica.get_agreements().get("004").delete()
+
+ # Run the task, use "force" because supplier 3 is not in sync with the other replicas
+ # in regards to the replica 4 RUV
+ log.info('test_clean: run the cleanAllRUV task...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'yes'
+ })
+ cruv_task.wait()
+
+ # Check the other supplier's RUV for 'replica 4'
+ log.info('test_clean_force: check all the suppliers have been cleaned...')
+ clean = check_ruvs("test_clean_force", topology_m4, m4rid)
+ assert clean
+
+ log.info('test_clean_force PASSED')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
new file mode 100644
index 000000000..0a0848bda
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
@@ -0,0 +1,214 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import ldap
+import logging
+import os
+import pytest
+import random
+import time
+import threading
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.idm.directorymanager import DirectoryManager
+from lib389.idm.user import UserAccounts
+from lib389.replica import ReplicationManager, Replicas
+
+log = logging.getLogger(__name__)
+
+
+class AddUsers(threading.Thread):
+ def __init__(self, inst, num_users):
+ threading.Thread.__init__(self)
+ self.daemon = True
+ self.inst = inst
+ self.num_users = num_users
+
+ def run(self):
+ """Start adding users"""
+
+ dm = DirectoryManager(self.inst)
+ conn = dm.bind()
+
+ users = UserAccounts(conn, DEFAULT_SUFFIX)
+
+ u_range = list(range(self.num_users))
+ random.shuffle(u_range)
+
+ for idx in u_range:
+ try:
+ users.create(properties={
+ 'uid': 'testuser%s' % idx,
+ 'cn' : 'testuser%s' % idx,
+ 'sn' : 'user%s' % idx,
+ 'uidNumber' : '%s' % (1000 + idx),
+ 'gidNumber' : '%s' % (1000 + idx),
+ 'homeDirectory' : '/home/testuser%s' % idx
+ })
+ # One of the suppliers was probably put into read only mode - just break out
+ except ldap.UNWILLING_TO_PERFORM:
+ break
+ except ldap.ALREADY_EXISTS:
+ pass
+ conn.close()
+
+def remove_some_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4 except from supplier3. Used by
+ the force tests."""
+
+ log.info('%s: remove the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+def check_ruvs(msg, topology_m4, m4rid):
+ """Check suppliers 1-3 for supplier 4's rid."""
+ for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
+ clean = False
+ replicas = Replicas(inst)
+ replica = replicas.get(DEFAULT_SUFFIX)
+ log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
+
+ count = 0
+ while not clean and count < 20:
+ ruv = replica.get_ruv()
+ if m4rid in ruv._rids:
+ time.sleep(5)
+ count = count + 1
+ else:
+ clean = True
+ if not clean:
+ raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
+ return True
+
+
+def test_multiple_tasks_with_force(topology_m4):
+ """Check that multiple tasks with a 'force' option work properly
+
+ :id: eb76a93d-8d1c-405e-9f25-6e8d5a781098
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Stop supplier 3
+ 2. Add a bunch of updates to supplier 4
+ 3. Disable replication on supplier 4
+ 4. Start supplier 3
+ 5. Remove agreements to supplier 4 from other suppliers
+ 6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
+ 7. Run one more cleanallruv task on supplier 1 with a 'force' option 'off'
+ 8. Check that everything was cleaned
+ :expectedresults:
+ 1. Supplier 3 should be stopped
+ 2. Operation should be successful
+ 3. Replication on supplier 4 should be disabled
+ 4. Supplier 3 should be started
+ 5. Agreements to supplier 4 should be removed
+ 6. Operation should be successful
+ 7. Operation should be successful
+ 8. Everything should be cleaned
+ """
+
+ log.info('Running test_multiple_tasks_with_force...')
+
+ # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
+ topology_m4.ms["supplier3"].stop()
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+
+ # Add a bunch of updates to supplier 4
+ m4_add_users = AddUsers(topology_m4.ms["supplier4"], 10)
+ m4_add_users.start()
+ m4_add_users.join()
+
+ # Disable supplier 4
+ # Remove the agreements from the other suppliers that point to supplier 4
+ remove_some_supplier4_agmts("test_multiple_tasks_with_force", topology_m4)
+
+ # Start supplier 3, it should be out of sync with the other replicas...
+ topology_m4.ms["supplier3"].start()
+
+ # Remove the agreement to replica 4
+ replica = Replicas(topology_m4.ms["supplier3"]).get(DEFAULT_SUFFIX)
+ replica.get_agreements().get("004").delete()
+
+ # Run the task, use "force" because supplier 3 is not in sync with the other replicas
+ # in regards to the replica 4 RUV
+ log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" on...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'yes',
+ 'replica-certify-all': 'no'
+ })
+
+ log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" off...')
+
+ # NOTE: This must be try not py.test raises, because the above may or may
+ # not have completed yet ....
+ try:
+ cruv_task_fail = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task_fail.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'no'
+ })
+ cruv_task_fail.wait()
+ except ldap.UNWILLING_TO_PERFORM:
+ pass
+ # Wait for the force task ....
+ cruv_task.wait()
+
+ # Check the other supplier's RUV for 'replica 4'
+ log.info('test_multiple_tasks_with_force: check all the suppliers have been cleaned...')
+ clean = check_ruvs("test_clean_force", topology_m4, m4rid)
+ assert clean
+ # Check supplier 1 does not have the clean task running
+ log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
+ if not task_done(topology_m4, cruv_task.dn):
+ log.fatal('test_abort: CleanAllRUV task was not aborted')
+ assert False
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
new file mode 100644
index 000000000..2e8d7e4a6
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
@@ -0,0 +1,161 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import ReplicationManager, Replicas
+
+log = logging.getLogger(__name__)
+
+
+def remove_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4. """
+
+ log.info('%s: remove all the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+
+def check_ruvs(msg, topology_m4, m4rid):
+ """Check suppliers 1-3 for supplier 4's rid."""
+ for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
+ clean = False
+ replicas = Replicas(inst)
+ replica = replicas.get(DEFAULT_SUFFIX)
+ log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
+
+ count = 0
+ while not clean and count < 20:
+ ruv = replica.get_ruv()
+ if m4rid in ruv._rids:
+ time.sleep(5)
+ count = count + 1
+ else:
+ clean = True
+ if not clean:
+ raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
+ return True
+
+
+@pytest.mark.flaky(max_runs=2, min_passes=1)
+def test_clean_restart(topology_m4):
+ """Check that cleanallruv task works properly after a restart
+
+ :id: c6233bb3-092c-4919-9ac9-80dd02cc6e02
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Disable replication on supplier 4
+ 2. Remove agreements to supplier 4 from other suppliers
+ 3. Stop supplier 3
+ 4. Run a cleanallruv task on supplier 1
+ 5. Stop supplier 1
+ 6. Start supplier 3
+ 7. Make sure that no crash happened
+ 8. Start supplier 1
+ 9. Make sure that no crash happened
+ 10. Check that everything was cleaned
+ :expectedresults:
+ 1. Operation should be successful
+ 2. Agreements to supplier 4 should be removed
+ 3. Supplier 3 should be stopped
+ 4. Cleanallruv task should be successfully executed
+ 5. Supplier 1 should be stopped
+ 6. Supplier 3 should be started
+ 7. No crash should happened
+ 8. Supplier 1 should be started
+ 9. No crash should happened
+ 10. Everything should be cleaned
+ """
+ log.info('Running test_clean_restart...')
+
+ # Disable supplier 4
+ log.info('test_clean: disable supplier 4...')
+
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_supplier4_agmts("test_clean", topology_m4)
+
+ # Stop supplier 3 to keep the task running, so we can stop supplier 1...
+ topology_m4.ms["supplier3"].stop()
+
+ # Run the task
+ log.info('test_clean: run the cleanAllRUV task...')
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'yes'
+ })
+
+ # Sleep a bit, then stop supplier 1
+ time.sleep(5)
+ topology_m4.ms["supplier1"].stop()
+
+ # Now start supplier 3 & 1, and make sure we didn't crash
+ topology_m4.ms["supplier3"].start()
+ if topology_m4.ms["supplier3"].detectDisorderlyShutdown():
+ log.fatal('test_clean_restart: Supplier 3 previously crashed!')
+ assert False
+
+ topology_m4.ms["supplier1"].start(timeout=30)
+ if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
+ log.fatal('test_clean_restart: Supplier 1 previously crashed!')
+ assert False
+
+ # Check the other supplier's RUV for 'replica 4'
+ log.info('test_clean_restart: check all the suppliers have been cleaned...')
+ clean = check_ruvs("test_clean_restart", topology_m4, m4rid)
+ assert clean
+
+ log.info('test_clean_restart PASSED, restoring supplier 4...')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
new file mode 100644
index 000000000..b4b74e339
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
@@ -0,0 +1,123 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import logging
+import pytest
+import os
+import time
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.replica import ReplicationManager, Replicas
+from lib389.config import CertmapLegacy
+from lib389.idm.services import ServiceAccounts
+
+log = logging.getLogger(__name__)
+
+
+def test_clean_shutdown_crash(topology_m2):
+ """Check that server didn't crash after shutdown when running CleanAllRUV task
+
+ :id: c34d0b40-3c3e-4f53-8656-5e4c2a310aaf
+ :setup: Replication setup with two suppliers
+ :steps:
+ 1. Enable TLS on both suppliers
+ 2. Reconfigure both agreements to use TLS Client auth
+ 3. Stop supplier2
+ 4. Run the CleanAllRUV task
+ 5. Restart supplier1
+ 6. Check if supplier1 didn't crash
+ 7. Restart supplier1 again
+ 8. Check if supplier1 didn't crash
+
+ :expectedresults:
+ 1. Success
+ 2. Success
+ 3. Success
+ 4. Success
+ 5. Success
+ 6. Success
+ 7. Success
+ 8. Success
+ """
+
+ m1 = topology_m2.ms["supplier1"]
+ m2 = topology_m2.ms["supplier2"]
+
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+
+ cm_m1 = CertmapLegacy(m1)
+ cm_m2 = CertmapLegacy(m2)
+
+ certmaps = cm_m1.list()
+ certmaps['default']['DNComps'] = None
+ certmaps['default']['CmapLdapAttr'] = 'nsCertSubjectDN'
+
+ cm_m1.set(certmaps)
+ cm_m2.set(certmaps)
+
+ log.info('Enabling TLS')
+ [i.enable_tls() for i in topology_m2]
+
+ log.info('Creating replication dns')
+ services = ServiceAccounts(m1, DEFAULT_SUFFIX)
+ repl_m1 = services.get('%s:%s' % (m1.host, m1.sslport))
+ repl_m1.set('nsCertSubjectDN', m1.get_server_tls_subject())
+
+ repl_m2 = services.get('%s:%s' % (m2.host, m2.sslport))
+ repl_m2.set('nsCertSubjectDN', m2.get_server_tls_subject())
+
+ log.info('Changing auth type')
+ replica_m1 = Replicas(m1).get(DEFAULT_SUFFIX)
+ agmt_m1 = replica_m1.get_agreements().list()[0]
+ agmt_m1.replace_many(
+ ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
+ ('nsDS5ReplicaTransportInfo', 'SSL'),
+ ('nsDS5ReplicaPort', '%s' % m2.sslport),
+ )
+
+ agmt_m1.remove_all('nsDS5ReplicaBindDN')
+
+ replica_m2 = Replicas(m2).get(DEFAULT_SUFFIX)
+ agmt_m2 = replica_m2.get_agreements().list()[0]
+
+ agmt_m2.replace_many(
+ ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
+ ('nsDS5ReplicaTransportInfo', 'SSL'),
+ ('nsDS5ReplicaPort', '%s' % m1.sslport),
+ )
+ agmt_m2.remove_all('nsDS5ReplicaBindDN')
+
+ log.info('Stopping supplier2')
+ m2.stop()
+
+ log.info('Run the cleanAllRUV task')
+ cruv_task = CleanAllRUVTask(m1)
+ cruv_task.create(properties={
+ 'replica-id': repl.get_rid(m1),
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no',
+ 'replica-certify-all': 'yes'
+ })
+
+ m1.restart()
+
+ log.info('Check if supplier1 crashed')
+ assert not m1.detectDisorderlyShutdown()
+
+ log.info('Repeat')
+ m1.restart()
+ assert not m1.detectDisorderlyShutdown()
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
new file mode 100644
index 000000000..0d43dd7d4
--- /dev/null
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
@@ -0,0 +1,216 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2022 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+import ldap
+import logging
+import pytest
+import os
+import random
+import time
+import threading
+from lib389._constants import DEFAULT_SUFFIX
+from lib389.topologies import topology_m4
+from lib389.tasks import CleanAllRUVTask
+from lib389.idm.directorymanager import DirectoryManager
+from lib389.idm.user import UserAccounts
+from lib389.replica import ReplicationManager, Replicas
+from lib389.config import LDBMConfig
+
+log = logging.getLogger(__name__)
+
+
+class AddUsers(threading.Thread):
+ def __init__(self, inst, num_users):
+ threading.Thread.__init__(self)
+ self.daemon = True
+ self.inst = inst
+ self.num_users = num_users
+
+ def run(self):
+ """Start adding users"""
+
+ dm = DirectoryManager(self.inst)
+ conn = dm.bind()
+
+ users = UserAccounts(conn, DEFAULT_SUFFIX)
+
+ u_range = list(range(self.num_users))
+ random.shuffle(u_range)
+
+ for idx in u_range:
+ try:
+ users.create(properties={
+ 'uid': 'testuser%s' % idx,
+ 'cn' : 'testuser%s' % idx,
+ 'sn' : 'user%s' % idx,
+ 'uidNumber' : '%s' % (1000 + idx),
+ 'gidNumber' : '%s' % (1000 + idx),
+ 'homeDirectory' : '/home/testuser%s' % idx
+ })
+ # One of the suppliers was probably put into read only mode - just break out
+ except ldap.UNWILLING_TO_PERFORM:
+ break
+ except ldap.ALREADY_EXISTS:
+ pass
+ conn.close()
+
+def remove_supplier4_agmts(msg, topology_m4):
+ """Remove all the repl agmts to supplier4. """
+
+ log.info('%s: remove all the agreements to supplier 4...' % msg)
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ # This will delete m4 from the topo *and* remove all incoming agreements
+ # to m4.
+ repl.remove_supplier(topology_m4.ms["supplier4"],
+ [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
+
+def task_done(topology_m4, task_dn, timeout=60):
+ """Check if the task is complete"""
+
+ attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
+ 'nsTaskCurrentItem', 'nsTaskTotalItems']
+ done = False
+ count = 0
+
+ while not done and count < timeout:
+ try:
+ entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
+ if entry is not None:
+ if entry.hasAttr('nsTaskExitCode'):
+ done = True
+ break
+ else:
+ done = True
+ break
+ except ldap.NO_SUCH_OBJECT:
+ done = True
+ break
+ except ldap.LDAPError:
+ break
+ time.sleep(1)
+ count += 1
+
+ return done
+
+def check_ruvs(msg, topology_m4, m4rid):
+ """Check suppliers 1-3 for supplier 4's rid."""
+ for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
+ clean = False
+ replicas = Replicas(inst)
+ replica = replicas.get(DEFAULT_SUFFIX)
+ log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
+
+ count = 0
+ while not clean and count < 20:
+ ruv = replica.get_ruv()
+ if m4rid in ruv._rids:
+ time.sleep(5)
+ count = count + 1
+ else:
+ clean = True
+ if not clean:
+ raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
+ return True
+
+
+@pytest.mark.flaky(max_runs=2, min_passes=1)
+def test_stress_clean(topology_m4):
+ """Put each server(m1 - m4) under a stress, and perform the entire clean process
+
+ :id: a8263cd6-f068-4357-86e0-e7c34504c8c5
+ :setup: Replication setup with four suppliers
+ :steps:
+ 1. Add a bunch of updates to all suppliers
+ 2. Put supplier 4 to read-only mode
+ 3. Disable replication on supplier 4
+ 4. Remove agreements to supplier 4 from other suppliers
+ 5. Run a cleanallruv task on supplier 1
+ 6. Check that everything was cleaned
+ :expectedresults:
+ 1. Operation should be successful
+ 2. Supplier 4 should be put to read-only mode
+ 3. Replication on supplier 4 should be disabled
+ 4. Agreements to supplier 4 should be removed
+ 5. Operation should be successful
+ 6. Everything should be cleaned
+ """
+
+ log.info('Running test_stress_clean...')
+ log.info('test_stress_clean: put all the suppliers under load...')
+
+ ldbm_config = LDBMConfig(topology_m4.ms["supplier4"])
+
+ # Put all the suppliers under load
+ # not too high load else it takes a long time to converge and
+ # the test result becomes instable
+ m1_add_users = AddUsers(topology_m4.ms["supplier1"], 200)
+ m1_add_users.start()
+ m2_add_users = AddUsers(topology_m4.ms["supplier2"], 200)
+ m2_add_users.start()
+ m3_add_users = AddUsers(topology_m4.ms["supplier3"], 200)
+ m3_add_users.start()
+ m4_add_users = AddUsers(topology_m4.ms["supplier4"], 200)
+ m4_add_users.start()
+
+ # Allow sometime to get replication flowing in all directions
+ log.info('test_stress_clean: allow some time for replication to get flowing...')
+ time.sleep(5)
+
+ # Put supplier 4 into read only mode
+ ldbm_config.set('nsslapd-readonly', 'on')
+ # We need to wait for supplier 4 to push its changes out
+ log.info('test_stress_clean: allow some time for supplier 4 to push changes out (60 seconds)...')
+ time.sleep(60)
+
+ # Remove the agreements from the other suppliers that point to supplier 4
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
+ remove_supplier4_agmts("test_stress_clean", topology_m4)
+
+ # Run the task
+ cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
+ cruv_task.create(properties={
+ 'replica-id': m4rid,
+ 'replica-base-dn': DEFAULT_SUFFIX,
+ 'replica-force-cleaning': 'no'
+ })
+ cruv_task.wait()
+
+ # Wait for the update to finish
+ log.info('test_stress_clean: wait for all the updates to finish...')
+ m1_add_users.join()
+ m2_add_users.join()
+ m3_add_users.join()
+ m4_add_users.join()
+
+ # Check the other supplier's RUV for 'replica 4'
+ log.info('test_stress_clean: check if all the replicas have been cleaned...')
+ clean = check_ruvs("test_stress_clean", topology_m4, m4rid)
+ assert clean
+
+ log.info('test_stress_clean: PASSED, restoring supplier 4...')
+
+ # Sleep for a bit to replication complete
+ log.info("Sleep for 120 seconds to allow replication to complete...")
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ repl.test_replication_topology([
+ topology_m4.ms["supplier1"],
+ topology_m4.ms["supplier2"],
+ topology_m4.ms["supplier3"],
+ ], timeout=120)
+
+ # Turn off readonly mode
+ ldbm_config.set('nsslapd-readonly', 'off')
+
+
+if __name__ == '__main__':
+ # Run isolated
+ # -s for DEBUG mode
+ CURRENT_FILE = os.path.realpath(__file__)
+ pytest.main(["-s", CURRENT_FILE])
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
index 1e9cd7c28..6d7141ada 100644
--- a/dirsrvtests/tests/suites/replication/cleanallruv_test.py
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
@@ -1,27 +1,20 @@
# --- BEGIN COPYRIGHT BLOCK ---
-# Copyright (C) 2019 Red Hat, Inc.
+# Copyright (C) 2022 Red Hat, Inc.
# All rights reserved.
#
# License: GPL (version 3 or any later version).
# See LICENSE for details.
# --- END COPYRIGHT BLOCK ---
#
-import threading
import pytest
-import random
from lib389 import DirSrv
from lib389.tasks import *
from lib389.utils import *
from lib389.topologies import topology_m4, topology_m2
-from lib389._constants import *
-
-from lib389.idm.directorymanager import DirectoryManager
+from lib389._constants import DEFAULT_SUFFIX
from lib389.replica import ReplicationManager, Replicas
from lib389.tasks import CleanAllRUVTask
-from lib389.idm.user import UserAccounts
-from lib389.config import LDBMConfig
-from lib389.config import CertmapLegacy
-from lib389.idm.services import ServiceAccounts
+
pytestmark = pytest.mark.tier1
@@ -29,42 +22,6 @@ logging.getLogger(__name__).setLevel(logging.DEBUG)
log = logging.getLogger(__name__)
-class AddUsers(threading.Thread):
- def __init__(self, inst, num_users):
- threading.Thread.__init__(self)
- self.daemon = True
- self.inst = inst
- self.num_users = num_users
-
- def run(self):
- """Start adding users"""
-
- dm = DirectoryManager(self.inst)
- conn = dm.bind()
-
- users = UserAccounts(conn, DEFAULT_SUFFIX)
-
- u_range = list(range(self.num_users))
- random.shuffle(u_range)
-
- for idx in u_range:
- try:
- users.create(properties={
- 'uid': 'testuser%s' % idx,
- 'cn' : 'testuser%s' % idx,
- 'sn' : 'user%s' % idx,
- 'uidNumber' : '%s' % (1000 + idx),
- 'gidNumber' : '%s' % (1000 + idx),
- 'homeDirectory' : '/home/testuser%s' % idx
- })
- # One of the suppliers was probably put into read only mode - just break out
- except ldap.UNWILLING_TO_PERFORM:
- break
- except ldap.ALREADY_EXISTS:
- pass
- conn.close()
-
-
def remove_supplier4_agmts(msg, topology_m4):
"""Remove all the repl agmts to supplier4. """
@@ -96,92 +53,7 @@ def check_ruvs(msg, topology_m4, m4rid):
return True
-def task_done(topology_m4, task_dn, timeout=60):
- """Check if the task is complete"""
-
- attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
- 'nsTaskCurrentItem', 'nsTaskTotalItems']
- done = False
- count = 0
-
- while not done and count < timeout:
- try:
- entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
- if entry is not None:
- if entry.hasAttr('nsTaskExitCode'):
- done = True
- break
- else:
- done = True
- break
- except ldap.NO_SUCH_OBJECT:
- done = True
- break
- except ldap.LDAPError:
- break
- time.sleep(1)
- count += 1
-
- return done
-
-
-def restore_supplier4(topology_m4):
- """In our tests will always be removing supplier 4, so we need a common
- way to restore it for another test
- """
-
- # Restart the remaining suppliers to allow rid 4 to be reused.
- for inst in topology_m4.ms.values():
- inst.restart()
-
- repl = ReplicationManager(DEFAULT_SUFFIX)
- repl.join_supplier(topology_m4.ms["supplier1"], topology_m4.ms["supplier4"])
-
- # Add the 2,3 -> 4 agmt.
- repl.ensure_agreement(topology_m4.ms["supplier2"], topology_m4.ms["supplier4"])
- repl.ensure_agreement(topology_m4.ms["supplier3"], topology_m4.ms["supplier4"])
- # And in reverse ...
- repl.ensure_agreement(topology_m4.ms["supplier4"], topology_m4.ms["supplier2"])
- repl.ensure_agreement(topology_m4.ms["supplier4"], topology_m4.ms["supplier3"])
-
- log.info('Supplier 4 has been successfully restored.')
-
-
-@pytest.fixture()
-def m4rid(request, topology_m4):
- log.debug("Wait a bit before the reset - it is required for the slow machines")
- time.sleep(5)
- log.debug("-------------- BEGIN RESET of m4 -----------------")
- repl = ReplicationManager(DEFAULT_SUFFIX)
- repl.test_replication_topology(topology_m4.ms.values())
- # What is supplier4's rid?
- m4rid = repl.get_rid(topology_m4.ms["supplier4"])
-
- def fin():
- try:
- # Restart the suppliers and rerun cleanallruv
- for inst in topology_m4.ms.values():
- inst.restart()
-
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- })
- cruv_task.wait()
- except ldap.UNWILLING_TO_PERFORM:
- # In some casse we already cleaned rid4, so if we fail, it's okay
- pass
- restore_supplier4(topology_m4)
- # Make sure everything works.
- repl.test_replication_topology(topology_m4.ms.values())
- request.addfinalizer(fin)
- log.debug("-------------- FINISH RESET of m4 -----------------")
- return m4rid
-
-
-def test_clean(topology_m4, m4rid):
+def test_clean(topology_m4):
"""Check that cleanallruv task works properly
:id: e9b3ce5c-e17c-409e-aafc-e97d630f2878
@@ -204,6 +76,8 @@ def test_clean(topology_m4, m4rid):
# Disable supplier 4
# Remove the agreements from the other suppliers that point to supplier 4
log.info('test_clean: disable supplier 4...')
+ repl = ReplicationManager(DEFAULT_SUFFIX)
+ m4rid = repl.get_rid(topology_m4.ms["supplier4"])
remove_supplier4_agmts("test_clean", topology_m4)
# Run the task
@@ -221,610 +95,6 @@ def test_clean(topology_m4, m4rid):
clean = check_ruvs("test_clean", topology_m4, m4rid)
assert clean
- log.info('test_clean PASSED, restoring supplier 4...')
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_clean_restart(topology_m4, m4rid):
- """Check that cleanallruv task works properly after a restart
-
- :id: c6233bb3-092c-4919-9ac9-80dd02cc6e02
- :setup: Replication setup with four suppliers
- :steps:
- 1. Disable replication on supplier 4
- 2. Remove agreements to supplier 4 from other suppliers
- 3. Stop supplier 3
- 4. Run a cleanallruv task on supplier 1
- 5. Stop supplier 1
- 6. Start supplier 3
- 7. Make sure that no crash happened
- 8. Start supplier 1
- 9. Make sure that no crash happened
- 10. Check that everything was cleaned
- :expectedresults:
- 1. Operation should be successful
- 2. Agreements to supplier 4 should be removed
- 3. Supplier 3 should be stopped
- 4. Cleanallruv task should be successfully executed
- 5. Supplier 1 should be stopped
- 6. Supplier 3 should be started
- 7. No crash should happened
- 8. Supplier 1 should be started
- 9. No crash should happened
- 10. Everything should be cleaned
- """
- log.info('Running test_clean_restart...')
-
- # Disable supplier 4
- log.info('test_clean: disable supplier 4...')
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_clean", topology_m4)
-
- # Stop supplier 3 to keep the task running, so we can stop supplier 1...
- topology_m4.ms["supplier3"].stop()
-
- # Run the task
- log.info('test_clean: run the cleanAllRUV task...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'yes'
- })
-
- # Sleep a bit, then stop supplier 1
- time.sleep(5)
- topology_m4.ms["supplier1"].stop()
-
- # Now start supplier 3 & 1, and make sure we didn't crash
- topology_m4.ms["supplier3"].start()
- if topology_m4.ms["supplier3"].detectDisorderlyShutdown():
- log.fatal('test_clean_restart: Supplier 3 previously crashed!')
- assert False
-
- topology_m4.ms["supplier1"].start(timeout=30)
- if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
- log.fatal('test_clean_restart: Supplier 1 previously crashed!')
- assert False
-
- # Check the other supplier's RUV for 'replica 4'
- log.info('test_clean_restart: check all the suppliers have been cleaned...')
- clean = check_ruvs("test_clean_restart", topology_m4, m4rid)
- assert clean
-
- log.info('test_clean_restart PASSED, restoring supplier 4...')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_clean_force(topology_m4, m4rid):
- """Check that multiple tasks with a 'force' option work properly
-
- :id: f8810dfe-d2d2-4dd9-ba03-5fc14896fabe
- :setup: Replication setup with four suppliers
- :steps:
- 1. Stop supplier 3
- 2. Add a bunch of updates to supplier 4
- 3. Disable replication on supplier 4
- 4. Start supplier 3
- 5. Remove agreements to supplier 4 from other suppliers
- 6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
- 7. Check that everything was cleaned
- :expectedresults:
- 1. Supplier 3 should be stopped
- 2. Operation should be successful
- 3. Replication on supplier 4 should be disabled
- 4. Supplier 3 should be started
- 5. Agreements to supplier 4 should be removed
- 6. Operation should be successful
- 7. Everything should be cleaned
- """
-
- log.info('Running test_clean_force...')
-
- # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
- topology_m4.ms["supplier3"].stop()
-
- # Add a bunch of updates to supplier 4
- m4_add_users = AddUsers(topology_m4.ms["supplier4"], 1500)
- m4_add_users.start()
- m4_add_users.join()
-
- # Start supplier 3, it should be out of sync with the other replicas...
- topology_m4.ms["supplier3"].start()
-
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_clean_force", topology_m4)
-
- # Run the task, use "force" because supplier 3 is not in sync with the other replicas
- # in regards to the replica 4 RUV
- log.info('test_clean: run the cleanAllRUV task...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'yes'
- })
- cruv_task.wait()
-
- # Check the other supplier's RUV for 'replica 4'
- log.info('test_clean_force: check all the suppliers have been cleaned...')
- clean = check_ruvs("test_clean_force", topology_m4, m4rid)
- assert clean
-
- log.info('test_clean_force PASSED, restoring supplier 4...')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_abort(topology_m4, m4rid):
- """Test the abort task basic functionality
-
- :id: b09a6887-8de0-4fac-8e41-73ccbaaf7a08
- :setup: Replication setup with four suppliers
- :steps:
- 1. Disable replication on supplier 4
- 2. Remove agreements to supplier 4 from other suppliers
- 3. Stop supplier 2
- 4. Run a cleanallruv task on supplier 1
- 5. Run a cleanallruv abort task on supplier 1
- :expectedresults: No hanging tasks left
- 1. Replication on supplier 4 should be disabled
- 2. Agreements to supplier 4 should be removed
- 3. Supplier 2 should be stopped
- 4. Operation should be successful
- 5. Operation should be successful
- """
-
- log.info('Running test_abort...')
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_abort", topology_m4)
-
- # Stop supplier 2
- log.info('test_abort: stop supplier 2 to freeze the cleanAllRUV task...')
- topology_m4.ms["supplier2"].stop()
-
- # Run the task
- log.info('test_abort: add the cleanAllRUV task...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'yes'
- })
- # Wait a bit
- time.sleep(2)
-
- # Abort the task
- cruv_task.abort()
-
- # Check supplier 1 does not have the clean task running
- log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
- if not task_done(topology_m4, cruv_task.dn):
- log.fatal('test_abort: CleanAllRUV task was not aborted')
- assert False
-
- # Start supplier 2
- log.info('test_abort: start supplier 2 to begin the restore process...')
- topology_m4.ms["supplier2"].start()
-
- log.info('test_abort PASSED, restoring supplier 4...')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_abort_restart(topology_m4, m4rid):
- """Test the abort task can handle a restart, and then resume
-
- :id: b66e33d4-fe85-4e1c-b882-75da80f70ab3
- :setup: Replication setup with four suppliers
- :steps:
- 1. Disable replication on supplier 4
- 2. Remove agreements to supplier 4 from other suppliers
- 3. Stop supplier 3
- 4. Run a cleanallruv task on supplier 1
- 5. Run a cleanallruv abort task on supplier 1
- 6. Restart supplier 1
- 7. Make sure that no crash happened
- 8. Start supplier 3
- 9. Check supplier 1 does not have the clean task running
- 10. Check that errors log doesn't have 'Aborting abort task' message
- :expectedresults:
- 1. Replication on supplier 4 should be disabled
- 2. Agreements to supplier 4 should be removed
- 3. Supplier 3 should be stopped
- 4. Operation should be successful
- 5. Operation should be successful
- 6. Supplier 1 should be restarted
- 7. No crash should happened
- 8. Supplier 3 should be started
- 9. Check supplier 1 shouldn't have the clean task running
- 10. Errors log shouldn't have 'Aborting abort task' message
- """
-
- log.info('Running test_abort_restart...')
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_abort", topology_m4)
-
- # Stop supplier 3
- log.info('test_abort_restart: stop supplier 3 to freeze the cleanAllRUV task...')
- topology_m4.ms["supplier3"].stop()
-
- # Run the task
- log.info('test_abort_restart: add the cleanAllRUV task...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'yes'
- })
- # Wait a bit
- time.sleep(2)
-
- # Abort the task
- cruv_task.abort(certify=True)
-
- # Check supplier 1 does not have the clean task running
- log.info('test_abort_abort: check supplier 1 no longer has a cleanAllRUV task...')
- if not task_done(topology_m4, cruv_task.dn):
- log.fatal('test_abort_restart: CleanAllRUV task was not aborted')
- assert False
-
- # Now restart supplier 1, and make sure the abort process completes
- topology_m4.ms["supplier1"].restart()
- if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
- log.fatal('test_abort_restart: Supplier 1 previously crashed!')
- assert False
-
- # Start supplier 3
- topology_m4.ms["supplier3"].start()
-
- # Need to wait 5 seconds before server processes any leftover tasks
- time.sleep(6)
-
- # Check supplier 1 tried to run abort task. We expect the abort task to be aborted.
- if not topology_m4.ms["supplier1"].searchErrorsLog('Aborting abort task'):
- log.fatal('test_abort_restart: Abort task did not restart')
- assert False
-
- log.info('test_abort_restart PASSED, restoring supplier 4...')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_abort_certify(topology_m4, m4rid):
- """Test the abort task with a replica-certify-all option
-
- :id: 78959966-d644-44a8-b98c-1fcf21b45eb0
- :setup: Replication setup with four suppliers
- :steps:
- 1. Disable replication on supplier 4
- 2. Remove agreements to supplier 4 from other suppliers
- 3. Stop supplier 2
- 4. Run a cleanallruv task on supplier 1
- 5. Run a cleanallruv abort task on supplier 1 with a replica-certify-all option
- :expectedresults: No hanging tasks left
- 1. Replication on supplier 4 should be disabled
- 2. Agreements to supplier 4 should be removed
- 3. Supplier 2 should be stopped
- 4. Operation should be successful
- 5. Operation should be successful
- """
-
- log.info('Running test_abort_certify...')
-
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_abort_certify", topology_m4)
-
- # Stop supplier 2
- log.info('test_abort_certify: stop supplier 2 to freeze the cleanAllRUV task...')
- topology_m4.ms["supplier2"].stop()
-
- # Run the task
- log.info('test_abort_certify: add the cleanAllRUV task...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'yes'
- })
- # Wait a bit
- time.sleep(2)
-
- # Abort the task
- log.info('test_abort_certify: abort the cleanAllRUV task...')
- abort_task = cruv_task.abort(certify=True)
-
- # Wait a while and make sure the abort task is still running
- log.info('test_abort_certify...')
-
- if task_done(topology_m4, abort_task.dn, 10):
- log.fatal('test_abort_certify: abort task incorrectly finished')
- assert False
-
- # Now start supplier 2 so it can be aborted
- log.info('test_abort_certify: start supplier 2 to allow the abort task to finish...')
- topology_m4.ms["supplier2"].start()
-
- # Wait for the abort task to stop
- if not task_done(topology_m4, abort_task.dn, 90):
- log.fatal('test_abort_certify: The abort CleanAllRUV task was not aborted')
- assert False
-
- # Check supplier 1 does not have the clean task running
- log.info('test_abort_certify: check supplier 1 no longer has a cleanAllRUV task...')
- if not task_done(topology_m4, cruv_task.dn):
- log.fatal('test_abort_certify: CleanAllRUV task was not aborted')
- assert False
-
- log.info('test_abort_certify PASSED, restoring supplier 4...')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_stress_clean(topology_m4, m4rid):
- """Put each server(m1 - m4) under a stress, and perform the entire clean process
-
- :id: a8263cd6-f068-4357-86e0-e7c34504c8c5
- :setup: Replication setup with four suppliers
- :steps:
- 1. Add a bunch of updates to all suppliers
- 2. Put supplier 4 to read-only mode
- 3. Disable replication on supplier 4
- 4. Remove agreements to supplier 4 from other suppliers
- 5. Run a cleanallruv task on supplier 1
- 6. Check that everything was cleaned
- :expectedresults:
- 1. Operation should be successful
- 2. Supplier 4 should be put to read-only mode
- 3. Replication on supplier 4 should be disabled
- 4. Agreements to supplier 4 should be removed
- 5. Operation should be successful
- 6. Everything should be cleaned
- """
-
- log.info('Running test_stress_clean...')
- log.info('test_stress_clean: put all the suppliers under load...')
-
- ldbm_config = LDBMConfig(topology_m4.ms["supplier4"])
-
- # not too high load else it takes a long time to converge and
- # the test result becomes instable
- m1_add_users = AddUsers(topology_m4.ms["supplier1"], 500)
- m1_add_users.start()
- m2_add_users = AddUsers(topology_m4.ms["supplier2"], 500)
- m2_add_users.start()
- m3_add_users = AddUsers(topology_m4.ms["supplier3"], 500)
- m3_add_users.start()
- m4_add_users = AddUsers(topology_m4.ms["supplier4"], 500)
- m4_add_users.start()
-
- # Allow sometime to get replication flowing in all directions
- log.info('test_stress_clean: allow some time for replication to get flowing...')
- time.sleep(5)
-
- # Put supplier 4 into read only mode
- ldbm_config.set('nsslapd-readonly', 'on')
- # We need to wait for supplier 4 to push its changes out
- log.info('test_stress_clean: allow some time for supplier 4 to push changes out (60 seconds)...')
- time.sleep(30)
-
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_stress_clean", topology_m4)
-
- # Run the task
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no'
- })
- cruv_task.wait()
-
- # Wait for the update to finish
- log.info('test_stress_clean: wait for all the updates to finish...')
- m1_add_users.join()
- m2_add_users.join()
- m3_add_users.join()
- m4_add_users.join()
-
- # Check the other supplier's RUV for 'replica 4'
- log.info('test_stress_clean: check if all the replicas have been cleaned...')
- clean = check_ruvs("test_stress_clean", topology_m4, m4rid)
- assert clean
-
- log.info('test_stress_clean: PASSED, restoring supplier 4...')
-
- # Sleep for a bit to replication complete
- log.info("Sleep for 120 seconds to allow replication to complete...")
- repl = ReplicationManager(DEFAULT_SUFFIX)
- repl.test_replication_topology([
- topology_m4.ms["supplier1"],
- topology_m4.ms["supplier2"],
- topology_m4.ms["supplier3"],
- ], timeout=120)
-
- # Turn off readonly mode
- ldbm_config.set('nsslapd-readonly', 'off')
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1)
-def test_multiple_tasks_with_force(topology_m4, m4rid):
- """Check that multiple tasks with a 'force' option work properly
-
- :id: eb76a93d-8d1c-405e-9f25-6e8d5a781098
- :setup: Replication setup with four suppliers
- :steps:
- 1. Stop supplier 3
- 2. Add a bunch of updates to supplier 4
- 3. Disable replication on supplier 4
- 4. Start supplier 3
- 5. Remove agreements to supplier 4 from other suppliers
- 6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
- 7. Run one more cleanallruv task on supplier 1 with a 'force' option 'off'
- 8. Check that everything was cleaned
- :expectedresults:
- 1. Supplier 3 should be stopped
- 2. Operation should be successful
- 3. Replication on supplier 4 should be disabled
- 4. Supplier 3 should be started
- 5. Agreements to supplier 4 should be removed
- 6. Operation should be successful
- 7. Operation should be successful
- 8. Everything should be cleaned
- """
-
- log.info('Running test_multiple_tasks_with_force...')
-
- # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
- topology_m4.ms["supplier3"].stop()
-
- # Add a bunch of updates to supplier 4
- m4_add_users = AddUsers(topology_m4.ms["supplier4"], 1500)
- m4_add_users.start()
- m4_add_users.join()
-
- # Start supplier 3, it should be out of sync with the other replicas...
- topology_m4.ms["supplier3"].start()
-
- # Disable supplier 4
- # Remove the agreements from the other suppliers that point to supplier 4
- remove_supplier4_agmts("test_multiple_tasks_with_force", topology_m4)
-
- # Run the task, use "force" because supplier 3 is not in sync with the other replicas
- # in regards to the replica 4 RUV
- log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" on...')
- cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'yes',
- 'replica-certify-all': 'no'
- })
-
- log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" off...')
-
- # NOTE: This must be try not py.test raises, because the above may or may
- # not have completed yet ....
- try:
- cruv_task_fail = CleanAllRUVTask(topology_m4.ms["supplier1"])
- cruv_task_fail.create(properties={
- 'replica-id': m4rid,
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'no'
- })
- cruv_task_fail.wait()
- except ldap.UNWILLING_TO_PERFORM:
- pass
- # Wait for the force task ....
- cruv_task.wait()
-
- # Check the other supplier's RUV for 'replica 4'
- log.info('test_multiple_tasks_with_force: check all the suppliers have been cleaned...')
- clean = check_ruvs("test_clean_force", topology_m4, m4rid)
- assert clean
- # Check supplier 1 does not have the clean task running
- log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
- if not task_done(topology_m4, cruv_task.dn):
- log.fatal('test_abort: CleanAllRUV task was not aborted')
- assert False
-
-
-@pytest.mark.bz1466441
-@pytest.mark.ds50370
-def test_clean_shutdown_crash(topology_m2):
- """Check that server didn't crash after shutdown when running CleanAllRUV task
-
- :id: c34d0b40-3c3e-4f53-8656-5e4c2a310aaf
- :setup: Replication setup with two suppliers
- :steps:
- 1. Enable TLS on both suppliers
- 2. Reconfigure both agreements to use TLS Client auth
- 3. Stop supplier2
- 4. Run the CleanAllRUV task
- 5. Restart supplier1
- 6. Check if supplier1 didn't crash
- 7. Restart supplier1 again
- 8. Check if supplier1 didn't crash
-
- :expectedresults:
- 1. Success
- 2. Success
- 3. Success
- 4. Success
- 5. Success
- 6. Success
- 7. Success
- 8. Success
- """
-
- m1 = topology_m2.ms["supplier1"]
- m2 = topology_m2.ms["supplier2"]
-
- repl = ReplicationManager(DEFAULT_SUFFIX)
-
- cm_m1 = CertmapLegacy(m1)
- cm_m2 = CertmapLegacy(m2)
-
- certmaps = cm_m1.list()
- certmaps['default']['DNComps'] = None
- certmaps['default']['CmapLdapAttr'] = 'nsCertSubjectDN'
-
- cm_m1.set(certmaps)
- cm_m2.set(certmaps)
-
- log.info('Enabling TLS')
- [i.enable_tls() for i in topology_m2]
-
- log.info('Creating replication dns')
- services = ServiceAccounts(m1, DEFAULT_SUFFIX)
- repl_m1 = services.get('%s:%s' % (m1.host, m1.sslport))
- repl_m1.set('nsCertSubjectDN', m1.get_server_tls_subject())
-
- repl_m2 = services.get('%s:%s' % (m2.host, m2.sslport))
- repl_m2.set('nsCertSubjectDN', m2.get_server_tls_subject())
-
- log.info('Changing auth type')
- replica_m1 = Replicas(m1).get(DEFAULT_SUFFIX)
- agmt_m1 = replica_m1.get_agreements().list()[0]
- agmt_m1.replace_many(
- ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
- ('nsDS5ReplicaTransportInfo', 'SSL'),
- ('nsDS5ReplicaPort', '%s' % m2.sslport),
- )
-
- agmt_m1.remove_all('nsDS5ReplicaBindDN')
-
- replica_m2 = Replicas(m2).get(DEFAULT_SUFFIX)
- agmt_m2 = replica_m2.get_agreements().list()[0]
-
- agmt_m2.replace_many(
- ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
- ('nsDS5ReplicaTransportInfo', 'SSL'),
- ('nsDS5ReplicaPort', '%s' % m1.sslport),
- )
- agmt_m2.remove_all('nsDS5ReplicaBindDN')
-
- log.info('Stopping supplier2')
- m2.stop()
-
- log.info('Run the cleanAllRUV task')
- cruv_task = CleanAllRUVTask(m1)
- cruv_task.create(properties={
- 'replica-id': repl.get_rid(m1),
- 'replica-base-dn': DEFAULT_SUFFIX,
- 'replica-force-cleaning': 'no',
- 'replica-certify-all': 'yes'
- })
-
- m1.restart()
-
- log.info('Check if supplier1 crashed')
- assert not m1.detectDisorderlyShutdown()
-
- log.info('Repeat')
- m1.restart()
- assert not m1.detectDisorderlyShutdown()
-
if __name__ == '__main__':
# Run isolated
diff --git a/dirsrvtests/tests/suites/replication/regression_m2_test.py b/dirsrvtests/tests/suites/replication/regression_m2_test.py
index bbf9c8486..65c299a0c 100644
--- a/dirsrvtests/tests/suites/replication/regression_m2_test.py
+++ b/dirsrvtests/tests/suites/replication/regression_m2_test.py
@@ -240,8 +240,12 @@ def test_double_delete(topo_m2, create_entry):
log.info('Deleting entry {} from supplier1'.format(create_entry.dn))
topo_m2.ms["supplier1"].delete_s(create_entry.dn)
- log.info('Deleting entry {} from supplier2'.format(create_entry.dn))
- topo_m2.ms["supplier2"].delete_s(create_entry.dn)
+ try:
+ log.info('Deleting entry {} from supplier2'.format(create_entry.dn))
+ topo_m2.ms["supplier2"].delete_s(create_entry.dn)
+ except ldap.NO_SUCH_OBJECT:
+ # replication was too fast (DEBUGGING is probably set)
+ pass
repl.enable_to_supplier(m2, [m1])
repl.enable_to_supplier(m1, [m2])
@@ -813,8 +817,9 @@ def test_keepalive_entries(topo_m2):
keep_alive_s1 = str(entries[0].data['keepalivetimestamp'])
keep_alive_s2 = str(entries[1].data['keepalivetimestamp'])
- # Wait for event interval (60 secs) to pass
- time.sleep(61)
+ # Wait for event interval (60 secs) to pass, but first update doesn't
+ # start until 30 seconds after startup
+ time.sleep(91)
# Check keep alives entries have been updated
entries = verify_keepalive_entries(topo_m2, True);
diff --git a/dirsrvtests/tests/suites/replication/regression_m2c2_test.py b/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
index 97b35c7ab..f9de7383c 100644
--- a/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
+++ b/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
@@ -289,6 +289,7 @@ def test_csngen_state_not_updated_if_different_uuid(topo_m2c2):
log.error(f"c1 csngen state has unexpectedly been synchronized with m2: time skew {c1_timeSkew}")
assert False
c1.start()
+ time.sleep(5)
# Step 8: Check that c2 has time skew
# Stop server to insure that dse.ldif is uptodate
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
index 5dab57de4..d67f1bc71 100644
--- a/ldap/servers/plugins/replication/repl5_replica.c
+++ b/ldap/servers/plugins/replication/repl5_replica.c
@@ -239,8 +239,8 @@ replica_new_from_entry(Slapi_Entry *e, char *errortext, PRBool is_add_operation,
/* create supplier update event */
if (r->repl_eqcxt_ka_update == NULL && replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
r->repl_eqcxt_ka_update = slapi_eq_repeat_rel(replica_subentry_update, r,
- slapi_current_rel_time_t() + 30,
- replica_get_keepalive_update_interval(r));
+ slapi_current_rel_time_t() + 30,
+ 1000 * replica_get_keepalive_update_interval(r));
}
if (r->tombstone_reap_interval > 0) {
@@ -518,7 +518,7 @@ replica_subentry_update(time_t when __attribute__((unused)), void *arg)
replica_subentry_check(repl_root, rid);
slapi_timestamp_utc_hr(buf, SLAPI_TIMESTAMP_BUFSIZE);
- slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "replica_subentry_update called at %s\n", buf);
+ slapi_log_err(SLAPI_LOG_REPL, "NSMMReplicationPlugin", "replica_subentry_update called at %s\n", buf);
val.bv_val = buf;
val.bv_len = strlen(val.bv_val);
vals[0] = &val;
@@ -542,7 +542,7 @@ replica_subentry_update(time_t when __attribute__((unused)), void *arg)
"Failure (%d) to update replication keep alive entry \"%s: %s\"\n",
ldrc, KEEP_ALIVE_ATTR, buf);
} else {
- slapi_log_err(SLAPI_LOG_PLUGIN, repl_plugin_name,
+ slapi_log_err(SLAPI_LOG_REPL, "NSMMReplicationPlugin",
"replica_subentry_update - "
"Successful update of replication keep alive entry \"%s: %s\"\n",
KEEP_ALIVE_ATTR, buf);
@@ -1536,7 +1536,7 @@ replica_set_enabled(Replica *r, PRBool enable)
if (r->repl_eqcxt_ka_update == NULL && replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
r->repl_eqcxt_ka_update = slapi_eq_repeat_rel(replica_subentry_update, r,
slapi_current_rel_time_t() + START_UPDATE_DELAY,
- replica_get_keepalive_update_interval(r));
+ 1000 * replica_get_keepalive_update_interval(r));
}
} else /* disable */
{
@@ -1546,7 +1546,7 @@ replica_set_enabled(Replica *r, PRBool enable)
r->repl_eqcxt_rs = NULL;
}
/* Remove supplier update event */
- if (replica_get_type(r) == REPLICA_TYPE_PRIMARY) {
+ if (replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
slapi_eq_cancel_rel(r->repl_eqcxt_ka_update);
r->repl_eqcxt_ka_update = NULL;
}
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
index 70c45ec50..b32d00941 100644
--- a/ldap/servers/plugins/replication/repl_extop.c
+++ b/ldap/servers/plugins/replication/repl_extop.c
@@ -493,7 +493,7 @@ free_and_return:
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
"decode_startrepl_extop - decoded csn: %s\n", *csnstr);
ruv_dump_to_log(*supplier_ruv, "decode_startrepl_extop");
- for (size_t i = 0; *extra_referrals && *extra_referrals[i]; i++) {
+ for (size_t i = 0; *extra_referrals && extra_referrals[i]; i++) {
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "decode_startrepl_extop - "
"decoded referral: %s\n", *extra_referrals[i]);
}
@@ -1661,7 +1661,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
* Launch the cleanruv monitoring thread. Once all the replicas are cleaned it will release the rid
*/
- cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR, "Launching cleanAllRUV thread...");
+ cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Launching cleanAllRUV thread...");
data = (cleanruv_data *)slapi_ch_calloc(1, sizeof(cleanruv_data));
if (data == NULL) {
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "multimaster_extop_cleanruv - CleanAllRUV Task - Failed to allocate "
diff --git a/ldap/servers/slapd/task.c b/ldap/servers/slapd/task.c
index 4c7262ab3..71d5a2fb5 100644
--- a/ldap/servers/slapd/task.c
+++ b/ldap/servers/slapd/task.c
@@ -742,7 +742,7 @@ get_internal_entry(Slapi_PBlock *pb, char *dn)
slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &ret);
if (ret != LDAP_SUCCESS) {
slapi_log_err(SLAPI_LOG_WARNING, "get_internal_entry",
- "Can't find task entry '%s'\n", dn);
+ "Failed to search for task entry '%s' error: %d\n", dn, ret);
return NULL;
}
@@ -786,9 +786,9 @@ modify_internal_entry(char *dn, LDAPMod **mods)
* entry -- try at least 3 times before giving up.
*/
tries++;
- if (tries == 3) {
- slapi_log_err(SLAPI_LOG_WARNING, "modify_internal_entry", "Can't modify task "
- "entry '%s'; %s (%d)\n",
+ if (tries == 5) {
+ slapi_log_err(SLAPI_LOG_WARNING, "modify_internal_entry",
+ "Can't modify task entry '%s'; %s (%d)\n",
dn, ldap_err2string(ret), ret);
slapi_pblock_destroy(pb);
return;
diff --git a/src/lib389/lib389/instance/remove.py b/src/lib389/lib389/instance/remove.py
index e96db3896..5668f375b 100644
--- a/src/lib389/lib389/instance/remove.py
+++ b/src/lib389/lib389/instance/remove.py
@@ -90,6 +90,12 @@ def remove_ds_instance(dirsrv, force=False):
# Remove parent (/var/lib/dirsrv/slapd-INST)
shutil.rmtree(remove_paths['db_dir'].replace('db', ''), ignore_errors=True)
+ # Remove /run/slapd-isntance
+ try:
+ os.remove(f'/run/slapd-{dirsrv.serverid}.socket')
+ except OSError as e:
+ _log.debug("Failed to remove socket file: " + str(e))
+
# We can not assume we have systemd ...
if dirsrv.ds_paths.with_systemd:
# Remove the systemd symlink
--
2.37.1