Blame SOURCES/0016-Issue-3903-fix-repl-keep-alive-event-interval.patch

a77461
From 01e941e3eadd7a208982d20c0ca9c104142f2b91 Mon Sep 17 00:00:00 2001
a77461
From: Mark Reynolds <mreynolds@redhat.com>
a77461
Date: Wed, 10 Aug 2022 08:58:28 -0400
a77461
Subject: [PATCH 4/4] Issue 3903 - fix repl keep alive event interval
a77461
a77461
Description:  Previously we passed the interval as seconds to the
a77461
              event queue, but it is supposed to be milliseconds.
a77461
a77461
              Fixed a crash with repl logging and decoding extended
a77461
              op payload (referrals).
a77461
a77461
              Also reworked alot of the replication CI tests that
a77461
              were flaky.
a77461
a77461
relates: https://github.com/389ds/389-ds-base/issues/3903
a77461
a77461
Reviewed by: tbordaz & spichugi(Thanks!)
a77461
---
a77461
 .../suites/replication/acceptance_test.py     |  52 +-
a77461
 .../cleanallruv_abort_certify_test.py         | 136 ++++
a77461
 .../cleanallruv_abort_restart_test.py         | 146 ++++
a77461
 .../replication/cleanallruv_abort_test.py     | 123 +++
a77461
 .../replication/cleanallruv_force_test.py     | 187 +++++
a77461
 .../cleanallruv_multiple_force_test.py        | 214 +++++
a77461
 .../replication/cleanallruv_restart_test.py   | 161 ++++
a77461
 .../cleanallruv_shutdown_crash_test.py        | 123 +++
a77461
 .../replication/cleanallruv_stress_test.py    | 216 +++++
a77461
 .../suites/replication/cleanallruv_test.py    | 742 +-----------------
a77461
 .../suites/replication/regression_m2_test.py  |  13 +-
a77461
 .../replication/regression_m2c2_test.py       |   1 +
a77461
 .../plugins/replication/repl5_replica.c       |  12 +-
a77461
 ldap/servers/plugins/replication/repl_extop.c |   4 +-
a77461
 ldap/servers/slapd/task.c                     |   8 +-
a77461
 src/lib389/lib389/instance/remove.py          |   6 +
a77461
 16 files changed, 1385 insertions(+), 759 deletions(-)
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
a77461
 create mode 100644 dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
a77461
a77461
diff --git a/dirsrvtests/tests/suites/replication/acceptance_test.py b/dirsrvtests/tests/suites/replication/acceptance_test.py
a77461
index a5f0c4c6b..863ee2553 100644
a77461
--- a/dirsrvtests/tests/suites/replication/acceptance_test.py
a77461
+++ b/dirsrvtests/tests/suites/replication/acceptance_test.py
a77461
@@ -8,6 +8,7 @@
a77461
 #
a77461
 import pytest
a77461
 import logging
a77461
+import time
a77461
 from lib389.replica import Replicas
a77461
 from lib389.tasks import *
a77461
 from lib389.utils import *
a77461
@@ -124,12 +125,16 @@ def test_modify_entry(topo_m4, create_entry):
a77461
         8. Some time should pass
a77461
         9. The change should be present on all suppliers
a77461
     """
a77461
+    if DEBUGGING:
a77461
+        sleep_time = 8
a77461
+    else:
a77461
+        sleep_time = 2
a77461
 
a77461
     log.info('Modifying entry {} - add operation'.format(TEST_ENTRY_DN))
a77461
 
a77461
     test_user = UserAccount(topo_m4.ms["supplier1"], TEST_ENTRY_DN)
a77461
     test_user.add('mail', '{}@redhat.com'.format(TEST_ENTRY_NAME))
a77461
-    time.sleep(1)
a77461
+    time.sleep(sleep_time)
a77461
 
a77461
     all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
a77461
     for u in all_user:
a77461
@@ -137,7 +142,7 @@ def test_modify_entry(topo_m4, create_entry):
a77461
 
a77461
     log.info('Modifying entry {} - replace operation'.format(TEST_ENTRY_DN))
a77461
     test_user.replace('mail', '{}@greenhat.com'.format(TEST_ENTRY_NAME))
a77461
-    time.sleep(1)
a77461
+    time.sleep(sleep_time)
a77461
 
a77461
     all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
a77461
     for u in all_user:
a77461
@@ -145,7 +150,7 @@ def test_modify_entry(topo_m4, create_entry):
a77461
 
a77461
     log.info('Modifying entry {} - delete operation'.format(TEST_ENTRY_DN))
a77461
     test_user.remove('mail', '{}@greenhat.com'.format(TEST_ENTRY_NAME))
a77461
-    time.sleep(1)
a77461
+    time.sleep(sleep_time)
a77461
 
a77461
     all_user = topo_m4.all_get_dsldapobject(TEST_ENTRY_DN, UserAccount)
a77461
     for u in all_user:
a77461
@@ -167,7 +172,10 @@ def test_delete_entry(topo_m4, create_entry):
a77461
 
a77461
     log.info('Deleting entry {} during the test'.format(TEST_ENTRY_DN))
a77461
     topo_m4.ms["supplier1"].delete_s(TEST_ENTRY_DN)
a77461
-
a77461
+    if DEBUGGING:
a77461
+        time.sleep(8)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
     entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["uid"])
a77461
     assert not entries, "Entry deletion {} wasn't replicated successfully".format(TEST_ENTRY_DN)
a77461
 
a77461
@@ -231,6 +239,11 @@ def test_modrdn_after_pause(topo_m4):
a77461
         5. The change should be present on all suppliers
a77461
     """
a77461
 
a77461
+    if DEBUGGING:
a77461
+        sleep_time = 8
a77461
+    else:
a77461
+        sleep_time = 3
a77461
+
a77461
     newrdn_name = 'newrdn'
a77461
     newrdn_dn = 'uid={},{}'.format(newrdn_name, DEFAULT_SUFFIX)
a77461
 
a77461
@@ -264,7 +277,7 @@ def test_modrdn_after_pause(topo_m4):
a77461
     topo_m4.resume_all_replicas()
a77461
 
a77461
     log.info('Wait for replication to happen')
a77461
-    time.sleep(3)
a77461
+    time.sleep(sleep_time)
a77461
 
a77461
     try:
a77461
         entries_new = get_repl_entries(topo_m4, newrdn_name, ["uid"])
a77461
@@ -354,6 +367,11 @@ def test_many_attrs(topo_m4, create_entry):
a77461
     for add_name in add_list:
a77461
         test_user.add('description', add_name)
a77461
 
a77461
+    if DEBUGGING:
a77461
+        time.sleep(10)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
+
a77461
     log.info('Check that everything was properly replicated after an add operation')
a77461
     entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["description"])
a77461
     for entry in entries:
a77461
@@ -363,6 +381,11 @@ def test_many_attrs(topo_m4, create_entry):
a77461
     for delete_name in delete_list:
a77461
         test_user.remove('description', delete_name)
a77461
 
a77461
+    if DEBUGGING:
a77461
+        time.sleep(10)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
+
a77461
     log.info('Check that everything was properly replicated after a delete operation')
a77461
     entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["description"])
a77461
     for entry in entries:
a77461
@@ -386,12 +409,22 @@ def test_double_delete(topo_m4, create_entry):
a77461
     log.info('Deleting entry {} from supplier1'.format(TEST_ENTRY_DN))
a77461
     topo_m4.ms["supplier1"].delete_s(TEST_ENTRY_DN)
a77461
 
a77461
+    if DEBUGGING:
a77461
+        time.sleep(5)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
+
a77461
     log.info('Deleting entry {} from supplier2'.format(TEST_ENTRY_DN))
a77461
     try:
a77461
         topo_m4.ms["supplier2"].delete_s(TEST_ENTRY_DN)
a77461
     except ldap.NO_SUCH_OBJECT:
a77461
         log.info("Entry {} wasn't found supplier2. It is expected.".format(TEST_ENTRY_DN))
a77461
 
a77461
+    if DEBUGGING:
a77461
+        time.sleep(5)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
+        
a77461
     log.info('Make searches to check if server is alive')
a77461
     entries = get_repl_entries(topo_m4, TEST_ENTRY_NAME, ["uid"])
a77461
     assert not entries, "Entry deletion {} wasn't replicated successfully".format(TEST_ENTRY_DN)
a77461
@@ -436,6 +469,11 @@ def test_password_repl_error(topo_m4, create_entry):
a77461
     m3_conn = test_user_m3.bind(TEST_ENTRY_NEW_PASS)
a77461
     m4_conn = test_user_m4.bind(TEST_ENTRY_NEW_PASS)
a77461
 
a77461
+    if DEBUGGING:
a77461
+        time.sleep(5)
a77461
+    else:
a77461
+        time.sleep(1)
a77461
+
a77461
     log.info('Check the error log for the error with {}'.format(TEST_ENTRY_DN))
a77461
     assert not m2.ds_error_log.match('.*can.t add a change for uid={}.*'.format(TEST_ENTRY_NAME))
a77461
 
a77461
@@ -552,7 +590,7 @@ def test_csnpurge_large_valueset(topo_m2):
a77461
     replica = replicas.list()[0]
a77461
     log.info('nsds5ReplicaPurgeDelay to 5')
a77461
     replica.set('nsds5ReplicaPurgeDelay', '5')
a77461
-    time.sleep(6)
a77461
+    time.sleep(10)
a77461
 
a77461
     # add some new values to the valueset containing entries that should be purged
a77461
     for i in range(21,25):
a77461
@@ -612,7 +650,7 @@ def test_urp_trigger_substring_search(topo_m2):
a77461
             break
a77461
         else:
a77461
             log.info('Entry not yet replicated on M2, wait a bit')
a77461
-            time.sleep(2)
a77461
+            time.sleep(3)
a77461
 
a77461
     # check that M2 access logs does not "(&(objectclass=nstombstone)(nscpentrydn=uid=asterisk_*_in_value,dc=example,dc=com))"
a77461
     log.info('Check that on M2, URP as not triggered such internal search')
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
a77461
new file mode 100644
a77461
index 000000000..603693b9e
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_certify_test.py
a77461
@@ -0,0 +1,136 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import ReplicationManager, Replicas
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+def remove_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4. """
a77461
+
a77461
+    log.info('%s: remove all the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
+def test_abort_certify(topology_m4):
a77461
+    """Test the abort task with a replica-certify-all option
a77461
+
a77461
+    :id: 78959966-d644-44a8-b98c-1fcf21b45eb0
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Disable replication on supplier 4
a77461
+        2. Remove agreements to supplier 4 from other suppliers
a77461
+        3. Stop supplier 2
a77461
+        4. Run a cleanallruv task on supplier 1
a77461
+        5. Run a cleanallruv abort task on supplier 1 with a replica-certify-all option
a77461
+    :expectedresults: No hanging tasks left
a77461
+        1. Replication on supplier 4 should be disabled
a77461
+        2. Agreements to supplier 4 should be removed
a77461
+        3. Supplier 2 should be stopped
a77461
+        4. Operation should be successful
a77461
+        5. Operation should be successful
a77461
+    """
a77461
+
a77461
+    log.info('Running test_abort_certify...')
a77461
+
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_supplier4_agmts("test_abort_certify", topology_m4)
a77461
+
a77461
+    # Stop supplier 2
a77461
+    log.info('test_abort_certify: stop supplier 2 to freeze the cleanAllRUV task...')
a77461
+    topology_m4.ms["supplier2"].stop()
a77461
+
a77461
+    # Run the task
a77461
+    log.info('test_abort_certify: add the cleanAllRUV task...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no',
a77461
+        'replica-certify-all': 'yes'
a77461
+        })
a77461
+    # Wait a bit
a77461
+    time.sleep(2)
a77461
+
a77461
+    # Abort the task
a77461
+    log.info('test_abort_certify: abort the cleanAllRUV task...')
a77461
+    abort_task = cruv_task.abort(certify=True)
a77461
+
a77461
+    # Wait a while and make sure the abort task is still running
a77461
+    log.info('test_abort_certify...')
a77461
+
a77461
+    if task_done(topology_m4, abort_task.dn, 10):
a77461
+        log.fatal('test_abort_certify: abort task incorrectly finished')
a77461
+        assert False
a77461
+
a77461
+    # Now start supplier 2 so it can be aborted
a77461
+    log.info('test_abort_certify: start supplier 2 to allow the abort task to finish...')
a77461
+    topology_m4.ms["supplier2"].start()
a77461
+
a77461
+    # Wait for the abort task to stop
a77461
+    if not task_done(topology_m4, abort_task.dn, 90):
a77461
+        log.fatal('test_abort_certify: The abort CleanAllRUV task was not aborted')
a77461
+        assert False
a77461
+
a77461
+    # Check supplier 1 does not have the clean task running
a77461
+    log.info('test_abort_certify: check supplier 1 no longer has a cleanAllRUV task...')
a77461
+    if not task_done(topology_m4, cruv_task.dn):
a77461
+        log.fatal('test_abort_certify: CleanAllRUV task was not aborted')
a77461
+        assert False
a77461
+
a77461
+    log.info('test_abort_certify PASSED')
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
a77461
new file mode 100644
a77461
index 000000000..1406c6553
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_restart_test.py
a77461
@@ -0,0 +1,146 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import ReplicationManager
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+def remove_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4. """
a77461
+
a77461
+    log.info('%s: remove all the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+   
a77461
+
a77461
+@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
+def test_abort_restart(topology_m4):
a77461
+    """Test the abort task can handle a restart, and then resume
a77461
+
a77461
+    :id: b66e33d4-fe85-4e1c-b882-75da80f70ab3
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Disable replication on supplier 4
a77461
+        2. Remove agreements to supplier 4 from other suppliers
a77461
+        3. Stop supplier 3
a77461
+        4. Run a cleanallruv task on supplier 1
a77461
+        5. Run a cleanallruv abort task on supplier 1
a77461
+        6. Restart supplier 1
a77461
+        7. Make sure that no crash happened
a77461
+        8. Start supplier 3
a77461
+        9. Check supplier 1 does not have the clean task running
a77461
+        10. Check that errors log doesn't have 'Aborting abort task' message
a77461
+    :expectedresults:
a77461
+        1. Replication on supplier 4 should be disabled
a77461
+        2. Agreements to supplier 4 should be removed
a77461
+        3. Supplier 3 should be stopped
a77461
+        4. Operation should be successful
a77461
+        5. Operation should be successful
a77461
+        6. Supplier 1 should be restarted
a77461
+        7. No crash should happened
a77461
+        8. Supplier 3 should be started
a77461
+        9. Check supplier 1 shouldn't have the clean task running
a77461
+        10. Errors log shouldn't have 'Aborting abort task' message
a77461
+    """
a77461
+
a77461
+    log.info('Running test_abort_restart...')
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_supplier4_agmts("test_abort", topology_m4)
a77461
+
a77461
+    # Stop supplier 3
a77461
+    log.info('test_abort_restart: stop supplier 3 to freeze the cleanAllRUV task...')
a77461
+    topology_m4.ms["supplier3"].stop()
a77461
+
a77461
+    # Run the task
a77461
+    log.info('test_abort_restart: add the cleanAllRUV task...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no',
a77461
+        'replica-certify-all': 'yes'
a77461
+        })
a77461
+    # Wait a bit
a77461
+    time.sleep(2)
a77461
+
a77461
+    # Abort the task
a77461
+    cruv_task.abort(certify=True)
a77461
+
a77461
+    # Check supplier 1 does not have the clean task running
a77461
+    log.info('test_abort_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
+    if not task_done(topology_m4, cruv_task.dn):
a77461
+        log.fatal('test_abort_restart: CleanAllRUV task was not aborted')
a77461
+        assert False
a77461
+
a77461
+    # Now restart supplier 1, and make sure the abort process completes
a77461
+    topology_m4.ms["supplier1"].restart()
a77461
+    if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
a77461
+        log.fatal('test_abort_restart: Supplier 1 previously crashed!')
a77461
+        assert False
a77461
+
a77461
+    # Start supplier 3
a77461
+    topology_m4.ms["supplier3"].start()
a77461
+
a77461
+    # Need to wait 5 seconds before server processes any leftover tasks
a77461
+    time.sleep(6)
a77461
+
a77461
+    # Check supplier 1 tried to run abort task.  We expect the abort task to be aborted.
a77461
+    if not topology_m4.ms["supplier1"].searchErrorsLog('Aborting abort task'):
a77461
+        log.fatal('test_abort_restart: Abort task did not restart')
a77461
+        assert False
a77461
+
a77461
+    log.info('test_abort_restart PASSED')
a77461
+    
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
a77461
new file mode 100644
a77461
index 000000000..f89188165
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_abort_test.py
a77461
@@ -0,0 +1,123 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import ReplicationManager
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+def remove_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4. """
a77461
+
a77461
+    log.info('%s: remove all the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+    
a77461
+@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
+def test_abort(topology_m4):
a77461
+    """Test the abort task basic functionality
a77461
+
a77461
+    :id: b09a6887-8de0-4fac-8e41-73ccbaaf7a08
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Disable replication on supplier 4
a77461
+        2. Remove agreements to supplier 4 from other suppliers
a77461
+        3. Stop supplier 2
a77461
+        4. Run a cleanallruv task on supplier 1
a77461
+        5. Run a cleanallruv abort task on supplier 1
a77461
+    :expectedresults: No hanging tasks left
a77461
+        1. Replication on supplier 4 should be disabled
a77461
+        2. Agreements to supplier 4 should be removed
a77461
+        3. Supplier 2 should be stopped
a77461
+        4. Operation should be successful
a77461
+        5. Operation should be successful
a77461
+    """
a77461
+
a77461
+    log.info('Running test_abort...')
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_supplier4_agmts("test_abort", topology_m4)
a77461
+
a77461
+    # Stop supplier 2
a77461
+    log.info('test_abort: stop supplier 2 to freeze the cleanAllRUV task...')
a77461
+    topology_m4.ms["supplier2"].stop()
a77461
+
a77461
+    # Run the task
a77461
+    log.info('test_abort: add the cleanAllRUV task...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no',
a77461
+        'replica-certify-all': 'yes'
a77461
+        })
a77461
+    # Wait a bit
a77461
+    time.sleep(2)
a77461
+
a77461
+    # Abort the task
a77461
+    cruv_task.abort()
a77461
+
a77461
+    # Check supplier 1 does not have the clean task running
a77461
+    log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
+    if not task_done(topology_m4, cruv_task.dn):
a77461
+        log.fatal('test_abort: CleanAllRUV task was not aborted')
a77461
+        assert False
a77461
+
a77461
+    # Start supplier 2
a77461
+    log.info('test_abort: start supplier 2 to begin the restore process...')
a77461
+    topology_m4.ms["supplier2"].start()
a77461
+
a77461
+    log.info('test_abort PASSED')
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
a77461
new file mode 100644
a77461
index 000000000..d5b930584
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_force_test.py
a77461
@@ -0,0 +1,187 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+import random
a77461
+import threading
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import Replicas, ReplicationManager
a77461
+from lib389.idm.directorymanager import DirectoryManager
a77461
+from lib389.idm.user import UserAccounts
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+class AddUsers(threading.Thread):
a77461
+    def __init__(self, inst, num_users):
a77461
+        threading.Thread.__init__(self)
a77461
+        self.daemon = True
a77461
+        self.inst = inst
a77461
+        self.num_users = num_users
a77461
+
a77461
+    def run(self):
a77461
+        """Start adding users"""
a77461
+
a77461
+        dm = DirectoryManager(self.inst)
a77461
+        conn = dm.bind()
a77461
+
a77461
+        users = UserAccounts(conn, DEFAULT_SUFFIX)
a77461
+
a77461
+        u_range = list(range(self.num_users))
a77461
+        random.shuffle(u_range)
a77461
+
a77461
+        for idx in u_range:
a77461
+            try:
a77461
+                users.create(properties={
a77461
+                    'uid': 'testuser%s' % idx,
a77461
+                    'cn' : 'testuser%s' % idx,
a77461
+                    'sn' : 'user%s' % idx,
a77461
+                    'uidNumber' : '%s' % (1000 + idx),
a77461
+                    'gidNumber' : '%s' % (1000 + idx),
a77461
+                    'homeDirectory' : '/home/testuser%s' % idx
a77461
+                })
a77461
+            # One of the suppliers was probably put into read only mode - just break out
a77461
+            except ldap.UNWILLING_TO_PERFORM:
a77461
+                break
a77461
+            except ldap.ALREADY_EXISTS:
a77461
+                pass
a77461
+        conn.close()
a77461
+
a77461
+def remove_some_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4 except from supplier3.  Used by
a77461
+    the force tests."""
a77461
+
a77461
+    log.info('%s: remove the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+def check_ruvs(msg, topology_m4, m4rid):
a77461
+    """Check suppliers 1-3 for supplier 4's rid."""
a77461
+    for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
a77461
+        clean = False
a77461
+        replicas = Replicas(inst)
a77461
+        replica = replicas.get(DEFAULT_SUFFIX)
a77461
+        log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
a77461
+
a77461
+        count = 0
a77461
+        while not clean and count < 20:
a77461
+            ruv = replica.get_ruv()
a77461
+            if m4rid in ruv._rids:
a77461
+                time.sleep(5)
a77461
+                count = count + 1
a77461
+            else:
a77461
+                clean = True
a77461
+        if not clean:
a77461
+            raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
a77461
+    return True
a77461
+
a77461
+def test_clean_force(topology_m4):
a77461
+    """Check that multiple tasks with a 'force' option work properly
a77461
+
a77461
+    :id: f8810dfe-d2d2-4dd9-ba03-5fc14896fabe
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Stop supplier 3
a77461
+        2. Add a bunch of updates to supplier 4
a77461
+        3. Disable replication on supplier 4
a77461
+        4. Start supplier 3
a77461
+        5. Remove agreements to supplier 4 from other suppliers
a77461
+        6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
a77461
+        7. Check that everything was cleaned
a77461
+    :expectedresults:
a77461
+        1. Supplier 3 should be stopped
a77461
+        2. Operation should be successful
a77461
+        3. Replication on supplier 4 should be disabled
a77461
+        4. Supplier 3 should be started
a77461
+        5. Agreements to supplier 4 should be removed
a77461
+        6. Operation should be successful
a77461
+        7. Everything should be cleaned
a77461
+    """
a77461
+
a77461
+    log.info('Running test_clean_force...')
a77461
+
a77461
+    # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
a77461
+    topology_m4.ms["supplier3"].stop()
a77461
+
a77461
+    # Add a bunch of updates to supplier 4
a77461
+    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 10)
a77461
+    m4_add_users.start()
a77461
+    m4_add_users.join()
a77461
+
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_some_supplier4_agmts("test_clean_force", topology_m4)
a77461
+
a77461
+    # Start supplier 3, it should be out of sync with the other replicas...
a77461
+    topology_m4.ms["supplier3"].start()
a77461
+
a77461
+    # Remove the agreement to replica 4
a77461
+    replica = Replicas(topology_m4.ms["supplier3"]).get(DEFAULT_SUFFIX)
a77461
+    replica.get_agreements().get("004").delete()
a77461
+
a77461
+    # Run the task, use "force" because supplier 3 is not in sync with the other replicas
a77461
+    # in regards to the replica 4 RUV
a77461
+    log.info('test_clean: run the cleanAllRUV task...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'yes'
a77461
+        })
a77461
+    cruv_task.wait()
a77461
+
a77461
+    # Check the other supplier's RUV for 'replica 4'
a77461
+    log.info('test_clean_force: check all the suppliers have been cleaned...')
a77461
+    clean = check_ruvs("test_clean_force", topology_m4, m4rid)
a77461
+    assert clean
a77461
+
a77461
+    log.info('test_clean_force PASSED')
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
a77461
new file mode 100644
a77461
index 000000000..0a0848bda
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_multiple_force_test.py
a77461
@@ -0,0 +1,214 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import ldap
a77461
+import logging
a77461
+import os
a77461
+import pytest
a77461
+import random
a77461
+import time
a77461
+import threading
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.idm.directorymanager import DirectoryManager
a77461
+from lib389.idm.user import UserAccounts
a77461
+from lib389.replica import ReplicationManager, Replicas
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+class AddUsers(threading.Thread):
a77461
+    def __init__(self, inst, num_users):
a77461
+        threading.Thread.__init__(self)
a77461
+        self.daemon = True
a77461
+        self.inst = inst
a77461
+        self.num_users = num_users
a77461
+
a77461
+    def run(self):
a77461
+        """Start adding users"""
a77461
+
a77461
+        dm = DirectoryManager(self.inst)
a77461
+        conn = dm.bind()
a77461
+
a77461
+        users = UserAccounts(conn, DEFAULT_SUFFIX)
a77461
+
a77461
+        u_range = list(range(self.num_users))
a77461
+        random.shuffle(u_range)
a77461
+
a77461
+        for idx in u_range:
a77461
+            try:
a77461
+                users.create(properties={
a77461
+                    'uid': 'testuser%s' % idx,
a77461
+                    'cn' : 'testuser%s' % idx,
a77461
+                    'sn' : 'user%s' % idx,
a77461
+                    'uidNumber' : '%s' % (1000 + idx),
a77461
+                    'gidNumber' : '%s' % (1000 + idx),
a77461
+                    'homeDirectory' : '/home/testuser%s' % idx
a77461
+                })
a77461
+            # One of the suppliers was probably put into read only mode - just break out
a77461
+            except ldap.UNWILLING_TO_PERFORM:
a77461
+                break
a77461
+            except ldap.ALREADY_EXISTS:
a77461
+                pass
a77461
+        conn.close()
a77461
+
a77461
+def remove_some_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4 except from supplier3.  Used by
a77461
+    the force tests."""
a77461
+
a77461
+    log.info('%s: remove the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+def check_ruvs(msg, topology_m4, m4rid):
a77461
+    """Check suppliers 1-3 for supplier 4's rid."""
a77461
+    for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
a77461
+        clean = False
a77461
+        replicas = Replicas(inst)
a77461
+        replica = replicas.get(DEFAULT_SUFFIX)
a77461
+        log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
a77461
+
a77461
+        count = 0
a77461
+        while not clean and count < 20:
a77461
+            ruv = replica.get_ruv()
a77461
+            if m4rid in ruv._rids:
a77461
+                time.sleep(5)
a77461
+                count = count + 1
a77461
+            else:
a77461
+                clean = True
a77461
+        if not clean:
a77461
+            raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
a77461
+    return True
a77461
+
a77461
+
a77461
+def test_multiple_tasks_with_force(topology_m4):
a77461
+    """Check that multiple tasks with a 'force' option work properly
a77461
+
a77461
+    :id: eb76a93d-8d1c-405e-9f25-6e8d5a781098
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Stop supplier 3
a77461
+        2. Add a bunch of updates to supplier 4
a77461
+        3. Disable replication on supplier 4
a77461
+        4. Start supplier 3
a77461
+        5. Remove agreements to supplier 4 from other suppliers
a77461
+        6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
a77461
+        7. Run one more cleanallruv task on supplier 1 with a 'force' option 'off'
a77461
+        8. Check that everything was cleaned
a77461
+    :expectedresults:
a77461
+        1. Supplier 3 should be stopped
a77461
+        2. Operation should be successful
a77461
+        3. Replication on supplier 4 should be disabled
a77461
+        4. Supplier 3 should be started
a77461
+        5. Agreements to supplier 4 should be removed
a77461
+        6. Operation should be successful
a77461
+        7. Operation should be successful
a77461
+        8. Everything should be cleaned
a77461
+    """
a77461
+
a77461
+    log.info('Running test_multiple_tasks_with_force...')
a77461
+
a77461
+    # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
a77461
+    topology_m4.ms["supplier3"].stop()
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+
a77461
+    # Add a bunch of updates to supplier 4
a77461
+    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 10)
a77461
+    m4_add_users.start()
a77461
+    m4_add_users.join()
a77461
+
a77461
+    # Disable supplier 4
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    remove_some_supplier4_agmts("test_multiple_tasks_with_force", topology_m4)
a77461
+
a77461
+    # Start supplier 3, it should be out of sync with the other replicas...
a77461
+    topology_m4.ms["supplier3"].start()
a77461
+
a77461
+    # Remove the agreement to replica 4
a77461
+    replica = Replicas(topology_m4.ms["supplier3"]).get(DEFAULT_SUFFIX)
a77461
+    replica.get_agreements().get("004").delete()
a77461
+
a77461
+    # Run the task, use "force" because supplier 3 is not in sync with the other replicas
a77461
+    # in regards to the replica 4 RUV
a77461
+    log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" on...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'yes',
a77461
+        'replica-certify-all': 'no'
a77461
+        })
a77461
+
a77461
+    log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" off...')
a77461
+
a77461
+    # NOTE: This must be try not py.test raises, because the above may or may
a77461
+    # not have completed yet ....
a77461
+    try:
a77461
+        cruv_task_fail = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+        cruv_task_fail.create(properties={
a77461
+            'replica-id': m4rid,
a77461
+            'replica-base-dn': DEFAULT_SUFFIX,
a77461
+            'replica-force-cleaning': 'no',
a77461
+            'replica-certify-all': 'no'
a77461
+            })
a77461
+        cruv_task_fail.wait()
a77461
+    except ldap.UNWILLING_TO_PERFORM:
a77461
+        pass
a77461
+    # Wait for the force task ....
a77461
+    cruv_task.wait()
a77461
+
a77461
+    # Check the other supplier's RUV for 'replica 4'
a77461
+    log.info('test_multiple_tasks_with_force: check all the suppliers have been cleaned...')
a77461
+    clean = check_ruvs("test_clean_force", topology_m4, m4rid)
a77461
+    assert clean
a77461
+    # Check supplier 1 does not have the clean task running
a77461
+    log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
+    if not task_done(topology_m4, cruv_task.dn):
a77461
+        log.fatal('test_abort: CleanAllRUV task was not aborted')
a77461
+        assert False
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
a77461
new file mode 100644
a77461
index 000000000..2e8d7e4a6
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_restart_test.py
a77461
@@ -0,0 +1,161 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import ReplicationManager, Replicas
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+def remove_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4. """
a77461
+
a77461
+    log.info('%s: remove all the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+
a77461
+def check_ruvs(msg, topology_m4, m4rid):
a77461
+    """Check suppliers 1-3 for supplier 4's rid."""
a77461
+    for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
a77461
+        clean = False
a77461
+        replicas = Replicas(inst)
a77461
+        replica = replicas.get(DEFAULT_SUFFIX)
a77461
+        log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
a77461
+
a77461
+        count = 0
a77461
+        while not clean and count < 20:
a77461
+            ruv = replica.get_ruv()
a77461
+            if m4rid in ruv._rids:
a77461
+                time.sleep(5)
a77461
+                count = count + 1
a77461
+            else:
a77461
+                clean = True
a77461
+        if not clean:
a77461
+            raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
a77461
+    return True
a77461
+    
a77461
+
a77461
+@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
+def test_clean_restart(topology_m4):
a77461
+    """Check that cleanallruv task works properly after a restart
a77461
+
a77461
+    :id: c6233bb3-092c-4919-9ac9-80dd02cc6e02
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Disable replication on supplier 4
a77461
+        2. Remove agreements to supplier 4 from other suppliers
a77461
+        3. Stop supplier 3
a77461
+        4. Run a cleanallruv task on supplier 1
a77461
+        5. Stop supplier 1
a77461
+        6. Start supplier 3
a77461
+        7. Make sure that no crash happened
a77461
+        8. Start supplier 1
a77461
+        9. Make sure that no crash happened
a77461
+        10. Check that everything was cleaned
a77461
+    :expectedresults:
a77461
+        1. Operation should be successful
a77461
+        2. Agreements to supplier 4 should be removed
a77461
+        3. Supplier 3 should be stopped
a77461
+        4. Cleanallruv task should be successfully executed
a77461
+        5. Supplier 1 should be stopped
a77461
+        6. Supplier 3 should be started
a77461
+        7. No crash should happened
a77461
+        8. Supplier 1 should be started
a77461
+        9. No crash should happened
a77461
+        10. Everything should be cleaned
a77461
+    """
a77461
+    log.info('Running test_clean_restart...')
a77461
+
a77461
+    # Disable supplier 4
a77461
+    log.info('test_clean: disable supplier 4...')
a77461
+
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_supplier4_agmts("test_clean", topology_m4)
a77461
+
a77461
+    # Stop supplier 3 to keep the task running, so we can stop supplier 1...
a77461
+    topology_m4.ms["supplier3"].stop()
a77461
+
a77461
+    # Run the task
a77461
+    log.info('test_clean: run the cleanAllRUV task...')
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no',
a77461
+        'replica-certify-all': 'yes'
a77461
+        })
a77461
+
a77461
+    # Sleep a bit, then stop supplier 1
a77461
+    time.sleep(5)
a77461
+    topology_m4.ms["supplier1"].stop()
a77461
+
a77461
+    # Now start supplier 3 & 1, and make sure we didn't crash
a77461
+    topology_m4.ms["supplier3"].start()
a77461
+    if topology_m4.ms["supplier3"].detectDisorderlyShutdown():
a77461
+        log.fatal('test_clean_restart: Supplier 3 previously crashed!')
a77461
+        assert False
a77461
+
a77461
+    topology_m4.ms["supplier1"].start(timeout=30)
a77461
+    if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
a77461
+        log.fatal('test_clean_restart: Supplier 1 previously crashed!')
a77461
+        assert False
a77461
+
a77461
+    # Check the other supplier's RUV for 'replica 4'
a77461
+    log.info('test_clean_restart: check all the suppliers have been cleaned...')
a77461
+    clean = check_ruvs("test_clean_restart", topology_m4, m4rid)
a77461
+    assert clean
a77461
+
a77461
+    log.info('test_clean_restart PASSED, restoring supplier 4...')
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
a77461
new file mode 100644
a77461
index 000000000..b4b74e339
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_shutdown_crash_test.py
a77461
@@ -0,0 +1,123 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import time
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.replica import ReplicationManager, Replicas
a77461
+from lib389.config import CertmapLegacy
a77461
+from lib389.idm.services import ServiceAccounts
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+def test_clean_shutdown_crash(topology_m2):
a77461
+    """Check that server didn't crash after shutdown when running CleanAllRUV task
a77461
+
a77461
+    :id: c34d0b40-3c3e-4f53-8656-5e4c2a310aaf
a77461
+    :setup: Replication setup with two suppliers
a77461
+    :steps:
a77461
+        1. Enable TLS on both suppliers
a77461
+        2. Reconfigure both agreements to use TLS Client auth
a77461
+        3. Stop supplier2
a77461
+        4. Run the CleanAllRUV task
a77461
+        5. Restart supplier1
a77461
+        6. Check if supplier1 didn't crash
a77461
+        7. Restart supplier1 again
a77461
+        8. Check if supplier1 didn't crash
a77461
+
a77461
+    :expectedresults:
a77461
+        1. Success
a77461
+        2. Success
a77461
+        3. Success
a77461
+        4. Success
a77461
+        5. Success
a77461
+        6. Success
a77461
+        7. Success
a77461
+        8. Success
a77461
+    """
a77461
+
a77461
+    m1 = topology_m2.ms["supplier1"]
a77461
+    m2 = topology_m2.ms["supplier2"]
a77461
+
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+
a77461
+    cm_m1 = CertmapLegacy(m1)
a77461
+    cm_m2 = CertmapLegacy(m2)
a77461
+
a77461
+    certmaps = cm_m1.list()
a77461
+    certmaps['default']['DNComps'] = None
a77461
+    certmaps['default']['CmapLdapAttr'] = 'nsCertSubjectDN'
a77461
+
a77461
+    cm_m1.set(certmaps)
a77461
+    cm_m2.set(certmaps)
a77461
+
a77461
+    log.info('Enabling TLS')
a77461
+    [i.enable_tls() for i in topology_m2]
a77461
+
a77461
+    log.info('Creating replication dns')
a77461
+    services = ServiceAccounts(m1, DEFAULT_SUFFIX)
a77461
+    repl_m1 = services.get('%s:%s' % (m1.host, m1.sslport))
a77461
+    repl_m1.set('nsCertSubjectDN', m1.get_server_tls_subject())
a77461
+
a77461
+    repl_m2 = services.get('%s:%s' % (m2.host, m2.sslport))
a77461
+    repl_m2.set('nsCertSubjectDN', m2.get_server_tls_subject())
a77461
+
a77461
+    log.info('Changing auth type')
a77461
+    replica_m1 = Replicas(m1).get(DEFAULT_SUFFIX)
a77461
+    agmt_m1 = replica_m1.get_agreements().list()[0]
a77461
+    agmt_m1.replace_many(
a77461
+        ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
a77461
+        ('nsDS5ReplicaTransportInfo', 'SSL'),
a77461
+        ('nsDS5ReplicaPort', '%s' % m2.sslport),
a77461
+    )
a77461
+
a77461
+    agmt_m1.remove_all('nsDS5ReplicaBindDN')
a77461
+
a77461
+    replica_m2 = Replicas(m2).get(DEFAULT_SUFFIX)
a77461
+    agmt_m2 = replica_m2.get_agreements().list()[0]
a77461
+
a77461
+    agmt_m2.replace_many(
a77461
+        ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
a77461
+        ('nsDS5ReplicaTransportInfo', 'SSL'),
a77461
+        ('nsDS5ReplicaPort', '%s' % m1.sslport),
a77461
+    )
a77461
+    agmt_m2.remove_all('nsDS5ReplicaBindDN')
a77461
+
a77461
+    log.info('Stopping supplier2')
a77461
+    m2.stop()
a77461
+
a77461
+    log.info('Run the cleanAllRUV task')
a77461
+    cruv_task = CleanAllRUVTask(m1)
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': repl.get_rid(m1),
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no',
a77461
+        'replica-certify-all': 'yes'
a77461
+    })
a77461
+
a77461
+    m1.restart()
a77461
+
a77461
+    log.info('Check if supplier1 crashed')
a77461
+    assert not m1.detectDisorderlyShutdown()
a77461
+
a77461
+    log.info('Repeat')
a77461
+    m1.restart()
a77461
+    assert not m1.detectDisorderlyShutdown()
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
a77461
new file mode 100644
a77461
index 000000000..0d43dd7d4
a77461
--- /dev/null
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_stress_test.py
a77461
@@ -0,0 +1,216 @@
a77461
+# --- BEGIN COPYRIGHT BLOCK ---
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
+# All rights reserved.
a77461
+#
a77461
+# License: GPL (version 3 or any later version).
a77461
+# See LICENSE for details.
a77461
+# --- END COPYRIGHT BLOCK ---
a77461
+#
a77461
+import ldap
a77461
+import logging
a77461
+import pytest
a77461
+import os
a77461
+import random
a77461
+import time
a77461
+import threading
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
+from lib389.topologies import topology_m4
a77461
+from lib389.tasks import CleanAllRUVTask
a77461
+from lib389.idm.directorymanager import DirectoryManager
a77461
+from lib389.idm.user import UserAccounts
a77461
+from lib389.replica import ReplicationManager, Replicas
a77461
+from lib389.config import LDBMConfig
a77461
+
a77461
+log = logging.getLogger(__name__)
a77461
+
a77461
+
a77461
+class AddUsers(threading.Thread):
a77461
+    def __init__(self, inst, num_users):
a77461
+        threading.Thread.__init__(self)
a77461
+        self.daemon = True
a77461
+        self.inst = inst
a77461
+        self.num_users = num_users
a77461
+
a77461
+    def run(self):
a77461
+        """Start adding users"""
a77461
+
a77461
+        dm = DirectoryManager(self.inst)
a77461
+        conn = dm.bind()
a77461
+
a77461
+        users = UserAccounts(conn, DEFAULT_SUFFIX)
a77461
+
a77461
+        u_range = list(range(self.num_users))
a77461
+        random.shuffle(u_range)
a77461
+
a77461
+        for idx in u_range:
a77461
+            try:
a77461
+                users.create(properties={
a77461
+                    'uid': 'testuser%s' % idx,
a77461
+                    'cn' : 'testuser%s' % idx,
a77461
+                    'sn' : 'user%s' % idx,
a77461
+                    'uidNumber' : '%s' % (1000 + idx),
a77461
+                    'gidNumber' : '%s' % (1000 + idx),
a77461
+                    'homeDirectory' : '/home/testuser%s' % idx
a77461
+                })
a77461
+            # One of the suppliers was probably put into read only mode - just break out
a77461
+            except ldap.UNWILLING_TO_PERFORM:
a77461
+                break
a77461
+            except ldap.ALREADY_EXISTS:
a77461
+                pass
a77461
+        conn.close()
a77461
+
a77461
+def remove_supplier4_agmts(msg, topology_m4):
a77461
+    """Remove all the repl agmts to supplier4. """
a77461
+
a77461
+    log.info('%s: remove all the agreements to supplier 4...' % msg)
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    # This will delete m4 from the topo *and* remove all incoming agreements
a77461
+    # to m4.
a77461
+    repl.remove_supplier(topology_m4.ms["supplier4"],
a77461
+        [topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]])
a77461
+
a77461
+def task_done(topology_m4, task_dn, timeout=60):
a77461
+    """Check if the task is complete"""
a77461
+
a77461
+    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
+                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
+    done = False
a77461
+    count = 0
a77461
+
a77461
+    while not done and count < timeout:
a77461
+        try:
a77461
+            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
+            if entry is not None:
a77461
+                if entry.hasAttr('nsTaskExitCode'):
a77461
+                    done = True
a77461
+                    break
a77461
+            else:
a77461
+                done = True
a77461
+                break
a77461
+        except ldap.NO_SUCH_OBJECT:
a77461
+            done = True
a77461
+            break
a77461
+        except ldap.LDAPError:
a77461
+            break
a77461
+        time.sleep(1)
a77461
+        count += 1
a77461
+
a77461
+    return done
a77461
+
a77461
+def check_ruvs(msg, topology_m4, m4rid):
a77461
+    """Check suppliers 1-3 for supplier 4's rid."""
a77461
+    for inst in (topology_m4.ms["supplier1"], topology_m4.ms["supplier2"], topology_m4.ms["supplier3"]):
a77461
+        clean = False
a77461
+        replicas = Replicas(inst)
a77461
+        replica = replicas.get(DEFAULT_SUFFIX)
a77461
+        log.info('check_ruvs for replica %s:%s (suffix:rid)' % (replica.get_suffix(), replica.get_rid()))
a77461
+
a77461
+        count = 0
a77461
+        while not clean and count < 20:
a77461
+            ruv = replica.get_ruv()
a77461
+            if m4rid in ruv._rids:
a77461
+                time.sleep(5)
a77461
+                count = count + 1
a77461
+            else:
a77461
+                clean = True
a77461
+        if not clean:
a77461
+            raise Exception("Supplier %s was not cleaned in time." % inst.serverid)
a77461
+    return True
a77461
+
a77461
+
a77461
+@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
+def test_stress_clean(topology_m4):
a77461
+    """Put each server(m1 - m4) under a stress, and perform the entire clean process
a77461
+
a77461
+    :id: a8263cd6-f068-4357-86e0-e7c34504c8c5
a77461
+    :setup: Replication setup with four suppliers
a77461
+    :steps:
a77461
+        1. Add a bunch of updates to all suppliers
a77461
+        2. Put supplier 4 to read-only mode
a77461
+        3. Disable replication on supplier 4
a77461
+        4. Remove agreements to supplier 4 from other suppliers
a77461
+        5. Run a cleanallruv task on supplier 1
a77461
+        6. Check that everything was cleaned
a77461
+    :expectedresults:
a77461
+        1. Operation should be successful
a77461
+        2. Supplier 4 should be put to read-only mode
a77461
+        3. Replication on supplier 4 should be disabled
a77461
+        4. Agreements to supplier 4 should be removed
a77461
+        5. Operation should be successful
a77461
+        6. Everything should be cleaned
a77461
+    """
a77461
+
a77461
+    log.info('Running test_stress_clean...')
a77461
+    log.info('test_stress_clean: put all the suppliers under load...')
a77461
+
a77461
+    ldbm_config = LDBMConfig(topology_m4.ms["supplier4"])
a77461
+
a77461
+    # Put all the suppliers under load
a77461
+    # not too high load else it takes a long time to converge and
a77461
+    # the test result becomes instable
a77461
+    m1_add_users = AddUsers(topology_m4.ms["supplier1"], 200)
a77461
+    m1_add_users.start()
a77461
+    m2_add_users = AddUsers(topology_m4.ms["supplier2"], 200)
a77461
+    m2_add_users.start()
a77461
+    m3_add_users = AddUsers(topology_m4.ms["supplier3"], 200)
a77461
+    m3_add_users.start()
a77461
+    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 200)
a77461
+    m4_add_users.start()
a77461
+
a77461
+    # Allow sometime to get replication flowing in all directions
a77461
+    log.info('test_stress_clean: allow some time for replication to get flowing...')
a77461
+    time.sleep(5)
a77461
+
a77461
+    # Put supplier 4 into read only mode
a77461
+    ldbm_config.set('nsslapd-readonly', 'on')
a77461
+    # We need to wait for supplier 4 to push its changes out
a77461
+    log.info('test_stress_clean: allow some time for supplier 4 to push changes out (60 seconds)...')
a77461
+    time.sleep(60)
a77461
+
a77461
+    # Remove the agreements from the other suppliers that point to supplier 4
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
+    remove_supplier4_agmts("test_stress_clean", topology_m4)
a77461
+
a77461
+    # Run the task
a77461
+    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
+    cruv_task.create(properties={
a77461
+        'replica-id': m4rid,
a77461
+        'replica-base-dn': DEFAULT_SUFFIX,
a77461
+        'replica-force-cleaning': 'no'
a77461
+        })
a77461
+    cruv_task.wait()
a77461
+
a77461
+    # Wait for the update to finish
a77461
+    log.info('test_stress_clean: wait for all the updates to finish...')
a77461
+    m1_add_users.join()
a77461
+    m2_add_users.join()
a77461
+    m3_add_users.join()
a77461
+    m4_add_users.join()
a77461
+
a77461
+    # Check the other supplier's RUV for 'replica 4'
a77461
+    log.info('test_stress_clean: check if all the replicas have been cleaned...')
a77461
+    clean = check_ruvs("test_stress_clean", topology_m4, m4rid)
a77461
+    assert clean
a77461
+
a77461
+    log.info('test_stress_clean:  PASSED, restoring supplier 4...')
a77461
+
a77461
+    # Sleep for a bit to replication complete
a77461
+    log.info("Sleep for 120 seconds to allow replication to complete...")
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    repl.test_replication_topology([
a77461
+        topology_m4.ms["supplier1"],
a77461
+        topology_m4.ms["supplier2"],
a77461
+        topology_m4.ms["supplier3"],
a77461
+        ], timeout=120)
a77461
+
a77461
+    # Turn off readonly mode
a77461
+    ldbm_config.set('nsslapd-readonly', 'off')
a77461
+
a77461
+
a77461
+if __name__ == '__main__':
a77461
+    # Run isolated
a77461
+    # -s for DEBUG mode
a77461
+    CURRENT_FILE = os.path.realpath(__file__)
a77461
+    pytest.main(["-s", CURRENT_FILE])
a77461
+
a77461
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
a77461
index 1e9cd7c28..6d7141ada 100644
a77461
--- a/dirsrvtests/tests/suites/replication/cleanallruv_test.py
a77461
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
a77461
@@ -1,27 +1,20 @@
a77461
 # --- BEGIN COPYRIGHT BLOCK ---
a77461
-# Copyright (C) 2019 Red Hat, Inc.
a77461
+# Copyright (C) 2022 Red Hat, Inc.
a77461
 # All rights reserved.
a77461
 #
a77461
 # License: GPL (version 3 or any later version).
a77461
 # See LICENSE for details.
a77461
 # --- END COPYRIGHT BLOCK ---
a77461
 #
a77461
-import threading
a77461
 import pytest
a77461
-import random
a77461
 from lib389 import DirSrv
a77461
 from lib389.tasks import *
a77461
 from lib389.utils import *
a77461
 from lib389.topologies import topology_m4, topology_m2
a77461
-from lib389._constants import *
a77461
-
a77461
-from lib389.idm.directorymanager import DirectoryManager
a77461
+from lib389._constants import DEFAULT_SUFFIX
a77461
 from lib389.replica import ReplicationManager, Replicas
a77461
 from lib389.tasks import CleanAllRUVTask
a77461
-from lib389.idm.user import UserAccounts
a77461
-from lib389.config import LDBMConfig
a77461
-from lib389.config import CertmapLegacy
a77461
-from lib389.idm.services import ServiceAccounts
a77461
+
a77461
 
a77461
 pytestmark = pytest.mark.tier1
a77461
 
a77461
@@ -29,42 +22,6 @@ logging.getLogger(__name__).setLevel(logging.DEBUG)
a77461
 log = logging.getLogger(__name__)
a77461
 
a77461
 
a77461
-class AddUsers(threading.Thread):
a77461
-    def __init__(self, inst, num_users):
a77461
-        threading.Thread.__init__(self)
a77461
-        self.daemon = True
a77461
-        self.inst = inst
a77461
-        self.num_users = num_users
a77461
-
a77461
-    def run(self):
a77461
-        """Start adding users"""
a77461
-
a77461
-        dm = DirectoryManager(self.inst)
a77461
-        conn = dm.bind()
a77461
-
a77461
-        users = UserAccounts(conn, DEFAULT_SUFFIX)
a77461
-
a77461
-        u_range = list(range(self.num_users))
a77461
-        random.shuffle(u_range)
a77461
-
a77461
-        for idx in u_range:
a77461
-            try:
a77461
-                users.create(properties={
a77461
-                    'uid': 'testuser%s' % idx,
a77461
-                    'cn' : 'testuser%s' % idx,
a77461
-                    'sn' : 'user%s' % idx,
a77461
-                    'uidNumber' : '%s' % (1000 + idx),
a77461
-                    'gidNumber' : '%s' % (1000 + idx),
a77461
-                    'homeDirectory' : '/home/testuser%s' % idx
a77461
-                })
a77461
-            # One of the suppliers was probably put into read only mode - just break out
a77461
-            except ldap.UNWILLING_TO_PERFORM:
a77461
-                break
a77461
-            except ldap.ALREADY_EXISTS:
a77461
-                pass
a77461
-        conn.close()
a77461
-
a77461
-
a77461
 def remove_supplier4_agmts(msg, topology_m4):
a77461
     """Remove all the repl agmts to supplier4. """
a77461
 
a77461
@@ -96,92 +53,7 @@ def check_ruvs(msg, topology_m4, m4rid):
a77461
     return True
a77461
 
a77461
 
a77461
-def task_done(topology_m4, task_dn, timeout=60):
a77461
-    """Check if the task is complete"""
a77461
-
a77461
-    attrlist = ['nsTaskLog', 'nsTaskStatus', 'nsTaskExitCode',
a77461
-                'nsTaskCurrentItem', 'nsTaskTotalItems']
a77461
-    done = False
a77461
-    count = 0
a77461
-
a77461
-    while not done and count < timeout:
a77461
-        try:
a77461
-            entry = topology_m4.ms["supplier1"].getEntry(task_dn, attrlist=attrlist)
a77461
-            if entry is not None:
a77461
-                if entry.hasAttr('nsTaskExitCode'):
a77461
-                    done = True
a77461
-                    break
a77461
-            else:
a77461
-                done = True
a77461
-                break
a77461
-        except ldap.NO_SUCH_OBJECT:
a77461
-            done = True
a77461
-            break
a77461
-        except ldap.LDAPError:
a77461
-            break
a77461
-        time.sleep(1)
a77461
-        count += 1
a77461
-
a77461
-    return done
a77461
-
a77461
-
a77461
-def restore_supplier4(topology_m4):
a77461
-    """In our tests will always be removing supplier 4, so we need a common
a77461
-    way to restore it for another test
a77461
-    """
a77461
-
a77461
-    # Restart the remaining suppliers to allow rid 4 to be reused.
a77461
-    for inst in topology_m4.ms.values():
a77461
-        inst.restart()
a77461
-
a77461
-    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
-    repl.join_supplier(topology_m4.ms["supplier1"], topology_m4.ms["supplier4"])
a77461
-
a77461
-    # Add the 2,3 -> 4 agmt.
a77461
-    repl.ensure_agreement(topology_m4.ms["supplier2"], topology_m4.ms["supplier4"])
a77461
-    repl.ensure_agreement(topology_m4.ms["supplier3"], topology_m4.ms["supplier4"])
a77461
-    # And in reverse ...
a77461
-    repl.ensure_agreement(topology_m4.ms["supplier4"], topology_m4.ms["supplier2"])
a77461
-    repl.ensure_agreement(topology_m4.ms["supplier4"], topology_m4.ms["supplier3"])
a77461
-
a77461
-    log.info('Supplier 4 has been successfully restored.')
a77461
-
a77461
-
a77461
-@pytest.fixture()
a77461
-def m4rid(request, topology_m4):
a77461
-    log.debug("Wait a bit before the reset - it is required for the slow machines")
a77461
-    time.sleep(5)
a77461
-    log.debug("-------------- BEGIN RESET of m4 -----------------")
a77461
-    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
-    repl.test_replication_topology(topology_m4.ms.values())
a77461
-    # What is supplier4's rid?
a77461
-    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
-
a77461
-    def fin():
a77461
-        try:
a77461
-            # Restart the suppliers and rerun cleanallruv
a77461
-            for inst in topology_m4.ms.values():
a77461
-                inst.restart()
a77461
-
a77461
-            cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-            cruv_task.create(properties={
a77461
-                'replica-id': m4rid,
a77461
-                'replica-base-dn': DEFAULT_SUFFIX,
a77461
-                'replica-force-cleaning': 'no',
a77461
-                })
a77461
-            cruv_task.wait()
a77461
-        except ldap.UNWILLING_TO_PERFORM:
a77461
-            # In some casse we already cleaned rid4, so if we fail, it's okay
a77461
-            pass
a77461
-        restore_supplier4(topology_m4)
a77461
-        # Make sure everything works.
a77461
-        repl.test_replication_topology(topology_m4.ms.values())
a77461
-    request.addfinalizer(fin)
a77461
-    log.debug("-------------- FINISH RESET of m4 -----------------")
a77461
-    return m4rid
a77461
-
a77461
-
a77461
-def test_clean(topology_m4, m4rid):
a77461
+def test_clean(topology_m4):
a77461
     """Check that cleanallruv task works properly
a77461
 
a77461
     :id: e9b3ce5c-e17c-409e-aafc-e97d630f2878
a77461
@@ -204,6 +76,8 @@ def test_clean(topology_m4, m4rid):
a77461
     # Disable supplier 4
a77461
     # Remove the agreements from the other suppliers that point to supplier 4
a77461
     log.info('test_clean: disable supplier 4...')
a77461
+    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
+    m4rid = repl.get_rid(topology_m4.ms["supplier4"])
a77461
     remove_supplier4_agmts("test_clean", topology_m4)
a77461
 
a77461
     # Run the task
a77461
@@ -221,610 +95,6 @@ def test_clean(topology_m4, m4rid):
a77461
     clean = check_ruvs("test_clean", topology_m4, m4rid)
a77461
     assert clean
a77461
 
a77461
-    log.info('test_clean PASSED, restoring supplier 4...')
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_clean_restart(topology_m4, m4rid):
a77461
-    """Check that cleanallruv task works properly after a restart
a77461
-
a77461
-    :id: c6233bb3-092c-4919-9ac9-80dd02cc6e02
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Disable replication on supplier 4
a77461
-        2. Remove agreements to supplier 4 from other suppliers
a77461
-        3. Stop supplier 3
a77461
-        4. Run a cleanallruv task on supplier 1
a77461
-        5. Stop supplier 1
a77461
-        6. Start supplier 3
a77461
-        7. Make sure that no crash happened
a77461
-        8. Start supplier 1
a77461
-        9. Make sure that no crash happened
a77461
-        10. Check that everything was cleaned
a77461
-    :expectedresults:
a77461
-        1. Operation should be successful
a77461
-        2. Agreements to supplier 4 should be removed
a77461
-        3. Supplier 3 should be stopped
a77461
-        4. Cleanallruv task should be successfully executed
a77461
-        5. Supplier 1 should be stopped
a77461
-        6. Supplier 3 should be started
a77461
-        7. No crash should happened
a77461
-        8. Supplier 1 should be started
a77461
-        9. No crash should happened
a77461
-        10. Everything should be cleaned
a77461
-    """
a77461
-    log.info('Running test_clean_restart...')
a77461
-
a77461
-    # Disable supplier 4
a77461
-    log.info('test_clean: disable supplier 4...')
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_clean", topology_m4)
a77461
-
a77461
-    # Stop supplier 3 to keep the task running, so we can stop supplier 1...
a77461
-    topology_m4.ms["supplier3"].stop()
a77461
-
a77461
-    # Run the task
a77461
-    log.info('test_clean: run the cleanAllRUV task...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no',
a77461
-        'replica-certify-all': 'yes'
a77461
-        })
a77461
-
a77461
-    # Sleep a bit, then stop supplier 1
a77461
-    time.sleep(5)
a77461
-    topology_m4.ms["supplier1"].stop()
a77461
-
a77461
-    # Now start supplier 3 & 1, and make sure we didn't crash
a77461
-    topology_m4.ms["supplier3"].start()
a77461
-    if topology_m4.ms["supplier3"].detectDisorderlyShutdown():
a77461
-        log.fatal('test_clean_restart: Supplier 3 previously crashed!')
a77461
-        assert False
a77461
-
a77461
-    topology_m4.ms["supplier1"].start(timeout=30)
a77461
-    if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
a77461
-        log.fatal('test_clean_restart: Supplier 1 previously crashed!')
a77461
-        assert False
a77461
-
a77461
-    # Check the other supplier's RUV for 'replica 4'
a77461
-    log.info('test_clean_restart: check all the suppliers have been cleaned...')
a77461
-    clean = check_ruvs("test_clean_restart", topology_m4, m4rid)
a77461
-    assert clean
a77461
-
a77461
-    log.info('test_clean_restart PASSED, restoring supplier 4...')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_clean_force(topology_m4, m4rid):
a77461
-    """Check that multiple tasks with a 'force' option work properly
a77461
-
a77461
-    :id: f8810dfe-d2d2-4dd9-ba03-5fc14896fabe
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Stop supplier 3
a77461
-        2. Add a bunch of updates to supplier 4
a77461
-        3. Disable replication on supplier 4
a77461
-        4. Start supplier 3
a77461
-        5. Remove agreements to supplier 4 from other suppliers
a77461
-        6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
a77461
-        7. Check that everything was cleaned
a77461
-    :expectedresults:
a77461
-        1. Supplier 3 should be stopped
a77461
-        2. Operation should be successful
a77461
-        3. Replication on supplier 4 should be disabled
a77461
-        4. Supplier 3 should be started
a77461
-        5. Agreements to supplier 4 should be removed
a77461
-        6. Operation should be successful
a77461
-        7. Everything should be cleaned
a77461
-    """
a77461
-
a77461
-    log.info('Running test_clean_force...')
a77461
-
a77461
-    # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
a77461
-    topology_m4.ms["supplier3"].stop()
a77461
-
a77461
-    # Add a bunch of updates to supplier 4
a77461
-    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 1500)
a77461
-    m4_add_users.start()
a77461
-    m4_add_users.join()
a77461
-
a77461
-    # Start supplier 3, it should be out of sync with the other replicas...
a77461
-    topology_m4.ms["supplier3"].start()
a77461
-
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_clean_force", topology_m4)
a77461
-
a77461
-    # Run the task, use "force" because supplier 3 is not in sync with the other replicas
a77461
-    # in regards to the replica 4 RUV
a77461
-    log.info('test_clean: run the cleanAllRUV task...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'yes'
a77461
-        })
a77461
-    cruv_task.wait()
a77461
-
a77461
-    # Check the other supplier's RUV for 'replica 4'
a77461
-    log.info('test_clean_force: check all the suppliers have been cleaned...')
a77461
-    clean = check_ruvs("test_clean_force", topology_m4, m4rid)
a77461
-    assert clean
a77461
-
a77461
-    log.info('test_clean_force PASSED, restoring supplier 4...')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_abort(topology_m4, m4rid):
a77461
-    """Test the abort task basic functionality
a77461
-
a77461
-    :id: b09a6887-8de0-4fac-8e41-73ccbaaf7a08
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Disable replication on supplier 4
a77461
-        2. Remove agreements to supplier 4 from other suppliers
a77461
-        3. Stop supplier 2
a77461
-        4. Run a cleanallruv task on supplier 1
a77461
-        5. Run a cleanallruv abort task on supplier 1
a77461
-    :expectedresults: No hanging tasks left
a77461
-        1. Replication on supplier 4 should be disabled
a77461
-        2. Agreements to supplier 4 should be removed
a77461
-        3. Supplier 2 should be stopped
a77461
-        4. Operation should be successful
a77461
-        5. Operation should be successful
a77461
-    """
a77461
-
a77461
-    log.info('Running test_abort...')
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_abort", topology_m4)
a77461
-
a77461
-    # Stop supplier 2
a77461
-    log.info('test_abort: stop supplier 2 to freeze the cleanAllRUV task...')
a77461
-    topology_m4.ms["supplier2"].stop()
a77461
-
a77461
-    # Run the task
a77461
-    log.info('test_abort: add the cleanAllRUV task...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no',
a77461
-        'replica-certify-all': 'yes'
a77461
-        })
a77461
-    # Wait a bit
a77461
-    time.sleep(2)
a77461
-
a77461
-    # Abort the task
a77461
-    cruv_task.abort()
a77461
-
a77461
-    # Check supplier 1 does not have the clean task running
a77461
-    log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
-    if not task_done(topology_m4, cruv_task.dn):
a77461
-        log.fatal('test_abort: CleanAllRUV task was not aborted')
a77461
-        assert False
a77461
-
a77461
-    # Start supplier 2
a77461
-    log.info('test_abort: start supplier 2 to begin the restore process...')
a77461
-    topology_m4.ms["supplier2"].start()
a77461
-
a77461
-    log.info('test_abort PASSED, restoring supplier 4...')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_abort_restart(topology_m4, m4rid):
a77461
-    """Test the abort task can handle a restart, and then resume
a77461
-
a77461
-    :id: b66e33d4-fe85-4e1c-b882-75da80f70ab3
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Disable replication on supplier 4
a77461
-        2. Remove agreements to supplier 4 from other suppliers
a77461
-        3. Stop supplier 3
a77461
-        4. Run a cleanallruv task on supplier 1
a77461
-        5. Run a cleanallruv abort task on supplier 1
a77461
-        6. Restart supplier 1
a77461
-        7. Make sure that no crash happened
a77461
-        8. Start supplier 3
a77461
-        9. Check supplier 1 does not have the clean task running
a77461
-        10. Check that errors log doesn't have 'Aborting abort task' message
a77461
-    :expectedresults:
a77461
-        1. Replication on supplier 4 should be disabled
a77461
-        2. Agreements to supplier 4 should be removed
a77461
-        3. Supplier 3 should be stopped
a77461
-        4. Operation should be successful
a77461
-        5. Operation should be successful
a77461
-        6. Supplier 1 should be restarted
a77461
-        7. No crash should happened
a77461
-        8. Supplier 3 should be started
a77461
-        9. Check supplier 1 shouldn't have the clean task running
a77461
-        10. Errors log shouldn't have 'Aborting abort task' message
a77461
-    """
a77461
-
a77461
-    log.info('Running test_abort_restart...')
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_abort", topology_m4)
a77461
-
a77461
-    # Stop supplier 3
a77461
-    log.info('test_abort_restart: stop supplier 3 to freeze the cleanAllRUV task...')
a77461
-    topology_m4.ms["supplier3"].stop()
a77461
-
a77461
-    # Run the task
a77461
-    log.info('test_abort_restart: add the cleanAllRUV task...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no',
a77461
-        'replica-certify-all': 'yes'
a77461
-        })
a77461
-    # Wait a bit
a77461
-    time.sleep(2)
a77461
-
a77461
-    # Abort the task
a77461
-    cruv_task.abort(certify=True)
a77461
-
a77461
-    # Check supplier 1 does not have the clean task running
a77461
-    log.info('test_abort_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
-    if not task_done(topology_m4, cruv_task.dn):
a77461
-        log.fatal('test_abort_restart: CleanAllRUV task was not aborted')
a77461
-        assert False
a77461
-
a77461
-    # Now restart supplier 1, and make sure the abort process completes
a77461
-    topology_m4.ms["supplier1"].restart()
a77461
-    if topology_m4.ms["supplier1"].detectDisorderlyShutdown():
a77461
-        log.fatal('test_abort_restart: Supplier 1 previously crashed!')
a77461
-        assert False
a77461
-
a77461
-    # Start supplier 3
a77461
-    topology_m4.ms["supplier3"].start()
a77461
-
a77461
-    # Need to wait 5 seconds before server processes any leftover tasks
a77461
-    time.sleep(6)
a77461
-
a77461
-    # Check supplier 1 tried to run abort task.  We expect the abort task to be aborted.
a77461
-    if not topology_m4.ms["supplier1"].searchErrorsLog('Aborting abort task'):
a77461
-        log.fatal('test_abort_restart: Abort task did not restart')
a77461
-        assert False
a77461
-
a77461
-    log.info('test_abort_restart PASSED, restoring supplier 4...')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_abort_certify(topology_m4, m4rid):
a77461
-    """Test the abort task with a replica-certify-all option
a77461
-
a77461
-    :id: 78959966-d644-44a8-b98c-1fcf21b45eb0
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Disable replication on supplier 4
a77461
-        2. Remove agreements to supplier 4 from other suppliers
a77461
-        3. Stop supplier 2
a77461
-        4. Run a cleanallruv task on supplier 1
a77461
-        5. Run a cleanallruv abort task on supplier 1 with a replica-certify-all option
a77461
-    :expectedresults: No hanging tasks left
a77461
-        1. Replication on supplier 4 should be disabled
a77461
-        2. Agreements to supplier 4 should be removed
a77461
-        3. Supplier 2 should be stopped
a77461
-        4. Operation should be successful
a77461
-        5. Operation should be successful
a77461
-    """
a77461
-
a77461
-    log.info('Running test_abort_certify...')
a77461
-
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_abort_certify", topology_m4)
a77461
-
a77461
-    # Stop supplier 2
a77461
-    log.info('test_abort_certify: stop supplier 2 to freeze the cleanAllRUV task...')
a77461
-    topology_m4.ms["supplier2"].stop()
a77461
-
a77461
-    # Run the task
a77461
-    log.info('test_abort_certify: add the cleanAllRUV task...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no',
a77461
-        'replica-certify-all': 'yes'
a77461
-        })
a77461
-    # Wait a bit
a77461
-    time.sleep(2)
a77461
-
a77461
-    # Abort the task
a77461
-    log.info('test_abort_certify: abort the cleanAllRUV task...')
a77461
-    abort_task = cruv_task.abort(certify=True)
a77461
-
a77461
-    # Wait a while and make sure the abort task is still running
a77461
-    log.info('test_abort_certify...')
a77461
-
a77461
-    if task_done(topology_m4, abort_task.dn, 10):
a77461
-        log.fatal('test_abort_certify: abort task incorrectly finished')
a77461
-        assert False
a77461
-
a77461
-    # Now start supplier 2 so it can be aborted
a77461
-    log.info('test_abort_certify: start supplier 2 to allow the abort task to finish...')
a77461
-    topology_m4.ms["supplier2"].start()
a77461
-
a77461
-    # Wait for the abort task to stop
a77461
-    if not task_done(topology_m4, abort_task.dn, 90):
a77461
-        log.fatal('test_abort_certify: The abort CleanAllRUV task was not aborted')
a77461
-        assert False
a77461
-
a77461
-    # Check supplier 1 does not have the clean task running
a77461
-    log.info('test_abort_certify: check supplier 1 no longer has a cleanAllRUV task...')
a77461
-    if not task_done(topology_m4, cruv_task.dn):
a77461
-        log.fatal('test_abort_certify: CleanAllRUV task was not aborted')
a77461
-        assert False
a77461
-
a77461
-    log.info('test_abort_certify PASSED, restoring supplier 4...')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_stress_clean(topology_m4, m4rid):
a77461
-    """Put each server(m1 - m4) under a stress, and perform the entire clean process
a77461
-
a77461
-    :id: a8263cd6-f068-4357-86e0-e7c34504c8c5
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Add a bunch of updates to all suppliers
a77461
-        2. Put supplier 4 to read-only mode
a77461
-        3. Disable replication on supplier 4
a77461
-        4. Remove agreements to supplier 4 from other suppliers
a77461
-        5. Run a cleanallruv task on supplier 1
a77461
-        6. Check that everything was cleaned
a77461
-    :expectedresults:
a77461
-        1. Operation should be successful
a77461
-        2. Supplier 4 should be put to read-only mode
a77461
-        3. Replication on supplier 4 should be disabled
a77461
-        4. Agreements to supplier 4 should be removed
a77461
-        5. Operation should be successful
a77461
-        6. Everything should be cleaned
a77461
-    """
a77461
-
a77461
-    log.info('Running test_stress_clean...')
a77461
-    log.info('test_stress_clean: put all the suppliers under load...')
a77461
-
a77461
-    ldbm_config = LDBMConfig(topology_m4.ms["supplier4"])
a77461
-
a77461
-    # not too high load else it takes a long time to converge and
a77461
-    # the test result becomes instable
a77461
-    m1_add_users = AddUsers(topology_m4.ms["supplier1"], 500)
a77461
-    m1_add_users.start()
a77461
-    m2_add_users = AddUsers(topology_m4.ms["supplier2"], 500)
a77461
-    m2_add_users.start()
a77461
-    m3_add_users = AddUsers(topology_m4.ms["supplier3"], 500)
a77461
-    m3_add_users.start()
a77461
-    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 500)
a77461
-    m4_add_users.start()
a77461
-
a77461
-    # Allow sometime to get replication flowing in all directions
a77461
-    log.info('test_stress_clean: allow some time for replication to get flowing...')
a77461
-    time.sleep(5)
a77461
-
a77461
-    # Put supplier 4 into read only mode
a77461
-    ldbm_config.set('nsslapd-readonly', 'on')
a77461
-    # We need to wait for supplier 4 to push its changes out
a77461
-    log.info('test_stress_clean: allow some time for supplier 4 to push changes out (60 seconds)...')
a77461
-    time.sleep(30)
a77461
-
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_stress_clean", topology_m4)
a77461
-
a77461
-    # Run the task
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no'
a77461
-        })
a77461
-    cruv_task.wait()
a77461
-
a77461
-    # Wait for the update to finish
a77461
-    log.info('test_stress_clean: wait for all the updates to finish...')
a77461
-    m1_add_users.join()
a77461
-    m2_add_users.join()
a77461
-    m3_add_users.join()
a77461
-    m4_add_users.join()
a77461
-
a77461
-    # Check the other supplier's RUV for 'replica 4'
a77461
-    log.info('test_stress_clean: check if all the replicas have been cleaned...')
a77461
-    clean = check_ruvs("test_stress_clean", topology_m4, m4rid)
a77461
-    assert clean
a77461
-
a77461
-    log.info('test_stress_clean:  PASSED, restoring supplier 4...')
a77461
-
a77461
-    # Sleep for a bit to replication complete
a77461
-    log.info("Sleep for 120 seconds to allow replication to complete...")
a77461
-    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
-    repl.test_replication_topology([
a77461
-        topology_m4.ms["supplier1"],
a77461
-        topology_m4.ms["supplier2"],
a77461
-        topology_m4.ms["supplier3"],
a77461
-        ], timeout=120)
a77461
-
a77461
-    # Turn off readonly mode
a77461
-    ldbm_config.set('nsslapd-readonly', 'off')
a77461
-
a77461
-
a77461
-@pytest.mark.flaky(max_runs=2, min_passes=1)
a77461
-def test_multiple_tasks_with_force(topology_m4, m4rid):
a77461
-    """Check that multiple tasks with a 'force' option work properly
a77461
-
a77461
-    :id: eb76a93d-8d1c-405e-9f25-6e8d5a781098
a77461
-    :setup: Replication setup with four suppliers
a77461
-    :steps:
a77461
-        1. Stop supplier 3
a77461
-        2. Add a bunch of updates to supplier 4
a77461
-        3. Disable replication on supplier 4
a77461
-        4. Start supplier 3
a77461
-        5. Remove agreements to supplier 4 from other suppliers
a77461
-        6. Run a cleanallruv task on supplier 1 with a 'force' option 'on'
a77461
-        7. Run one more cleanallruv task on supplier 1 with a 'force' option 'off'
a77461
-        8. Check that everything was cleaned
a77461
-    :expectedresults:
a77461
-        1. Supplier 3 should be stopped
a77461
-        2. Operation should be successful
a77461
-        3. Replication on supplier 4 should be disabled
a77461
-        4. Supplier 3 should be started
a77461
-        5. Agreements to supplier 4 should be removed
a77461
-        6. Operation should be successful
a77461
-        7. Operation should be successful
a77461
-        8. Everything should be cleaned
a77461
-    """
a77461
-
a77461
-    log.info('Running test_multiple_tasks_with_force...')
a77461
-
a77461
-    # Stop supplier 3, while we update supplier 4, so that 3 is behind the other suppliers
a77461
-    topology_m4.ms["supplier3"].stop()
a77461
-
a77461
-    # Add a bunch of updates to supplier 4
a77461
-    m4_add_users = AddUsers(topology_m4.ms["supplier4"], 1500)
a77461
-    m4_add_users.start()
a77461
-    m4_add_users.join()
a77461
-
a77461
-    # Start supplier 3, it should be out of sync with the other replicas...
a77461
-    topology_m4.ms["supplier3"].start()
a77461
-
a77461
-    # Disable supplier 4
a77461
-    # Remove the agreements from the other suppliers that point to supplier 4
a77461
-    remove_supplier4_agmts("test_multiple_tasks_with_force", topology_m4)
a77461
-
a77461
-    # Run the task, use "force" because supplier 3 is not in sync with the other replicas
a77461
-    # in regards to the replica 4 RUV
a77461
-    log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" on...')
a77461
-    cruv_task = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': m4rid,
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'yes',
a77461
-        'replica-certify-all': 'no'
a77461
-        })
a77461
-
a77461
-    log.info('test_multiple_tasks_with_force: run the cleanAllRUV task with "force" off...')
a77461
-
a77461
-    # NOTE: This must be try not py.test raises, because the above may or may
a77461
-    # not have completed yet ....
a77461
-    try:
a77461
-        cruv_task_fail = CleanAllRUVTask(topology_m4.ms["supplier1"])
a77461
-        cruv_task_fail.create(properties={
a77461
-            'replica-id': m4rid,
a77461
-            'replica-base-dn': DEFAULT_SUFFIX,
a77461
-            'replica-force-cleaning': 'no',
a77461
-            'replica-certify-all': 'no'
a77461
-            })
a77461
-        cruv_task_fail.wait()
a77461
-    except ldap.UNWILLING_TO_PERFORM:
a77461
-        pass
a77461
-    # Wait for the force task ....
a77461
-    cruv_task.wait()
a77461
-
a77461
-    # Check the other supplier's RUV for 'replica 4'
a77461
-    log.info('test_multiple_tasks_with_force: check all the suppliers have been cleaned...')
a77461
-    clean = check_ruvs("test_clean_force", topology_m4, m4rid)
a77461
-    assert clean
a77461
-    # Check supplier 1 does not have the clean task running
a77461
-    log.info('test_abort: check supplier 1 no longer has a cleanAllRUV task...')
a77461
-    if not task_done(topology_m4, cruv_task.dn):
a77461
-        log.fatal('test_abort: CleanAllRUV task was not aborted')
a77461
-        assert False
a77461
-
a77461
-
a77461
-@pytest.mark.bz1466441
a77461
-@pytest.mark.ds50370
a77461
-def test_clean_shutdown_crash(topology_m2):
a77461
-    """Check that server didn't crash after shutdown when running CleanAllRUV task
a77461
-
a77461
-    :id: c34d0b40-3c3e-4f53-8656-5e4c2a310aaf
a77461
-    :setup: Replication setup with two suppliers
a77461
-    :steps:
a77461
-        1. Enable TLS on both suppliers
a77461
-        2. Reconfigure both agreements to use TLS Client auth
a77461
-        3. Stop supplier2
a77461
-        4. Run the CleanAllRUV task
a77461
-        5. Restart supplier1
a77461
-        6. Check if supplier1 didn't crash
a77461
-        7. Restart supplier1 again
a77461
-        8. Check if supplier1 didn't crash
a77461
-
a77461
-    :expectedresults:
a77461
-        1. Success
a77461
-        2. Success
a77461
-        3. Success
a77461
-        4. Success
a77461
-        5. Success
a77461
-        6. Success
a77461
-        7. Success
a77461
-        8. Success
a77461
-    """
a77461
-
a77461
-    m1 = topology_m2.ms["supplier1"]
a77461
-    m2 = topology_m2.ms["supplier2"]
a77461
-
a77461
-    repl = ReplicationManager(DEFAULT_SUFFIX)
a77461
-
a77461
-    cm_m1 = CertmapLegacy(m1)
a77461
-    cm_m2 = CertmapLegacy(m2)
a77461
-
a77461
-    certmaps = cm_m1.list()
a77461
-    certmaps['default']['DNComps'] = None
a77461
-    certmaps['default']['CmapLdapAttr'] = 'nsCertSubjectDN'
a77461
-
a77461
-    cm_m1.set(certmaps)
a77461
-    cm_m2.set(certmaps)
a77461
-
a77461
-    log.info('Enabling TLS')
a77461
-    [i.enable_tls() for i in topology_m2]
a77461
-
a77461
-    log.info('Creating replication dns')
a77461
-    services = ServiceAccounts(m1, DEFAULT_SUFFIX)
a77461
-    repl_m1 = services.get('%s:%s' % (m1.host, m1.sslport))
a77461
-    repl_m1.set('nsCertSubjectDN', m1.get_server_tls_subject())
a77461
-
a77461
-    repl_m2 = services.get('%s:%s' % (m2.host, m2.sslport))
a77461
-    repl_m2.set('nsCertSubjectDN', m2.get_server_tls_subject())
a77461
-
a77461
-    log.info('Changing auth type')
a77461
-    replica_m1 = Replicas(m1).get(DEFAULT_SUFFIX)
a77461
-    agmt_m1 = replica_m1.get_agreements().list()[0]
a77461
-    agmt_m1.replace_many(
a77461
-        ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
a77461
-        ('nsDS5ReplicaTransportInfo', 'SSL'),
a77461
-        ('nsDS5ReplicaPort', '%s' % m2.sslport),
a77461
-    )
a77461
-
a77461
-    agmt_m1.remove_all('nsDS5ReplicaBindDN')
a77461
-
a77461
-    replica_m2 = Replicas(m2).get(DEFAULT_SUFFIX)
a77461
-    agmt_m2 = replica_m2.get_agreements().list()[0]
a77461
-
a77461
-    agmt_m2.replace_many(
a77461
-        ('nsDS5ReplicaBindMethod', 'SSLCLIENTAUTH'),
a77461
-        ('nsDS5ReplicaTransportInfo', 'SSL'),
a77461
-        ('nsDS5ReplicaPort', '%s' % m1.sslport),
a77461
-    )
a77461
-    agmt_m2.remove_all('nsDS5ReplicaBindDN')
a77461
-
a77461
-    log.info('Stopping supplier2')
a77461
-    m2.stop()
a77461
-
a77461
-    log.info('Run the cleanAllRUV task')
a77461
-    cruv_task = CleanAllRUVTask(m1)
a77461
-    cruv_task.create(properties={
a77461
-        'replica-id': repl.get_rid(m1),
a77461
-        'replica-base-dn': DEFAULT_SUFFIX,
a77461
-        'replica-force-cleaning': 'no',
a77461
-        'replica-certify-all': 'yes'
a77461
-    })
a77461
-
a77461
-    m1.restart()
a77461
-
a77461
-    log.info('Check if supplier1 crashed')
a77461
-    assert not m1.detectDisorderlyShutdown()
a77461
-
a77461
-    log.info('Repeat')
a77461
-    m1.restart()
a77461
-    assert not m1.detectDisorderlyShutdown()
a77461
-
a77461
 
a77461
 if __name__ == '__main__':
a77461
     # Run isolated
a77461
diff --git a/dirsrvtests/tests/suites/replication/regression_m2_test.py b/dirsrvtests/tests/suites/replication/regression_m2_test.py
a77461
index bbf9c8486..65c299a0c 100644
a77461
--- a/dirsrvtests/tests/suites/replication/regression_m2_test.py
a77461
+++ b/dirsrvtests/tests/suites/replication/regression_m2_test.py
a77461
@@ -240,8 +240,12 @@ def test_double_delete(topo_m2, create_entry):
a77461
     log.info('Deleting entry {} from supplier1'.format(create_entry.dn))
a77461
     topo_m2.ms["supplier1"].delete_s(create_entry.dn)
a77461
 
a77461
-    log.info('Deleting entry {} from supplier2'.format(create_entry.dn))
a77461
-    topo_m2.ms["supplier2"].delete_s(create_entry.dn)
a77461
+    try:
a77461
+        log.info('Deleting entry {} from supplier2'.format(create_entry.dn))
a77461
+        topo_m2.ms["supplier2"].delete_s(create_entry.dn)
a77461
+    except ldap.NO_SUCH_OBJECT:
a77461
+        # replication was too fast (DEBUGGING is probably set)
a77461
+        pass
a77461
 
a77461
     repl.enable_to_supplier(m2, [m1])
a77461
     repl.enable_to_supplier(m1, [m2])
a77461
@@ -813,8 +817,9 @@ def test_keepalive_entries(topo_m2):
a77461
     keep_alive_s1 = str(entries[0].data['keepalivetimestamp'])
a77461
     keep_alive_s2 = str(entries[1].data['keepalivetimestamp'])
a77461
 
a77461
-    # Wait for event interval (60 secs) to pass
a77461
-    time.sleep(61)
a77461
+    # Wait for event interval (60 secs) to pass, but first update doesn't
a77461
+    # start until 30 seconds after startup
a77461
+    time.sleep(91)
a77461
 
a77461
     # Check keep alives entries have been updated
a77461
     entries = verify_keepalive_entries(topo_m2, True);
a77461
diff --git a/dirsrvtests/tests/suites/replication/regression_m2c2_test.py b/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
a77461
index 97b35c7ab..f9de7383c 100644
a77461
--- a/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
a77461
+++ b/dirsrvtests/tests/suites/replication/regression_m2c2_test.py
a77461
@@ -289,6 +289,7 @@ def test_csngen_state_not_updated_if_different_uuid(topo_m2c2):
a77461
         log.error(f"c1 csngen state has unexpectedly been synchronized with m2: time skew {c1_timeSkew}")
a77461
         assert False
a77461
     c1.start()
a77461
+    time.sleep(5)
a77461
 
a77461
     # Step 8: Check that c2 has time skew
a77461
     # Stop server to insure that dse.ldif is uptodate
a77461
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
a77461
index 5dab57de4..d67f1bc71 100644
a77461
--- a/ldap/servers/plugins/replication/repl5_replica.c
a77461
+++ b/ldap/servers/plugins/replication/repl5_replica.c
a77461
@@ -239,8 +239,8 @@ replica_new_from_entry(Slapi_Entry *e, char *errortext, PRBool is_add_operation,
a77461
     /* create supplier update event */
a77461
     if (r->repl_eqcxt_ka_update == NULL && replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
a77461
         r->repl_eqcxt_ka_update = slapi_eq_repeat_rel(replica_subentry_update, r,
a77461
-                                                   slapi_current_rel_time_t() + 30,
a77461
-                                                   replica_get_keepalive_update_interval(r));
a77461
+                                                      slapi_current_rel_time_t() + 30,
a77461
+                                                      1000 * replica_get_keepalive_update_interval(r));
a77461
     }
a77461
 
a77461
     if (r->tombstone_reap_interval > 0) {
a77461
@@ -518,7 +518,7 @@ replica_subentry_update(time_t when __attribute__((unused)), void *arg)
a77461
     replica_subentry_check(repl_root, rid);
a77461
 
a77461
     slapi_timestamp_utc_hr(buf, SLAPI_TIMESTAMP_BUFSIZE);
a77461
-    slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "replica_subentry_update called at %s\n", buf);
a77461
+    slapi_log_err(SLAPI_LOG_REPL, "NSMMReplicationPlugin", "replica_subentry_update called at %s\n", buf);
a77461
     val.bv_val = buf;
a77461
     val.bv_len = strlen(val.bv_val);
a77461
     vals[0] = &val;
a77461
@@ -542,7 +542,7 @@ replica_subentry_update(time_t when __attribute__((unused)), void *arg)
a77461
                       "Failure (%d) to update replication keep alive entry \"%s: %s\"\n",
a77461
                       ldrc, KEEP_ALIVE_ATTR, buf);
a77461
     } else {
a77461
-        slapi_log_err(SLAPI_LOG_PLUGIN, repl_plugin_name,
a77461
+        slapi_log_err(SLAPI_LOG_REPL, "NSMMReplicationPlugin",
a77461
                       "replica_subentry_update - "
a77461
                       "Successful update of replication keep alive entry \"%s: %s\"\n",
a77461
                       KEEP_ALIVE_ATTR, buf);
a77461
@@ -1536,7 +1536,7 @@ replica_set_enabled(Replica *r, PRBool enable)
a77461
         if (r->repl_eqcxt_ka_update == NULL && replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
a77461
             r->repl_eqcxt_ka_update = slapi_eq_repeat_rel(replica_subentry_update, r,
a77461
                                                        slapi_current_rel_time_t() + START_UPDATE_DELAY,
a77461
-                                                       replica_get_keepalive_update_interval(r));
a77461
+                                                       1000 * replica_get_keepalive_update_interval(r));
a77461
         }
a77461
     } else /* disable */
a77461
     {
a77461
@@ -1546,7 +1546,7 @@ replica_set_enabled(Replica *r, PRBool enable)
a77461
             r->repl_eqcxt_rs = NULL;
a77461
         }
a77461
         /* Remove supplier update event */
a77461
-        if (replica_get_type(r) == REPLICA_TYPE_PRIMARY) {
a77461
+        if (replica_get_type(r) == REPLICA_TYPE_UPDATABLE) {
a77461
             slapi_eq_cancel_rel(r->repl_eqcxt_ka_update);
a77461
             r->repl_eqcxt_ka_update = NULL;
a77461
         }
a77461
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
a77461
index 70c45ec50..b32d00941 100644
a77461
--- a/ldap/servers/plugins/replication/repl_extop.c
a77461
+++ b/ldap/servers/plugins/replication/repl_extop.c
a77461
@@ -493,7 +493,7 @@ free_and_return:
a77461
         slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
a77461
                 "decode_startrepl_extop - decoded csn: %s\n", *csnstr);
a77461
         ruv_dump_to_log(*supplier_ruv, "decode_startrepl_extop");
a77461
-        for (size_t i = 0; *extra_referrals && *extra_referrals[i]; i++) {
a77461
+        for (size_t i = 0; *extra_referrals && extra_referrals[i]; i++) {
a77461
             slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "decode_startrepl_extop - "
a77461
                 "decoded referral: %s\n", *extra_referrals[i]);
a77461
         }
a77461
@@ -1661,7 +1661,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
a77461
          *  Launch the cleanruv monitoring thread.  Once all the replicas are cleaned it will release the rid
a77461
          */
a77461
 
a77461
-        cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_ERR, "Launching cleanAllRUV thread...");
a77461
+        cleanruv_log(NULL, rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Launching cleanAllRUV thread...");
a77461
         data = (cleanruv_data *)slapi_ch_calloc(1, sizeof(cleanruv_data));
a77461
         if (data == NULL) {
a77461
             slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "multimaster_extop_cleanruv - CleanAllRUV Task - Failed to allocate "
a77461
diff --git a/ldap/servers/slapd/task.c b/ldap/servers/slapd/task.c
a77461
index 4c7262ab3..71d5a2fb5 100644
a77461
--- a/ldap/servers/slapd/task.c
a77461
+++ b/ldap/servers/slapd/task.c
a77461
@@ -742,7 +742,7 @@ get_internal_entry(Slapi_PBlock *pb, char *dn)
a77461
     slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &ret;;
a77461
     if (ret != LDAP_SUCCESS) {
a77461
         slapi_log_err(SLAPI_LOG_WARNING, "get_internal_entry",
a77461
-                      "Can't find task entry '%s'\n", dn);
a77461
+                      "Failed to search for task entry '%s' error: %d\n", dn, ret);
a77461
         return NULL;
a77461
     }
a77461
 
a77461
@@ -786,9 +786,9 @@ modify_internal_entry(char *dn, LDAPMod **mods)
a77461
              * entry -- try at least 3 times before giving up.
a77461
              */
a77461
             tries++;
a77461
-            if (tries == 3) {
a77461
-                slapi_log_err(SLAPI_LOG_WARNING, "modify_internal_entry", "Can't modify task "
a77461
-                                                                          "entry '%s'; %s (%d)\n",
a77461
+            if (tries == 5) {
a77461
+                slapi_log_err(SLAPI_LOG_WARNING, "modify_internal_entry",
a77461
+                              "Can't modify task entry '%s'; %s (%d)\n",
a77461
                               dn, ldap_err2string(ret), ret);
a77461
                 slapi_pblock_destroy(pb);
a77461
                 return;
a77461
diff --git a/src/lib389/lib389/instance/remove.py b/src/lib389/lib389/instance/remove.py
a77461
index e96db3896..5668f375b 100644
a77461
--- a/src/lib389/lib389/instance/remove.py
a77461
+++ b/src/lib389/lib389/instance/remove.py
a77461
@@ -90,6 +90,12 @@ def remove_ds_instance(dirsrv, force=False):
a77461
     # Remove parent (/var/lib/dirsrv/slapd-INST)
a77461
     shutil.rmtree(remove_paths['db_dir'].replace('db', ''), ignore_errors=True)
a77461
 
a77461
+    # Remove /run/slapd-isntance
a77461
+    try:
a77461
+        os.remove(f'/run/slapd-{dirsrv.serverid}.socket')
a77461
+    except OSError as e:
a77461
+        _log.debug("Failed to remove socket file: " + str(e))
a77461
+
a77461
     # We can not assume we have systemd ...
a77461
     if dirsrv.ds_paths.with_systemd:
a77461
         # Remove the systemd symlink
a77461
-- 
a77461
2.37.1
a77461