|
|
be9751 |
From 29c9e1c3c760f0941b022d45d14c248e9ceb9738 Mon Sep 17 00:00:00 2001
|
|
|
be9751 |
From: progier389 <72748589+progier389@users.noreply.github.com>
|
|
|
be9751 |
Date: Tue, 3 Nov 2020 12:18:50 +0100
|
|
|
be9751 |
Subject: [PATCH 2/3] ticket 2058: Add keep alive entry after on-line
|
|
|
be9751 |
initialization - second version (#4399)
|
|
|
be9751 |
|
|
|
be9751 |
Bug description:
|
|
|
be9751 |
Keep alive entry is not created on target master after on line initialization,
|
|
|
be9751 |
and its RUVelement stays empty until a direct update is issued on that master
|
|
|
be9751 |
|
|
|
be9751 |
Fix description:
|
|
|
be9751 |
The patch allows a consumer (configured as a master) to create (if it did not
|
|
|
be9751 |
exist before) the consumer's keep alive entry. It creates it at the end of a
|
|
|
be9751 |
replication session at a time we are sure the changelog exists and will not
|
|
|
be9751 |
be reset. It allows a consumer to have RUVelement with csn in the RUV at the
|
|
|
be9751 |
first incoming replication session.
|
|
|
be9751 |
|
|
|
be9751 |
That is basically lkrispen's proposal with an associated pytest testcase
|
|
|
be9751 |
|
|
|
be9751 |
Second version changes:
|
|
|
be9751 |
- moved the testcase to suites/replication/regression_test.py
|
|
|
be9751 |
- set up the topology from a 2 master topology then
|
|
|
be9751 |
reinitialized the replicas from an ldif without replication metadata
|
|
|
be9751 |
rather than using the cli.
|
|
|
be9751 |
- search for keepalive entries using search_s instead of getEntry
|
|
|
be9751 |
- add a comment about keep alive entries purpose
|
|
|
be9751 |
|
|
|
be9751 |
last commit:
|
|
|
be9751 |
- wait that ruv are in sync before checking keep alive entries
|
|
|
be9751 |
|
|
|
be9751 |
Reviewed by: droideck, Firstyear
|
|
|
be9751 |
|
|
|
be9751 |
Platforms tested: F32
|
|
|
be9751 |
|
|
|
be9751 |
relates: #2058
|
|
|
be9751 |
---
|
|
|
be9751 |
.../suites/replication/regression_test.py | 130 ++++++++++++++++++
|
|
|
be9751 |
.../plugins/replication/repl5_replica.c | 14 ++
|
|
|
be9751 |
ldap/servers/plugins/replication/repl_extop.c | 4 +
|
|
|
be9751 |
3 files changed, 148 insertions(+)
|
|
|
be9751 |
|
|
|
be9751 |
diff --git a/dirsrvtests/tests/suites/replication/regression_test.py b/dirsrvtests/tests/suites/replication/regression_test.py
|
|
|
be9751 |
index 844d762b9..14b9d6a44 100644
|
|
|
be9751 |
--- a/dirsrvtests/tests/suites/replication/regression_test.py
|
|
|
be9751 |
+++ b/dirsrvtests/tests/suites/replication/regression_test.py
|
|
|
be9751 |
@@ -98,6 +98,30 @@ def _move_ruv(ldif_file):
|
|
|
be9751 |
for dn, entry in ldif_list:
|
|
|
be9751 |
ldif_writer.unparse(dn, entry)
|
|
|
be9751 |
|
|
|
be9751 |
+def _remove_replication_data(ldif_file):
|
|
|
be9751 |
+ """ Remove the replication data from ldif file:
|
|
|
be9751 |
+ db2lif without -r includes some of the replica data like
|
|
|
be9751 |
+ - nsUniqueId
|
|
|
be9751 |
+ - keepalive entries
|
|
|
be9751 |
+ This function filters the ldif fil to remove these data
|
|
|
be9751 |
+ """
|
|
|
be9751 |
+
|
|
|
be9751 |
+ with open(ldif_file) as f:
|
|
|
be9751 |
+ parser = ldif.LDIFRecordList(f)
|
|
|
be9751 |
+ parser.parse()
|
|
|
be9751 |
+
|
|
|
be9751 |
+ ldif_list = parser.all_records
|
|
|
be9751 |
+ # Iterate on a copy of the ldif entry list
|
|
|
be9751 |
+ for dn, entry in ldif_list[:]:
|
|
|
be9751 |
+ if dn.startswith('cn=repl keep alive'):
|
|
|
be9751 |
+ ldif_list.remove((dn,entry))
|
|
|
be9751 |
+ else:
|
|
|
be9751 |
+ entry.pop('nsUniqueId')
|
|
|
be9751 |
+ with open(ldif_file, 'w') as f:
|
|
|
be9751 |
+ ldif_writer = ldif.LDIFWriter(f)
|
|
|
be9751 |
+ for dn, entry in ldif_list:
|
|
|
be9751 |
+ ldif_writer.unparse(dn, entry)
|
|
|
be9751 |
+
|
|
|
be9751 |
|
|
|
be9751 |
@pytest.fixture(scope="module")
|
|
|
be9751 |
def topo_with_sigkill(request):
|
|
|
be9751 |
@@ -897,6 +921,112 @@ def test_moving_entry_make_online_init_fail(topology_m2):
|
|
|
be9751 |
assert len(m1entries) == len(m2entries)
|
|
|
be9751 |
|
|
|
be9751 |
|
|
|
be9751 |
+def get_keepalive_entries(instance,replica):
|
|
|
be9751 |
+ # Returns the keep alive entries that exists with the suffix of the server instance
|
|
|
be9751 |
+ try:
|
|
|
be9751 |
+ entries = instance.search_s(replica.get_suffix(), ldap.SCOPE_ONELEVEL,
|
|
|
be9751 |
+ "(&(objectclass=ldapsubentry)(cn=repl keep alive*))",
|
|
|
be9751 |
+ ['cn', 'nsUniqueId', 'modifierTimestamp'])
|
|
|
be9751 |
+ except ldap.LDAPError as e:
|
|
|
be9751 |
+ log.fatal('Failed to retrieve keepalive entry (%s) on instance %s: error %s' % (dn, instance, str(e)))
|
|
|
be9751 |
+ assert False
|
|
|
be9751 |
+ # No error, so lets log the keepalive entries
|
|
|
be9751 |
+ if log.isEnabledFor(logging.DEBUG):
|
|
|
be9751 |
+ for ret in entries:
|
|
|
be9751 |
+ log.debug("Found keepalive entry:\n"+str(ret));
|
|
|
be9751 |
+ return entries
|
|
|
be9751 |
+
|
|
|
be9751 |
+def verify_keepalive_entries(topo, expected):
|
|
|
be9751 |
+ #Check that keep alive entries exists (or not exists) for every masters on every masters
|
|
|
be9751 |
+ #Note: The testing method is quite basic: counting that there is one keepalive entry per master.
|
|
|
be9751 |
+ # that is ok for simple test cases like test_online_init_should_create_keepalive_entries but
|
|
|
be9751 |
+ # not for the general case as keep alive associated with no more existing master may exists
|
|
|
be9751 |
+ # (for example after: db2ldif / demote a master / ldif2db / init other masters)
|
|
|
be9751 |
+ # ==> if the function is somehow pushed in lib389, a check better than simply counting the entries
|
|
|
be9751 |
+ # should be done.
|
|
|
be9751 |
+ for masterId in topo.ms:
|
|
|
be9751 |
+ master=topo.ms[masterId]
|
|
|
be9751 |
+ for replica in Replicas(master).list():
|
|
|
be9751 |
+ if (replica.get_role() != ReplicaRole.MASTER):
|
|
|
be9751 |
+ continue
|
|
|
be9751 |
+ replica_info = f'master: {masterId} RID: {replica.get_rid()} suffix: {replica.get_suffix()}'
|
|
|
be9751 |
+ log.debug(f'Checking keepAliveEntries on {replica_info}')
|
|
|
be9751 |
+ keepaliveEntries = get_keepalive_entries(master, replica);
|
|
|
be9751 |
+ expectedCount = len(topo.ms) if expected else 0
|
|
|
be9751 |
+ foundCount = len(keepaliveEntries)
|
|
|
be9751 |
+ if (foundCount == expectedCount):
|
|
|
be9751 |
+ log.debug(f'Found {foundCount} keepalive entries as expected on {replica_info}.')
|
|
|
be9751 |
+ else:
|
|
|
be9751 |
+ log.error(f'{foundCount} Keepalive entries are found '
|
|
|
be9751 |
+ f'while {expectedCount} were expected on {replica_info}.')
|
|
|
be9751 |
+ assert False
|
|
|
be9751 |
+
|
|
|
be9751 |
+
|
|
|
be9751 |
+def test_online_init_should_create_keepalive_entries(topo_m2):
|
|
|
be9751 |
+ """Check that keep alive entries are created when initializinf a master from another one
|
|
|
be9751 |
+
|
|
|
be9751 |
+ :id: d5940e71-d18a-4b71-aaf7-b9185361fffe
|
|
|
be9751 |
+ :setup: Two masters replication setup
|
|
|
be9751 |
+ :steps:
|
|
|
be9751 |
+ 1. Generate ldif without replication data
|
|
|
be9751 |
+ 2 Init both masters from that ldif
|
|
|
be9751 |
+ 3 Check that keep alive entries does not exists
|
|
|
be9751 |
+ 4 Perform on line init of master2 from master1
|
|
|
be9751 |
+ 5 Check that keep alive entries exists
|
|
|
be9751 |
+ :expectedresults:
|
|
|
be9751 |
+ 1. No error while generating ldif
|
|
|
be9751 |
+ 2. No error while importing the ldif file
|
|
|
be9751 |
+ 3. No keepalive entrie should exists on any masters
|
|
|
be9751 |
+ 4. No error while initializing master2
|
|
|
be9751 |
+ 5. All keepalive entries should exist on every masters
|
|
|
be9751 |
+
|
|
|
be9751 |
+ """
|
|
|
be9751 |
+
|
|
|
be9751 |
+ repl = ReplicationManager(DEFAULT_SUFFIX)
|
|
|
be9751 |
+ m1 = topo_m2.ms["master1"]
|
|
|
be9751 |
+ m2 = topo_m2.ms["master2"]
|
|
|
be9751 |
+ # Step 1: Generate ldif without replication data
|
|
|
be9751 |
+ m1.stop()
|
|
|
be9751 |
+ m2.stop()
|
|
|
be9751 |
+ ldif_file = '%s/norepl.ldif' % m1.get_ldif_dir()
|
|
|
be9751 |
+ m1.db2ldif(bename=DEFAULT_BENAME, suffixes=[DEFAULT_SUFFIX],
|
|
|
be9751 |
+ excludeSuffixes=None, repl_data=False,
|
|
|
be9751 |
+ outputfile=ldif_file, encrypt=False)
|
|
|
be9751 |
+ # Remove replication metadata that are still in the ldif
|
|
|
be9751 |
+ _remove_replication_data(ldif_file)
|
|
|
be9751 |
+
|
|
|
be9751 |
+ # Step 2: Init both masters from that ldif
|
|
|
be9751 |
+ m1.ldif2db(DEFAULT_BENAME, None, None, None, ldif_file)
|
|
|
be9751 |
+ m2.ldif2db(DEFAULT_BENAME, None, None, None, ldif_file)
|
|
|
be9751 |
+ m1.start()
|
|
|
be9751 |
+ m2.start()
|
|
|
be9751 |
+
|
|
|
be9751 |
+ """ Replica state is now as if CLI setup has been done using:
|
|
|
be9751 |
+ dsconf master1 replication enable --suffix "${SUFFIX}" --role master
|
|
|
be9751 |
+ dsconf master2 replication enable --suffix "${SUFFIX}" --role master
|
|
|
be9751 |
+ dsconf master1 replication create-manager --name "${REPLICATION_MANAGER_NAME}" --passwd "${REPLICATION_MANAGER_PASSWORD}"
|
|
|
be9751 |
+ dsconf master2 replication create-manager --name "${REPLICATION_MANAGER_NAME}" --passwd "${REPLICATION_MANAGER_PASSWORD}"
|
|
|
be9751 |
+ dsconf master1 repl-agmt create --suffix "${SUFFIX}"
|
|
|
be9751 |
+ dsconf master2 repl-agmt create --suffix "${SUFFIX}"
|
|
|
be9751 |
+ """
|
|
|
be9751 |
+
|
|
|
be9751 |
+ # Step 3: No keepalive entrie should exists on any masters
|
|
|
be9751 |
+ verify_keepalive_entries(topo_m2, False)
|
|
|
be9751 |
+
|
|
|
be9751 |
+ # Step 4: Perform on line init of master2 from master1
|
|
|
be9751 |
+ agmt = Agreements(m1).list()[0]
|
|
|
be9751 |
+ agmt.begin_reinit()
|
|
|
be9751 |
+ (done, error) = agmt.wait_reinit()
|
|
|
be9751 |
+ assert done is True
|
|
|
be9751 |
+ assert error is False
|
|
|
be9751 |
+
|
|
|
be9751 |
+ # Step 5: All keepalive entries should exists on every masters
|
|
|
be9751 |
+ # Verify the keep alive entry once replication is in sync
|
|
|
be9751 |
+ # (that is the step that fails when bug is not fixed)
|
|
|
be9751 |
+ repl.wait_for_ruv(m2,m1)
|
|
|
be9751 |
+ verify_keepalive_entries(topo_m2, True);
|
|
|
be9751 |
+
|
|
|
be9751 |
+
|
|
|
be9751 |
if __name__ == '__main__':
|
|
|
be9751 |
# Run isolated
|
|
|
be9751 |
# -s for DEBUG mode
|
|
|
be9751 |
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
be9751 |
index f01782330..f0ea0f8ef 100644
|
|
|
be9751 |
--- a/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
be9751 |
+++ b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
be9751 |
@@ -373,6 +373,20 @@ replica_destroy(void **arg)
|
|
|
be9751 |
slapi_ch_free((void **)arg);
|
|
|
be9751 |
}
|
|
|
be9751 |
|
|
|
be9751 |
+/******************************************************************************
|
|
|
be9751 |
+ ******************** REPLICATION KEEP ALIVE ENTRIES **************************
|
|
|
be9751 |
+ ******************************************************************************
|
|
|
be9751 |
+ * They are subentries of the replicated suffix and there is one per master. *
|
|
|
be9751 |
+ * These entries exist only to trigger a change that get replicated over the *
|
|
|
be9751 |
+ * topology. *
|
|
|
be9751 |
+ * Their main purpose is to generate records in the changelog and they are *
|
|
|
be9751 |
+ * updated from time to time by fractional replication to insure that at *
|
|
|
be9751 |
+ * least a change must be replicated by FR after a great number of not *
|
|
|
be9751 |
+ * replicated changes are found in the changelog. The interest is that the *
|
|
|
be9751 |
+ * fractional RUV get then updated so less changes need to be walked in the *
|
|
|
be9751 |
+ * changelog when searching for the first change to send *
|
|
|
be9751 |
+ ******************************************************************************/
|
|
|
be9751 |
+
|
|
|
be9751 |
#define KEEP_ALIVE_ATTR "keepalivetimestamp"
|
|
|
be9751 |
#define KEEP_ALIVE_ENTRY "repl keep alive"
|
|
|
be9751 |
#define KEEP_ALIVE_DN_FORMAT "cn=%s %d,%s"
|
|
|
be9751 |
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
be9751 |
index 14c8e0bcc..af486f730 100644
|
|
|
be9751 |
--- a/ldap/servers/plugins/replication/repl_extop.c
|
|
|
be9751 |
+++ b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
be9751 |
@@ -1173,6 +1173,10 @@ multimaster_extop_EndNSDS50ReplicationRequest(Slapi_PBlock *pb)
|
|
|
be9751 |
*/
|
|
|
be9751 |
if (cl5GetState() == CL5_STATE_OPEN) {
|
|
|
be9751 |
replica_log_ruv_elements(r);
|
|
|
be9751 |
+ /* now that the changelog is open and started, we can alos cretae the
|
|
|
be9751 |
+ * keep alive entry without risk that db and cl will not match
|
|
|
be9751 |
+ */
|
|
|
be9751 |
+ replica_subentry_check(replica_get_root(r), replica_get_rid(r));
|
|
|
be9751 |
}
|
|
|
be9751 |
|
|
|
be9751 |
/* ONREPL code that dealt with new RUV, etc was moved into the code
|
|
|
be9751 |
--
|
|
|
be9751 |
2.26.2
|
|
|
be9751 |
|