andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 months ago
Clone
Blob Blame History Raw
From 83dfeca912daf72b2bb73288423e634091747979 Mon Sep 17 00:00:00 2001
From: progier389 <progier@redhat.com>
Date: Mon, 25 Oct 2021 17:09:57 +0200
Subject: [PATCH] Issue 4943 - Fix csn generator to limit time skew drift - PR
 4946

---
 ldap/servers/slapd/csngen.c       | 459 +++++++++++++++++-------------
 ldap/servers/slapd/slapi-plugin.h |   9 +
 2 files changed, 268 insertions(+), 200 deletions(-)

diff --git a/ldap/servers/slapd/csngen.c b/ldap/servers/slapd/csngen.c
index b08d8b25c..c7c5c2ba8 100644
--- a/ldap/servers/slapd/csngen.c
+++ b/ldap/servers/slapd/csngen.c
@@ -18,8 +18,9 @@
 #include "prcountr.h"
 #include "slap.h"
 
+
 #define CSN_MAX_SEQNUM 0xffff              /* largest sequence number */
-#define CSN_MAX_TIME_ADJUST 24 * 60 * 60   /* maximum allowed time adjustment (in seconds) = 1 day */
+#define CSN_MAX_TIME_ADJUST _SEC_PER_DAY   /* maximum allowed time adjustment (in seconds) = 1 day */
 #define ATTR_CSN_GENERATOR_STATE "nsState" /* attribute that stores csn state information */
 #define STATE_FORMAT "%8x%8x%8x%4hx%4hx"
 #define STATE_LENGTH 32
@@ -27,6 +28,8 @@
 #define CSN_CALC_TSTAMP(gen) ((gen)->state.sampled_time + \
                               (gen)->state.local_offset + \
                               (gen)->state.remote_offset)
+#define TIME_DIFF_WARNING_DELAY  (30*_SEC_PER_DAY)  /* log an info message when difference
+                                                       between clock is greater than this delay */
 
 /*
  * **************************************************************************
@@ -63,6 +66,7 @@ typedef struct csngen_state
 struct csngen
 {
     csngen_state state;      /* persistent state of the generator */
+    int32_t (*gettime)(struct timespec *tp); /* Get local time */
     callback_list callbacks; /* list of callbacks registered with the generator */
     Slapi_RWLock *lock;      /* concurrency control */
 };
@@ -78,7 +82,7 @@ static int _csngen_init_callbacks(CSNGen *gen);
 static void _csngen_call_callbacks(const CSNGen *gen, const CSN *csn, PRBool abort);
 static int _csngen_cmp_callbacks(const void *el1, const void *el2);
 static void _csngen_free_callbacks(CSNGen *gen);
-static int _csngen_adjust_local_time(CSNGen *gen, time_t cur_time);
+static int _csngen_adjust_local_time(CSNGen *gen);
 
 /*
  * **************************************************************************
@@ -121,6 +125,7 @@ csngen_new(ReplicaId rid, Slapi_Attr *state)
     _csngen_init_callbacks(gen);
 
     gen->state.rid = rid;
+    gen->gettime = slapi_clock_utc_gettime;
 
     if (state) {
         rc = _csngen_parse_state(gen, state);
@@ -164,10 +169,7 @@ csngen_free(CSNGen **gen)
 int
 csngen_new_csn(CSNGen *gen, CSN **csn, PRBool notify)
 {
-    struct timespec now = {0};
     int rc = CSN_SUCCESS;
-    time_t cur_time;
-    int delta;
 
     if (gen == NULL || csn == NULL) {
         slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn", "Invalid argument\n");
@@ -180,39 +182,13 @@ csngen_new_csn(CSNGen *gen, CSN **csn, PRBool notify)
         return CSN_MEMORY_ERROR;
     }
 
-    if ((rc = slapi_clock_gettime(&now)) != 0) {
-        /* Failed to get system time, we must abort */
-        slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn",
-                "Failed to get system time (%s)\n",
-                slapd_system_strerror(rc));
-        return CSN_TIME_ERROR;
-    }
-    cur_time = now.tv_sec;
-
     slapi_rwlock_wrlock(gen->lock);
 
-    /* check if the time should be adjusted */
-    delta = cur_time - gen->state.sampled_time;
-    if (delta > _SEC_PER_DAY || delta < (-1 * _SEC_PER_DAY)) {
-        /* We had a jump larger than a day */
-        slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn",
-                "Detected large jump in CSN time.  Delta: %d (current time: %ld  vs  previous time: %ld)\n",
-                delta, cur_time, gen->state.sampled_time);
-    }
-    if (delta > 0) {
-        rc = _csngen_adjust_local_time(gen, cur_time);
-        if (rc != CSN_SUCCESS) {
-            slapi_rwlock_unlock(gen->lock);
-            return rc;
-        }
+    rc = _csngen_adjust_local_time(gen);
+    if (rc != CSN_SUCCESS) {
+        slapi_rwlock_unlock(gen->lock);
+        return rc;
     }
-    /* if (delta < 0) this means the local system time was set back
-     * the new csn will be generated based on sampled time, which is
-     * ahead of system time and previously generated csns.
-     * the time stamp of the csn will not change until system time
-     * catches up or is corrected by remote csns.
-     * But we need to ensure that the seq_num does not overflow.
-     */
 
     if (gen->state.seq_num == CSN_MAX_SEQNUM) {
         slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn", "Sequence rollover; "
@@ -261,13 +237,36 @@ csngen_rewrite_rid(CSNGen *gen, ReplicaId rid)
 }
 
 /* this function should be called when a remote CSN for the same part of
-   the dit becomes known to the server (for instance, as part of RUV during
-   replication session. In response, the generator would adjust its notion
-   of time so that it does not generate smaller csns */
+ * the dit becomes known to the server (for instance, as part of RUV during
+ * replication session. In response, the generator would adjust its notion
+ * of time so that it does not generate smaller csns
+ *
+ * The following counters are updated
+ *   - when a new csn is generated
+ *   - when csngen is adjusted (beginning of a incoming (extop) or outgoing
+ *     (inc_protocol) session)
+ *
+ * sampled_time: It takes the value of current system time.
+ *
+ * remote offset: it is updated when 'csn' argument is ahead of the next csn
+ * that the csn generator will generate. It is the MAX jump ahead, it is not
+ * cumulative counter (e.g. if remote_offset=7 and 'csn' is 5sec ahead
+ * remote_offset stays the same. The jump ahead (5s) pour into the local offset.
+ * It is not clear of the interest of this counter. It gives an indication of
+ * the maximum jump ahead but not much.
+ *
+ * local offset: it is increased if
+ *   - system time is going backward (compare sampled_time)
+ *   - if 'csn' argument is ahead of csn that the csn generator would generate
+ *     AND diff('csn', csngen.new_csn) < remote_offset
+ *     then the diff "pour" into local_offset
+ *  It is decreased as the clock is ticking, local offset is "consumed" as
+ *  sampled_time progresses.
+ */
 int
 csngen_adjust_time(CSNGen *gen, const CSN *csn)
 {
-    time_t remote_time, remote_offset, cur_time;
+    time_t remote_time, remote_offset, cur_time, old_time, new_time;
     PRUint16 remote_seqnum;
     int rc;
     extern int config_get_ignore_time_skew(void);
@@ -281,6 +280,11 @@ csngen_adjust_time(CSNGen *gen, const CSN *csn)
 
     slapi_rwlock_wrlock(gen->lock);
 
+    /* Get last local csn time */
+    old_time = CSN_CALC_TSTAMP(gen);
+    /* update local offset and sample_time */
+    rc = _csngen_adjust_local_time(gen);
+
     if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
         cur_time = CSN_CALC_TSTAMP(gen);
         slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
@@ -290,79 +294,60 @@ csngen_adjust_time(CSNGen *gen, const CSN *csn)
                       gen->state.local_offset,
                       gen->state.remote_offset);
     }
-    /* make sure we have the current time */
-    cur_time = slapi_current_utc_time();
-
-    /* make sure sampled_time is current */
-    /* must only call adjust_local_time if the current time is greater than
-       the generator state time */
-    if ((cur_time > gen->state.sampled_time) &&
-        (CSN_SUCCESS != (rc = _csngen_adjust_local_time(gen, cur_time)))) {
+    if (rc != CSN_SUCCESS) {
         /* _csngen_adjust_local_time will log error */
         slapi_rwlock_unlock(gen->lock);
-        csngen_dump_state(gen);
+        csngen_dump_state(gen, SLAPI_LOG_DEBUG);
         return rc;
     }
 
-    cur_time = CSN_CALC_TSTAMP(gen);
-    if (remote_time >= cur_time) {
-        time_t new_time = 0;
-
-        if (remote_seqnum > gen->state.seq_num) {
-            if (remote_seqnum < CSN_MAX_SEQNUM) {
-                gen->state.seq_num = remote_seqnum + 1;
-            } else {
-                remote_time++;
-            }
-        }
-
-        remote_offset = remote_time - cur_time;
-        if (remote_offset > gen->state.remote_offset) {
-            if (ignore_time_skew || (remote_offset <= CSN_MAX_TIME_ADJUST)) {
-                gen->state.remote_offset = remote_offset;
-            } else /* remote_offset > CSN_MAX_TIME_ADJUST */
-            {
-                slapi_log_err(SLAPI_LOG_ERR, "csngen_adjust_time",
-                              "Adjustment limit exceeded; value - %ld, limit - %ld\n",
-                              remote_offset, (long)CSN_MAX_TIME_ADJUST);
-                slapi_rwlock_unlock(gen->lock);
-                csngen_dump_state(gen);
-                return CSN_LIMIT_EXCEEDED;
-            }
-        } else if (remote_offset > 0) { /* still need to account for this */
-            gen->state.local_offset += remote_offset;
+    remote_offset = remote_time - CSN_CALC_TSTAMP(gen);
+    if (remote_offset > 0) {
+        if (!ignore_time_skew && (gen->state.remote_offset + remote_offset > CSN_MAX_TIME_ADJUST)) {
+            slapi_log_err(SLAPI_LOG_ERR, "csngen_adjust_time",
+                          "Adjustment limit exceeded; value - %ld, limit - %ld\n",
+                          remote_offset, (long)CSN_MAX_TIME_ADJUST);
+            slapi_rwlock_unlock(gen->lock);
+            csngen_dump_state(gen, SLAPI_LOG_DEBUG);
+            return CSN_LIMIT_EXCEEDED;
         }
-
-        new_time = CSN_CALC_TSTAMP(gen);
-        /* let's revisit the seq num - if the new time is > the old
-           tiem, we should reset the seq number to remote + 1 if
-           this won't cause a wrap around */
-        if (new_time >= cur_time) {
-            /* just set seq_num regardless of whether the current one
-               is < or > than the remote one - the goal of this function
-               is to make sure we generate CSNs > the remote CSN - if
-               we have increased the time, we can decrease the seqnum
-               and still guarantee that any new CSNs generated will be
-               > any current CSNs we have generated */
-            if (remote_seqnum < gen->state.seq_num) {
-                gen->state.seq_num ++;
-            } else {
-                gen->state.seq_num = remote_seqnum + 1;
-            }
+        gen->state.remote_offset += remote_offset;
+        /* To avoid beat phenomena between suppliers let put 1 second in local_offset
+         * it will be eaten at next clock tick rather than increasing remote offset
+         * If we do not do that we will have a time skew drift of 1 second per 2 seconds
+         * if suppliers are desynchronized by 0.5 second 
+         */
+        if (gen->state.local_offset == 0) {
+            gen->state.local_offset++;
+            gen->state.remote_offset--;
         }
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
-            slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
-                          "gen state after %08lx%04x:%ld:%ld:%ld\n",
-                          new_time, gen->state.seq_num,
-                          gen->state.sampled_time,
-                          gen->state.local_offset,
-                          gen->state.remote_offset);
+    }
+    /* Time to compute seqnum so that 
+     *   new csn >= remote csn and new csn >= old local csn 
+     */
+    new_time = CSN_CALC_TSTAMP(gen);
+    PR_ASSERT(new_time >= old_time);
+    PR_ASSERT(new_time >= remote_time);
+    if (new_time > old_time) {
+        /* Can reset (local) seqnum */
+        gen->state.seq_num = 0;
+    }
+    if (new_time == remote_time && remote_seqnum >= gen->state.seq_num) {
+        if (remote_seqnum >= CSN_MAX_SEQNUM) {
+            gen->state.seq_num = 0;
+            gen->state.local_offset++;
+        } else {
+            gen->state.seq_num = remote_seqnum + 1;
         }
-    } else if (gen->state.remote_offset > 0) {
-        /* decrease remote offset? */
-        /* how to decrease remote offset but ensure that we don't
-           generate a duplicate CSN, or a CSN smaller than one we've already
-           generated? */
+    }
+
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+        slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
+                      "gen state after %08lx%04x:%ld:%ld:%ld\n",
+                      new_time, gen->state.seq_num,
+                      gen->state.sampled_time,
+                      gen->state.local_offset,
+                      gen->state.remote_offset);
     }
 
     slapi_rwlock_unlock(gen->lock);
@@ -435,16 +420,16 @@ csngen_unregister_callbacks(CSNGen *gen, void *cookie)
 
 /* debugging function */
 void
-csngen_dump_state(const CSNGen *gen)
+csngen_dump_state(const CSNGen *gen, int severity)
 {
     if (gen) {
         slapi_rwlock_rdlock(gen->lock);
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "CSN generator's state:\n");
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);
+        slapi_log_err(severity, "csngen_dump_state", "CSN generator's state:\n");
+        slapi_log_err(severity, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);
+        slapi_log_err(severity, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);
+        slapi_log_err(severity, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);
+        slapi_log_err(severity, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);
+        slapi_log_err(severity, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);
         slapi_rwlock_unlock(gen->lock);
     }
 }
@@ -459,15 +444,17 @@ csngen_test()
     CSNGen *gen = csngen_new(255, NULL);
 
     slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "staring csn generator test ...");
-    csngen_dump_state(gen);
+    csngen_dump_state(gen, SLAPI_LOG_INFO);
 
     rc = _csngen_start_test_threads(gen);
     if (rc == 0) {
-        DS_Sleep(PR_SecondsToInterval(TEST_TIME));
+        for (size_t i = 0; i < TEST_TIME && !slapi_is_shutting_down(); i++) {
+            DS_Sleep(PR_SecondsToInterval(1));
+        }
     }
 
     _csngen_stop_test_threads();
-    csngen_dump_state(gen);
+    csngen_dump_state(gen, SLAPI_LOG_INFO);
     slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "csn generator test is complete...");
 }
 
@@ -572,94 +559,93 @@ _csngen_cmp_callbacks(const void *el1, const void *el2)
         return 1;
 }
 
+/* Get time and adjust local offset */
 static int
-_csngen_adjust_local_time(CSNGen *gen, time_t cur_time)
+_csngen_adjust_local_time(CSNGen *gen)
 {
     extern int config_get_ignore_time_skew(void);
     int ignore_time_skew = config_get_ignore_time_skew();
-    time_t time_diff = cur_time - gen->state.sampled_time;
+    struct timespec now = {0};
+    time_t time_diff;
+    time_t cur_time;
+    int rc;
 
+    
+    if ((rc = gen->gettime(&now)) != 0) {
+        /* Failed to get system time, we must abort */
+        slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn",
+                "Failed to get system time (%s)\n",
+                slapd_system_strerror(rc));
+        return CSN_TIME_ERROR;
+    }
+    cur_time = now.tv_sec;
+    time_diff = cur_time - gen->state.sampled_time;
+
+    /* check if the time should be adjusted */
     if (time_diff == 0) {
         /* This is a no op - _csngen_adjust_local_time should never be called
            in this case, because there is nothing to adjust - but just return
            here to protect ourselves
         */
         return CSN_SUCCESS;
-    } else if (time_diff > 0) {
-        time_t ts_before = CSN_CALC_TSTAMP(gen);
-        time_t ts_after = 0;
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
-            time_t new_time = CSN_CALC_TSTAMP(gen);
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
-                          "gen state before %08lx%04x:%ld:%ld:%ld\n",
-                          new_time, gen->state.seq_num,
-                          gen->state.sampled_time,
-                          gen->state.local_offset,
-                          gen->state.remote_offset);
-        }
-
-        gen->state.sampled_time = cur_time;
-        if (time_diff > gen->state.local_offset)
-            gen->state.local_offset = 0;
-        else
-            gen->state.local_offset = gen->state.local_offset - time_diff;
-
-        /* only reset the seq_num if the new timestamp part of the CSN
-           is going to be greater than the old one - if they are the
-           same after the above adjustment (which can happen if
-           csngen_adjust_time has to store the offset in the
-           local_offset field) we must not allow the CSN to regress or
-           generate duplicate numbers */
-        ts_after = CSN_CALC_TSTAMP(gen);
-        if (ts_after > ts_before) {
-            gen->state.seq_num = 0; /* only reset if new time > old time */
-        }
-
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
-            time_t new_time = CSN_CALC_TSTAMP(gen);
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
-                          "gen state after %08lx%04x:%ld:%ld:%ld\n",
-                          new_time, gen->state.seq_num,
-                          gen->state.sampled_time,
-                          gen->state.local_offset,
-                          gen->state.remote_offset);
-        }
-        return CSN_SUCCESS;
-    } else /* time was turned back */
-    {
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
-            time_t new_time = CSN_CALC_TSTAMP(gen);
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
-                          "gen state back before %08lx%04x:%ld:%ld:%ld\n",
-                          new_time, gen->state.seq_num,
-                          gen->state.sampled_time,
-                          gen->state.local_offset,
-                          gen->state.remote_offset);
-        }
+    }
+    if (labs(time_diff) > TIME_DIFF_WARNING_DELAY) {
+        /* We had a jump larger than a day */
+        slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn",
+                "Detected large jump in CSN time.  Delta: %ld (current time: %ld  vs  previous time: %ld)\n",
+                time_diff, cur_time, gen->state.sampled_time);
+    }
+    if (!ignore_time_skew && (gen->state.local_offset - time_diff > CSN_MAX_TIME_ADJUST)) {
+        slapi_log_err(SLAPI_LOG_ERR, "_csngen_adjust_local_time",
+                      "Adjustment limit exceeded; value - %ld, limit - %d\n",
+                      gen->state.local_offset - time_diff, CSN_MAX_TIME_ADJUST);
+        return CSN_LIMIT_EXCEEDED;
+    }
 
-        if (!ignore_time_skew && (labs(time_diff) > CSN_MAX_TIME_ADJUST)) {
-            slapi_log_err(SLAPI_LOG_ERR, "_csngen_adjust_local_time",
-                          "Adjustment limit exceeded; value - %ld, limit - %d\n",
-                          labs(time_diff), CSN_MAX_TIME_ADJUST);
-            return CSN_LIMIT_EXCEEDED;
-        }
+    time_t ts_before = CSN_CALC_TSTAMP(gen);
+    time_t ts_after = 0;
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+        time_t new_time = CSN_CALC_TSTAMP(gen);
+        slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
+                      "gen state before %08lx%04x:%ld:%ld:%ld\n",
+                      new_time, gen->state.seq_num,
+                      gen->state.sampled_time,
+                      gen->state.local_offset,
+                      gen->state.remote_offset);
+    }
 
-        gen->state.sampled_time = cur_time;
-        gen->state.local_offset = MAX_VAL(gen->state.local_offset, labs(time_diff));
-        gen->state.seq_num = 0;
+    gen->state.sampled_time = cur_time;
+    gen->state.local_offset = MAX_VAL(0, gen->state.local_offset - time_diff);
+    /* new local_offset = MAX_VAL(0, old sample_time + old local_offset - cur_time)
+     * ==> new local_offset >= 0 and 
+     *     new local_offset + cur_time >= old sample_time + old local_offset
+     * ==> new local_offset + cur_time + remote_offset >=
+     *            sample_time + old local_offset + remote_offset
+     * ==> CSN_CALC_TSTAMP(new gen) >= CSN_CALC_TSTAMP(old gen)
+     */
 
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
-            time_t new_time = CSN_CALC_TSTAMP(gen);
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
-                          "gen state back after %08lx%04x:%ld:%ld:%ld\n",
-                          new_time, gen->state.seq_num,
-                          gen->state.sampled_time,
-                          gen->state.local_offset,
-                          gen->state.remote_offset);
-        }
+    /* only reset the seq_num if the new timestamp part of the CSN
+       is going to be greater than the old one - if they are the
+       same after the above adjustment (which can happen if
+       csngen_adjust_time has to store the offset in the
+       local_offset field) we must not allow the CSN to regress or
+       generate duplicate numbers */
+    ts_after = CSN_CALC_TSTAMP(gen);
+    PR_ASSERT(ts_after >= ts_before);
+    if (ts_after > ts_before) {
+        gen->state.seq_num = 0; /* only reset if new time > old time */
+    }
 
-        return CSN_SUCCESS;
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+        time_t new_time = CSN_CALC_TSTAMP(gen);
+        slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
+                      "gen state after %08lx%04x:%ld:%ld:%ld\n",
+                      new_time, gen->state.seq_num,
+                      gen->state.sampled_time,
+                      gen->state.local_offset,
+                      gen->state.remote_offset);
     }
+    return CSN_SUCCESS;
 }
 
 /*
@@ -670,8 +656,8 @@ _csngen_adjust_local_time(CSNGen *gen, time_t cur_time)
 #define DEFAULT_THREAD_STACKSIZE 0
 
 #define GEN_TREAD_COUNT 20
-int s_thread_count;
-int s_must_exit;
+static int s_thread_count;
+static int s_must_exit;
 
 static int
 _csngen_start_test_threads(CSNGen *gen)
@@ -736,8 +722,8 @@ _csngen_stop_test_threads(void)
     s_must_exit = 1;
 
     while (s_thread_count > 0) {
-        /* sleep for 30 seconds */
-        DS_Sleep(PR_SecondsToInterval(20));
+        /* sleep for 5 seconds */
+        DS_Sleep(PR_SecondsToInterval(5));
     }
 }
 
@@ -752,7 +738,7 @@ _csngen_gen_tester_main(void *data)
 
     PR_ASSERT(gen);
 
-    while (!s_must_exit) {
+    while (!s_must_exit && !slapi_is_shutting_down()) {
         rc = csngen_new_csn(gen, &csn, PR_FALSE);
         if (rc != CSN_SUCCESS) {
             slapi_log_err(SLAPI_LOG_ERR, "_csngen_gen_tester_main",
@@ -764,7 +750,7 @@ _csngen_gen_tester_main(void *data)
         csn_free(&csn);
 
         /* sleep for 30 seconds */
-        DS_Sleep(PR_SecondsToInterval(10));
+        DS_Sleep(PR_SecondsToInterval(30));
     }
 
     PR_AtomicDecrement(&s_thread_count);
@@ -782,7 +768,7 @@ _csngen_remote_tester_main(void *data)
 
     PR_ASSERT(gen);
 
-    while (!s_must_exit) {
+    while (!s_must_exit && !slapi_is_shutting_down()) {
         rc = csngen_new_csn(gen, &csn, PR_FALSE);
         if (rc != CSN_SUCCESS) {
             slapi_log_err(SLAPI_LOG_ERR, "_csngen_remote_tester_main",
@@ -797,12 +783,12 @@ _csngen_remote_tester_main(void *data)
                               "Failed to adjust generator's time; csn error - %d\n", rc);
             }
 
-            csngen_dump_state(gen);
+            csngen_dump_state(gen, SLAPI_LOG_INFO);
         }
         csn_free(&csn);
 
         /* sleep for 30 seconds */
-        DS_Sleep(PR_SecondsToInterval(60));
+        DS_Sleep(PR_SecondsToInterval(30));
     }
 
     PR_AtomicDecrement(&s_thread_count);
@@ -816,17 +802,90 @@ _csngen_local_tester_main(void *data)
 
     PR_ASSERT(gen);
 
-
-    while (!s_must_exit) {
+    while (!s_must_exit && !slapi_is_shutting_down()) {
         /* sleep for 30 seconds */
-        DS_Sleep(PR_SecondsToInterval(60));
+        DS_Sleep(PR_SecondsToInterval(30));
 
         /*
          * g_sampled_time -= slapi_rand () % 100;
          */
-
-        csngen_dump_state(gen);
+        csngen_dump_state(gen, SLAPI_LOG_INFO);
     }
 
     PR_AtomicDecrement(&s_thread_count);
 }
+
+int _csngen_tester_state;
+int _csngen_tester_state_rid;
+
+static int
+_mynoise(int time, int len, double height)
+{
+   if (((time/len) % 2) == 0) {
+        return -height + 2 * height * ( time % len ) / (len-1);
+   } else {
+        return height - 2 * height * ( time % len ) / (len-1);
+   }
+}
+
+
+int32_t _csngen_tester_gettime(struct timespec *tp)
+{
+    int vtime = _csngen_tester_state ;
+    tp->tv_sec = 0x1000000 + vtime + 2 * _csngen_tester_state_rid;
+    if (_csngen_tester_state_rid == 3) {
+        /* tp->tv_sec += _mynoise(vtime, 10, 1.5); */
+        tp->tv_sec += _mynoise(vtime, 30, 15);
+    }
+    return 0;
+}
+
+/* Mimic a fully meshed multi suplier topology */
+void csngen_multi_suppliers_test(void)
+{
+#define NB_TEST_MASTERS	6
+#define NB_TEST_STATES	500
+    CSNGen *gen[NB_TEST_MASTERS];
+    struct timespec now = {0};
+    CSN *last_csn = NULL;
+    CSN *csn = NULL;
+    int i,j,rc;
+
+    _csngen_tester_gettime(&now);
+
+    for (i=0; i< NB_TEST_MASTERS; i++) {
+        gen[i] = csngen_new(i+1, NULL);
+        gen[i]->gettime = _csngen_tester_gettime;
+        gen[i]->state.sampled_time = now.tv_sec;
+    }
+
+    for (_csngen_tester_state=0; _csngen_tester_state < NB_TEST_STATES; _csngen_tester_state++) {
+        for (i=0; i< NB_TEST_MASTERS; i++) {
+            _csngen_tester_state_rid = i+1;
+            rc = csngen_new_csn(gen[i], &csn, PR_FALSE);
+            if (rc) {
+                continue;
+            }
+            csngen_dump_state(gen[i], SLAPI_LOG_INFO);
+
+            if (csn_compare(csn, last_csn) <= 0) {
+                slapi_log_err(SLAPI_LOG_ERR, "csngen_multi_suppliers_test",
+                              "CSN generated in disorder state=%d rid=%d\n", _csngen_tester_state, _csngen_tester_state_rid);
+                _csngen_tester_state = NB_TEST_STATES;
+                break;
+            }
+            last_csn = csn;
+
+            for (j=0; j< NB_TEST_MASTERS; j++) {
+                if (i==j) {
+                    continue;
+                }
+                _csngen_tester_state_rid = j+1;
+                rc = csngen_adjust_time(gen[j], csn);
+                if (rc) {
+                    continue;
+                }
+            }
+        }
+    }
+}
diff --git a/ldap/servers/slapd/slapi-plugin.h b/ldap/servers/slapd/slapi-plugin.h
index 9f7971ec1..3ba3df910 100644
--- a/ldap/servers/slapd/slapi-plugin.h
+++ b/ldap/servers/slapd/slapi-plugin.h
@@ -6778,8 +6778,17 @@ time_t slapi_current_time(void) __attribute__((deprecated));
  *
  * \param tp - a timespec struct where the system time is set
  * \return result code, upon success tp is set to the system time
+ * as a clock in UTC timezone. This clock adjusts with ntp steps,
+ * and should NOT be used for timer information.
  */
 int32_t slapi_clock_gettime(struct timespec *tp);
+/* 
+ * slapi_clock_gettime should have better been called
+ * slapi_clock_utc_gettime but sice the function pre-existed
+ * we are just adding an alias (to avoid risking to break
+ * some custom plugins)
+ */
+#define slapi_clock_utc_gettime slapi_clock_gettime
 
 /**
  * Returns the current system time as a hr clock relative to uptime
-- 
2.31.1