Blame SOURCES/0002-Issue-4943-Fix-csn-generator-to-limit-time-skew-drif.patch

9f2552
From 968ad6b5039d839bfbc61da755c252cc7598415b Mon Sep 17 00:00:00 2001
9f2552
From: progier389 <progier@redhat.com>
9f2552
Date: Mon, 25 Oct 2021 17:09:57 +0200
9f2552
Subject: [PATCH 02/12] Issue 4943 - Fix csn generator to limit time skew drift
9f2552
 - PR 4946
9f2552
9f2552
---
9f2552
 ldap/servers/slapd/csngen.c       | 433 +++++++++++++++++-------------
9f2552
 ldap/servers/slapd/slapi-plugin.h |   9 +
9f2552
 2 files changed, 255 insertions(+), 187 deletions(-)
9f2552
9f2552
diff --git a/ldap/servers/slapd/csngen.c b/ldap/servers/slapd/csngen.c
9f2552
index fcd88b4cc..c7c5c2ba8 100644
9f2552
--- a/ldap/servers/slapd/csngen.c
9f2552
+++ b/ldap/servers/slapd/csngen.c
9f2552
@@ -18,8 +18,9 @@
9f2552
 #include "prcountr.h"
9f2552
 #include "slap.h"
9f2552
 
9f2552
+
9f2552
 #define CSN_MAX_SEQNUM 0xffff              /* largest sequence number */
9f2552
-#define CSN_MAX_TIME_ADJUST 24 * 60 * 60   /* maximum allowed time adjustment (in seconds) = 1 day */
9f2552
+#define CSN_MAX_TIME_ADJUST _SEC_PER_DAY   /* maximum allowed time adjustment (in seconds) = 1 day */
9f2552
 #define ATTR_CSN_GENERATOR_STATE "nsState" /* attribute that stores csn state information */
9f2552
 #define STATE_FORMAT "%8x%8x%8x%4hx%4hx"
9f2552
 #define STATE_LENGTH 32
9f2552
@@ -27,6 +28,8 @@
9f2552
 #define CSN_CALC_TSTAMP(gen) ((gen)->state.sampled_time + \
9f2552
                               (gen)->state.local_offset + \
9f2552
                               (gen)->state.remote_offset)
9f2552
+#define TIME_DIFF_WARNING_DELAY  (30*_SEC_PER_DAY)  /* log an info message when difference
9f2552
+                                                       between clock is greater than this delay */
9f2552
 
9f2552
 /*
9f2552
  * **************************************************************************
9f2552
@@ -63,6 +66,7 @@ typedef struct csngen_state
9f2552
 struct csngen
9f2552
 {
9f2552
     csngen_state state;      /* persistent state of the generator */
9f2552
+    int32_t (*gettime)(struct timespec *tp); /* Get local time */
9f2552
     callback_list callbacks; /* list of callbacks registered with the generator */
9f2552
     Slapi_RWLock *lock;      /* concurrency control */
9f2552
 };
9f2552
@@ -78,7 +82,7 @@ static int _csngen_init_callbacks(CSNGen *gen);
9f2552
 static void _csngen_call_callbacks(const CSNGen *gen, const CSN *csn, PRBool abort);
9f2552
 static int _csngen_cmp_callbacks(const void *el1, const void *el2);
9f2552
 static void _csngen_free_callbacks(CSNGen *gen);
9f2552
-static int _csngen_adjust_local_time(CSNGen *gen, time_t cur_time);
9f2552
+static int _csngen_adjust_local_time(CSNGen *gen);
9f2552
 
9f2552
 /*
9f2552
  * **************************************************************************
9f2552
@@ -121,6 +125,7 @@ csngen_new(ReplicaId rid, Slapi_Attr *state)
9f2552
     _csngen_init_callbacks(gen);
9f2552
 
9f2552
     gen->state.rid = rid;
9f2552
+    gen->gettime = slapi_clock_utc_gettime;
9f2552
 
9f2552
     if (state) {
9f2552
         rc = _csngen_parse_state(gen, state);
9f2552
@@ -164,10 +169,7 @@ csngen_free(CSNGen **gen)
9f2552
 int
9f2552
 csngen_new_csn(CSNGen *gen, CSN **csn, PRBool notify)
9f2552
 {
9f2552
-    struct timespec now = {0};
9f2552
     int rc = CSN_SUCCESS;
9f2552
-    time_t cur_time;
9f2552
-    int delta;
9f2552
 
9f2552
     if (gen == NULL || csn == NULL) {
9f2552
         slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn", "Invalid argument\n");
9f2552
@@ -180,39 +182,13 @@ csngen_new_csn(CSNGen *gen, CSN **csn, PRBool notify)
9f2552
         return CSN_MEMORY_ERROR;
9f2552
     }
9f2552
 
9f2552
-    if ((rc = slapi_clock_gettime(&now)) != 0) {
9f2552
-        /* Failed to get system time, we must abort */
9f2552
-        slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn",
9f2552
-                "Failed to get system time (%s)\n",
9f2552
-                slapd_system_strerror(rc));
9f2552
-        return CSN_TIME_ERROR;
9f2552
-    }
9f2552
-    cur_time = now.tv_sec;
9f2552
-
9f2552
     slapi_rwlock_wrlock(gen->lock);
9f2552
 
9f2552
-    /* check if the time should be adjusted */
9f2552
-    delta = cur_time - gen->state.sampled_time;
9f2552
-    if (delta > _SEC_PER_DAY || delta < (-1 * _SEC_PER_DAY)) {
9f2552
-        /* We had a jump larger than a day */
9f2552
-        slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn",
9f2552
-                "Detected large jump in CSN time.  Delta: %d (current time: %ld  vs  previous time: %ld)\n",
9f2552
-                delta, cur_time, gen->state.sampled_time);
9f2552
-    }
9f2552
-    if (delta > 0) {
9f2552
-        rc = _csngen_adjust_local_time(gen, cur_time);
9f2552
-        if (rc != CSN_SUCCESS) {
9f2552
-            slapi_rwlock_unlock(gen->lock);
9f2552
-            return rc;
9f2552
-        }
9f2552
+    rc = _csngen_adjust_local_time(gen);
9f2552
+    if (rc != CSN_SUCCESS) {
9f2552
+        slapi_rwlock_unlock(gen->lock);
9f2552
+        return rc;
9f2552
     }
9f2552
-    /* if (delta < 0) this means the local system time was set back
9f2552
-     * the new csn will be generated based on sampled time, which is
9f2552
-     * ahead of system time and previously generated csns.
9f2552
-     * the time stamp of the csn will not change until system time
9f2552
-     * catches up or is corrected by remote csns.
9f2552
-     * But we need to ensure that the seq_num does not overflow.
9f2552
-     */
9f2552
 
9f2552
     if (gen->state.seq_num == CSN_MAX_SEQNUM) {
9f2552
         slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn", "Sequence rollover; "
9f2552
@@ -261,13 +237,36 @@ csngen_rewrite_rid(CSNGen *gen, ReplicaId rid)
9f2552
 }
9f2552
 
9f2552
 /* this function should be called when a remote CSN for the same part of
9f2552
-   the dit becomes known to the server (for instance, as part of RUV during
9f2552
-   replication session. In response, the generator would adjust its notion
9f2552
-   of time so that it does not generate smaller csns */
9f2552
+ * the dit becomes known to the server (for instance, as part of RUV during
9f2552
+ * replication session. In response, the generator would adjust its notion
9f2552
+ * of time so that it does not generate smaller csns
9f2552
+ *
9f2552
+ * The following counters are updated
9f2552
+ *   - when a new csn is generated
9f2552
+ *   - when csngen is adjusted (beginning of a incoming (extop) or outgoing
9f2552
+ *     (inc_protocol) session)
9f2552
+ *
9f2552
+ * sampled_time: It takes the value of current system time.
9f2552
+ *
9f2552
+ * remote offset: it is updated when 'csn' argument is ahead of the next csn
9f2552
+ * that the csn generator will generate. It is the MAX jump ahead, it is not
9f2552
+ * cumulative counter (e.g. if remote_offset=7 and 'csn' is 5sec ahead
9f2552
+ * remote_offset stays the same. The jump ahead (5s) pour into the local offset.
9f2552
+ * It is not clear of the interest of this counter. It gives an indication of
9f2552
+ * the maximum jump ahead but not much.
9f2552
+ *
9f2552
+ * local offset: it is increased if
9f2552
+ *   - system time is going backward (compare sampled_time)
9f2552
+ *   - if 'csn' argument is ahead of csn that the csn generator would generate
9f2552
+ *     AND diff('csn', csngen.new_csn) < remote_offset
9f2552
+ *     then the diff "pour" into local_offset
9f2552
+ *  It is decreased as the clock is ticking, local offset is "consumed" as
9f2552
+ *  sampled_time progresses.
9f2552
+ */
9f2552
 int
9f2552
 csngen_adjust_time(CSNGen *gen, const CSN *csn)
9f2552
 {
9f2552
-    time_t remote_time, remote_offset, cur_time;
9f2552
+    time_t remote_time, remote_offset, cur_time, old_time, new_time;
9f2552
     PRUint16 remote_seqnum;
9f2552
     int rc;
9f2552
     extern int config_get_ignore_time_skew(void);
9f2552
@@ -281,6 +280,11 @@ csngen_adjust_time(CSNGen *gen, const CSN *csn)
9f2552
 
9f2552
     slapi_rwlock_wrlock(gen->lock);
9f2552
 
9f2552
+    /* Get last local csn time */
9f2552
+    old_time = CSN_CALC_TSTAMP(gen);
9f2552
+    /* update local offset and sample_time */
9f2552
+    rc = _csngen_adjust_local_time(gen);
9f2552
+
9f2552
     if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
         cur_time = CSN_CALC_TSTAMP(gen);
9f2552
         slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
9f2552
@@ -290,79 +294,60 @@ csngen_adjust_time(CSNGen *gen, const CSN *csn)
9f2552
                       gen->state.local_offset,
9f2552
                       gen->state.remote_offset);
9f2552
     }
9f2552
-    /* make sure we have the current time */
9f2552
-    cur_time = slapi_current_utc_time();
9f2552
-
9f2552
-    /* make sure sampled_time is current */
9f2552
-    /* must only call adjust_local_time if the current time is greater than
9f2552
-       the generator state time */
9f2552
-    if ((cur_time > gen->state.sampled_time) &&
9f2552
-        (CSN_SUCCESS != (rc = _csngen_adjust_local_time(gen, cur_time)))) {
9f2552
+    if (rc != CSN_SUCCESS) {
9f2552
         /* _csngen_adjust_local_time will log error */
9f2552
         slapi_rwlock_unlock(gen->lock);
9f2552
-        csngen_dump_state(gen);
9f2552
+        csngen_dump_state(gen, SLAPI_LOG_DEBUG);
9f2552
         return rc;
9f2552
     }
9f2552
 
9f2552
-    cur_time = CSN_CALC_TSTAMP(gen);
9f2552
-    if (remote_time >= cur_time) {
9f2552
-        time_t new_time = 0;
9f2552
-
9f2552
-        if (remote_seqnum > gen->state.seq_num) {
9f2552
-            if (remote_seqnum < CSN_MAX_SEQNUM) {
9f2552
-                gen->state.seq_num = remote_seqnum + 1;
9f2552
-            } else {
9f2552
-                remote_time++;
9f2552
-            }
9f2552
-        }
9f2552
-
9f2552
-        remote_offset = remote_time - cur_time;
9f2552
-        if (remote_offset > gen->state.remote_offset) {
9f2552
-            if (ignore_time_skew || (remote_offset <= CSN_MAX_TIME_ADJUST)) {
9f2552
-                gen->state.remote_offset = remote_offset;
9f2552
-            } else /* remote_offset > CSN_MAX_TIME_ADJUST */
9f2552
-            {
9f2552
-                slapi_log_err(SLAPI_LOG_ERR, "csngen_adjust_time",
9f2552
-                              "Adjustment limit exceeded; value - %ld, limit - %ld\n",
9f2552
-                              remote_offset, (long)CSN_MAX_TIME_ADJUST);
9f2552
-                slapi_rwlock_unlock(gen->lock);
9f2552
-                csngen_dump_state(gen);
9f2552
-                return CSN_LIMIT_EXCEEDED;
9f2552
-            }
9f2552
-        } else if (remote_offset > 0) { /* still need to account for this */
9f2552
-            gen->state.local_offset += remote_offset;
9f2552
+    remote_offset = remote_time - CSN_CALC_TSTAMP(gen);
9f2552
+    if (remote_offset > 0) {
9f2552
+        if (!ignore_time_skew && (gen->state.remote_offset + remote_offset > CSN_MAX_TIME_ADJUST)) {
9f2552
+            slapi_log_err(SLAPI_LOG_ERR, "csngen_adjust_time",
9f2552
+                          "Adjustment limit exceeded; value - %ld, limit - %ld\n",
9f2552
+                          remote_offset, (long)CSN_MAX_TIME_ADJUST);
9f2552
+            slapi_rwlock_unlock(gen->lock);
9f2552
+            csngen_dump_state(gen, SLAPI_LOG_DEBUG);
9f2552
+            return CSN_LIMIT_EXCEEDED;
9f2552
         }
9f2552
-
9f2552
-        new_time = CSN_CALC_TSTAMP(gen);
9f2552
-        /* let's revisit the seq num - if the new time is > the old
9f2552
-           tiem, we should reset the seq number to remote + 1 if
9f2552
-           this won't cause a wrap around */
9f2552
-        if (new_time >= cur_time) {
9f2552
-            /* just set seq_num regardless of whether the current one
9f2552
-               is < or > than the remote one - the goal of this function
9f2552
-               is to make sure we generate CSNs > the remote CSN - if
9f2552
-               we have increased the time, we can decrease the seqnum
9f2552
-               and still guarantee that any new CSNs generated will be
9f2552
-               > any current CSNs we have generated */
9f2552
-            if (remote_seqnum < gen->state.seq_num) {
9f2552
-                gen->state.seq_num ++;
9f2552
-            } else {
9f2552
-                gen->state.seq_num = remote_seqnum + 1;
9f2552
-            }
9f2552
+        gen->state.remote_offset += remote_offset;
9f2552
+        /* To avoid beat phenomena between suppliers let put 1 second in local_offset
9f2552
+         * it will be eaten at next clock tick rather than increasing remote offset
9f2552
+         * If we do not do that we will have a time skew drift of 1 second per 2 seconds
9f2552
+         * if suppliers are desynchronized by 0.5 second 
9f2552
+         */
9f2552
+        if (gen->state.local_offset == 0) {
9f2552
+            gen->state.local_offset++;
9f2552
+            gen->state.remote_offset--;
9f2552
         }
9f2552
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
-            slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
9f2552
-                          "gen state after %08lx%04x:%ld:%ld:%ld\n",
9f2552
-                          new_time, gen->state.seq_num,
9f2552
-                          gen->state.sampled_time,
9f2552
-                          gen->state.local_offset,
9f2552
-                          gen->state.remote_offset);
9f2552
+    }
9f2552
+    /* Time to compute seqnum so that 
9f2552
+     *   new csn >= remote csn and new csn >= old local csn 
9f2552
+     */
9f2552
+    new_time = CSN_CALC_TSTAMP(gen);
9f2552
+    PR_ASSERT(new_time >= old_time);
9f2552
+    PR_ASSERT(new_time >= remote_time);
9f2552
+    if (new_time > old_time) {
9f2552
+        /* Can reset (local) seqnum */
9f2552
+        gen->state.seq_num = 0;
9f2552
+    }
9f2552
+    if (new_time == remote_time && remote_seqnum >= gen->state.seq_num) {
9f2552
+        if (remote_seqnum >= CSN_MAX_SEQNUM) {
9f2552
+            gen->state.seq_num = 0;
9f2552
+            gen->state.local_offset++;
9f2552
+        } else {
9f2552
+            gen->state.seq_num = remote_seqnum + 1;
9f2552
         }
9f2552
-    } else if (gen->state.remote_offset > 0) {
9f2552
-        /* decrease remote offset? */
9f2552
-        /* how to decrease remote offset but ensure that we don't
9f2552
-           generate a duplicate CSN, or a CSN smaller than one we've already
9f2552
-           generated? */
9f2552
+    }
9f2552
+
9f2552
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
+        slapi_log_err(SLAPI_LOG_REPL, "csngen_adjust_time",
9f2552
+                      "gen state after %08lx%04x:%ld:%ld:%ld\n",
9f2552
+                      new_time, gen->state.seq_num,
9f2552
+                      gen->state.sampled_time,
9f2552
+                      gen->state.local_offset,
9f2552
+                      gen->state.remote_offset);
9f2552
     }
9f2552
 
9f2552
     slapi_rwlock_unlock(gen->lock);
9f2552
@@ -435,16 +420,16 @@ csngen_unregister_callbacks(CSNGen *gen, void *cookie)
9f2552
 
9f2552
 /* debugging function */
9f2552
 void
9f2552
-csngen_dump_state(const CSNGen *gen)
9f2552
+csngen_dump_state(const CSNGen *gen, int severity)
9f2552
 {
9f2552
     if (gen) {
9f2552
         slapi_rwlock_rdlock(gen->lock);
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "CSN generator's state:\n");
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);
9f2552
-        slapi_log_err(SLAPI_LOG_DEBUG, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "CSN generator's state:\n");
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "\treplica id: %d\n", gen->state.rid);
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "\tsampled time: %ld\n", gen->state.sampled_time);
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "\tlocal offset: %ld\n", gen->state.local_offset);
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "\tremote offset: %ld\n", gen->state.remote_offset);
9f2552
+        slapi_log_err(severity, "csngen_dump_state", "\tsequence number: %d\n", gen->state.seq_num);
9f2552
         slapi_rwlock_unlock(gen->lock);
9f2552
     }
9f2552
 }
9f2552
@@ -459,7 +444,7 @@ csngen_test()
9f2552
     CSNGen *gen = csngen_new(255, NULL);
9f2552
 
9f2552
     slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "staring csn generator test ...");
9f2552
-    csngen_dump_state(gen);
9f2552
+    csngen_dump_state(gen, SLAPI_LOG_INFO);
9f2552
 
9f2552
     rc = _csngen_start_test_threads(gen);
9f2552
     if (rc == 0) {
9f2552
@@ -469,7 +454,7 @@ csngen_test()
9f2552
     }
9f2552
 
9f2552
     _csngen_stop_test_threads();
9f2552
-    csngen_dump_state(gen);
9f2552
+    csngen_dump_state(gen, SLAPI_LOG_INFO);
9f2552
     slapi_log_err(SLAPI_LOG_DEBUG, "csngen_test", "csn generator test is complete...");
9f2552
 }
9f2552
 
9f2552
@@ -574,94 +559,93 @@ _csngen_cmp_callbacks(const void *el1, const void *el2)
9f2552
         return 1;
9f2552
 }
9f2552
 
9f2552
+/* Get time and adjust local offset */
9f2552
 static int
9f2552
-_csngen_adjust_local_time(CSNGen *gen, time_t cur_time)
9f2552
+_csngen_adjust_local_time(CSNGen *gen)
9f2552
 {
9f2552
     extern int config_get_ignore_time_skew(void);
9f2552
     int ignore_time_skew = config_get_ignore_time_skew();
9f2552
-    time_t time_diff = cur_time - gen->state.sampled_time;
9f2552
+    struct timespec now = {0};
9f2552
+    time_t time_diff;
9f2552
+    time_t cur_time;
9f2552
+    int rc;
9f2552
 
9f2552
+    
9f2552
+    if ((rc = gen->gettime(&now)) != 0) {
9f2552
+        /* Failed to get system time, we must abort */
9f2552
+        slapi_log_err(SLAPI_LOG_ERR, "csngen_new_csn",
9f2552
+                "Failed to get system time (%s)\n",
9f2552
+                slapd_system_strerror(rc));
9f2552
+        return CSN_TIME_ERROR;
9f2552
+    }
9f2552
+    cur_time = now.tv_sec;
9f2552
+    time_diff = cur_time - gen->state.sampled_time;
9f2552
+
9f2552
+    /* check if the time should be adjusted */
9f2552
     if (time_diff == 0) {
9f2552
         /* This is a no op - _csngen_adjust_local_time should never be called
9f2552
            in this case, because there is nothing to adjust - but just return
9f2552
            here to protect ourselves
9f2552
         */
9f2552
         return CSN_SUCCESS;
9f2552
-    } else if (time_diff > 0) {
9f2552
-        time_t ts_before = CSN_CALC_TSTAMP(gen);
9f2552
-        time_t ts_after = 0;
9f2552
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
-            time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
-                          "gen state before %08lx%04x:%ld:%ld:%ld\n",
9f2552
-                          new_time, gen->state.seq_num,
9f2552
-                          gen->state.sampled_time,
9f2552
-                          gen->state.local_offset,
9f2552
-                          gen->state.remote_offset);
9f2552
-        }
9f2552
-
9f2552
-        gen->state.sampled_time = cur_time;
9f2552
-        if (time_diff > gen->state.local_offset)
9f2552
-            gen->state.local_offset = 0;
9f2552
-        else
9f2552
-            gen->state.local_offset = gen->state.local_offset - time_diff;
9f2552
-
9f2552
-        /* only reset the seq_num if the new timestamp part of the CSN
9f2552
-           is going to be greater than the old one - if they are the
9f2552
-           same after the above adjustment (which can happen if
9f2552
-           csngen_adjust_time has to store the offset in the
9f2552
-           local_offset field) we must not allow the CSN to regress or
9f2552
-           generate duplicate numbers */
9f2552
-        ts_after = CSN_CALC_TSTAMP(gen);
9f2552
-        if (ts_after > ts_before) {
9f2552
-            gen->state.seq_num = 0; /* only reset if new time > old time */
9f2552
-        }
9f2552
-
9f2552
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
-            time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
-                          "gen state after %08lx%04x:%ld:%ld:%ld\n",
9f2552
-                          new_time, gen->state.seq_num,
9f2552
-                          gen->state.sampled_time,
9f2552
-                          gen->state.local_offset,
9f2552
-                          gen->state.remote_offset);
9f2552
-        }
9f2552
-        return CSN_SUCCESS;
9f2552
-    } else /* time was turned back */
9f2552
-    {
9f2552
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
-            time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
-                          "gen state back before %08lx%04x:%ld:%ld:%ld\n",
9f2552
-                          new_time, gen->state.seq_num,
9f2552
-                          gen->state.sampled_time,
9f2552
-                          gen->state.local_offset,
9f2552
-                          gen->state.remote_offset);
9f2552
-        }
9f2552
+    }
9f2552
+    if (labs(time_diff) > TIME_DIFF_WARNING_DELAY) {
9f2552
+        /* We had a jump larger than a day */
9f2552
+        slapi_log_err(SLAPI_LOG_INFO, "csngen_new_csn",
9f2552
+                "Detected large jump in CSN time.  Delta: %ld (current time: %ld  vs  previous time: %ld)\n",
9f2552
+                time_diff, cur_time, gen->state.sampled_time);
9f2552
+    }
9f2552
+    if (!ignore_time_skew && (gen->state.local_offset - time_diff > CSN_MAX_TIME_ADJUST)) {
9f2552
+        slapi_log_err(SLAPI_LOG_ERR, "_csngen_adjust_local_time",
9f2552
+                      "Adjustment limit exceeded; value - %ld, limit - %d\n",
9f2552
+                      gen->state.local_offset - time_diff, CSN_MAX_TIME_ADJUST);
9f2552
+        return CSN_LIMIT_EXCEEDED;
9f2552
+    }
9f2552
 
9f2552
-        if (!ignore_time_skew && (labs(time_diff) > CSN_MAX_TIME_ADJUST)) {
9f2552
-            slapi_log_err(SLAPI_LOG_ERR, "_csngen_adjust_local_time",
9f2552
-                          "Adjustment limit exceeded; value - %ld, limit - %d\n",
9f2552
-                          labs(time_diff), CSN_MAX_TIME_ADJUST);
9f2552
-            return CSN_LIMIT_EXCEEDED;
9f2552
-        }
9f2552
+    time_t ts_before = CSN_CALC_TSTAMP(gen);
9f2552
+    time_t ts_after = 0;
9f2552
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
+        time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
+        slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
+                      "gen state before %08lx%04x:%ld:%ld:%ld\n",
9f2552
+                      new_time, gen->state.seq_num,
9f2552
+                      gen->state.sampled_time,
9f2552
+                      gen->state.local_offset,
9f2552
+                      gen->state.remote_offset);
9f2552
+    }
9f2552
 
9f2552
-        gen->state.sampled_time = cur_time;
9f2552
-        gen->state.local_offset = MAX_VAL(gen->state.local_offset, labs(time_diff));
9f2552
-        gen->state.seq_num = 0;
9f2552
+    gen->state.sampled_time = cur_time;
9f2552
+    gen->state.local_offset = MAX_VAL(0, gen->state.local_offset - time_diff);
9f2552
+    /* new local_offset = MAX_VAL(0, old sample_time + old local_offset - cur_time)
9f2552
+     * ==> new local_offset >= 0 and 
9f2552
+     *     new local_offset + cur_time >= old sample_time + old local_offset
9f2552
+     * ==> new local_offset + cur_time + remote_offset >=
9f2552
+     *            sample_time + old local_offset + remote_offset
9f2552
+     * ==> CSN_CALC_TSTAMP(new gen) >= CSN_CALC_TSTAMP(old gen)
9f2552
+     */
9f2552
 
9f2552
-        if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
-            time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
-            slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
-                          "gen state back after %08lx%04x:%ld:%ld:%ld\n",
9f2552
-                          new_time, gen->state.seq_num,
9f2552
-                          gen->state.sampled_time,
9f2552
-                          gen->state.local_offset,
9f2552
-                          gen->state.remote_offset);
9f2552
-        }
9f2552
+    /* only reset the seq_num if the new timestamp part of the CSN
9f2552
+       is going to be greater than the old one - if they are the
9f2552
+       same after the above adjustment (which can happen if
9f2552
+       csngen_adjust_time has to store the offset in the
9f2552
+       local_offset field) we must not allow the CSN to regress or
9f2552
+       generate duplicate numbers */
9f2552
+    ts_after = CSN_CALC_TSTAMP(gen);
9f2552
+    PR_ASSERT(ts_after >= ts_before);
9f2552
+    if (ts_after > ts_before) {
9f2552
+        gen->state.seq_num = 0; /* only reset if new time > old time */
9f2552
+    }
9f2552
 
9f2552
-        return CSN_SUCCESS;
9f2552
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
9f2552
+        time_t new_time = CSN_CALC_TSTAMP(gen);
9f2552
+        slapi_log_err(SLAPI_LOG_REPL, "_csngen_adjust_local_time",
9f2552
+                      "gen state after %08lx%04x:%ld:%ld:%ld\n",
9f2552
+                      new_time, gen->state.seq_num,
9f2552
+                      gen->state.sampled_time,
9f2552
+                      gen->state.local_offset,
9f2552
+                      gen->state.remote_offset);
9f2552
     }
9f2552
+    return CSN_SUCCESS;
9f2552
 }
9f2552
 
9f2552
 /*
9f2552
@@ -799,7 +783,7 @@ _csngen_remote_tester_main(void *data)
9f2552
                               "Failed to adjust generator's time; csn error - %d\n", rc);
9f2552
             }
9f2552
 
9f2552
-            csngen_dump_state(gen);
9f2552
+            csngen_dump_state(gen, SLAPI_LOG_INFO);
9f2552
         }
9f2552
         csn_free(&csn;;
9f2552
 
9f2552
@@ -825,8 +809,83 @@ _csngen_local_tester_main(void *data)
9f2552
         /*
9f2552
          * g_sampled_time -= slapi_rand () % 100;
9f2552
          */
9f2552
-        csngen_dump_state(gen);
9f2552
+        csngen_dump_state(gen, SLAPI_LOG_INFO);
9f2552
     }
9f2552
 
9f2552
     PR_AtomicDecrement(&s_thread_count);
9f2552
 }
9f2552
+
9f2552
+int _csngen_tester_state;
9f2552
+int _csngen_tester_state_rid;
9f2552
+
9f2552
+static int
9f2552
+_mynoise(int time, int len, double height)
9f2552
+{
9f2552
+   if (((time/len) % 2) == 0) {
9f2552
+        return -height + 2 * height * ( time % len ) / (len-1);
9f2552
+   } else {
9f2552
+        return height - 2 * height * ( time % len ) / (len-1);
9f2552
+   }
9f2552
+}
9f2552
+
9f2552
+
9f2552
+int32_t _csngen_tester_gettime(struct timespec *tp)
9f2552
+{
9f2552
+    int vtime = _csngen_tester_state ;
9f2552
+    tp->tv_sec = 0x1000000 + vtime + 2 * _csngen_tester_state_rid;
9f2552
+    if (_csngen_tester_state_rid == 3) {
9f2552
+        /* tp->tv_sec += _mynoise(vtime, 10, 1.5); */
9f2552
+        tp->tv_sec += _mynoise(vtime, 30, 15);
9f2552
+    }
9f2552
+    return 0;
9f2552
+}
9f2552
+
9f2552
+/* Mimic a fully meshed multi suplier topology */
9f2552
+void csngen_multi_suppliers_test(void)
9f2552
+{
9f2552
+#define NB_TEST_MASTERS	6
9f2552
+#define NB_TEST_STATES	500
9f2552
+    CSNGen *gen[NB_TEST_MASTERS];
9f2552
+    struct timespec now = {0};
9f2552
+    CSN *last_csn = NULL;
9f2552
+    CSN *csn = NULL;
9f2552
+    int i,j,rc;
9f2552
+
9f2552
+    _csngen_tester_gettime(&now;;
9f2552
+
9f2552
+    for (i=0; i< NB_TEST_MASTERS; i++) {
9f2552
+        gen[i] = csngen_new(i+1, NULL);
9f2552
+        gen[i]->gettime = _csngen_tester_gettime;
9f2552
+        gen[i]->state.sampled_time = now.tv_sec;
9f2552
+    }
9f2552
+
9f2552
+    for (_csngen_tester_state=0; _csngen_tester_state < NB_TEST_STATES; _csngen_tester_state++) {
9f2552
+        for (i=0; i< NB_TEST_MASTERS; i++) {
9f2552
+            _csngen_tester_state_rid = i+1;
9f2552
+            rc = csngen_new_csn(gen[i], &csn, PR_FALSE);
9f2552
+            if (rc) {
9f2552
+                continue;
9f2552
+            }
9f2552
+            csngen_dump_state(gen[i], SLAPI_LOG_INFO);
9f2552
+
9f2552
+            if (csn_compare(csn, last_csn) <= 0) {
9f2552
+                slapi_log_err(SLAPI_LOG_ERR, "csngen_multi_suppliers_test",
9f2552
+                              "CSN generated in disorder state=%d rid=%d\n", _csngen_tester_state, _csngen_tester_state_rid);
9f2552
+                _csngen_tester_state = NB_TEST_STATES;
9f2552
+                break;
9f2552
+            }
9f2552
+            last_csn = csn;
9f2552
+
9f2552
+            for (j=0; j< NB_TEST_MASTERS; j++) {
9f2552
+                if (i==j) {
9f2552
+                    continue;
9f2552
+                }
9f2552
+                _csngen_tester_state_rid = j+1;
9f2552
+                rc = csngen_adjust_time(gen[j], csn);
9f2552
+                if (rc) {
9f2552
+                    continue;
9f2552
+                }
9f2552
+            }
9f2552
+        }
9f2552
+    }
9f2552
+}
9f2552
diff --git a/ldap/servers/slapd/slapi-plugin.h b/ldap/servers/slapd/slapi-plugin.h
9f2552
index 56765fdfb..59c5ec9ab 100644
9f2552
--- a/ldap/servers/slapd/slapi-plugin.h
9f2552
+++ b/ldap/servers/slapd/slapi-plugin.h
9f2552
@@ -6762,8 +6762,17 @@ time_t slapi_current_time(void) __attribute__((deprecated));
9f2552
  *
9f2552
  * \param tp - a timespec struct where the system time is set
9f2552
  * \return result code, upon success tp is set to the system time
9f2552
+ * as a clock in UTC timezone. This clock adjusts with ntp steps,
9f2552
+ * and should NOT be used for timer information.
9f2552
  */
9f2552
 int32_t slapi_clock_gettime(struct timespec *tp);
9f2552
+/* 
9f2552
+ * slapi_clock_gettime should have better been called
9f2552
+ * slapi_clock_utc_gettime but sice the function pre-existed
9f2552
+ * we are just adding an alias (to avoid risking to break
9f2552
+ * some custom plugins)
9f2552
+ */
9f2552
+#define slapi_clock_utc_gettime slapi_clock_gettime
9f2552
 
9f2552
 /**
9f2552
  * Returns the current system time as a hr clock relative to uptime
9f2552
-- 
9f2552
2.31.1
9f2552