|
|
ed9856 |
From 905d243347e13a342ce39491927f158b5337fd43 Mon Sep 17 00:00:00 2001
|
|
|
ed9856 |
From: tbordaz <tbordaz@redhat.com>
|
|
|
ed9856 |
Date: Tue, 23 Feb 2021 13:42:31 +0100
|
|
|
ed9856 |
Subject: [PATCH] Issue 4644 - Large updates can reset the CLcache to the
|
|
|
ed9856 |
beginning of the changelog (#4647)
|
|
|
ed9856 |
|
|
|
ed9856 |
Bug description:
|
|
|
ed9856 |
The replication agreements are using bulk load to load updates.
|
|
|
ed9856 |
For bulk load it uses a cursor with DB_MULTIPLE_KEY and DB_NEXT.
|
|
|
ed9856 |
Before using the cursor, it must be initialized with DB_SET.
|
|
|
ed9856 |
|
|
|
ed9856 |
If during the cursor/DB_SET the CSN refers to an update that is larger than
|
|
|
ed9856 |
the size of the provided buffer, then the cursor remains not initialized and
|
|
|
ed9856 |
c_get returns DB_BUFFER_SMALL.
|
|
|
ed9856 |
|
|
|
ed9856 |
The consequence is that the next c_get(DB_MULTIPLE_KEY and DB_NEXT) will return the
|
|
|
ed9856 |
first record in the changelog DB. This break CLcache.
|
|
|
ed9856 |
|
|
|
ed9856 |
Fix description:
|
|
|
ed9856 |
The fix is to harden cursor initialization so that if DB_SET fails
|
|
|
ed9856 |
because of DB_BUFFER_SMALL. It reallocates buf_data and retries a DB_SET.
|
|
|
ed9856 |
If DB_SET can not be initialized it logs a warning.
|
|
|
ed9856 |
|
|
|
ed9856 |
The patch also changes the behaviour of the fix #4492.
|
|
|
ed9856 |
#4492 detected a massive (1day) jump prior the starting csn and ended the
|
|
|
ed9856 |
replication session. If the jump was systematic, for example
|
|
|
ed9856 |
if the CLcache got broken because of a too large updates, then
|
|
|
ed9856 |
replication was systematically stopped.
|
|
|
ed9856 |
This patch suppress the systematically stop, letting RA doing a big jump.
|
|
|
ed9856 |
From #4492 only remains the warning.
|
|
|
ed9856 |
|
|
|
ed9856 |
relates: https://github.com/389ds/389-ds-base/issues/4644
|
|
|
ed9856 |
|
|
|
ed9856 |
Reviewed by: Pierre Rogier (Thanks !!!!)
|
|
|
ed9856 |
|
|
|
ed9856 |
Platforms tested: F31
|
|
|
ed9856 |
---
|
|
|
ed9856 |
.../servers/plugins/replication/cl5_clcache.c | 68 +++++++++++++++----
|
|
|
ed9856 |
1 file changed, 53 insertions(+), 15 deletions(-)
|
|
|
ed9856 |
|
|
|
ed9856 |
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ed9856 |
index 43b7c77d8..0b1f48f0c 100644
|
|
|
ed9856 |
--- a/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ed9856 |
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ed9856 |
@@ -370,9 +370,7 @@ clcache_load_buffer(CLC_Buffer *buf, CSN **anchorCSN, int *continue_on_miss, cha
|
|
|
ed9856 |
}
|
|
|
ed9856 |
csn_as_string(buf->buf_current_csn, 0, curr);
|
|
|
ed9856 |
slapi_log_err(loglevel, buf->buf_agmt_name,
|
|
|
ed9856 |
- "clcache_load_buffer - bulk load cursor (%s) is lower than starting csn %s. Ending session.\n", curr, initial_starting_csn);
|
|
|
ed9856 |
- /* it just end the session with UPDATE_NO_MORE_UPDATES */
|
|
|
ed9856 |
- rc = CLC_STATE_DONE;
|
|
|
ed9856 |
+ "clcache_load_buffer - bulk load cursor (%s) is lower than starting csn %s.\n", curr, initial_starting_csn);
|
|
|
ed9856 |
}
|
|
|
ed9856 |
}
|
|
|
ed9856 |
|
|
|
ed9856 |
@@ -413,10 +411,7 @@ clcache_load_buffer(CLC_Buffer *buf, CSN **anchorCSN, int *continue_on_miss, cha
|
|
|
ed9856 |
}
|
|
|
ed9856 |
csn_as_string(buf->buf_current_csn, 0, curr);
|
|
|
ed9856 |
slapi_log_err(loglevel, buf->buf_agmt_name,
|
|
|
ed9856 |
- "clcache_load_buffer - (DB_SET_RANGE) bulk load cursor (%s) is lower than starting csn %s. Ending session.\n", curr, initial_starting_csn);
|
|
|
ed9856 |
- rc = DB_NOTFOUND;
|
|
|
ed9856 |
-
|
|
|
ed9856 |
- return rc;
|
|
|
ed9856 |
+ "clcache_load_buffer - (DB_SET_RANGE) bulk load cursor (%s) is lower than starting csn %s.\n", curr, initial_starting_csn);
|
|
|
ed9856 |
}
|
|
|
ed9856 |
}
|
|
|
ed9856 |
}
|
|
|
ed9856 |
@@ -444,6 +439,42 @@ clcache_load_buffer(CLC_Buffer *buf, CSN **anchorCSN, int *continue_on_miss, cha
|
|
|
ed9856 |
return rc;
|
|
|
ed9856 |
}
|
|
|
ed9856 |
|
|
|
ed9856 |
+/* Set a cursor to a specific key (buf->buf_key)
|
|
|
ed9856 |
+ * In case buf_data is too small to receive the value, DB_SET fails
|
|
|
ed9856 |
+ * (DB_BUFFER_SMALL). This let the cursor uninitialized that is
|
|
|
ed9856 |
+ * problematic because further cursor DB_NEXT will reset the cursor
|
|
|
ed9856 |
+ * to the beginning of the CL.
|
|
|
ed9856 |
+ * If buf_data is too small, this function reallocates enough space
|
|
|
ed9856 |
+ *
|
|
|
ed9856 |
+ * It returns the return code of cursor->c_get
|
|
|
ed9856 |
+ */
|
|
|
ed9856 |
+static int
|
|
|
ed9856 |
+clcache_cursor_set(DBC *cursor, CLC_Buffer *buf)
|
|
|
ed9856 |
+{
|
|
|
ed9856 |
+ int rc;
|
|
|
ed9856 |
+ uint32_t ulen;
|
|
|
ed9856 |
+ uint32_t dlen;
|
|
|
ed9856 |
+ uint32_t size;
|
|
|
ed9856 |
+
|
|
|
ed9856 |
+ rc = cursor->c_get(cursor, &buf->buf_key, &buf->buf_data, DB_SET);
|
|
|
ed9856 |
+ if (rc == DB_BUFFER_SMALL) {
|
|
|
ed9856 |
+ uint32_t ulen;
|
|
|
ed9856 |
+
|
|
|
ed9856 |
+ /* Fortunately, buf->buf_data.size has been set by
|
|
|
ed9856 |
+ * c_get() to the actual data size needed. So we can
|
|
|
ed9856 |
+ * reallocate the data buffer and try to set again.
|
|
|
ed9856 |
+ */
|
|
|
ed9856 |
+ ulen = buf->buf_data.ulen;
|
|
|
ed9856 |
+ buf->buf_data.ulen = (buf->buf_data.size / DEFAULT_CLC_BUFFER_PAGE_SIZE + 1) * DEFAULT_CLC_BUFFER_PAGE_SIZE;
|
|
|
ed9856 |
+ buf->buf_data.data = slapi_ch_realloc(buf->buf_data.data, buf->buf_data.ulen);
|
|
|
ed9856 |
+ slapi_log_err(SLAPI_LOG_REPL, buf->buf_agmt_name,
|
|
|
ed9856 |
+ "clcache_cursor_set - buf data len reallocated %d -> %d bytes (DB_BUFFER_SMALL)\n",
|
|
|
ed9856 |
+ ulen, buf->buf_data.ulen);
|
|
|
ed9856 |
+ rc = cursor->c_get(cursor, &buf->buf_key, &buf->buf_data, DB_SET);
|
|
|
ed9856 |
+ }
|
|
|
ed9856 |
+ return rc;
|
|
|
ed9856 |
+}
|
|
|
ed9856 |
+
|
|
|
ed9856 |
static int
|
|
|
ed9856 |
clcache_load_buffer_bulk(CLC_Buffer *buf, int flag)
|
|
|
ed9856 |
{
|
|
|
ed9856 |
@@ -478,17 +509,24 @@ retry:
|
|
|
ed9856 |
|
|
|
ed9856 |
if (use_flag == DB_NEXT) {
|
|
|
ed9856 |
/* For bulk read, position the cursor before read the next block */
|
|
|
ed9856 |
- rc = cursor->c_get(cursor,
|
|
|
ed9856 |
- &buf->buf_key,
|
|
|
ed9856 |
- &buf->buf_data,
|
|
|
ed9856 |
- DB_SET);
|
|
|
ed9856 |
+ rc = clcache_cursor_set(cursor, buf);
|
|
|
ed9856 |
}
|
|
|
ed9856 |
|
|
|
ed9856 |
- /*
|
|
|
ed9856 |
- * Continue if the error is no-mem since we don't need to
|
|
|
ed9856 |
- * load in the key record anyway with DB_SET.
|
|
|
ed9856 |
- */
|
|
|
ed9856 |
if (0 == rc || DB_BUFFER_SMALL == rc) {
|
|
|
ed9856 |
+ /*
|
|
|
ed9856 |
+ * It should not have failed with DB_BUFFER_SMALL as we tried
|
|
|
ed9856 |
+ * to adjust buf_data in clcache_cursor_set.
|
|
|
ed9856 |
+ * But if it failed with DB_BUFFER_SMALL, there is a risk in clcache_cursor_get
|
|
|
ed9856 |
+ * that the cursor will be reset to the beginning of the changelog.
|
|
|
ed9856 |
+ * Returning an error at this point will stop replication that is
|
|
|
ed9856 |
+ * a risk. So just accept the risk of a reset to the beginning of the CL
|
|
|
ed9856 |
+ * and log an alarming message.
|
|
|
ed9856 |
+ */
|
|
|
ed9856 |
+ if (rc == DB_BUFFER_SMALL) {
|
|
|
ed9856 |
+ slapi_log_err(SLAPI_LOG_WARNING, buf->buf_agmt_name,
|
|
|
ed9856 |
+ "clcache_load_buffer_bulk - Fail to position on csn=%s from the changelog (too large update ?). Risk of full CL evaluation.\n",
|
|
|
ed9856 |
+ (char *)buf->buf_key.data);
|
|
|
ed9856 |
+ }
|
|
|
ed9856 |
rc = clcache_cursor_get(cursor, buf, use_flag);
|
|
|
ed9856 |
}
|
|
|
ed9856 |
}
|
|
|
ed9856 |
--
|
|
|
ed9856 |
2.26.2
|
|
|
ed9856 |
|