|
|
ba46c7 |
From e98e41731051b7bf4a443b51a9d3563fc1853773 Mon Sep 17 00:00:00 2001
|
|
|
ba46c7 |
From: Rich Megginson <rmeggins@redhat.com>
|
|
|
ba46c7 |
Date: Wed, 6 Nov 2013 14:22:31 -0700
|
|
|
ba46c7 |
Subject: [PATCH 47/49] Ticket #47585 Replication Failures related to skipped
|
|
|
ba46c7 |
entries due to cleaned rids
|
|
|
ba46c7 |
|
|
|
ba46c7 |
https://fedorahosted.org/389/ticket/47585
|
|
|
ba46c7 |
Reviewed by: nhosoi (Thanks!)
|
|
|
ba46c7 |
Branch: 389-ds-base-1.3.1
|
|
|
ba46c7 |
Fix Description: If a change was found in the changelog buffer that is
|
|
|
ba46c7 |
skipped due to having an unknown replica ID (rid), the entire buffer was
|
|
|
ba46c7 |
marked as CLC_STATE_NEW_RID. When the buffer is exhausted and the iterator
|
|
|
ba46c7 |
code goes to read in the new buffer, it would not read in the new buffer
|
|
|
ba46c7 |
because it only loads a new buffer if the current buffer state is
|
|
|
ba46c7 |
CLC_STATE_READY. I don't know why the entire buffer would be marked as
|
|
|
ba46c7 |
CLC_STATE_NEW_RID and stop iteration. It seems to me that just the update
|
|
|
ba46c7 |
should be skipped, but new buffers should be loaded in order to keep sending
|
|
|
ba46c7 |
non-skipped updates to the consumer.
|
|
|
ba46c7 |
It is possible for a CSN with an unknown RID to get into the changelog if
|
|
|
ba46c7 |
the server with that RID had been removed by cleanruv/cleanallruv. In that
|
|
|
ba46c7 |
case, the CSN should be skipped. It is assumed that the change was already
|
|
|
ba46c7 |
sent - cleanallruv is supposed to wait until all known changes have been
|
|
|
ba46c7 |
seen before removing the RID from the RUV - so it is safe to skip it.
|
|
|
ba46c7 |
Added additional debugging, so that we can better tell why changelog entries
|
|
|
ba46c7 |
were skipped.
|
|
|
ba46c7 |
Platforms tested: RHEL6 x86_64
|
|
|
ba46c7 |
Flag Day: no
|
|
|
ba46c7 |
Doc impact: no
|
|
|
ba46c7 |
(cherry picked from commit cf08f1274404e4796966011a98a6a0acbbfd6070)
|
|
|
ba46c7 |
(cherry picked from commit 30bb98fb693ea1aac9774bdc43b923eacd72570a)
|
|
|
ba46c7 |
(cherry picked from commit fc70e4ac6accaa14d140e333829e98897f6ff164)
|
|
|
ba46c7 |
---
|
|
|
ba46c7 |
ldap/servers/plugins/replication/cl5_clcache.c | 48 ++++++++++++++++++++++----
|
|
|
ba46c7 |
1 file changed, 42 insertions(+), 6 deletions(-)
|
|
|
ba46c7 |
|
|
|
ba46c7 |
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ba46c7 |
index 7a6a446..8218312 100644
|
|
|
ba46c7 |
--- a/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ba46c7 |
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
|
|
|
ba46c7 |
@@ -120,6 +120,11 @@ struct clc_buffer {
|
|
|
ba46c7 |
int buf_load_cnt; /* number of loads for session */
|
|
|
ba46c7 |
int buf_record_cnt; /* number of changes for session */
|
|
|
ba46c7 |
int buf_record_skipped; /* number of changes skipped */
|
|
|
ba46c7 |
+ int buf_skipped_new_rid; /* number of changes skipped due to new_rid */
|
|
|
ba46c7 |
+ int buf_skipped_csn_gt_cons_maxcsn; /* number of changes skipped due to csn greater than consumer maxcsn */
|
|
|
ba46c7 |
+ int buf_skipped_up_to_date; /* number of changes skipped due to consumer being up-to-date for the given rid */
|
|
|
ba46c7 |
+ int buf_skipped_csn_gt_ruv; /* number of changes skipped due to preceedents are not covered by local RUV snapshot */
|
|
|
ba46c7 |
+ int buf_skipped_csn_covered; /* number of changes skipped due to CSNs already covered by consumer RUV */
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/*
|
|
|
ba46c7 |
* fields that should be accessed via bl_lock or pl_lock
|
|
|
ba46c7 |
@@ -252,6 +257,11 @@ clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV
|
|
|
ba46c7 |
(*buf)->buf_record_skipped = 0;
|
|
|
ba46c7 |
(*buf)->buf_cursor = NULL;
|
|
|
ba46c7 |
(*buf)->buf_num_cscbs = 0;
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_new_rid = 0;
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_csn_gt_cons_maxcsn = 0;
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_up_to_date = 0;
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_csn_gt_ruv = 0;
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_csn_covered = 0;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
else {
|
|
|
ba46c7 |
*buf = clcache_new_buffer ( consumer_rid );
|
|
|
ba46c7 |
@@ -287,11 +297,16 @@ clcache_return_buffer ( CLC_Buffer **buf )
|
|
|
ba46c7 |
int i;
|
|
|
ba46c7 |
|
|
|
ba46c7 |
slapi_log_error ( SLAPI_LOG_REPL, (*buf)->buf_agmt_name,
|
|
|
ba46c7 |
- "session end: state=%d load=%d sent=%d skipped=%d\n",
|
|
|
ba46c7 |
- (*buf)->buf_state,
|
|
|
ba46c7 |
- (*buf)->buf_load_cnt,
|
|
|
ba46c7 |
- (*buf)->buf_record_cnt - (*buf)->buf_record_skipped,
|
|
|
ba46c7 |
- (*buf)->buf_record_skipped );
|
|
|
ba46c7 |
+ "session end: state=%d load=%d sent=%d skipped=%d skipped_new_rid=%d "
|
|
|
ba46c7 |
+ "skipped_csn_gt_cons_maxcsn=%d skipped_up_to_date=%d "
|
|
|
ba46c7 |
+ "skipped_csn_gt_ruv=%d skipped_csn_covered=%d\n",
|
|
|
ba46c7 |
+ (*buf)->buf_state,
|
|
|
ba46c7 |
+ (*buf)->buf_load_cnt,
|
|
|
ba46c7 |
+ (*buf)->buf_record_cnt - (*buf)->buf_record_skipped,
|
|
|
ba46c7 |
+ (*buf)->buf_record_skipped, (*buf)->buf_skipped_new_rid,
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_csn_gt_cons_maxcsn,
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_up_to_date, (*buf)->buf_skipped_csn_gt_ruv,
|
|
|
ba46c7 |
+ (*buf)->buf_skipped_csn_covered);
|
|
|
ba46c7 |
|
|
|
ba46c7 |
for ( i = 0; i < (*buf)->buf_num_cscbs; i++ ) {
|
|
|
ba46c7 |
clcache_free_cscb ( &(*buf)->buf_cscbs[i] );
|
|
|
ba46c7 |
@@ -676,6 +691,8 @@ clcache_skip_change ( CLC_Buffer *buf )
|
|
|
ba46c7 |
ReplicaId rid;
|
|
|
ba46c7 |
int skip = 1;
|
|
|
ba46c7 |
int i;
|
|
|
ba46c7 |
+ char buf_cur_csn_str[CSN_STRSIZE];
|
|
|
ba46c7 |
+ char oth_csn_str[CSN_STRSIZE];
|
|
|
ba46c7 |
|
|
|
ba46c7 |
do {
|
|
|
ba46c7 |
|
|
|
ba46c7 |
@@ -697,6 +714,14 @@ clcache_skip_change ( CLC_Buffer *buf )
|
|
|
ba46c7 |
* The consumer must have been "restored" and needs this newer update.
|
|
|
ba46c7 |
*/
|
|
|
ba46c7 |
skip = 0;
|
|
|
ba46c7 |
+ } else if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
|
|
|
ba46c7 |
+ csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
|
|
|
ba46c7 |
+ csn_as_string(cons_maxcsn, 0, oth_csn_str);
|
|
|
ba46c7 |
+ slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
|
|
|
ba46c7 |
+ "Skipping update because the changelog buffer current csn [%s] is "
|
|
|
ba46c7 |
+ "less than or equal to the consumer max csn [%s]\n",
|
|
|
ba46c7 |
+ buf_cur_csn_str, oth_csn_str);
|
|
|
ba46c7 |
+ buf->buf_skipped_csn_gt_cons_maxcsn++;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
csn_free(&cons_maxcsn);
|
|
|
ba46c7 |
break;
|
|
|
ba46c7 |
@@ -714,7 +739,14 @@ clcache_skip_change ( CLC_Buffer *buf )
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/* Skip CSN whose RID is unknown to the local RUV snapshot */
|
|
|
ba46c7 |
if ( i >= buf->buf_num_cscbs ) {
|
|
|
ba46c7 |
- buf->buf_state = CLC_STATE_NEW_RID;
|
|
|
ba46c7 |
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
|
|
|
ba46c7 |
+ csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
|
|
|
ba46c7 |
+ slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
|
|
|
ba46c7 |
+ "Skipping update because the changelog buffer current csn [%s] rid "
|
|
|
ba46c7 |
+ "[%d] is not in the list of changelog csn buffers (length %d)\n",
|
|
|
ba46c7 |
+ buf_cur_csn_str, rid, buf->buf_num_cscbs);
|
|
|
ba46c7 |
+ }
|
|
|
ba46c7 |
+ buf->buf_skipped_new_rid++;
|
|
|
ba46c7 |
break;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
|
|
|
ba46c7 |
@@ -722,17 +754,20 @@ clcache_skip_change ( CLC_Buffer *buf )
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/* Skip if the consumer is already up-to-date for the RID */
|
|
|
ba46c7 |
if ( cscb->state == CLC_STATE_UP_TO_DATE ) {
|
|
|
ba46c7 |
+ buf->buf_skipped_up_to_date++;
|
|
|
ba46c7 |
break;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/* Skip CSN whose preceedents are not covered by local RUV snapshot */
|
|
|
ba46c7 |
if ( cscb->state == CLC_STATE_CSN_GT_RUV ) {
|
|
|
ba46c7 |
+ buf->buf_skipped_csn_gt_ruv++;
|
|
|
ba46c7 |
break;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/* Skip CSNs already covered by consumer RUV */
|
|
|
ba46c7 |
if ( cscb->consumer_maxcsn &&
|
|
|
ba46c7 |
csn_compare ( buf->buf_current_csn, cscb->consumer_maxcsn ) <= 0 ) {
|
|
|
ba46c7 |
+ buf->buf_skipped_csn_covered++;
|
|
|
ba46c7 |
break;
|
|
|
ba46c7 |
}
|
|
|
ba46c7 |
|
|
|
ba46c7 |
@@ -762,6 +797,7 @@ clcache_skip_change ( CLC_Buffer *buf )
|
|
|
ba46c7 |
|
|
|
ba46c7 |
/* Skip CSNs not covered by local RUV snapshot */
|
|
|
ba46c7 |
cscb->state = CLC_STATE_CSN_GT_RUV;
|
|
|
ba46c7 |
+ buf->buf_skipped_csn_gt_ruv++;
|
|
|
ba46c7 |
|
|
|
ba46c7 |
} while (0);
|
|
|
ba46c7 |
|
|
|
ba46c7 |
--
|
|
|
ba46c7 |
1.8.1.4
|
|
|
ba46c7 |
|