|
|
96373c |
From 127e0d954eb7741c4afdc0305f7970b7ea164e8d Mon Sep 17 00:00:00 2001
|
|
|
96373c |
From: Ludwig Krispenz <lkrispen@redhat.com>
|
|
|
96373c |
Date: Thu, 9 Nov 2017 11:28:34 +0100
|
|
|
96373c |
Subject: [PATCH] Ticket 48118 - At startup, changelog can be erronously
|
|
|
96373c |
rebuilt after a normal shutdown
|
|
|
96373c |
|
|
|
96373c |
Problem: There are two problems that can lead to inconsistent database and changelog maxruv:
|
|
|
96373c |
1] the database ruv is written periodically in th ehouskeeping thread and at shutdown. It
|
|
|
96373c |
relies on teh ruv_dirty flag, but due to a race condition this can be reset befor writing
|
|
|
96373c |
the ruv
|
|
|
96373c |
2] the changelog max ruv is updated whenever an operation is commutted, but in case of internal
|
|
|
96373c |
operations inside the txn for a client operation, if the operation is aborted the cl maxruv
|
|
|
96373c |
is not reset. Since it is only written at shutdown this normally is no problem, but if the
|
|
|
96373c |
aborted operation is the last before shutdown or is aborted by shutdown the cl ruv is incorrect
|
|
|
96373c |
|
|
|
96373c |
Fix: the fix is in two parts:
|
|
|
96373c |
1] remove the use of the dirty flag, ensure that the ruv is always written. The overhead for writing
|
|
|
96373c |
a database ruv that has not changed is minimal
|
|
|
96373c |
2] when writing the changelog maxruv check if the macsns it contains are really present in the
|
|
|
96373c |
changelog. If not the maxruv is not written, it will be reconstructed at the next startup
|
|
|
96373c |
|
|
|
96373c |
Reviewed by: William,Thierry - Thanks
|
|
|
96373c |
---
|
|
|
96373c |
ldap/servers/plugins/replication/cl5_api.c | 39 ++++++++++++++++++++++
|
|
|
96373c |
ldap/servers/plugins/replication/repl5.h | 1 -
|
|
|
96373c |
ldap/servers/plugins/replication/repl5_replica.c | 32 +-----------------
|
|
|
96373c |
.../plugins/replication/repl5_replica_config.c | 2 --
|
|
|
96373c |
4 files changed, 40 insertions(+), 34 deletions(-)
|
|
|
96373c |
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c
|
|
|
96373c |
index ec648c014..55032dfb0 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/cl5_api.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/cl5_api.c
|
|
|
96373c |
@@ -250,6 +250,8 @@ static void _cl5ReadBerval(struct berval *bv, char **buff);
|
|
|
96373c |
static void _cl5WriteBerval(struct berval *bv, char **buff);
|
|
|
96373c |
static int _cl5ReadBervals(struct berval ***bv, char **buff, unsigned int size);
|
|
|
96373c |
static int _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size);
|
|
|
96373c |
+static int64_t _cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv);
|
|
|
96373c |
+static int64_t _cl5CheckCSNinCL(const ruv_enum_data *element, void *arg);
|
|
|
96373c |
|
|
|
96373c |
/* replay iteration */
|
|
|
96373c |
#ifdef FOR_DEBUGGING
|
|
|
96373c |
@@ -2716,6 +2718,36 @@ _cl5WriteBervals(struct berval **bv, char **buff, u_int32_t *size)
|
|
|
96373c |
return CL5_SUCCESS;
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
+static int64_t
|
|
|
96373c |
+_cl5CheckCSNinCL(const ruv_enum_data *element, void *arg)
|
|
|
96373c |
+{
|
|
|
96373c |
+ CL5DBFile *file = (CL5DBFile *)arg;
|
|
|
96373c |
+ int rc = 0;
|
|
|
96373c |
+
|
|
|
96373c |
+ DBT key = {0}, data = {0};
|
|
|
96373c |
+ char csnStr[CSN_STRSIZE];
|
|
|
96373c |
+
|
|
|
96373c |
+ /* construct the key */
|
|
|
96373c |
+ key.data = csn_as_string(element->csn, PR_FALSE, csnStr);
|
|
|
96373c |
+ key.size = CSN_STRSIZE;
|
|
|
96373c |
+
|
|
|
96373c |
+ data.flags = DB_DBT_MALLOC;
|
|
|
96373c |
+
|
|
|
96373c |
+ rc = file->db->get(file->db, NULL /*txn*/, &key, &data, 0);
|
|
|
96373c |
+
|
|
|
96373c |
+ slapi_ch_free(&(data.data));
|
|
|
96373c |
+ return rc;
|
|
|
96373c |
+}
|
|
|
96373c |
+
|
|
|
96373c |
+static int64_t
|
|
|
96373c |
+_cl5CheckMaxRUV(CL5DBFile *file, RUV *maxruv)
|
|
|
96373c |
+{
|
|
|
96373c |
+ int rc = 0;
|
|
|
96373c |
+
|
|
|
96373c |
+ rc = ruv_enumerate_elements(maxruv, _cl5CheckCSNinCL, (void *)file);
|
|
|
96373c |
+
|
|
|
96373c |
+ return rc;
|
|
|
96373c |
+}
|
|
|
96373c |
/* upgrade from db33 to db41
|
|
|
96373c |
* 1. Run recovery on the database environment using the DB_ENV->open method
|
|
|
96373c |
* 2. Remove any Berkeley DB environment using the DB_ENV->remove method
|
|
|
96373c |
@@ -4010,6 +4042,13 @@ _cl5WriteRUV(CL5DBFile *file, PRBool purge)
|
|
|
96373c |
rc = ruv_to_bervals(file->maxRUV, &vals);
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
+ if (!purge && _cl5CheckMaxRUV(file, file->maxRUV)) {
|
|
|
96373c |
+ slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name_cl,
|
|
|
96373c |
+ "_cl5WriteRUV - changelog maxRUV not found in changelog for file %s\n",
|
|
|
96373c |
+ file->name);
|
|
|
96373c |
+ return CL5_DB_ERROR;
|
|
|
96373c |
+ }
|
|
|
96373c |
+
|
|
|
96373c |
key.size = CSN_STRSIZE;
|
|
|
96373c |
|
|
|
96373c |
rc = _cl5WriteBervals(vals, &buff, &data.size);
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
index c6e79b7e2..4e206a0fc 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5.h
|
|
|
96373c |
@@ -725,7 +725,6 @@ Object *replica_get_for_backend(const char *be_name);
|
|
|
96373c |
void replica_set_purge_delay(Replica *r, uint32_t purge_delay);
|
|
|
96373c |
void replica_set_tombstone_reap_interval(Replica *r, long interval);
|
|
|
96373c |
void replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv);
|
|
|
96373c |
-void replica_set_ruv_dirty(Replica *r);
|
|
|
96373c |
Slapi_Entry *get_in_memory_ruv(Slapi_DN *suffix_sdn);
|
|
|
96373c |
int replica_write_ruv(Replica *r);
|
|
|
96373c |
char *replica_get_dn(Replica *r);
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
index e5296bf1c..77f4f18e4 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
96373c |
@@ -41,7 +41,6 @@ struct replica
|
|
|
96373c |
ReplicaType repl_type; /* is this replica read-only ? */
|
|
|
96373c |
ReplicaId repl_rid; /* replicaID */
|
|
|
96373c |
Object *repl_ruv; /* replica update vector */
|
|
|
96373c |
- PRBool repl_ruv_dirty; /* Dirty flag for ruv */
|
|
|
96373c |
CSNPL *min_csn_pl; /* Pending list for minimal CSN */
|
|
|
96373c |
void *csn_pl_reg_id; /* registration assignment for csn callbacks */
|
|
|
96373c |
unsigned long repl_state_flags; /* state flags */
|
|
|
96373c |
@@ -788,7 +787,6 @@ replica_set_ruv(Replica *r, RUV *ruv)
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
|
|
|
96373c |
- r->repl_ruv_dirty = PR_TRUE;
|
|
|
96373c |
|
|
|
96373c |
replica_unlock(r->repl_lock);
|
|
|
96373c |
}
|
|
|
96373c |
@@ -860,9 +858,6 @@ replica_update_ruv(Replica *r, const CSN *updated_csn, const char *replica_purl)
|
|
|
96373c |
"to update RUV for replica %s, csn = %s\n",
|
|
|
96373c |
slapi_sdn_get_dn(r->repl_root),
|
|
|
96373c |
csn_as_string(updated_csn, PR_FALSE, csn_str));
|
|
|
96373c |
- } else {
|
|
|
96373c |
- /* RUV updated - mark as dirty */
|
|
|
96373c |
- r->repl_ruv_dirty = PR_TRUE;
|
|
|
96373c |
}
|
|
|
96373c |
} else {
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name,
|
|
|
96373c |
@@ -1347,8 +1342,6 @@ replica_dump(Replica *r)
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tupdate dn: %s\n",
|
|
|
96373c |
updatedn_list ? updatedn_list : "not configured");
|
|
|
96373c |
slapi_ch_free_string(&updatedn_list);
|
|
|
96373c |
- slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\truv: %s configured and is %sdirty\n",
|
|
|
96373c |
- r->repl_ruv ? "" : "not", r->repl_ruv_dirty ? "" : "not ");
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "\tCSN generator: %s configured\n",
|
|
|
96373c |
r->repl_csngen ? "" : "not");
|
|
|
96373c |
/* JCMREPL - Dump Referrals */
|
|
|
96373c |
@@ -1675,7 +1668,6 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
|
|
|
96373c |
|
|
|
96373c |
ruv_force_csn_update_from_ruv(upper_bound_ruv, r_ruv,
|
|
|
96373c |
"Force update of database RUV (from CL RUV) -> ", SLAPI_LOG_NOTICE);
|
|
|
96373c |
- replica_set_ruv_dirty(r);
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
} else {
|
|
|
96373c |
@@ -2778,11 +2770,6 @@ replica_write_ruv(Replica *r)
|
|
|
96373c |
|
|
|
96373c |
replica_lock(r->repl_lock);
|
|
|
96373c |
|
|
|
96373c |
- if (!r->repl_ruv_dirty) {
|
|
|
96373c |
- replica_unlock(r->repl_lock);
|
|
|
96373c |
- return rc;
|
|
|
96373c |
- }
|
|
|
96373c |
-
|
|
|
96373c |
PR_ASSERT(r->repl_ruv);
|
|
|
96373c |
|
|
|
96373c |
ruv_to_smod((RUV *)object_get_data(r->repl_ruv), &smod);
|
|
|
96373c |
@@ -2817,14 +2804,10 @@ replica_write_ruv(Replica *r)
|
|
|
96373c |
/* ruv does not exist - create one */
|
|
|
96373c |
replica_lock(r->repl_lock);
|
|
|
96373c |
|
|
|
96373c |
- if (rc == LDAP_SUCCESS) {
|
|
|
96373c |
- r->repl_ruv_dirty = PR_FALSE;
|
|
|
96373c |
- } else if (rc == LDAP_NO_SUCH_OBJECT) {
|
|
|
96373c |
+ if (rc == LDAP_NO_SUCH_OBJECT) {
|
|
|
96373c |
/* this includes an internal operation - but since this only happens
|
|
|
96373c |
during server startup - its ok that we have lock around it */
|
|
|
96373c |
rc = _replica_configure_ruv(r, PR_TRUE);
|
|
|
96373c |
- if (rc == 0)
|
|
|
96373c |
- r->repl_ruv_dirty = PR_FALSE;
|
|
|
96373c |
} else /* error */
|
|
|
96373c |
{
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
|
|
|
96373c |
@@ -3325,7 +3308,6 @@ replica_create_ruv_tombstone(Replica *r)
|
|
|
96373c |
|
|
|
96373c |
if (ruv_init_new(csnstr, r->repl_rid, purl, &ruv) == RUV_SUCCESS) {
|
|
|
96373c |
r->repl_ruv = object_new((void *)ruv, (FNFree)ruv_destroy);
|
|
|
96373c |
- r->repl_ruv_dirty = PR_TRUE;
|
|
|
96373c |
return_value = LDAP_SUCCESS;
|
|
|
96373c |
} else {
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_ERR, repl_plugin_name, "replica_create_ruv_tombstone - "
|
|
|
96373c |
@@ -3365,8 +3347,6 @@ replica_create_ruv_tombstone(Replica *r)
|
|
|
96373c |
slapi_add_internal_pb(pb);
|
|
|
96373c |
e = NULL; /* add consumes e, upon success or failure */
|
|
|
96373c |
slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &return_value);
|
|
|
96373c |
- if (return_value == LDAP_SUCCESS)
|
|
|
96373c |
- r->repl_ruv_dirty = PR_FALSE;
|
|
|
96373c |
|
|
|
96373c |
done:
|
|
|
96373c |
slapi_entry_free(e);
|
|
|
96373c |
@@ -3630,7 +3610,6 @@ replica_strip_cleaned_rids(Replica *r)
|
|
|
96373c |
ruv_get_cleaned_rids(ruv, rid);
|
|
|
96373c |
while (rid[i] != 0) {
|
|
|
96373c |
ruv_delete_replica(ruv, rid[i]);
|
|
|
96373c |
- replica_set_ruv_dirty(r);
|
|
|
96373c |
if (replica_write_ruv(r)) {
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
|
|
|
96373c |
"replica_strip_cleaned_rids - Failed to write RUV\n");
|
|
|
96373c |
@@ -3744,15 +3723,6 @@ replica_update_ruv_consumer(Replica *r, RUV *supplier_ruv)
|
|
|
96373c |
}
|
|
|
96373c |
}
|
|
|
96373c |
|
|
|
96373c |
-void
|
|
|
96373c |
-replica_set_ruv_dirty(Replica *r)
|
|
|
96373c |
-{
|
|
|
96373c |
- PR_ASSERT(r);
|
|
|
96373c |
- replica_lock(r->repl_lock);
|
|
|
96373c |
- r->repl_ruv_dirty = PR_TRUE;
|
|
|
96373c |
- replica_unlock(r->repl_lock);
|
|
|
96373c |
-}
|
|
|
96373c |
-
|
|
|
96373c |
PRBool
|
|
|
96373c |
replica_is_state_flag_set(Replica *r, int32_t flag)
|
|
|
96373c |
{
|
|
|
96373c |
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
index 9c8d6adbb..e025f34d8 100644
|
|
|
96373c |
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
|
|
|
96373c |
@@ -937,7 +937,6 @@ replica_config_change_type_and_id(Replica *r, const char *new_type, const char *
|
|
|
96373c |
replica_reset_csn_pl(r);
|
|
|
96373c |
}
|
|
|
96373c |
ruv_delete_replica(ruv, oldrid);
|
|
|
96373c |
- replica_set_ruv_dirty(r);
|
|
|
96373c |
cl5CleanRUV(oldrid);
|
|
|
96373c |
replica_set_csn_assigned(r);
|
|
|
96373c |
}
|
|
|
96373c |
@@ -1323,7 +1322,6 @@ replica_execute_cleanruv_task(Object *r, ReplicaId rid, char *returntext __attri
|
|
|
96373c |
return LDAP_UNWILLING_TO_PERFORM;
|
|
|
96373c |
}
|
|
|
96373c |
rc = ruv_delete_replica(local_ruv, rid);
|
|
|
96373c |
- replica_set_ruv_dirty(replica);
|
|
|
96373c |
if (replica_write_ruv(replica)) {
|
|
|
96373c |
slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "cleanAllRUV_task - Could not write RUV\n");
|
|
|
96373c |
}
|
|
|
96373c |
--
|
|
|
96373c |
2.13.6
|
|
|
96373c |
|