|
|
74ca47 |
From 4182dd8bbff22f9e0e45b763a4619c0bc8dcb153 Mon Sep 17 00:00:00 2001
|
|
|
74ca47 |
From: Mark Reynolds <mreynolds@redhat.com>
|
|
|
74ca47 |
Date: Tue, 9 May 2017 12:31:58 -0400
|
|
|
74ca47 |
Subject: [PATCH] Ticket 49238 - AddressSanitizer: heap-use-after-free in
|
|
|
74ca47 |
libreplication
|
|
|
74ca47 |
|
|
|
74ca47 |
Bug Description:
|
|
|
74ca47 |
The bug is detected in csn pending list component, when
|
|
|
74ca47 |
accessing a csn that has already been freed.
|
|
|
74ca47 |
|
|
|
74ca47 |
The bug is mostly detectable under ASAN because under normal run
|
|
|
74ca47 |
the read access to the csn would only crash if the csn was in
|
|
|
74ca47 |
an unmapped page (that is quite difficult to acheive).
|
|
|
74ca47 |
|
|
|
74ca47 |
The bug was observed under the following conditions:
|
|
|
74ca47 |
- very slow machine
|
|
|
74ca47 |
- all instances running on the same machine
|
|
|
74ca47 |
|
|
|
74ca47 |
The patch address 2 issues
|
|
|
74ca47 |
|
|
|
74ca47 |
Issue - 1
|
|
|
74ca47 |
Under specfic circumstance (failure, like "db_deadlock" during changelog update),
|
|
|
74ca47 |
the csn was freed but still present in the pending list (fix-1).
|
|
|
74ca47 |
|
|
|
74ca47 |
Issue - 2
|
|
|
74ca47 |
Further investigations, showed an other corner case where a
|
|
|
74ca47 |
replica could be updated by several suppliers in parallel.
|
|
|
74ca47 |
In such scenario, an update (on one thread-2) with a higher csn (let csn-2)
|
|
|
74ca47 |
may be applied before an update (on another thread-1) with a smaller
|
|
|
74ca47 |
csn (let csn-1).
|
|
|
74ca47 |
csn-2 is freed when thread-2 complete but the csn-2 will remain
|
|
|
74ca47 |
in the pending list until csn-1 is commited.
|
|
|
74ca47 |
so followup of pending list may access a csn that was freed
|
|
|
74ca47 |
|
|
|
74ca47 |
Fix Description:
|
|
|
74ca47 |
Issue - 1
|
|
|
74ca47 |
The fix in repl5_plugins.c, frees the csn (thread private area)
|
|
|
74ca47 |
at the condition pending list was roll up for that csn (ruv update).
|
|
|
74ca47 |
|
|
|
74ca47 |
Issue - 2
|
|
|
74ca47 |
The fix is in two parts:
|
|
|
74ca47 |
If a supplier tries to acquire a replica while it is
|
|
|
74ca47 |
already owner of it, the replica is granted.
|
|
|
74ca47 |
|
|
|
74ca47 |
If a supplier owns a replica and is asking again for it,
|
|
|
74ca47 |
but this time the replica is not granted, the replica is release and
|
|
|
74ca47 |
the supplier disconnected.
|
|
|
74ca47 |
|
|
|
74ca47 |
https://pagure.io/389-ds-base/issue/49238
|
|
|
74ca47 |
|
|
|
74ca47 |
Reviewed by: Mark Reynolds, Ludwig Krispenz, William Brown (thanks to you all !!)
|
|
|
74ca47 |
|
|
|
74ca47 |
Platforms tested: 7.4
|
|
|
74ca47 |
|
|
|
74ca47 |
Flag Day: no
|
|
|
74ca47 |
|
|
|
74ca47 |
Doc impact: no
|
|
|
74ca47 |
---
|
|
|
74ca47 |
ldap/servers/plugins/replication/repl5.h | 1 +
|
|
|
74ca47 |
ldap/servers/plugins/replication/repl5_plugins.c | 7 +++-
|
|
|
74ca47 |
ldap/servers/plugins/replication/repl5_replica.c | 49 +++++++++++++++++++-----
|
|
|
74ca47 |
ldap/servers/plugins/replication/repl_extop.c | 42 ++++++++++++++++++--
|
|
|
74ca47 |
4 files changed, 86 insertions(+), 13 deletions(-)
|
|
|
74ca47 |
|
|
|
74ca47 |
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
|
|
|
74ca47 |
index c3bd10c..1d8989c 100644
|
|
|
74ca47 |
--- a/ldap/servers/plugins/replication/repl5.h
|
|
|
74ca47 |
+++ b/ldap/servers/plugins/replication/repl5.h
|
|
|
74ca47 |
@@ -549,6 +549,7 @@ void replica_relinquish_exclusive_access(Replica *r, PRUint64 connid, int opid);
|
|
|
74ca47 |
PRBool replica_get_tombstone_reap_active(const Replica *r);
|
|
|
74ca47 |
const Slapi_DN *replica_get_root(const Replica *r);
|
|
|
74ca47 |
const char *replica_get_name(const Replica *r);
|
|
|
74ca47 |
+uint64_t replica_get_locking_conn(const Replica *r);
|
|
|
74ca47 |
ReplicaId replica_get_rid (const Replica *r);
|
|
|
74ca47 |
void replica_set_rid (Replica *r, ReplicaId rid);
|
|
|
74ca47 |
PRBool replica_is_initialized (const Replica *r);
|
|
|
74ca47 |
diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c
|
|
|
74ca47 |
index ebcc230..9ef06af 100644
|
|
|
74ca47 |
--- a/ldap/servers/plugins/replication/repl5_plugins.c
|
|
|
74ca47 |
+++ b/ldap/servers/plugins/replication/repl5_plugins.c
|
|
|
74ca47 |
@@ -1224,7 +1224,12 @@ common_return:
|
|
|
74ca47 |
opcsn = operation_get_csn(op);
|
|
|
74ca47 |
prim_csn = get_thread_primary_csn();
|
|
|
74ca47 |
if (csn_is_equal(opcsn, prim_csn)) {
|
|
|
74ca47 |
- set_thread_primary_csn(NULL);
|
|
|
74ca47 |
+ if (return_value == 0) {
|
|
|
74ca47 |
+ /* the primary csn was succesfully committed
|
|
|
74ca47 |
+ * unset it in the thread local data
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+ set_thread_primary_csn(NULL);
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
}
|
|
|
74ca47 |
if (repl_obj) {
|
|
|
74ca47 |
object_release (repl_obj);
|
|
|
74ca47 |
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
74ca47 |
index a106f8b..1bdc138 100644
|
|
|
74ca47 |
--- a/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
74ca47 |
+++ b/ldap/servers/plugins/replication/repl5_replica.c
|
|
|
74ca47 |
@@ -64,6 +64,7 @@ struct replica {
|
|
|
74ca47 |
PRBool state_update_inprogress; /* replica state is being updated */
|
|
|
74ca47 |
PRLock *agmt_lock; /* protects agreement creation, start and stop */
|
|
|
74ca47 |
char *locking_purl; /* supplier who has exclusive access */
|
|
|
74ca47 |
+ uint64_t locking_conn; /* The supplier's connection id */
|
|
|
74ca47 |
Slapi_Counter *protocol_timeout;/* protocol shutdown timeout */
|
|
|
74ca47 |
Slapi_Counter *backoff_min; /* backoff retry minimum */
|
|
|
74ca47 |
Slapi_Counter *backoff_max; /* backoff retry maximum */
|
|
|
74ca47 |
@@ -602,19 +603,32 @@ replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opi
|
|
|
74ca47 |
slapi_sdn_get_dn(r->repl_root),
|
|
|
74ca47 |
r->locking_purl ? r->locking_purl : "unknown");
|
|
|
74ca47 |
rval = PR_FALSE;
|
|
|
74ca47 |
+ if (!(r->repl_state_flags & REPLICA_TOTAL_IN_PROGRESS)) {
|
|
|
74ca47 |
+ /* inc update */
|
|
|
74ca47 |
+ if (r->locking_purl && r->locking_conn == connid) {
|
|
|
74ca47 |
+ /* This is the same supplier connection, reset the replica
|
|
|
74ca47 |
+ * purl, and return success */
|
|
|
74ca47 |
+ slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name,
|
|
|
74ca47 |
+ "replica_get_exclusive_access - "
|
|
|
74ca47 |
+ "This is a second acquire attempt from the same replica connection "
|
|
|
74ca47 |
+ " - return success instead of busy\n");
|
|
|
74ca47 |
+ slapi_ch_free_string(&r->locking_purl);
|
|
|
74ca47 |
+ r->locking_purl = slapi_ch_strdup(locking_purl);
|
|
|
74ca47 |
+ rval = PR_TRUE;
|
|
|
74ca47 |
+ goto done;
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
+ if (replica_get_release_timeout(r)) {
|
|
|
74ca47 |
+ /*
|
|
|
74ca47 |
+ * Abort the current session so other replicas can acquire
|
|
|
74ca47 |
+ * this server.
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+ r->abort_session = ABORT_SESSION;
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
if (current_purl)
|
|
|
74ca47 |
{
|
|
|
74ca47 |
*current_purl = slapi_ch_strdup(r->locking_purl);
|
|
|
74ca47 |
}
|
|
|
74ca47 |
- if (!(r->repl_state_flags & REPLICA_TOTAL_IN_PROGRESS) &&
|
|
|
74ca47 |
- replica_get_release_timeout(r))
|
|
|
74ca47 |
- {
|
|
|
74ca47 |
- /*
|
|
|
74ca47 |
- * We are not doing a total update, so abort the current session
|
|
|
74ca47 |
- * so other replicas can acquire this server.
|
|
|
74ca47 |
- */
|
|
|
74ca47 |
- r->abort_session = ABORT_SESSION;
|
|
|
74ca47 |
- }
|
|
|
74ca47 |
}
|
|
|
74ca47 |
else
|
|
|
74ca47 |
{
|
|
|
74ca47 |
@@ -642,7 +656,9 @@ replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opi
|
|
|
74ca47 |
}
|
|
|
74ca47 |
slapi_ch_free_string(&r->locking_purl);
|
|
|
74ca47 |
r->locking_purl = slapi_ch_strdup(locking_purl);
|
|
|
74ca47 |
+ r->locking_conn = connid;
|
|
|
74ca47 |
}
|
|
|
74ca47 |
+done:
|
|
|
74ca47 |
replica_unlock(r->repl_lock);
|
|
|
74ca47 |
return rval;
|
|
|
74ca47 |
}
|
|
|
74ca47 |
@@ -720,6 +736,18 @@ replica_get_name(const Replica *r) /* ONREPL - should we return copy instead? */
|
|
|
74ca47 |
return(r->repl_name);
|
|
|
74ca47 |
}
|
|
|
74ca47 |
|
|
|
74ca47 |
+/*
|
|
|
74ca47 |
+ * Returns locking_conn of this replica
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+uint64_t
|
|
|
74ca47 |
+replica_get_locking_conn(const Replica *r)
|
|
|
74ca47 |
+{
|
|
|
74ca47 |
+ uint64_t connid;
|
|
|
74ca47 |
+ replica_lock(r->repl_lock);
|
|
|
74ca47 |
+ connid = r->locking_conn;
|
|
|
74ca47 |
+ replica_unlock(r->repl_lock);
|
|
|
74ca47 |
+ return connid;
|
|
|
74ca47 |
+}
|
|
|
74ca47 |
/*
|
|
|
74ca47 |
* Returns replicaid of this replica
|
|
|
74ca47 |
*/
|
|
|
74ca47 |
@@ -2251,6 +2279,9 @@ _replica_init_from_config (Replica *r, Slapi_Entry *e, char *errortext)
|
|
|
74ca47 |
}
|
|
|
74ca47 |
|
|
|
74ca47 |
r->tombstone_reap_stop = r->tombstone_reap_active = PR_FALSE;
|
|
|
74ca47 |
+
|
|
|
74ca47 |
+ /* No supplier holding the replica */
|
|
|
74ca47 |
+ r->locking_conn = ULONG_MAX;
|
|
|
74ca47 |
|
|
|
74ca47 |
return (_replica_check_validity (r));
|
|
|
74ca47 |
}
|
|
|
74ca47 |
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
74ca47 |
index 412caec..a39d918 100644
|
|
|
74ca47 |
--- a/ldap/servers/plugins/replication/repl_extop.c
|
|
|
74ca47 |
+++ b/ldap/servers/plugins/replication/repl_extop.c
|
|
|
74ca47 |
@@ -1138,9 +1138,45 @@ send_response:
|
|
|
74ca47 |
*/
|
|
|
74ca47 |
if (NULL != connext && NULL != connext->replica_acquired)
|
|
|
74ca47 |
{
|
|
|
74ca47 |
- Object *r_obj = (Object*)connext->replica_acquired;
|
|
|
74ca47 |
- replica_relinquish_exclusive_access((Replica*)object_get_data (r_obj),
|
|
|
74ca47 |
- connid, opid);
|
|
|
74ca47 |
+ Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired);
|
|
|
74ca47 |
+ uint64_t r_locking_conn;
|
|
|
74ca47 |
+
|
|
|
74ca47 |
+ /* At this point the supplier runs a Replica Agreement for
|
|
|
74ca47 |
+ * the specific replica connext->replica_acquired.
|
|
|
74ca47 |
+ * The RA does not know it holds the replica (because it is
|
|
|
74ca47 |
+ * sending this request).
|
|
|
74ca47 |
+ * The situation is confused
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+ slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "multimaster_extop_StartNSDS50ReplicationRequest - "
|
|
|
74ca47 |
+ "already acquired replica: replica not ready (%d) (replica=%s)\n", response, replica_get_name(r) ? replica_get_name(r) : "no name");
|
|
|
74ca47 |
+
|
|
|
74ca47 |
+ /*
|
|
|
74ca47 |
+ * On consumer side, we release the exclusive access at the
|
|
|
74ca47 |
+ * condition this is this RA that holds the replica
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+ if (r) {
|
|
|
74ca47 |
+
|
|
|
74ca47 |
+ r_locking_conn = replica_get_locking_conn(r);
|
|
|
74ca47 |
+ slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "multimaster_extop_StartNSDS50ReplicationRequest - "
|
|
|
74ca47 |
+ "already acquired replica: locking_conn=%d, current connid=%d\n", (int) r_locking_conn, (int) connid);
|
|
|
74ca47 |
+
|
|
|
74ca47 |
+ if ((r_locking_conn != ULONG_MAX) && (r_locking_conn == connid)) {
|
|
|
74ca47 |
+ replica_relinquish_exclusive_access(r, connid, opid);
|
|
|
74ca47 |
+ object_release((Object*) connext->replica_acquired);
|
|
|
74ca47 |
+ connext->replica_acquired = NULL;
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
+ }
|
|
|
74ca47 |
+ /*
|
|
|
74ca47 |
+ * On consumer side we should not keep a incoming connection
|
|
|
74ca47 |
+ * with replica_acquired set although the supplier is not aware of
|
|
|
74ca47 |
+ *
|
|
|
74ca47 |
+ * On the supplier, we need to close the connection so
|
|
|
74ca47 |
+ * that the RA will restart a new session in a clear state
|
|
|
74ca47 |
+ */
|
|
|
74ca47 |
+ slapi_log_err(SLAPI_LOG_REPL, repl_plugin_name, "multimaster_extop_StartNSDS50ReplicationRequest - "
|
|
|
74ca47 |
+ "already acquired replica: disconnect conn=%d\n", connid);
|
|
|
74ca47 |
+ slapi_disconnect_server(conn);
|
|
|
74ca47 |
+
|
|
|
74ca47 |
}
|
|
|
74ca47 |
/* Remove any flags that would indicate repl session in progress */
|
|
|
74ca47 |
if (NULL != connext)
|
|
|
74ca47 |
--
|
|
|
74ca47 |
2.9.4
|
|
|
74ca47 |
|