From 5b6deac35adbae20d0821a4530d30f0908ad7478 Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Mon, 31 Mar 2014 15:17:59 -0400
Subject: [PATCH] Ticket 47759 - Crash in replication when server is under
write load
Bug Description: When the server is under alot of load, a race condition allows
a replication connection LDAP struct to be freed(unbind) while
it is being used by another thread. This leads to a crash.
Fix Description: Extend the connection lock to also cover ldap client interaction
(e.g. conn->ld struct).
https://fedorahosted.org/389/ticket/47759
Reviewed by: nhosoi & rmeggins(Thanks!!)
(cherry picked from commit 9940ca29ca258891c52640a23adc2851afe59d0e)
(cherry picked from commit 0e576c85c34826c4d63d9578db55f8179b4a1a60)
(cherry picked from commit 2a80b7152823ca16628c2da48614166b8d2104a4)
---
.../servers/plugins/replication/repl5_connection.c | 89 ++++++++++++----------
ldap/servers/slapd/ldaputil.c | 39 +++++-----
2 files changed, 69 insertions(+), 59 deletions(-)
diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c
index 668abda..17d1d9c 100644
--- a/ldap/servers/plugins/replication/repl5_connection.c
+++ b/ldap/servers/plugins/replication/repl5_connection.c
@@ -138,6 +138,7 @@ static void repl5_debug_timeout_callback(time_t when, void *arg);
/* Forward declarations */
static void close_connection_internal(Repl_Connection *conn);
+static void conn_delete_internal(Repl_Connection *conn);
/*
* Create a new connection object. Returns a pointer to the object, or
@@ -182,11 +183,22 @@ conn_new(Repl_Agmt *agmt)
rpc->plain = NULL;
return rpc;
loser:
- conn_delete(rpc);
+ conn_delete_internal(rpc);
slapi_ch_free((void**)&rpc);
return NULL;
}
+static PRBool
+conn_connected_locked(Repl_Connection *conn, int locked)
+{
+ PRBool return_value;
+
+ if(!locked) PR_Lock(conn->lock);
+ return_value = STATE_CONNECTED == conn->state;
+ if(!locked) PR_Unlock(conn->lock);
+
+ return return_value;
+}
/*
* Return PR_TRUE if the connection is in the connected state
@@ -194,14 +206,9 @@ loser:
static PRBool
conn_connected(Repl_Connection *conn)
{
- PRBool return_value;
- PR_Lock(conn->lock);
- return_value = STATE_CONNECTED == conn->state;
- PR_Unlock(conn->lock);
- return return_value;
+ return conn_connected_locked(conn, 1);
}
-
/*
* Destroy a connection object.
*/
@@ -243,7 +250,6 @@ conn_delete(Repl_Connection *conn)
if (slapi_eq_cancel(conn->linger_event) == 1)
{
/* Event was found and cancelled. Destroy the connection object. */
- PR_Unlock(conn->lock);
destroy_it = PR_TRUE;
}
else
@@ -254,16 +260,15 @@ conn_delete(Repl_Connection *conn)
* off, so arrange for the event to destroy the object .
*/
conn->delete_after_linger = PR_TRUE;
- PR_Unlock(conn->lock);
}
}
if (destroy_it)
{
conn_delete_internal(conn);
}
+ PR_Unlock(conn->lock);
}
-
/*
* Return the last operation type processed by the connection
* object, and the LDAP error encountered.
@@ -327,17 +332,18 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda
while (!slapi_is_shutting_down())
{
/* we have to make sure the update sending thread does not
- attempt to call conn_disconnect while we are reading
+ attempt to close connection while we are reading
results - so lock the conn while we get the results */
PR_Lock(conn->lock);
+
if ((STATE_CONNECTED != conn->state) || !conn->ld) {
rc = -1;
return_value = CONN_NOT_CONNECTED;
PR_Unlock(conn->lock);
break;
}
-
rc = ldap_result(conn->ld, send_msgid, 1, &local_timeout, &res);
+
PR_Unlock(conn->lock);
if (0 != rc)
@@ -661,8 +667,10 @@ perform_operation(Repl_Connection *conn, int optype, const char *dn,
server_controls[1] = update_control;
server_controls[2] = NULL;
- /* lock the conn to prevent the result reader thread
- from closing the connection out from under us */
+ /*
+ * Lock the conn to prevent the result reader thread
+ * from closing the connection out from under us.
+ */
PR_Lock(conn->lock);
if (STATE_CONNECTED == conn->state)
{
@@ -804,7 +812,6 @@ conn_send_rename(Repl_Connection *conn, const char *dn,
NULL /* extop OID */, NULL /* extop payload */, message_id);
}
-
/*
* Send an LDAP extended operation.
*/
@@ -818,7 +825,6 @@ conn_send_extended_operation(Repl_Connection *conn, const char *extop_oid,
update_control, extop_oid, payload, message_id);
}
-
/*
* Synchronously read an entry and return a specific attribute's values.
* Returns CONN_OPERATION_SUCCESS if successful. Returns
@@ -838,6 +844,8 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
LDAPMessage *res = NULL;
char *attrs[2];
+ PR_Lock(conn->lock);
+
PR_ASSERT(NULL != type);
if (conn_connected(conn))
{
@@ -860,7 +868,7 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
}
else if (IS_DISCONNECT_ERROR(ldap_rc))
{
- conn_disconnect(conn);
+ close_connection_internal(conn);
return_value = CONN_NOT_CONNECTED;
}
else
@@ -878,10 +886,11 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
{
return_value = CONN_NOT_CONNECTED;
}
+ PR_Unlock(conn->lock);
+
return return_value;
}
-
/*
* Return an pointer to a string describing the connection's status.
*/
@@ -892,8 +901,6 @@ conn_get_status(Repl_Connection *conn)
return conn->status;
}
-
-
/*
* Cancel any outstanding linger timer. Should be called when
* a replication session is beginning.
@@ -925,7 +932,6 @@ conn_cancel_linger(Repl_Connection *conn)
PR_Unlock(conn->lock);
}
-
/*
* Called when our linger timeout timer expires. This means
* we should check to see if perhaps the connection's become
@@ -957,7 +963,6 @@ linger_timeout(time_t event_time, void *arg)
}
}
-
/*
* Indicate that a session is ending. The linger timer starts when
* this function is called.
@@ -995,8 +1000,6 @@ conn_start_linger(Repl_Connection *conn)
PR_Unlock(conn->lock);
}
-
-
/*
* If no connection is currently active, opens a connection and binds to
* the remote server. If a connection is open (e.g. lingering) then
@@ -1015,10 +1018,14 @@ conn_connect(Repl_Connection *conn)
ConnResult return_value = CONN_OPERATION_SUCCESS;
int pw_ret = 1;
- /** Connection already open just return SUCCESS **/
- if(conn->state == STATE_CONNECTED) goto done;
-
PR_Lock(conn->lock);
+
+ /* Connection already open, just return SUCCESS */
+ if(conn->state == STATE_CONNECTED){
+ PR_Unlock(conn->lock);
+ return return_value;
+ }
+
if (conn->flag_agmt_changed) {
/* So far we cannot change Hostname and Port */
/* slapi_ch_free((void **)&conn->hostname); */
@@ -1033,7 +1040,6 @@ conn_connect(Repl_Connection *conn)
conn->port = agmt_get_port(conn->agmt); /* port could be updated */
slapi_ch_free((void **)&conn->plain);
}
- PR_Unlock(conn->lock);
creds = agmt_get_credentials(conn->agmt);
@@ -1174,6 +1180,7 @@ done:
{
close_connection_internal(conn);
}
+ PR_Unlock(conn->lock);
return return_value;
}
@@ -1209,7 +1216,6 @@ conn_disconnect(Repl_Connection *conn)
PR_Unlock(conn->lock);
}
-
/*
* Determine if the remote replica supports DS 5.0 replication.
* Return codes:
@@ -1226,6 +1232,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
ConnResult return_value;
int ldap_rc;
+ PR_Lock(conn->lock);
if (conn_connected(conn))
{
if (conn->supports_ds50_repl == -1) {
@@ -1273,7 +1280,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
if (IS_DISCONNECT_ERROR(ldap_rc))
{
conn->last_ldap_error = ldap_rc; /* specific reason */
- conn_disconnect(conn);
+ close_connection_internal(conn);
return_value = CONN_NOT_CONNECTED;
}
else
@@ -1293,10 +1300,11 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
/* Not connected */
return_value = CONN_NOT_CONNECTED;
}
+ PR_Unlock(conn->lock);
+
return return_value;
}
-
/*
* Determine if the remote replica supports DS 7.1 replication.
* Return codes:
@@ -1313,6 +1321,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
ConnResult return_value;
int ldap_rc;
+ PR_Lock(conn->lock);
if (conn_connected(conn))
{
if (conn->supports_ds71_repl == -1) {
@@ -1344,7 +1353,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
if (IS_DISCONNECT_ERROR(ldap_rc))
{
conn->last_ldap_error = ldap_rc; /* specific reason */
- conn_disconnect(conn);
+ close_connection_internal(conn);
return_value = CONN_NOT_CONNECTED;
}
else
@@ -1364,6 +1373,8 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
/* Not connected */
return_value = CONN_NOT_CONNECTED;
}
+ PR_Unlock(conn->lock);
+
return return_value;
}
@@ -1383,6 +1394,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
ConnResult return_value;
int ldap_rc;
+ PR_Lock(conn->lock);
if (conn_connected(conn))
{
if (conn->supports_ds90_repl == -1) {
@@ -1414,7 +1426,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
if (IS_DISCONNECT_ERROR(ldap_rc))
{
conn->last_ldap_error = ldap_rc; /* specific reason */
- conn_disconnect(conn);
+ close_connection_internal(conn);
return_value = CONN_NOT_CONNECTED;
}
else
@@ -1423,7 +1435,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
}
}
if (NULL != res)
- ldap_msgfree(res);
+ ldap_msgfree(res);
}
else
{
@@ -1435,6 +1447,8 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
/* Not connected */
return_value = CONN_NOT_CONNECTED;
}
+ PR_Unlock(conn->lock);
+
return return_value;
}
@@ -1452,7 +1466,6 @@ conn_replica_is_readonly(Repl_Connection *conn)
}
}
-
/*
* Return 1 if "value" is a value of attribute type "type" in entry "entry".
* Otherwise, return 0.
@@ -1501,9 +1514,6 @@ attribute_string_value_present(LDAP *ld, LDAPMessage *entry, const char *type,
return return_value;
}
-
-
-
/*
* Read the remote server's schema entry, then read the local schema entry,
* and compare the nsschemacsn attribute. If the local csn is newer, or
@@ -1533,7 +1543,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
return_value = CONN_OPERATION_FAILED;
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "NULL remote CSN\n");
}
- else if (!conn_connected(conn))
+ else if (!conn_connected_locked(conn, 0 /* not locked */))
{
return_value = CONN_NOT_CONNECTED;
slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
@@ -1699,6 +1709,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
{
csn_free(&localcsn);
}
+
return return_value;
}
diff --git a/ldap/servers/slapd/ldaputil.c b/ldap/servers/slapd/ldaputil.c
index edc8267..08601bd 100644
--- a/ldap/servers/slapd/ldaputil.c
+++ b/ldap/servers/slapd/ldaputil.c
@@ -1011,8 +1011,8 @@ slapi_ldap_bind(
than the currently unused clientctrls */
ldap_get_option(ld, LDAP_OPT_CLIENT_CONTROLS, &clientctrls);
if (clientctrls && clientctrls[0] &&
- slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
- secure = 2;
+ slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
+ secure = 2;
} else {
#if defined(USE_OPENLDAP)
/* openldap doesn't have a SSL/TLS yes/no flag - so grab the
@@ -1051,12 +1051,12 @@ slapi_ldap_bind(
slapi_log_error(SLAPI_LOG_SHELL, "slapi_ldap_bind",
"Set up conn to use client auth\n");
}
- bvcreds.bv_val = NULL; /* ignore username and passed in creds */
- bvcreds.bv_len = 0; /* for external auth */
- bindid = NULL;
+ bvcreds.bv_val = NULL; /* ignore username and passed in creds */
+ bvcreds.bv_len = 0; /* for external auth */
+ bindid = NULL;
} else { /* other type of auth */
- bvcreds.bv_val = (char *)creds;
- bvcreds.bv_len = creds ? strlen(creds) : 0;
+ bvcreds.bv_val = (char *)creds;
+ bvcreds.bv_len = creds ? strlen(creds) : 0;
}
if (secure == 2) { /* send start tls */
@@ -1084,31 +1084,29 @@ slapi_ldap_bind(
bindid, creds);
if ((rc = ldap_sasl_bind(ld, bindid, mech, &bvcreds, serverctrls,
NULL /* clientctrls */, &mymsgid))) {
- char *myhostname = NULL;
- char *copy = NULL;
+ char *hostname = NULL;
+ char *host_port = NULL;
char *ptr = NULL;
int myerrno = errno;
int gaierr = 0;
- ldap_get_option(ld, LDAP_OPT_HOST_NAME, &myhostname);
- if (myhostname) {
- ptr = strchr(myhostname, ':');
+ ldap_get_option(ld, LDAP_OPT_HOST_NAME, &host_port);
+ if (host_port) {
+ ptr = strchr(host_port, ':');
if (ptr) {
- copy = slapi_ch_strdup(myhostname);
- *(copy + (ptr - myhostname)) = '\0';
- slapi_ch_free_string(&myhostname);
- myhostname = copy;
+ hostname = slapi_ch_strdup(host_port);
+ *(hostname + (ptr - host_port)) = '\0';
}
}
-
if (0 == myerrno) {
struct addrinfo *result = NULL;
- gaierr = getaddrinfo(myhostname, NULL, NULL, &result);
+ gaierr = getaddrinfo(hostname, NULL, NULL, &result);
myerrno = errno;
if (result) {
freeaddrinfo(result);
}
}
+
slapi_log_error(SLAPI_LOG_FATAL, "slapi_ldap_bind",
"Error: could not send bind request for id "
"[%s] authentication mechanism [%s]: error %d (%s), system error %d (%s), "
@@ -1119,8 +1117,9 @@ slapi_ldap_bind(
PR_GetError(), slapd_pr_strerror(PR_GetError()),
myerrno ? myerrno : gaierr,
myerrno ? slapd_system_strerror(myerrno) : gai_strerror(gaierr),
- myhostname ? myhostname : "unknown host");
- slapi_ch_free_string(&myhostname);
+ host_port ? host_port : "unknown host");
+ slapi_ch_free_string(&hostname);
+ slapi_ch_free_string(&host_port);
goto done;
}
--
1.8.1.4