From 6b7f87a557170164518d7c3b8e408304f2a9c1f4 Mon Sep 17 00:00:00 2001
From: Thierry Bordaz <tbordaz@redhat.com>
Date: Fri, 17 May 2019 14:31:45 +0200
Subject: [PATCH] Ticket 50389 - ns-slapd craches while two threads are polling
the same connection
Bug Description:
nspr IO is not multi-threaded safe.
389-ds should not be in a situation where several threads are polling
a same connection at the same time.
The scenario is a worker send back an operation result at the same time
another worker wants to read an incoming request.
Fix Description:
The fix consist in synchonizing polling with c_pdumutex.
The thread that sends data (flush_ber) hold c_pdumutex.
The thread that reads the data does a non blocking read. It then
enforce ioblocktimeout with iteration of poll.
The reading thread must hold c_pdumutex during poll to synchronize
with the reader thread.
The reading thread must poll with a small timeout
(CONN_TURBO_TIMEOUT_INTERVAL). In order to not block
the thread that send back data, the fix reduces the delay to 0.1s.
https://pagure.io/389-ds-base/issue/50389
Reviewed by: Mark Reynolds, Matus Honek, William Brown
Platforms tested: F28
Flag Day: no
Doc impact: no
(cherry picked from commit 2886ba77f664e4734a7ddfe4146f229caca49ce4)
---
ldap/servers/slapd/connection.c | 5 ++++-
ldap/servers/slapd/daemon.c | 2 ++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c
index 188383b97..945602f20 100644
--- a/ldap/servers/slapd/connection.c
+++ b/ldap/servers/slapd/connection.c
@@ -932,7 +932,8 @@ connection_free_private_buffer(Connection *conn)
#define CONN_DONE 3
#define CONN_TIMEDOUT 4
-#define CONN_TURBO_TIMEOUT_INTERVAL 1000 /* milliseconds */
+#define CONN_TURBO_TIMEOUT_INTERVAL 100 /* milliseconds */
+#define CONN_TURBO_TIMEOUT_MAXIMUM 5 /* attempts * interval IE 2000ms with 400 * 5 */
#define CONN_TURBO_CHECK_INTERVAL 5 /* seconds */
#define CONN_TURBO_PERCENTILE 50 /* proportion of threads allowed to be in turbo mode */
#define CONN_TURBO_HYSTERESIS 0 /* avoid flip flopping in and out of turbo mode */
@@ -1207,7 +1208,9 @@ connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, int *
pr_pd.fd = (PRFileDesc *)conn->c_prfd;
pr_pd.in_flags = PR_POLL_READ;
pr_pd.out_flags = 0;
+ PR_Lock(conn->c_pdumutex);
ret = PR_Poll(&pr_pd, 1, timeout);
+ PR_Unlock(conn->c_pdumutex);
waits_done++;
/* Did we time out ? */
if (0 == ret) {
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
index c77e1f15c..4841a8a5c 100644
--- a/ldap/servers/slapd/daemon.c
+++ b/ldap/servers/slapd/daemon.c
@@ -1943,6 +1943,8 @@ ns_handle_pr_read_ready(struct ns_job_t *job)
* or something goes seriously wrong. Otherwise, return 0.
* If -1 is returned, PR_GetError() explains why.
* Revision: handle changed to void * to allow 64bit support
+ *
+ * Caller (flush_ber) must hold conn->c_pdumutex
*/
static int
slapd_poll(void *handle, int output)
--
2.17.2