Tree - rpms/389-ds-base - CentOS Git server

amoralej / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 years ago

Source
Stats

The permissions on this repository are being updated. This may take a while. During this time, you or some of the project's contributors may not be able to push to this repository.

Blame SOURCES/0083-Ticket-48184-clean-up-and-delete-connections-at-shut.patch

Blob History Raw

		b663b9	`From 5a5d3dffd0b36edb543fd31fa53d7128dd5161c2 Mon Sep 17 00:00:00 2001`
		b663b9	`From: Thierry Bordaz <tbordaz@redhat.com>`
		b663b9	`Date: Fri, 18 May 2018 10:13:46 +0200`
		b663b9	`Subject: [PATCH] Ticket 48184 - clean up and delete connections at shutdown`
		b663b9	`(2nd try)`
		b663b9
		b663b9	`Bug description:`
		b663b9	`During shutdown we would not close connections.`
		b663b9	`In the past this may have just been an annoyance, but now with the way`
		b663b9	`nunc-stans works, io events can still trigger on open xeisting connectinos`
		b663b9	`during shutdown.`
		b663b9
		b663b9	`Because of NS dynamic it can happen that several jobs wants to work on the`
		b663b9	`same connection. In such case (a job is already set in c_job) we delay the`
		b663b9	`new job that will retry.`
		b663b9	`In addition:`
		b663b9	`- some call needed c_mutex`
		b663b9	`- test uninitialized nunc-stans in case of shutdown while startup is not completed`
		b663b9
		b663b9	`Fix Description: Close connections during shutdown rather than`
		b663b9	`leaving them alive.`
		b663b9
		b663b9	`https://pagure.io/389-ds-base/issue/48184`
		b663b9
		b663b9	`Reviewed by:`
		b663b9	`Original was Ludwig and Viktor`
		b663b9	`Second fix reviewed by Mark`
		b663b9
		b663b9	`Platforms tested: F26`
		b663b9
		b663b9	`Flag Day: no`
		b663b9
		b663b9	`Doc impact: no`
		b663b9
		b663b9	`(cherry picked from commit e562157ca3e97867d902996cc18fb04f90dc10a8)`
		b663b9	`---`
		b663b9	`ldap/servers/slapd/connection.c \| 2 +`
		b663b9	`ldap/servers/slapd/conntable.c \| 13 ++++`
		b663b9	`ldap/servers/slapd/daemon.c \| 131 ++++++++++++++++++++++++++++------------`
		b663b9	`ldap/servers/slapd/fe.h \| 1 +`
		b663b9	`ldap/servers/slapd/slap.h \| 1 +`
		b663b9	`5 files changed, 108 insertions(+), 40 deletions(-)`
		b663b9
		b663b9	`diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c`
		b663b9	`index b5030f0cb..76e83112b 100644`
		b663b9	`--- a/ldap/servers/slapd/connection.c`
		b663b9	`+++ b/ldap/servers/slapd/connection.c`
		b663b9	`@@ -1716,7 +1716,9 @@ connection_threadmain()`
		b663b9	`if ((tag != LDAP_REQ_UNBIND) && !thread_turbo_flag && !replication_connection) {`
		b663b9	`if (!more_data) {`
		b663b9	`conn->c_flags &= ~CONN_FLAG_MAX_THREADS;`
		b663b9	`+ PR_EnterMonitor(conn->c_mutex);`
		b663b9	`connection_make_readable_nolock(conn);`
		b663b9	`+ PR_ExitMonitor(conn->c_mutex);`
		b663b9	`/* once the connection is readable, another thread may access conn,`
		b663b9	`* so need locking from here on */`
		b663b9	`signal_listner();`
		b663b9	`diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c`
		b663b9	`index 7c57b47cd..f2f763dfa 100644`
		b663b9	`--- a/ldap/servers/slapd/conntable.c`
		b663b9	`+++ b/ldap/servers/slapd/conntable.c`
		b663b9	`@@ -91,6 +91,19 @@ connection_table_abandon_all_operations(Connection_Table *ct)`
		b663b9	`}`
		b663b9	`}`
		b663b9
		b663b9	`+void`
		b663b9	`+connection_table_disconnect_all(Connection_Table *ct)`
		b663b9	`+{`
		b663b9	`+ for (size_t i = 0; i < ct->size; i++) {`
		b663b9	`+ if (ct->c[i].c_mutex) {`
		b663b9	`+ Connection *c = &(ct->c[i]);`
		b663b9	`+ PR_EnterMonitor(c->c_mutex);`
		b663b9	`+ disconnect_server_nomutex(c, c->c_connid, -1, SLAPD_DISCONNECT_ABORT, ECANCELED);`
		b663b9	`+ PR_ExitMonitor(c->c_mutex);`
		b663b9	`+ }`
		b663b9	`+ }`
		b663b9	`+}`
		b663b9	`+`
		b663b9	`/* Given a file descriptor for a socket, this function will return`
		b663b9	`* a slot in the connection table to use.`
		b663b9	`*`
		b663b9	`diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c`
		b663b9	`index fcc461a90..50e67474e 100644`
		b663b9	`--- a/ldap/servers/slapd/daemon.c`
		b663b9	`+++ b/ldap/servers/slapd/daemon.c`
		b663b9	`@@ -1087,12 +1087,18 @@ slapd_daemon(daemon_ports_t ports, ns_thrpool_t tp)`
		b663b9	`/* we have exited from ns_thrpool_wait. This means we are shutting down! */`
		b663b9	`/* Please see https://firstyear.fedorapeople.org/nunc-stans/md_docs_job-safety.html */`
		b663b9	`/* tldr is shutdown needs to run first to allow job_done on an ARMED job */`
		b663b9	`- for (size_t i = 0; i < listeners; i++) {`
		b663b9	`- PRStatus shutdown_status = ns_job_done(listener_idxs[i].ns_job);`
		b663b9	`- if (shutdown_status != PR_SUCCESS) {`
		b663b9	`- slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);`
		b663b9	`+ for (uint64_t i = 0; i < listeners; i++) {`
		b663b9	`+ PRStatus shutdown_status;`
		b663b9	`+`
		b663b9	`+ if (listener_idxs[i].ns_job) {`
		b663b9	`+ shutdown_status = ns_job_done(listener_idxs[i].ns_job);`
		b663b9	`+ if (shutdown_status != PR_SUCCESS) {`
		b663b9	`+ slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);`
		b663b9	`+ }`
		b663b9	`+ PR_ASSERT(shutdown_status == PR_SUCCESS);`
		b663b9	`+ } else {`
		b663b9	`+ slapi_log_err(SLAPI_LOG_CRIT, "slapd_daemon", "Listeners uninitialized. Possibly the server was shutdown while starting\n");`
		b663b9	`}`
		b663b9	`- PR_ASSERT(shutdown_status == PR_SUCCESS);`
		b663b9	`listener_idxs[i].ns_job = NULL;`
		b663b9	`}`
		b663b9	`} else {`
		b663b9	`@@ -1176,6 +1182,32 @@ slapd_daemon(daemon_ports_t ports, ns_thrpool_t tp)`
		b663b9	`housekeeping_stop(); /* Run this after op_thread_cleanup() logged sth */`
		b663b9	`disk_monitoring_stop();`
		b663b9
		b663b9	`+ /*`
		b663b9	`+ * Now that they are abandonded, we need to mark them as done.`
		b663b9	`+ * In NS while it's safe to allow excess jobs to be cleaned by`
		b663b9	`+ * by the walk and ns_job_done of remaining queued events, the`
		b663b9	`+ * issue is that if we allow something to live past this point`
		b663b9	`+ * the CT is freed from underneath, and bad things happen (tm).`
		b663b9	`+ *`
		b663b9	`+ * NOTE: We do this after we stop psearch, because there could`
		b663b9	`+ * be a race between flagging the psearch done, and users still`
		b663b9	`+ * try to send on the connection. Similar with op_threads.`
		b663b9	`+ */`
		b663b9	`+ connection_table_disconnect_all(the_connection_table);`
		b663b9	`+`
		b663b9	`+ /*`
		b663b9	`+ * WARNING: Normally we should close the tp in main`
		b663b9	`+ * but because of issues in the current connection design`
		b663b9	`+ * we need to close it here to guarantee events won't fire!`
		b663b9	`+ *`
		b663b9	`+ * All the connection close jobs "should" complete before`
		b663b9	`+ * shutdown at least.`
		b663b9	`+ */`
		b663b9	`+ if (enable_nunc_stans) {`
		b663b9	`+ ns_thrpool_shutdown(tp);`
		b663b9	`+ ns_thrpool_wait(tp);`
		b663b9	`+ }`
		b663b9	`+`
		b663b9	`threads = g_get_active_threadcnt();`
		b663b9	`if (threads > 0) {`
		b663b9	`slapi_log_err(SLAPI_LOG_INFO, "slapd_daemon",`
		b663b9	`@@ -1628,25 +1660,18 @@ ns_handle_closure(struct ns_job_t *job)`
		b663b9	`Connection c = (Connection )ns_job_get_data(job);`
		b663b9	`int do_yield = 0;`
		b663b9
		b663b9	`-/* this function must be called from the event loop thread */`
		b663b9	`-#ifdef DEBUG`
		b663b9	`- PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job)));`
		b663b9	`-#else`
		b663b9	`- /* This doesn't actually confirm it's in the event loop thread, but it's a start */`
		b663b9	`- if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) {`
		b663b9	`- slapi_log_err(SLAPI_LOG_ERR, "ns_handle_closure", "Attempt to close outside of event loop thread %" PRIu64 " for fd=%d\n",`
		b663b9	`- c->c_connid, c->c_sd);`
		b663b9	`- return;`
		b663b9	`- }`
		b663b9	`-#endif`
		b663b9	`-`
		b663b9	`PR_EnterMonitor(c->c_mutex);`
		b663b9	`+ /* Assert we really have the right job state. */`
		b663b9	`+ PR_ASSERT(job == c->c_job);`
		b663b9
		b663b9	`connection_release_nolock_ext(c, 1); /* release ref acquired for event framework */`
		b663b9	`PR_ASSERT(c->c_ns_close_jobs == 1); /* should be exactly 1 active close job - this one */`
		b663b9	`c->c_ns_close_jobs--; /* this job is processing closure */`
		b663b9	`+ /* Because handle closure will add a new job, we need to detach our current one. */`
		b663b9	`+ c->c_job = NULL;`
		b663b9	`do_yield = ns_handle_closure_nomutex(c);`
		b663b9	`PR_ExitMonitor(c->c_mutex);`
		b663b9	`+ /* Remove this task now. */`
		b663b9	`ns_job_done(job);`
		b663b9	`if (do_yield) {`
		b663b9	`/* closure not done - another reference still outstanding */`
		b663b9	`@@ -1659,14 +1684,25 @@ ns_handle_closure(struct ns_job_t *job)`
		b663b9	`/**`
		b663b9	`* Schedule more I/O for this connection, or make sure that it`
		b663b9	`* is closed in the event loop.`
		b663b9	`+ * caller must hold c_mutex`
		b663b9	`+ * It returns`
		b663b9	`+ * 0 on success`
		b663b9	`+ * 1 on need to retry`
		b663b9	`*/`
		b663b9	`-void`
		b663b9	`-ns_connection_post_io_or_closing(Connection *conn)`
		b663b9	`+static int`
		b663b9	`+ns_connection_post_io_or_closing_try(Connection *conn)`
		b663b9	`{`
		b663b9	`struct timeval tv;`
		b663b9
		b663b9	`if (!enable_nunc_stans) {`
		b663b9	`- return;`
		b663b9	`+ return 0;`
		b663b9	`+ }`
		b663b9	`+`
		b663b9	`+ /*`
		b663b9	`+ * Cancel any existing ns jobs we have registered.`
		b663b9	`+ */`
		b663b9	`+ if (conn->c_job != NULL) {`
		b663b9	`+ return 1;`
		b663b9	`}`
		b663b9
		b663b9	`if (CONN_NEEDS_CLOSING(conn)) {`
		b663b9	`@@ -1676,15 +1712,12 @@ ns_connection_post_io_or_closing(Connection *conn)`
		b663b9	`slapi_log_err(SLAPI_LOG_CONNS, "ns_connection_post_io_or_closing", "Already a close "`
		b663b9	`"job in progress on conn %" PRIu64 " for fd=%d\n",`
		b663b9	`conn->c_connid, conn->c_sd);`
		b663b9	`- return;`
		b663b9	`+ return 0;`
		b663b9	`} else {`
		b663b9	`- /* just make sure we schedule the event to be closed in a timely manner */`
		b663b9	`- tv.tv_sec = 0;`
		b663b9	`- tv.tv_usec = slapd_wakeup_timer * 1000;`
		b663b9	`conn->c_ns_close_jobs++; /* now 1 active closure job */`
		b663b9	`connection_acquire_nolock_ext(conn, 1 /* allow acquire even when closing /); / event framework now has a reference */`
		b663b9	`- ns_result_t job_result = ns_add_timeout_job(conn->c_tp, &tv, NS_JOB_TIMER,`
		b663b9	`- ns_handle_closure, conn, NULL);`
		b663b9	`+ /* Close the job asynchronously. Why? */`
		b663b9	`+ ns_result_t job_result = ns_add_job(conn->c_tp, NS_JOB_TIMER, ns_handle_closure, conn, &(conn->c_job));`
		b663b9	`if (job_result != NS_SUCCESS) {`
		b663b9	`if (job_result == NS_SHUTDOWN) {`
		b663b9	`slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post closure job "`
		b663b9	`@@ -1723,12 +1756,12 @@ ns_connection_post_io_or_closing(Connection *conn)`
		b663b9	`* The error occurs when we get a connection in a closing state.`
		b663b9	`* For now we return, but there is probably a better way to handle the error case.`
		b663b9	`*/`
		b663b9	`- return;`
		b663b9	`+ return 0;`
		b663b9	`}`
		b663b9	`#endif`
		b663b9	`ns_result_t job_result = ns_add_io_timeout_job(conn->c_tp, conn->c_prfd, &tv,`
		b663b9	`NS_JOB_READ \| NS_JOB_PRESERVE_FD,`
		b663b9	`- ns_handle_pr_read_ready, conn, NULL);`
		b663b9	`+ ns_handle_pr_read_ready, conn, &(conn->c_job));`
		b663b9	`if (job_result != NS_SUCCESS) {`
		b663b9	`if (job_result == NS_SHUTDOWN) {`
		b663b9	`slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post I/O job for "`
		b663b9	`@@ -1745,6 +1778,28 @@ ns_connection_post_io_or_closing(Connection *conn)`
		b663b9	`conn->c_connid, conn->c_sd);`
		b663b9	`}`
		b663b9	`}`
		b663b9	`+ return 0;`
		b663b9	`+}`
		b663b9	`+void`
		b663b9	`+ns_connection_post_io_or_closing(Connection *conn)`
		b663b9	`+{`
		b663b9	`+ while (ns_connection_post_io_or_closing_try(conn)) {`
		b663b9	`+ /* we should retry later */`
		b663b9	`+`
		b663b9	`+ /* We are not suppose to work immediately on the connection that is taken by`
		b663b9	`+ * another job`
		b663b9	`+ * release the lock and give some time`
		b663b9	`+ */`
		b663b9	`+`
		b663b9	`+ if (CONN_NEEDS_CLOSING(conn) && conn->c_ns_close_jobs) {`
		b663b9	`+ return;`
		b663b9	`+ } else {`
		b663b9	`+ PR_ExitMonitor(conn->c_mutex);`
		b663b9	`+ DS_Sleep(PR_MillisecondsToInterval(100));`
		b663b9	`+`
		b663b9	`+ PR_EnterMonitor(conn->c_mutex);`
		b663b9	`+ }`
		b663b9	`+ }`
		b663b9	`}`
		b663b9
		b663b9	`/* This function must be called without the thread flag, in the`
		b663b9	`@@ -1757,19 +1812,12 @@ ns_handle_pr_read_ready(struct ns_job_t *job)`
		b663b9	`int maxthreads = config_get_maxthreadsperconn();`
		b663b9	`Connection c = (Connection )ns_job_get_data(job);`
		b663b9
		b663b9	`-/* this function must be called from the event loop thread */`
		b663b9	`-#ifdef DEBUG`
		b663b9	`- PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job)));`
		b663b9	`-#else`
		b663b9	`- /* This doesn't actually confirm it's in the event loop thread, but it's a start */`
		b663b9	`- if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) {`
		b663b9	`- slapi_log_err(SLAPI_LOG_ERR, "ns_handle_pr_read_ready", "Attempt to handle read ready outside of event loop thread %" PRIu64 " for fd=%d\n",`
		b663b9	`- c->c_connid, c->c_sd);`
		b663b9	`- return;`
		b663b9	`- }`
		b663b9	`-#endif`
		b663b9	`-`
		b663b9	`PR_EnterMonitor(c->c_mutex);`
		b663b9	`+ /* Assert we really have the right job state. */`
		b663b9	`+ PR_ASSERT(job == c->c_job);`
		b663b9	`+`
		b663b9	`+ /* On all code paths we remove the job, so set it null now */`
		b663b9	`+ c->c_job = NULL;`
		b663b9
		b663b9	`slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "activity on conn %" PRIu64 " for fd=%d\n",`
		b663b9	`c->c_connid, c->c_sd);`
		b663b9	`@@ -1829,6 +1877,7 @@ ns_handle_pr_read_ready(struct ns_job_t *job)`
		b663b9	`slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "queued conn %" PRIu64 " for fd=%d\n",`
		b663b9	`c->c_connid, c->c_sd);`
		b663b9	`}`
		b663b9	`+ /* Since we call done on the job, we need to remove it here. */`
		b663b9	`PR_ExitMonitor(c->c_mutex);`
		b663b9	`ns_job_done(job);`
		b663b9	`return;`
		b663b9	`@@ -2451,7 +2500,9 @@ ns_handle_new_connection(struct ns_job_t *job)`
		b663b9	`* that poll() was avoided, even at the expense of putting this new fd back`
		b663b9	`* in nunc-stans to poll for read ready.`
		b663b9	`*/`
		b663b9	`+ PR_EnterMonitor(c->c_mutex);`
		b663b9	`ns_connection_post_io_or_closing(c);`
		b663b9	`+ PR_ExitMonitor(c->c_mutex);`
		b663b9	`return;`
		b663b9	`}`
		b663b9
		b663b9	`diff --git a/ldap/servers/slapd/fe.h b/ldap/servers/slapd/fe.h`
		b663b9	`index 4d25a9fb8..f47bb6145 100644`
		b663b9	`--- a/ldap/servers/slapd/fe.h`
		b663b9	`+++ b/ldap/servers/slapd/fe.h`
		b663b9	`@@ -100,6 +100,7 @@ extern Connection_Table the_connection_table; / JCM - Exported from globals.c`
		b663b9	`Connection_Table *connection_table_new(int table_size);`
		b663b9	`void connection_table_free(Connection_Table *ct);`
		b663b9	`void connection_table_abandon_all_operations(Connection_Table *ct);`
		b663b9	`+void connection_table_disconnect_all(Connection_Table *ct);`
		b663b9	`Connection connection_table_get_connection(Connection_Table ct, int sd);`
		b663b9	`int connection_table_move_connection_out_of_active_list(Connection_Table ct, Connection c);`
		b663b9	`void connection_table_move_connection_on_to_active_list(Connection_Table ct, Connection c);`
		b663b9	`diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h`
		b663b9	`index 03355f5fe..de4ac35c0 100644`
		b663b9	`--- a/ldap/servers/slapd/slap.h`
		b663b9	`+++ b/ldap/servers/slapd/slap.h`
		b663b9	`@@ -1650,6 +1650,7 @@ typedef struct conn`
		b663b9	`void c_io_layer_cb_data; / callback data */`
		b663b9	`struct connection_table c_ct; / connection table that this connection belongs to */`
		b663b9	`ns_thrpool_t c_tp; / thread pool for this connection */`
		b663b9	`+ struct ns_job_t c_job; / If it exists, the current ns_job_t */`
		b663b9	`int c_ns_close_jobs; /* number of current close jobs */`
		b663b9	`char c_ipaddr; / ip address str - used by monitor */`
		b663b9	`} Connection;`
		b663b9	`--`
		b663b9	`2.13.6`
		b663b9

amoralej / rpms / 389-ds-base

Source Code

Blame SOURCES/0083-Ticket-48184-clean-up-and-delete-connections-at-shut.patch