From bf3f506d14cf89b9d1d1e3504524b231a6cef2b1 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 8 May 2019 23:16:07 +0200 Subject: [PATCH 35/48] BE: Send refresh requests in batches As we extend the background refresh into larger domains, the amount of data that SSSD refreshes on the background might be larger. And refreshing all expired entries in a single request might block sssd_be for a long time, either triggering the watchdog or starving other legitimate requests. Therefore the background refresh will be done in batches of 200 entries. The first batch of every type (up to 200 users, up to 200 groups, ...) will be scheduled imediatelly and subsequent batches with a 0.5 second delay. Related: https://pagure.io/SSSD/sssd/issue/4012 Reviewed-by: Sumit Bose --- src/providers/be_refresh.c | 131 ++++++++++++++++++++++---- src/tests/cmocka/test_expire_common.c | 6 +- src/tests/sss_idmap-tests.c | 8 +- src/util/util.h | 8 ++ 4 files changed, 128 insertions(+), 25 deletions(-) diff --git a/src/providers/be_refresh.c b/src/providers/be_refresh.c index c4ff71e1f..5d86509bb 100644 --- a/src/providers/be_refresh.c +++ b/src/providers/be_refresh.c @@ -204,8 +204,21 @@ struct be_refresh_state { struct sss_domain_info *domain; enum be_refresh_type index; time_t period; + + char **refresh_values; + size_t refresh_val_size; + size_t refresh_index; + + size_t batch_size; + char **refresh_batch; }; +static errno_t be_refresh_batch_step(struct tevent_req *req, + uint32_t msec_delay); +static void be_refresh_batch_step_wakeup(struct tevent_context *ev, + struct tevent_timer *tt, + struct timeval tv, + void *pvt); static errno_t be_refresh_step(struct tevent_req *req); static void be_refresh_done(struct tevent_req *subreq); @@ -236,6 +249,13 @@ struct tevent_req *be_refresh_send(TALLOC_CTX *mem_ctx, goto immediately; } + state->batch_size = 200; + state->refresh_batch = talloc_zero_array(state, char *, state->batch_size+1); + if (state->refresh_batch == NULL) { + ret = ENOMEM; + goto immediately; + } + ret = be_refresh_step(req); if (ret == EOK) { goto immediately; @@ -261,8 +281,6 @@ immediately: static errno_t be_refresh_step(struct tevent_req *req) { struct be_refresh_state *state = NULL; - struct tevent_req *subreq = NULL; - char **values = NULL; errno_t ret; state = tevent_req_data(req, struct be_refresh_state); @@ -289,42 +307,103 @@ static errno_t be_refresh_step(struct tevent_req *req) goto done; } + talloc_zfree(state->refresh_values); ret = be_refresh_get_values(state, state->index, state->ctx->attr_name, - state->domain, state->period, &values); + state->domain, state->period, + &state->refresh_values); if (ret != EOK) { DEBUG(SSSDBG_CRIT_FAILURE, "Unable to obtain DN list [%d]: %s\n", ret, sss_strerror(ret)); goto done; } - DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %s in domain %s\n", - state->cb->name, state->domain->name); + for (state->refresh_val_size = 0; + state->refresh_values[state->refresh_val_size] != NULL; + state->refresh_val_size++); + + DEBUG(SSSDBG_TRACE_FUNC, "Refreshing %zu %s in domain %s\n", + state->refresh_val_size, state->cb->name, state->domain->name); - subreq = state->cb->send_fn(state, state->ev, state->be_ctx, - state->domain, values, state->cb->pvt); - if (subreq == NULL) { - ret = ENOMEM; + ret = be_refresh_batch_step(req, 0); + if (ret == EOK) { + state->index++; + continue; + } else if (ret != EAGAIN) { goto done; } - - /* make the list disappear with subreq */ - talloc_steal(subreq, values); - - tevent_req_set_callback(subreq, be_refresh_done, req); + /* EAGAIN only, refreshing something.. */ state->index++; - ret = EAGAIN; goto done; } ret = EOK; done: - if (ret != EOK && ret != EAGAIN) { - talloc_free(values); + return ret; +} + +static errno_t be_refresh_batch_step(struct tevent_req *req, + uint32_t msec_delay) +{ + struct be_refresh_state *state = tevent_req_data(req, struct be_refresh_state); + struct timeval tv; + struct tevent_timer *timeout = NULL; + + size_t remaining; + size_t batch_size; + + memset(state->refresh_batch, 0, sizeof(char *) * state->batch_size); + + if (state->refresh_index >= state->refresh_val_size) { + DEBUG(SSSDBG_FUNC_DATA, "The batch is done\n"); + state->refresh_index = 0; + return EOK; } - return ret; + remaining = state->refresh_val_size - state->refresh_index; + batch_size = MIN(remaining, state->batch_size); + DEBUG(SSSDBG_FUNC_DATA, + "This batch will refresh %zu entries (so far %zu/%zu)\n", + batch_size, state->refresh_index, state->refresh_val_size); + + for (size_t i = 0; i < batch_size; i++) { + state->refresh_batch[i] = state->refresh_values[state->refresh_index]; + state->refresh_index++; + } + + tv = tevent_timeval_current_ofs(0, msec_delay * 1000); + timeout = tevent_add_timer(state->be_ctx->ev, req, tv, + be_refresh_batch_step_wakeup, req); + if (timeout == NULL) { + return ENOMEM; + } + + return EAGAIN; +} + +static void be_refresh_batch_step_wakeup(struct tevent_context *ev, + struct tevent_timer *tt, + struct timeval tv, + void *pvt) +{ + struct tevent_req *req; + struct tevent_req *subreq = NULL; + struct be_refresh_state *state = NULL; + + req = talloc_get_type(pvt, struct tevent_req); + state = tevent_req_data(req, struct be_refresh_state); + + DEBUG(SSSDBG_TRACE_INTERNAL, "Issuing refresh\n"); + subreq = state->cb->send_fn(state, state->ev, state->be_ctx, + state->domain, + state->refresh_batch, + state->cb->pvt); + if (subreq == NULL) { + tevent_req_error(req, ENOMEM); + return; + } + tevent_req_set_callback(subreq, be_refresh_done, req); } static void be_refresh_done(struct tevent_req *subreq) @@ -342,8 +421,24 @@ static void be_refresh_done(struct tevent_req *subreq) goto done; } + ret = be_refresh_batch_step(req, 500); + if (ret == EAGAIN) { + DEBUG(SSSDBG_TRACE_INTERNAL, + "Another batch in this step in progress\n"); + return; + } else if (ret != EOK) { + DEBUG(SSSDBG_OP_FAILURE, + "be_refresh_batch_step failed [%d]: %s\n", + ret, sss_strerror(ret)); + goto done; + } + + DEBUG(SSSDBG_TRACE_INTERNAL, "All batches in this step refreshed\n"); + + /* Proceed to the next step */ ret = be_refresh_step(req); if (ret == EAGAIN) { + DEBUG(SSSDBG_TRACE_INTERNAL, "Another step in progress\n"); return; } diff --git a/src/tests/cmocka/test_expire_common.c b/src/tests/cmocka/test_expire_common.c index 5d3ea02f3..4f6168190 100644 --- a/src/tests/cmocka/test_expire_common.c +++ b/src/tests/cmocka/test_expire_common.c @@ -32,7 +32,7 @@ #include "tests/common_check.h" #include "tests/cmocka/test_expire_common.h" -#define MAX 100 +#define MAX_VAL 100 static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s) { @@ -41,10 +41,10 @@ static char *now_str(TALLOC_CTX *mem_ctx, const char* format, int s) size_t len; char *timestr; - timestr = talloc_array(mem_ctx, char, MAX); + timestr = talloc_array(mem_ctx, char, MAX_VAL); tm = gmtime(&t); - len = strftime(timestr, MAX, format, tm); + len = strftime(timestr, MAX_VAL, format, tm); if (len == 0) { return NULL; } diff --git a/src/tests/sss_idmap-tests.c b/src/tests/sss_idmap-tests.c index 96f0861ac..e5f3f7041 100644 --- a/src/tests/sss_idmap-tests.c +++ b/src/tests/sss_idmap-tests.c @@ -140,8 +140,8 @@ void idmap_add_domain_with_sec_slices_setup_cb_fail(void) } -#define MAX 1000 -char data[MAX]; +#define DATA_MAX 1000 +char data[DATA_MAX]; enum idmap_error_code cb2(const char *dom_name, const char *dom_sid, @@ -154,10 +154,10 @@ enum idmap_error_code cb2(const char *dom_name, char *p = (char*)pvt; size_t len; - len = snprintf(p, MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32, + len = snprintf(p, DATA_MAX, "%s, %s %s, %"PRIu32", %"PRIu32", %" PRIu32, dom_name, dom_sid, range_id, min_id, max_id, first_rid); - if (len >= MAX) { + if (len >= DATA_MAX) { return IDMAP_OUT_OF_MEMORY; } return IDMAP_SUCCESS; diff --git a/src/util/util.h b/src/util/util.h index c5680d89a..13e434b62 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -67,6 +67,14 @@ #define NULL 0 #endif +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + #define SSSD_MAIN_OPTS SSSD_DEBUG_OPTS #define SSSD_SERVER_OPTS(uid, gid) \ -- 2.20.1