|
|
4c04d8 |
From d8875611eefff661ad3f92b6f75b0c90c22918a6 Mon Sep 17 00:00:00 2001
|
|
|
e79480 |
From: Mark Reynolds <mreynolds@redhat.com>
|
|
|
e79480 |
Date: Wed, 16 Oct 2019 20:27:30 -0400
|
|
|
e79480 |
Subject: [PATCH] Issue 49850 - ldbm_get_nonleaf_ids() slow for databases with
|
|
|
e79480 |
many non-leaf entries
|
|
|
e79480 |
|
|
|
e79480 |
Bug Description: The logs from an LDIF import indicated that gathering non-leaf IDs
|
|
|
e79480 |
for creating the ancestorid index took an enormous amount of time,
|
|
|
e79480 |
over 10hrs. The root cause is that the parentid index btree ordering
|
|
|
e79480 |
is lexical, but the IDList being built up from it is sorted numerically.
|
|
|
e79480 |
In the existing code, the IDList is maintained in constantly sorted
|
|
|
e79480 |
order by idl_insert().
|
|
|
e79480 |
|
|
|
e79480 |
Fix Description: ldbm_get_nonleaf_ids() switches to idl_append_extend() instead idl_insert()
|
|
|
e79480 |
for building up the IDList and then sorts the result only once, using
|
|
|
e79480 |
qsort with idl_sort_cmp, after the entire list has been gathered.
|
|
|
e79480 |
|
|
|
e79480 |
The improvement on identical hardware is for the operation to take 10
|
|
|
e79480 |
seconds rather than 10 hours
|
|
|
e79480 |
|
|
|
e79480 |
Patch Author: Thomas Lackey <telackey@bozemanpass.com> Thanks for the great contribution!!!
|
|
|
e79480 |
|
|
|
e79480 |
relates: https://pagure.io/389-ds-base/issue/49850
|
|
|
e79480 |
|
|
|
e79480 |
Reviewed by: mreynolds, tbordaz, and firstyear
|
|
|
e79480 |
---
|
|
|
e79480 |
ldap/servers/slapd/back-ldbm/ancestorid.c | 20 +++++++++++++++++++-
|
|
|
e79480 |
1 file changed, 19 insertions(+), 1 deletion(-)
|
|
|
e79480 |
|
|
|
e79480 |
diff --git a/ldap/servers/slapd/back-ldbm/ancestorid.c b/ldap/servers/slapd/back-ldbm/ancestorid.c
|
|
|
e79480 |
index 24642923d..254a3aa3b 100644
|
|
|
e79480 |
--- a/ldap/servers/slapd/back-ldbm/ancestorid.c
|
|
|
e79480 |
+++ b/ldap/servers/slapd/back-ldbm/ancestorid.c
|
|
|
e79480 |
@@ -82,7 +82,14 @@ ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl, ImportJob *job)
|
|
|
e79480 |
ret = dbc->c_get(dbc, &key, &data, DB_NEXT_NODUP);
|
|
|
e79480 |
if ((ret == 0) && (*(char *)key.data == EQ_PREFIX)) {
|
|
|
e79480 |
id = (ID)strtoul((char *)key.data + 1, NULL, 10);
|
|
|
e79480 |
- idl_insert(&nodes, id);
|
|
|
e79480 |
+ /*
|
|
|
e79480 |
+ * TEL 20180711 - switch to idl_append instead of idl_insert because there is no
|
|
|
e79480 |
+ * no need to keep the list constantly sorted, which can be very expensive with
|
|
|
e79480 |
+ * large databases (exacerbated by the fact that the parentid btree ordering is
|
|
|
e79480 |
+ * lexical, but the idl_insert ordering is numeric). It is enough to gather them
|
|
|
e79480 |
+ * all together and sort them once at the end.
|
|
|
e79480 |
+ */
|
|
|
e79480 |
+ idl_append_extend(&nodes, id);
|
|
|
e79480 |
}
|
|
|
e79480 |
key_count++;
|
|
|
e79480 |
if (!(key_count % PROGRESS_INTERVAL)) {
|
|
|
e79480 |
@@ -107,6 +114,17 @@ ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl, ImportJob *job)
|
|
|
e79480 |
if (ret != 0)
|
|
|
e79480 |
ldbm_nasty("ldbm_get_nonleaf_ids", sourcefile, 13030, ret);
|
|
|
e79480 |
|
|
|
e79480 |
+ if (ret == 0) {
|
|
|
e79480 |
+ /* now sort it */
|
|
|
e79480 |
+ import_log_notice(job, SLAPI_LOG_INFO, "ldbm_get_nonleaf_ids",
|
|
|
e79480 |
+ "Starting sort of ancestorid non-leaf IDs...");
|
|
|
e79480 |
+
|
|
|
e79480 |
+ qsort((void *)&nodes->b_ids[0], nodes->b_nids, (size_t)sizeof(ID), idl_sort_cmp);
|
|
|
e79480 |
+
|
|
|
e79480 |
+ import_log_notice(job, SLAPI_LOG_INFO, "ldbm_get_nonleaf_ids",
|
|
|
e79480 |
+ "Finished sort of ancestorid non-leaf IDs.");
|
|
|
e79480 |
+ }
|
|
|
e79480 |
+
|
|
|
e79480 |
out:
|
|
|
e79480 |
/* Close the cursor */
|
|
|
e79480 |
if (dbc != NULL) {
|
|
|
e79480 |
--
|
|
|
e79480 |
2.21.0
|
|
|
e79480 |
|