Blame SOURCES/0011-Issue-49850-ldbm_get_nonleaf_ids-slow-for-databases-.patch

4c04d8
From d8875611eefff661ad3f92b6f75b0c90c22918a6 Mon Sep 17 00:00:00 2001
e79480
From: Mark Reynolds <mreynolds@redhat.com>
e79480
Date: Wed, 16 Oct 2019 20:27:30 -0400
e79480
Subject: [PATCH] Issue 49850 -  ldbm_get_nonleaf_ids() slow for databases with
e79480
 many non-leaf entries
e79480
e79480
Bug Description:  The logs from an LDIF import indicated that gathering non-leaf IDs
e79480
                  for creating the ancestorid index took an enormous amount of time,
e79480
                  over 10hrs.  The root cause is that the parentid index btree ordering
e79480
                  is lexical, but the IDList being built up from it is sorted numerically.
e79480
                  In the existing code, the IDList is maintained in constantly sorted
e79480
                  order by idl_insert().
e79480
e79480
Fix Description:  ldbm_get_nonleaf_ids() switches to idl_append_extend() instead idl_insert()
e79480
                  for building up the IDList and then sorts the result only once, using
e79480
                  qsort with idl_sort_cmp, after the entire list has been gathered.
e79480
e79480
                  The improvement on identical hardware is for the operation to take 10
e79480
                  seconds rather than 10 hours
e79480
e79480
Patch Author:  Thomas Lackey <telackey@bozemanpass.com>  Thanks for the great contribution!!!
e79480
e79480
relates: https://pagure.io/389-ds-base/issue/49850
e79480
e79480
Reviewed by: mreynolds, tbordaz, and firstyear
e79480
---
e79480
 ldap/servers/slapd/back-ldbm/ancestorid.c | 20 +++++++++++++++++++-
e79480
 1 file changed, 19 insertions(+), 1 deletion(-)
e79480
e79480
diff --git a/ldap/servers/slapd/back-ldbm/ancestorid.c b/ldap/servers/slapd/back-ldbm/ancestorid.c
e79480
index 24642923d..254a3aa3b 100644
e79480
--- a/ldap/servers/slapd/back-ldbm/ancestorid.c
e79480
+++ b/ldap/servers/slapd/back-ldbm/ancestorid.c
e79480
@@ -82,7 +82,14 @@ ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl, ImportJob *job)
e79480
         ret = dbc->c_get(dbc, &key, &data, DB_NEXT_NODUP);
e79480
         if ((ret == 0) && (*(char *)key.data == EQ_PREFIX)) {
e79480
             id = (ID)strtoul((char *)key.data + 1, NULL, 10);
e79480
-            idl_insert(&nodes, id);
e79480
+            /*
e79480
+             * TEL 20180711 - switch to idl_append instead of idl_insert because there is no
e79480
+             * no need to keep the list constantly sorted, which can be very expensive with
e79480
+             * large databases (exacerbated by the fact that the parentid btree ordering is
e79480
+             * lexical, but the idl_insert ordering is numeric).  It is enough to gather them
e79480
+             * all together and sort them once at the end.
e79480
+             */
e79480
+            idl_append_extend(&nodes, id);
e79480
         }
e79480
         key_count++;
e79480
         if (!(key_count % PROGRESS_INTERVAL)) {
e79480
@@ -107,6 +114,17 @@ ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl, ImportJob *job)
e79480
     if (ret != 0)
e79480
         ldbm_nasty("ldbm_get_nonleaf_ids", sourcefile, 13030, ret);
e79480
 
e79480
+    if (ret == 0) {
e79480
+        /* now sort it */
e79480
+        import_log_notice(job, SLAPI_LOG_INFO, "ldbm_get_nonleaf_ids",
e79480
+            "Starting sort of ancestorid non-leaf IDs...");
e79480
+
e79480
+        qsort((void *)&nodes->b_ids[0], nodes->b_nids, (size_t)sizeof(ID), idl_sort_cmp);
e79480
+
e79480
+        import_log_notice(job, SLAPI_LOG_INFO, "ldbm_get_nonleaf_ids",
e79480
+            "Finished sort of ancestorid non-leaf IDs.");
e79480
+    }
e79480
+
e79480
 out:
e79480
     /* Close the cursor */
e79480
     if (dbc != NULL) {
e79480
-- 
e79480
2.21.0
e79480