|
|
40cd75 |
From 64b9d015523b4ae379ff2d72fc73da173be8a712 Mon Sep 17 00:00:00 2001
|
|
|
40cd75 |
From: Mohammad Nweider <nweiderm@amazon.com>
|
|
|
40cd75 |
Date: Wed, 18 Oct 2017 13:02:15 +0000
|
|
|
40cd75 |
Subject: [PATCH] Ticket 49401 - improve valueset sorted performance on delete
|
|
|
40cd75 |
|
|
|
40cd75 |
Bug Description: valueset sorted maintains a list of syntax sorted
|
|
|
40cd75 |
references to the attributes of the entry. During addition these are
|
|
|
40cd75 |
modified and added properly, so they stay sorted.
|
|
|
40cd75 |
|
|
|
40cd75 |
However, in the past to maintain the sorted property, during a delete
|
|
|
40cd75 |
we would need to remove the vs->sorted array, and recreate it via qsort,
|
|
|
40cd75 |
|
|
|
40cd75 |
While this was an improvement from past (where we would removed
|
|
|
40cd75 |
vs->sorted during an attr delete), it still has performance implications
|
|
|
40cd75 |
on very very large datasets, IE 50,000 member groups with
|
|
|
40cd75 |
addition/deletion, large entry caches and replication.
|
|
|
40cd75 |
|
|
|
40cd75 |
Fix Description: Implement a new algorithm that is able to maintain
|
|
|
40cd75 |
existing sort data in a near linear time.
|
|
|
40cd75 |
|
|
|
40cd75 |
https://pagure.io/389-ds-base/issue/49401
|
|
|
40cd75 |
|
|
|
40cd75 |
Author: nweiderm, wibrown
|
|
|
40cd75 |
|
|
|
40cd75 |
Review by: wibrown, lkrispen, tbordaz (Thanks nweiderm!)
|
|
|
40cd75 |
|
|
|
40cd75 |
(cherry picked from commit a43a8efc7907116146b505ac40f18fac71f474b0)
|
|
|
40cd75 |
---
|
|
|
40cd75 |
ldap/servers/slapd/valueset.c | 171 +++++++++++++++++++++++++-----------------
|
|
|
40cd75 |
1 file changed, 102 insertions(+), 69 deletions(-)
|
|
|
40cd75 |
|
|
|
40cd75 |
diff --git a/ldap/servers/slapd/valueset.c b/ldap/servers/slapd/valueset.c
|
|
|
40cd75 |
index e22bc9c39..ae0a13fdc 100644
|
|
|
40cd75 |
--- a/ldap/servers/slapd/valueset.c
|
|
|
40cd75 |
+++ b/ldap/servers/slapd/valueset.c
|
|
|
40cd75 |
@@ -741,7 +741,10 @@ valueset_array_purge(const Slapi_Attr *a, Slapi_ValueSet *vs, const CSN *csn)
|
|
|
40cd75 |
size_t i = 0;
|
|
|
40cd75 |
size_t j = 0;
|
|
|
40cd75 |
int nextValue = 0;
|
|
|
40cd75 |
+ int nv = 0;
|
|
|
40cd75 |
int numValues = 0;
|
|
|
40cd75 |
+ Slapi_Value **va2 = NULL;
|
|
|
40cd75 |
+ int *sorted2 = NULL;
|
|
|
40cd75 |
|
|
|
40cd75 |
/* Loop over all the values freeing the old ones. */
|
|
|
40cd75 |
for(i = 0; i < vs->num; i++)
|
|
|
40cd75 |
@@ -752,91 +755,122 @@ valueset_array_purge(const Slapi_Attr *a, Slapi_ValueSet *vs, const CSN *csn)
|
|
|
40cd75 |
} else {
|
|
|
40cd75 |
j = i;
|
|
|
40cd75 |
}
|
|
|
40cd75 |
- csnset_purge(&(vs->va[j]->v_csnset),csn);
|
|
|
40cd75 |
- if (vs->va[j]->v_csnset == NULL) {
|
|
|
40cd75 |
- slapi_value_free(&vs->va[j]);
|
|
|
40cd75 |
- vs->va[j] = NULL;
|
|
|
40cd75 |
- } else if (vs->va[j] != NULL) {
|
|
|
40cd75 |
- /* This value survived, we should count it. */
|
|
|
40cd75 |
- numValues++;
|
|
|
40cd75 |
+ if (vs->va[j]) {
|
|
|
40cd75 |
+ csnset_purge(&(vs->va[j]->v_csnset),csn);
|
|
|
40cd75 |
+ if (vs->va[j]->v_csnset == NULL) {
|
|
|
40cd75 |
+ slapi_value_free(&vs->va[j]);
|
|
|
40cd75 |
+ /* Set the removed value to NULL so we know later to skip it */
|
|
|
40cd75 |
+ vs->va[j] = NULL;
|
|
|
40cd75 |
+ if (vs->sorted) {
|
|
|
40cd75 |
+ /* Mark the value in sorted for removal */
|
|
|
40cd75 |
+ vs->sorted[i] = -1;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+ } else {
|
|
|
40cd75 |
+ /* This value survived, we should count it. */
|
|
|
40cd75 |
+ numValues++;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
}
|
|
|
40cd75 |
}
|
|
|
40cd75 |
|
|
|
40cd75 |
- /* Now compact the value/sorted list. */
|
|
|
40cd75 |
+ /* Compact vs->va and vs->sorted only when there're
|
|
|
40cd75 |
+ * remaining values ie: numValues is greater than 0 */
|
|
|
40cd75 |
/*
|
|
|
40cd75 |
- * Because we want to preserve the sorted array, this is complicated.
|
|
|
40cd75 |
+ * Algorithm explination: We start with a pair of arrays - the attrs, and the sorted array that provides
|
|
|
40cd75 |
+ * a lookup into it:
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * va: [d e a c b] sorted: [2 4 3 0 1]
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * When we remove the element b, we NULL it, and we have to mark the place where it "was" as a -1 to
|
|
|
40cd75 |
+ * flag it's removal.
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * va: [d e a c NULL] sorted: [2 -1 3 0 1]
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * Now a second va is created with the reduced allocation,
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * va2: [ X X X X ] ....
|
|
|
40cd75 |
*
|
|
|
40cd75 |
- * We have an array of values:
|
|
|
40cd75 |
- * [ b, a, c, NULL, e, NULL, NULL, d]
|
|
|
40cd75 |
- * And an array of indicies that are sorted.
|
|
|
40cd75 |
- * [ 1, 0, 2, 7, 4, 3, 5, 6 ]
|
|
|
40cd75 |
- * Were we to iterate over the sorted array, we get refs to the values in
|
|
|
40cd75 |
- * some order.
|
|
|
40cd75 |
- * The issue is now we must *remove* from both the values *and* the sorted.
|
|
|
40cd75 |
+ * Now we loop over sorted, skipping -1 that we find. In a new counter we create new sorted
|
|
|
40cd75 |
+ * references, and move the values compacting them in the process.
|
|
|
40cd75 |
+ * va: [d e a c NULL]
|
|
|
40cd75 |
+ * va2: [a x x x]
|
|
|
40cd75 |
+ * sorted: [_0 -1 3 0 1]
|
|
|
40cd75 |
*
|
|
|
40cd75 |
- * Previously, we just discarded this, because too hard. Now we try to keep
|
|
|
40cd75 |
- * it. The issue is that this is surprisingly hard to actually keep in
|
|
|
40cd75 |
- * sync.
|
|
|
40cd75 |
+ * Looping a few more times would yield:
|
|
|
40cd75 |
*
|
|
|
40cd75 |
- * We can't just blindly move the values down: That breaks the sorted array
|
|
|
40cd75 |
- * and we would need to iterate over the sorted array multiple times to
|
|
|
40cd75 |
- * achieve this.
|
|
|
40cd75 |
+ * va2: [a c x x]
|
|
|
40cd75 |
+ * sorted: [_0 _1 3 0 1]
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * va2: [a c d x]
|
|
|
40cd75 |
+ * sorted: [_0 _1 _2 0 1]
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * va2: [a c d e]
|
|
|
40cd75 |
+ * sorted: [_0 _1 _2 _3 1]
|
|
|
40cd75 |
+ *
|
|
|
40cd75 |
+ * Not only does this sort va, but with sorted, we have a faster lookup, and it will benefit cache
|
|
|
40cd75 |
+ * lookup.
|
|
|
40cd75 |
*
|
|
|
40cd75 |
- * It's actually going to be easier to just ditch the sorted, compact vs
|
|
|
40cd75 |
- * and then qsort the array.
|
|
|
40cd75 |
*/
|
|
|
40cd75 |
+ if (numValues > 0) {
|
|
|
40cd75 |
+ if(vs->sorted) {
|
|
|
40cd75 |
+ /* Let's allocate va2 and sorted2 */
|
|
|
40cd75 |
+ va2 = (Slapi_Value **) slapi_ch_malloc( (numValues + 1) * sizeof(Slapi_Value *));
|
|
|
40cd75 |
+ sorted2 = (int *) slapi_ch_malloc( (numValues + 1)* sizeof(int));
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
|
|
|
40cd75 |
- j = 0;
|
|
|
40cd75 |
- while (nextValue < numValues && j < vs->num)
|
|
|
40cd75 |
- {
|
|
|
40cd75 |
- /* nextValue is what we are looking at now
|
|
|
40cd75 |
- * j tracks along the array getting next elements.
|
|
|
40cd75 |
- *
|
|
|
40cd75 |
- * [ b, a, c, NULL, e, NULL, NULL, d]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- * [ b, a, c, e, NULL, NULL, NULL, d]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- * [ b, a, c, e, NULL, NULL, NULL, d]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- * [ b, a, c, e, NULL, NULL, NULL, d]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- * [ b, a, c, e, NULL, NULL, NULL, d]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- * [ b, a, c, e, d, NULL, NULL, NULL]
|
|
|
40cd75 |
- * ^nv ^j
|
|
|
40cd75 |
- */
|
|
|
40cd75 |
- if (vs->va[nextValue] == NULL) {
|
|
|
40cd75 |
- /* Advance j till we find something */
|
|
|
40cd75 |
- while (vs->va[j] == NULL) {
|
|
|
40cd75 |
- j++;
|
|
|
40cd75 |
+ /* I is the index for the *new* va2 array */
|
|
|
40cd75 |
+ for(i=0; i<vs->num; i++) {
|
|
|
40cd75 |
+ if (vs->sorted) {
|
|
|
40cd75 |
+ /* Skip any removed values from the index */
|
|
|
40cd75 |
+ while((nv < vs->num) && (-1 == vs->sorted[nv])) {
|
|
|
40cd75 |
+ nv++;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+ /* We have a remaining value, add it to the va */
|
|
|
40cd75 |
+ if(nv < vs->num) {
|
|
|
40cd75 |
+ va2[i] = vs->va[vs->sorted[nv]];
|
|
|
40cd75 |
+ sorted2[i] = i;
|
|
|
40cd75 |
+ nv++;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+ } else {
|
|
|
40cd75 |
+ while((nextValue < vs->num) && (NULL == vs->va[nextValue])) {
|
|
|
40cd75 |
+ nextValue++;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+
|
|
|
40cd75 |
+ if(nextValue < vs->num) {
|
|
|
40cd75 |
+ vs->va[i] = vs->va[nextValue];
|
|
|
40cd75 |
+ nextValue++;
|
|
|
40cd75 |
+ } else {
|
|
|
40cd75 |
+ break;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
}
|
|
|
40cd75 |
- /* We have something! */
|
|
|
40cd75 |
- vs->va[nextValue] = vs->va[j];
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+
|
|
|
40cd75 |
+ if (vs->sorted) {
|
|
|
40cd75 |
+ /* Finally replace the valuearray and adjust num, max */
|
|
|
40cd75 |
+ slapi_ch_free((void **)&vs->va);
|
|
|
40cd75 |
+ slapi_ch_free((void **)&vs->sorted);
|
|
|
40cd75 |
+ vs->va = va2;
|
|
|
40cd75 |
+ vs->sorted = sorted2;
|
|
|
40cd75 |
+ vs->num = numValues;
|
|
|
40cd75 |
+ vs->max = vs->num + 1;
|
|
|
40cd75 |
+ } else {
|
|
|
40cd75 |
+ vs->num = numValues;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
+
|
|
|
40cd75 |
+ for (j = vs->num; j < vs->max; j++) {
|
|
|
40cd75 |
vs->va[j] = NULL;
|
|
|
40cd75 |
+ if (vs->sorted) {
|
|
|
40cd75 |
+ vs->sorted[j] = -1;
|
|
|
40cd75 |
+ }
|
|
|
40cd75 |
}
|
|
|
40cd75 |
- nextValue++;
|
|
|
40cd75 |
- }
|
|
|
40cd75 |
- /* Fix up the number of values */
|
|
|
40cd75 |
- vs->num = numValues;
|
|
|
40cd75 |
- /* Should we re-alloc values to be smaller? */
|
|
|
40cd75 |
- /* Other parts of DS are lazy. Lets clean our list */
|
|
|
40cd75 |
- for (j = vs->num; j < vs->max; j++) {
|
|
|
40cd75 |
- vs->va[j] = NULL;
|
|
|
40cd75 |
+ } else {
|
|
|
40cd75 |
+ slapi_valueset_done(vs);
|
|
|
40cd75 |
}
|
|
|
40cd75 |
|
|
|
40cd75 |
- /* All the values were deleted, we can discard the whole array. */
|
|
|
40cd75 |
- if(vs->num == 0) {
|
|
|
40cd75 |
- if(vs->sorted) {
|
|
|
40cd75 |
- slapi_ch_free ((void **)&vs->sorted);
|
|
|
40cd75 |
- }
|
|
|
40cd75 |
- slapi_ch_free ((void **)&vs->va);
|
|
|
40cd75 |
- vs->va = NULL;
|
|
|
40cd75 |
- vs->max = 0;
|
|
|
40cd75 |
- } else if (vs->sorted != NULL) {
|
|
|
40cd75 |
- /* We still have values! rebuild the sorted array */
|
|
|
40cd75 |
+ /* We still have values but not sorted array! rebuild it */
|
|
|
40cd75 |
+ if(vs->num > VALUESET_ARRAY_SORT_THRESHOLD && vs->sorted == NULL) {
|
|
|
40cd75 |
+ vs->sorted = (int *) slapi_ch_malloc( vs->max* sizeof(int));
|
|
|
40cd75 |
valueset_array_to_sorted(a, vs);
|
|
|
40cd75 |
}
|
|
|
40cd75 |
-
|
|
|
40cd75 |
#ifdef DEBUG
|
|
|
40cd75 |
PR_ASSERT(vs->num == 0 || (vs->num > 0 && vs->va[0] != NULL));
|
|
|
40cd75 |
size_t index = 0;
|
|
|
40cd75 |
@@ -847,7 +881,6 @@ valueset_array_purge(const Slapi_Attr *a, Slapi_ValueSet *vs, const CSN *csn)
|
|
|
40cd75 |
PR_ASSERT(vs->va[index] == NULL);
|
|
|
40cd75 |
}
|
|
|
40cd75 |
#endif
|
|
|
40cd75 |
-
|
|
|
40cd75 |
/* return the number of remaining values */
|
|
|
40cd75 |
return numValues;
|
|
|
40cd75 |
}
|
|
|
40cd75 |
--
|
|
|
40cd75 |
2.13.6
|
|
|
40cd75 |
|