74ca47
From 9be74e83539e204e9a56721da5c22bd9abf38195 Mon Sep 17 00:00:00 2001
74ca47
From: Mark Reynolds <mreynolds@redhat.com>
74ca47
Date: Wed, 19 Apr 2017 13:41:22 -0400
74ca47
Subject: [PATCH] Ticket 49204 - Fix lower bounds on import autosize + On small
74ca47
 VM, autotune breaks the access of the suffixes
74ca47
74ca47
    Bug Description:
74ca47
        ldif2db in some cases may set a cache of 0, which may y break imports.
74ca47
74ca47
        Under memory pressure, the amount of available memory at startup
74ca47
        can be so low that the configured cachememsize will be rejected
74ca47
        (unwilling to perform).
74ca47
        This should leave the cachememsize being "0" (default)
74ca47
        This conduct to be unable to access the suffix pages.
74ca47
74ca47
    Fix Description:
74ca47
74ca47
     * autosize set an incorrect percentage which was too high.
74ca47
     * we did not check the lower bound of the allocation
74ca47
       so we now set that we must have a minimum allocation.
74ca47
     * Set entrycache to a minimal value, even if it looks insane
74ca47
     * add a cap on reduction of caches, so we always allocate a few pages
74ca47
       at least, and prevent returning 0 to the caller.
74ca47
74ca47
    https://pagure.io/389-ds-base/issue/49204
74ca47
74ca47
    Author: wibrown, tbordaz
74ca47
74ca47
    Review by: tbordaz (Thanks mate, great work with this :) )
74ca47
---
74ca47
 ldap/servers/slapd/back-ldbm/cache.c               |  4 +--
74ca47
 ldap/servers/slapd/back-ldbm/dblayer.c             | 33 +++++++++++++---------
74ca47
 ldap/servers/slapd/back-ldbm/dblayer.h             | 12 ++++----
74ca47
 ldap/servers/slapd/back-ldbm/ldbm_config.c         |  4 +--
74ca47
 .../servers/slapd/back-ldbm/ldbm_instance_config.c | 23 +++++++++++++--
74ca47
 ldap/servers/slapd/slapi-private.h                 |  2 +-
74ca47
 ldap/servers/slapd/util.c                          | 20 +++++++++----
74ca47
 7 files changed, 65 insertions(+), 33 deletions(-)
74ca47
74ca47
diff --git a/ldap/servers/slapd/back-ldbm/cache.c b/ldap/servers/slapd/back-ldbm/cache.c
74ca47
index 0f0cf3b..c6638a2 100644
74ca47
--- a/ldap/servers/slapd/back-ldbm/cache.c
74ca47
+++ b/ldap/servers/slapd/back-ldbm/cache.c
74ca47
@@ -65,7 +65,7 @@
74ca47
 
74ca47
 /* static functions */
74ca47
 static void entrycache_clear_int(struct cache *cache);
74ca47
-static void entrycache_set_max_size(struct cache *cache, size_t bytes);
74ca47
+static void entrycache_set_max_size(struct cache *cache, uint64_t bytes);
74ca47
 static int entrycache_remove_int(struct cache *cache, struct backentry *e);
74ca47
 static void entrycache_return(struct cache *cache, struct backentry **bep);
74ca47
 static int entrycache_replace(struct cache *cache, struct backentry *olde, struct backentry *newe);
74ca47
@@ -77,7 +77,7 @@ static void entry_lru_verify(struct cache *cache, struct backentry *e, int in);
74ca47
 
74ca47
 static int dn_same_id(const void *bdn, const void *k);
74ca47
 static void dncache_clear_int(struct cache *cache);
74ca47
-static void dncache_set_max_size(struct cache *cache, size_t bytes);
74ca47
+static void dncache_set_max_size(struct cache *cache, uint64_t bytes);
74ca47
 static int dncache_remove_int(struct cache *cache, struct backdn *dn);
74ca47
 static void dncache_return(struct cache *cache, struct backdn **bdn);
74ca47
 static int dncache_replace(struct cache *cache, struct backdn *olddn, struct backdn *newdn);
74ca47
diff --git a/ldap/servers/slapd/back-ldbm/dblayer.c b/ldap/servers/slapd/back-ldbm/dblayer.c
74ca47
index 3c1fbb0..f834322 100644
74ca47
--- a/ldap/servers/slapd/back-ldbm/dblayer.c
74ca47
+++ b/ldap/servers/slapd/back-ldbm/dblayer.c
74ca47
@@ -1237,8 +1237,8 @@ no_diskspace(struct ldbminfo *li, int dbenv_flags)
74ca47
     struct statvfs db_buf;
74ca47
     int using_region_files = !(dbenv_flags & ( DB_PRIVATE | DB_SYSTEM_MEM));
74ca47
     /* value of 10 == 10% == little more than the average overhead calculated for very large files on 64-bit system for bdb 4.7 */
74ca47
-    PRUint64 expected_siz = li->li_dbcachesize + li->li_dbcachesize/10; /* dbcache + region files */
74ca47
-    PRUint64 fsiz;
74ca47
+    uint64_t expected_siz = li->li_dbcachesize + li->li_dbcachesize/10; /* dbcache + region files */
74ca47
+    uint64_t fsiz;
74ca47
     char *region_dir;
74ca47
 
74ca47
     if (statvfs(li->li_directory, &db_buf) < 0){
74ca47
@@ -1263,7 +1263,7 @@ no_diskspace(struct ldbminfo *li, int dbenv_flags)
74ca47
                         li->li_dblayer_private->dblayer_dbhome_directory);
74ca47
                     return 1;
74ca47
                 }
74ca47
-                fsiz = ((PRUint64)dbhome_buf.f_bavail) * ((PRUint64)dbhome_buf.f_bsize);
74ca47
+                fsiz = ((uint64_t)dbhome_buf.f_bavail) * ((uint64_t)dbhome_buf.f_bsize);
74ca47
                 region_dir = li->li_dblayer_private->dblayer_dbhome_directory;
74ca47
             } else {
74ca47
                 /* Shared/private memory.  No need to check disk space, return success */
74ca47
@@ -1387,12 +1387,17 @@ dblayer_start(struct ldbminfo *li, int dbmode)
74ca47
     /* Sanity check on cache size on platforms which allow us to figure out
74ca47
      * the available phys mem */
74ca47
     slapi_pal_meminfo *mi = spal_meminfo_get();
74ca47
-    if (!util_is_cachesize_sane(mi, &(priv->dblayer_cachesize))) {
74ca47
+    util_cachesize_result result = util_is_cachesize_sane(mi, &(priv->dblayer_cachesize));
74ca47
+    if (result == UTIL_CACHESIZE_ERROR) {
74ca47
+        slapi_log_err(SLAPI_LOG_CRIT, "dblayer_start", "Unable to determine if cachesize was valid!!!");
74ca47
+    } else if (result == UTIL_CACHESIZE_REDUCED) {
74ca47
+        /* In some cases we saw this go to 0, prevent this. */
74ca47
+        if (priv->dblayer_cachesize < MINCACHESIZE) {
74ca47
+            priv->dblayer_cachesize = MINCACHESIZE;
74ca47
+        }
74ca47
         /* Oops---looks like the admin misconfigured, let's warn them */
74ca47
-        slapi_log_err(SLAPI_LOG_WARNING,"dblayer_start", "Likely CONFIGURATION ERROR -"
74ca47
-                  "dbcachesize is configured to use more than the available "
74ca47
-                  "physical memory, decreased to the largest available size (%"PRIu64" bytes).\n",
74ca47
-                  priv->dblayer_cachesize);
74ca47
+        slapi_log_err(SLAPI_LOG_WARNING, "dblayer_start", "Likely CONFIGURATION ERROR - dbcachesize is configured to use more than the available "
74ca47
+                  "memory, decreased to (%"PRIu64" bytes).\n", priv->dblayer_cachesize);
74ca47
         li->li_dbcachesize = priv->dblayer_cachesize;
74ca47
     }
74ca47
     spal_meminfo_destroy(mi);
74ca47
@@ -3816,7 +3821,7 @@ static const u_int32_t default_flags = DB_NEXT;
74ca47
 typedef struct txn_test_iter {
74ca47
     DB *db;
74ca47
     DBC *cur;
74ca47
-    size_t cnt;
74ca47
+    uint64_t cnt;
74ca47
     const char *attr;
74ca47
     u_int32_t flags;
74ca47
     backend *be;
74ca47
@@ -3938,10 +3943,10 @@ static int txn_test_threadmain(void *param)
74ca47
     Object *inst_obj;
74ca47
     int rc = 0;
74ca47
     txn_test_iter **ttilist = NULL;
74ca47
-    size_t tticnt = 0;
74ca47
+    uint64_t tticnt = 0;
74ca47
     DB_TXN *txn = NULL;
74ca47
     txn_test_cfg cfg = {0};
74ca47
-    size_t counter = 0;
74ca47
+    uint64_t counter = 0;
74ca47
     char keybuf[8192];
74ca47
     char databuf[8192];
74ca47
     int dbattempts = 0;
74ca47
@@ -4062,9 +4067,9 @@ retry_txn:
74ca47
         if (!rc) {
74ca47
             DBT key;
74ca47
             DBT data;
74ca47
-            size_t ii;
74ca47
-            size_t donecnt = 0;
74ca47
-            size_t cnt = 0;
74ca47
+            uint64_t ii;
74ca47
+            uint64_t donecnt = 0;
74ca47
+            uint64_t cnt = 0;
74ca47
 
74ca47
             /* phase 1 - open a cursor to each db */
74ca47
             if (cfg.verbose) {
74ca47
diff --git a/ldap/servers/slapd/back-ldbm/dblayer.h b/ldap/servers/slapd/back-ldbm/dblayer.h
74ca47
index 816c943..77b04fa 100644
74ca47
--- a/ldap/servers/slapd/back-ldbm/dblayer.h
74ca47
+++ b/ldap/servers/slapd/back-ldbm/dblayer.h
74ca47
@@ -90,8 +90,8 @@ struct dblayer_private
74ca47
     int dblayer_ncache;
74ca47
     int dblayer_previous_ncache;
74ca47
     int dblayer_tx_max;
74ca47
-    size_t dblayer_cachesize;
74ca47
-    size_t dblayer_previous_cachesize; /* Cache size when we last shut down--
74ca47
+    uint64_t dblayer_cachesize;
74ca47
+    uint64_t dblayer_previous_cachesize; /* Cache size when we last shut down--
74ca47
                                         * used to determine if we delete 
74ca47
                                         * the mpool */
74ca47
     int dblayer_recovery_required;
74ca47
@@ -102,15 +102,15 @@ struct dblayer_private
74ca47
     int dblayer_durable_transactions;
74ca47
     int dblayer_checkpoint_interval;
74ca47
     int dblayer_circular_logging;
74ca47
-    size_t dblayer_page_size;       /* db page size if configured,
74ca47
+    uint64_t dblayer_page_size;       /* db page size if configured,
74ca47
                                      * otherwise default to DBLAYER_PAGESIZE */
74ca47
-    size_t dblayer_index_page_size; /* db index page size if configured,
74ca47
+    uint64_t dblayer_index_page_size; /* db index page size if configured,
74ca47
                                      * otherwise default to 
74ca47
                                      * DBLAYER_INDEX_PAGESIZE */
74ca47
     int dblayer_idl_divisor;        /* divide page size by this to get IDL 
74ca47
                                      * size */
74ca47
-    size_t dblayer_logfile_size;    /* How large can one logfile be ? */
74ca47
-    size_t dblayer_logbuf_size;     /* how large log buffer can be */
74ca47
+    uint64_t dblayer_logfile_size;    /* How large can one logfile be ? */
74ca47
+    uint64_t dblayer_logbuf_size;     /* how large log buffer can be */
74ca47
     int dblayer_file_mode;          /* pmode for files we create */
74ca47
     int dblayer_verbose;            /* Get libdb to exhale debugging info */
74ca47
     int dblayer_debug;              /* Will libdb emit debugging info into 
74ca47
diff --git a/ldap/servers/slapd/back-ldbm/ldbm_config.c b/ldap/servers/slapd/back-ldbm/ldbm_config.c
74ca47
index d5120d3..401cd60 100644
74ca47
--- a/ldap/servers/slapd/back-ldbm/ldbm_config.c
74ca47
+++ b/ldap/servers/slapd/back-ldbm/ldbm_config.c
74ca47
@@ -1582,9 +1582,9 @@ static config_info ldbm_config[] = {
74ca47
     {CONFIG_DB_DEBUG_CHECKPOINTING, CONFIG_TYPE_ONOFF, "off", &ldbm_config_db_debug_checkpointing_get, &ldbm_config_db_debug_checkpointing_set, 0},
74ca47
     {CONFIG_DB_HOME_DIRECTORY, CONFIG_TYPE_STRING, "", &ldbm_config_db_home_directory_get, &ldbm_config_db_home_directory_set, 0},
74ca47
     {CONFIG_IMPORT_CACHE_AUTOSIZE, CONFIG_TYPE_INT, "-1", &ldbm_config_import_cache_autosize_get, &ldbm_config_import_cache_autosize_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
74ca47
-    {CONFIG_CACHE_AUTOSIZE, CONFIG_TYPE_INT, "0", &ldbm_config_cache_autosize_get, &ldbm_config_cache_autosize_set, 0},
74ca47
+    {CONFIG_CACHE_AUTOSIZE, CONFIG_TYPE_INT, "10", &ldbm_config_cache_autosize_get, &ldbm_config_cache_autosize_set, 0},
74ca47
     {CONFIG_CACHE_AUTOSIZE_SPLIT, CONFIG_TYPE_INT, "40", &ldbm_config_cache_autosize_split_get, &ldbm_config_cache_autosize_split_set, 0},
74ca47
-    {CONFIG_IMPORT_CACHESIZE, CONFIG_TYPE_SIZE_T, "20000000", &ldbm_config_import_cachesize_get, &ldbm_config_import_cachesize_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
74ca47
+    {CONFIG_IMPORT_CACHESIZE, CONFIG_TYPE_SIZE_T, "16777216", &ldbm_config_import_cachesize_get, &ldbm_config_import_cachesize_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
74ca47
     {CONFIG_IDL_SWITCH, CONFIG_TYPE_STRING, "new", &ldbm_config_idl_get_idl_new, &ldbm_config_idl_set_tune, CONFIG_FLAG_ALWAYS_SHOW},
74ca47
     {CONFIG_IDL_UPDATE, CONFIG_TYPE_ONOFF, "on", &ldbm_config_idl_get_update, &ldbm_config_idl_set_update, 0},
74ca47
     {CONFIG_BYPASS_FILTER_TEST, CONFIG_TYPE_STRING, "on", &ldbm_config_get_bypass_filter_test, &ldbm_config_set_bypass_filter_test, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
74ca47
diff --git a/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c b/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c
74ca47
index 62cdbc3..36d830d 100644
74ca47
--- a/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c
74ca47
+++ b/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c
74ca47
@@ -93,6 +93,7 @@ ldbm_instance_config_cachememsize_set(void *arg, void *value, char *errorbuf, in
74ca47
     int retval = LDAP_SUCCESS;
74ca47
     size_t val = (size_t) value;
74ca47
     uint64_t delta = 0;
74ca47
+    uint64_t delta_original = 0;
74ca47
 
74ca47
     /* Do whatever we can to make sure the data is ok. */
74ca47
     /* There is an error here. We check the new val against our current mem-alloc 
74ca47
@@ -108,18 +109,34 @@ ldbm_instance_config_cachememsize_set(void *arg, void *value, char *errorbuf, in
74ca47
     if (apply) {
74ca47
         if (val > inst->inst_cache.c_maxsize) {
74ca47
             delta = val - inst->inst_cache.c_maxsize;
74ca47
+            delta_original = delta;
74ca47
 
74ca47
             util_cachesize_result sane;
74ca47
             slapi_pal_meminfo *mi = spal_meminfo_get();
74ca47
             sane = util_is_cachesize_sane(mi, &delta);
74ca47
             spal_meminfo_destroy(mi);
74ca47
 
74ca47
-            if (sane != UTIL_CACHESIZE_VALID){
74ca47
-                slapi_create_errormsg(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, "Error: cachememsize value is too large.");
74ca47
-                slapi_log_err(SLAPI_LOG_ERR, "ldbm_instance_config_cachememsize_set", "cachememsize value is too large.\n");
74ca47
+            if (sane == UTIL_CACHESIZE_ERROR){
74ca47
+                slapi_create_errormsg(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, "Error: unable to determine system memory limits.");
74ca47
+                slapi_log_err(SLAPI_LOG_ERR, "ldbm_instance_config_cachememsize_set", "Enable to determine system memory limits.\n");
74ca47
                 return LDAP_UNWILLING_TO_PERFORM;
74ca47
+            } else if (sane == UTIL_CACHESIZE_REDUCED) {
74ca47
+                slapi_log_err(SLAPI_LOG_WARNING, "ldbm_instance_config_cachememsize_set", "delta +%"PRIu64" of request %"PRIu64" reduced to %"PRIu64"\n", delta_original, val, delta);
74ca47
+                /*
74ca47
+                 * This works as: value = 100
74ca47
+                 * delta_original to inst, 20;
74ca47
+                 * delta reduced to 5:
74ca47
+                 * 100 - (20 - 5) == 85;
74ca47
+                 * so if you recalculated delta now (val - inst), it would be 5.
74ca47
+                 */
74ca47
+                val = val - (delta_original - delta);
74ca47
             }
74ca47
         }
74ca47
+        if (inst->inst_cache.c_maxsize < MINCACHESIZE || val < MINCACHESIZE) {
74ca47
+            slapi_log_err(SLAPI_LOG_ERR, "ldbm_instance_config_cachememsize_set", "force a minimal value %"PRIu64"\n", MINCACHESIZE);
74ca47
+            /* This value will trigger an autotune next start up, but it should increase only */
74ca47
+            val = MINCACHESIZE;
74ca47
+        }
74ca47
         cache_set_max_size(&(inst->inst_cache), val, CACHE_TYPE_ENTRY);
74ca47
     }
74ca47
 
74ca47
diff --git a/ldap/servers/slapd/slapi-private.h b/ldap/servers/slapd/slapi-private.h
74ca47
index 0c76580..d9547d8 100644
74ca47
--- a/ldap/servers/slapd/slapi-private.h
74ca47
+++ b/ldap/servers/slapd/slapi-private.h
74ca47
@@ -1392,7 +1392,7 @@ typedef enum _util_cachesize_result {
74ca47
  * \return util_cachesize_result.
74ca47
  * \sa util_cachesize_result, spal_meminfo_get
74ca47
  */
74ca47
-util_cachesize_result util_is_cachesize_sane(slapi_pal_meminfo *mi, size_t *cachesize);
74ca47
+util_cachesize_result util_is_cachesize_sane(slapi_pal_meminfo *mi, uint64_t *cachesize);
74ca47
 
74ca47
 /**
74ca47
  * Retrieve the number of threads the server should run with based on this hardware.
74ca47
diff --git a/ldap/servers/slapd/util.c b/ldap/servers/slapd/util.c
74ca47
index 012e83d..4ff6d41 100644
74ca47
--- a/ldap/servers/slapd/util.c
74ca47
+++ b/ldap/servers/slapd/util.c
74ca47
@@ -1468,16 +1468,26 @@ util_is_cachesize_sane(slapi_pal_meminfo *mi, uint64_t *cachesize)
74ca47
         return UTIL_CACHESIZE_ERROR;
74ca47
     }
74ca47
 
74ca47
+    util_cachesize_result result = UTIL_CACHESIZE_VALID;
74ca47
     slapi_log_err(SLAPI_LOG_TRACE, "util_is_cachesize_sane", "Available bytes %"PRIu64", requested bytes %"PRIu64"\n", mi->system_available_bytes, *cachesize);
74ca47
     if (*cachesize > mi->system_available_bytes) {
74ca47
-        /* Since we are ask for more than what's available, we give 3/4 of the remaining.
74ca47
+        /* Since we are ask for more than what's available, we give 1/2 of the remaining.
74ca47
          * the remaining system mem to the cachesize instead, and log a warning
74ca47
          */
74ca47
-        *cachesize = (mi->system_available_bytes * 0.75);
74ca47
-        slapi_log_err(SLAPI_LOG_TRACE, "util_is_cachesize_sane", "Adjusted cachesize to %"PRIu64"\n", *cachesize);
74ca47
-        return UTIL_CACHESIZE_REDUCED;
74ca47
+        uint64_t adjust_cachesize = (mi->system_available_bytes * 0.5);
74ca47
+        if (adjust_cachesize > *cachesize) {
74ca47
+            slapi_log_err(SLAPI_LOG_CRIT, "util_is_cachesize_sane", "Invalid adjusted cachesize is greater than request %"PRIu64, adjust_cachesize);
74ca47
+            return UTIL_CACHESIZE_ERROR;
74ca47
+        }
74ca47
+        if (adjust_cachesize < (16 * mi->pagesize_bytes)) {
74ca47
+            /* At minimum respond with 16 pages - that's 64k on x86_64 */
74ca47
+            adjust_cachesize = 16 * mi->pagesize_bytes;
74ca47
+        }
74ca47
+        *cachesize = adjust_cachesize;
74ca47
+        slapi_log_err(SLAPI_LOG_TRACE, "util_is_cachesize_sane", "Adjusted cachesize down to %"PRIu64"\n", *cachesize);
74ca47
+        result = UTIL_CACHESIZE_REDUCED;
74ca47
     }
74ca47
-    return UTIL_CACHESIZE_VALID;
74ca47
+    return result;
74ca47
 }
74ca47
 
74ca47
 long
74ca47
-- 
74ca47
2.9.3
74ca47