Blob Blame History Raw
From 74525da09050809a55e2c6bba4c42b27d5326a8f Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Sun, 12 Jan 2020 20:11:07 -0500
Subject: [PATCH] Issue 50599 - Remove db region files prior to db recovery

Bug Description:  If the server crashes then the region files can become
                  corrupted and this prevents the server from starting.

Fix Description:  If we encounter a disorderly shutdown, then remove
                  the region files so there is a clean slate to start
                  with.

                  Also cleaned up function typo:  slapi_disordely_shutdown

relates: https://pagure.io/389-ds-base/issue/50599

Reviewed by: firstyear & lkrispen (Thanks!!)
---
 .../plugins/replication/repl5_replica.c       |  4 +-
 .../slapd/back-ldbm/db-bdb/bdb_layer.c        | 88 ++++++++++---------
 ldap/servers/slapd/plugin.c                   |  8 +-
 ldap/servers/slapd/slapi-plugin.h             |  2 +-
 4 files changed, 54 insertions(+), 48 deletions(-)

diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
index 94507bff8..02caa88d9 100644
--- a/ldap/servers/plugins/replication/repl5_replica.c
+++ b/ldap/servers/plugins/replication/repl5_replica.c
@@ -1657,7 +1657,7 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
              * sessions.
              */
 
-            if (slapi_disordely_shutdown(PR_FALSE)) {
+            if (slapi_disorderly_shutdown(PR_FALSE)) {
                 slapi_log_err(SLAPI_LOG_WARNING, repl_plugin_name, "replica_check_for_data_reload - "
                                                                    "Disorderly shutdown for replica %s. Check if DB RUV needs to be updated\n",
                               slapi_sdn_get_dn(r->repl_root));
@@ -1701,7 +1701,7 @@ replica_check_for_data_reload(Replica *r, void *arg __attribute__((unused)))
                                   slapi_sdn_get_dn(r->repl_root));
                     rc = 0;
                 }
-            } /* slapi_disordely_shutdown */
+            } /* slapi_disorderly_shutdown */
 
             object_release(ruv_obj);
         } else /* we have no changes currently logged for this replica */
diff --git a/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c b/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
index 10f6d401e..2103dac38 100644
--- a/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
+++ b/ldap/servers/slapd/back-ldbm/db-bdb/bdb_layer.c
@@ -15,6 +15,8 @@
 #include <prclist.h>
 #include <sys/types.h>
 #include <sys/statvfs.h>
+#include <glob.h>
+
 
 #define DB_OPEN(oflags, db, txnid, file, database, type, flags, mode, rval)                                     \
     {                                                                                                           \
@@ -990,10 +992,9 @@ bdb_start(struct ldbminfo *li, int dbmode)
     return_value = dblayer_grok_directory(region_dir,
                                           DBLAYER_DIRECTORY_READWRITE_ACCESS);
     if (0 != return_value) {
-        slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Can't start because the database "
-                                                       "directory \"%s\" either doesn't exist, or is not "
-                                                       "accessible\n",
-                      region_dir);
+        slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                "Can't start because the database directory \"%s\" either doesn't exist, or is not accessible\n",
+                region_dir);
         return return_value;
     }
 
@@ -1003,10 +1004,9 @@ bdb_start(struct ldbminfo *li, int dbmode)
         return_value = dblayer_grok_directory(log_dir,
                                               DBLAYER_DIRECTORY_READWRITE_ACCESS);
         if (0 != return_value) {
-            slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Can't start because the log "
-                                                           "directory \"%s\" either doesn't exist, or is not "
-                                                           "accessible\n",
-                          log_dir);
+            slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                    "Can't start because the log directory \"%s\" either doesn't exist, or is not accessible\n",
+                    log_dir);
             return return_value;
         }
     }
@@ -1057,15 +1057,27 @@ bdb_start(struct ldbminfo *li, int dbmode)
         if (conf->bdb_recovery_required) {
             open_flags |= DB_RECOVER;
             if (DBLAYER_RESTORE_MODE & dbmode) {
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Recovering database after restore "
-                                                                 "from archive.\n");
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
+                        "Recovering database after restore from archive.\n");
             } else if (DBLAYER_CLEAN_RECOVER_MODE & dbmode) {
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Clean up db environment and start "
-                                                                 "from archive.\n");
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
+                        "Clean up db environment and start from archive.\n");
             } else {
-                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "Detected Disorderly Shutdown last "
-                                                                 "time Directory Server was running, recovering database.\n");
-                slapi_disordely_shutdown(PR_TRUE);
+                glob_t globbuf;
+                char file_pattern[MAXPATHLEN];
+
+                slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start",
+                        "Detected Disorderly Shutdown last time Directory Server was running, recovering database.\n");
+                slapi_disorderly_shutdown(PR_TRUE);
+
+                /* Better wipe out the region files to help ensure a clean start */
+                PR_snprintf(file_pattern, MAXPATHLEN, "%s/%s", region_dir, "__db.*");
+                if (glob(file_pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) {
+                    for (size_t i = 0; i < globbuf.gl_pathc; i++) {
+                        remove(globbuf.gl_pathv[i]);
+                    }
+                    globfree(&globbuf);
+                }
             }
         }
         switch (dbmode & DBLAYER_RESTORE_MASK) {
@@ -1121,7 +1133,7 @@ bdb_start(struct ldbminfo *li, int dbmode)
              */
             if (conf->bdb_lock_config <= BDB_LOCK_NB_MIN) {
                 slapi_log_err(SLAPI_LOG_NOTICE, "bdb_start", "New max db lock count is too small.  "
-                                                                 "Resetting it to the default value %d.\n",
+                              "Resetting it to the default value %d.\n",
                               BDB_LOCK_NB_MIN);
                 conf->bdb_lock_config = BDB_LOCK_NB_MIN;
             }
@@ -1165,29 +1177,26 @@ bdb_start(struct ldbminfo *li, int dbmode)
     if ((open_flags & DB_RECOVER) || (open_flags & DB_RECOVER_FATAL)) {
         /* Recover, then close, then open again */
         int recover_flags = open_flags & ~DB_THREAD;
-
         if (DBLAYER_CLEAN_RECOVER_MODE & dbmode) /* upgrade case */
         {
             DB_ENV *thisenv = pEnv->bdb_DB_ENV;
             return_value = thisenv->remove(thisenv, region_dir, DB_FORCE);
             if (0 != return_value) {
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "Failed to remove old db env "
-                                               "in %s: %s\n",
-                              region_dir,
-                              dblayer_strerror(return_value));
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "Failed to remove old db env in %s: %s\n",
+                        region_dir, dblayer_strerror(return_value));
                 return return_value;
             }
             dbmode = DBLAYER_NORMAL_MODE;
 
             if ((return_value = bdb_make_env(&pEnv, li)) != 0) {
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "Failed to create DBENV (returned: %d).\n",
-                              return_value);
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "Failed to create DBENV (returned: %d).\n", return_value);
                 return return_value;
             }
         }
 
+
         return_value = (pEnv->bdb_DB_ENV->open)(
             pEnv->bdb_DB_ENV,
             region_dir,
@@ -1201,27 +1210,25 @@ bdb_start(struct ldbminfo *li, int dbmode)
                  */
                 slapi_log_err(SLAPI_LOG_CRIT,
                               "bdb_start", "mmap in opening database environment (recovery mode) "
-                                               "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
+                              "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
                               li->li_dbcachesize, return_value, dblayer_strerror(return_value));
                 bdb_free_env(&priv->dblayer_env);
                 priv->dblayer_env = CATASTROPHIC;
             } else {
                 slapi_log_err(SLAPI_LOG_CRIT, "bdb_start", "Database Recovery Process FAILED. "
-                                                               "The database is not recoverable. err=%d: %s\n",
+                              "The database is not recoverable. err=%d: %s\n",
                               return_value, dblayer_strerror(return_value));
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "Please make sure there is enough disk space for "
-                                               "dbcache (%" PRIu64 " bytes) and db region files\n",
-                              li->li_dbcachesize);
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "Please make sure there is enough disk space for dbcache (%" PRIu64 " bytes) and db region files\n",
+                        li->li_dbcachesize);
             }
             return return_value;
         } else {
             open_flags &= ~(DB_RECOVER | DB_RECOVER_FATAL);
             pEnv->bdb_DB_ENV->close(pEnv->bdb_DB_ENV, 0);
             if ((return_value = bdb_make_env(&pEnv, li)) != 0) {
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "Failed to create DBENV (returned: %d).\n",
-                              return_value);
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "Failed to create DBENV (returned: %d).\n", return_value);
                 return return_value;
             }
             bdb_free_env(&priv->dblayer_env);
@@ -1288,16 +1295,15 @@ bdb_start(struct ldbminfo *li, int dbmode)
                  * https://blackflag.mcom.com/show_bug.cgi?id=557319
                  * Crash ns-slapd while running scalab01 after restart slapd
                  */
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "mmap in opening database environment "
-                                               "failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
-                              li->li_dbcachesize, return_value, dblayer_strerror(return_value));
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "mmap in opening database environment failed trying to allocate %" PRIu64 " bytes. (OS err %d - %s)\n",
+                        li->li_dbcachesize, return_value, dblayer_strerror(return_value));
                 bdb_free_env(&priv->dblayer_env);
                 priv->dblayer_env = CATASTROPHIC;
             } else {
-                slapi_log_err(SLAPI_LOG_CRIT,
-                              "bdb_start", "Opening database environment (%s) failed. err=%d: %s\n",
-                              region_dir, return_value, dblayer_strerror(return_value));
+                slapi_log_err(SLAPI_LOG_CRIT, "bdb_start",
+                        "Opening database environment (%s) failed. err=%d: %s\n",
+                        region_dir, return_value, dblayer_strerror(return_value));
             }
         }
         return return_value;
diff --git a/ldap/servers/slapd/plugin.c b/ldap/servers/slapd/plugin.c
index b00c1bd8f..282b98738 100644
--- a/ldap/servers/slapd/plugin.c
+++ b/ldap/servers/slapd/plugin.c
@@ -4383,14 +4383,14 @@ slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb)
 }
 
 PRBool
-slapi_disordely_shutdown(PRBool set)
+slapi_disorderly_shutdown(PRBool set)
 {
-    static PRBool is_disordely_shutdown = PR_FALSE;
+    static PRBool is_disorderly_shutdown = PR_FALSE;
 
     if (set) {
-        is_disordely_shutdown = PR_TRUE;
+        is_disorderly_shutdown = PR_TRUE;
     }
-    return (is_disordely_shutdown);
+    return (is_disorderly_shutdown);
 }
 
 /*
diff --git a/ldap/servers/slapd/slapi-plugin.h b/ldap/servers/slapd/slapi-plugin.h
index 29a6238d9..50b8d12c8 100644
--- a/ldap/servers/slapd/slapi-plugin.h
+++ b/ldap/servers/slapd/slapi-plugin.h
@@ -7900,7 +7900,7 @@ uint64_t slapi_str_to_u64(const char *s);
 
 void slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb);
 
-PRBool slapi_disordely_shutdown(PRBool set);
+PRBool slapi_disorderly_shutdown(PRBool set);
 
 /*
  * Public entry extension getter/setter functions
-- 
2.21.1