923a60
From 7204e7f9ea3067bda7e5658a06e91b67c736f8ab Mon Sep 17 00:00:00 2001
923a60
From: Lennart Poettering <lennart@poettering.net>
923a60
Date: Mon, 12 Feb 2018 16:14:58 +0100
923a60
Subject: [PATCH] sd-journal: properly handle inotify queue overflow
923a60
923a60
This adds proper handling of IN_Q_OVERFLOW: when the inotify queue runs
923a60
over we'll reiterate all directories we are looking at. At the same time
923a60
we'll mark all files and directories we encounter that way with a
923a60
generation counter we first increased. All files and directories not
923a60
marked like this are then unloaded.
923a60
923a60
With this logic we do the best when the inotify queue overflows: we
923a60
synchronize our in-memory state again with what's on disk.  This
923a60
contains some refactoring of the directory logic, to share more code
923a60
between uuid directories and "root" directories and generally make
923a60
things a bit more readable by splitting things up into smaller bits.
923a60
923a60
See: #7998 #8032
923a60
923a60
(cherry-picked from commit 858749f7312bd0adb5433075a92e1c35a2fb56ac)
923a60
923a60
Resolves: #1540538
923a60
---
923a60
 src/journal/journal-file.h     |   2 +
923a60
 src/journal/journal-internal.h |   2 +
923a60
 src/journal/sd-journal.c       | 237 ++++++++++++++++++++++++++-------
923a60
 src/shared/path-util.c         |  14 ++
923a60
 src/shared/path-util.h         |   2 +
923a60
 5 files changed, 206 insertions(+), 51 deletions(-)
923a60
923a60
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
923a60
index c74ad5fc58..dd8ef52d2a 100644
923a60
--- a/src/journal/journal-file.h
923a60
+++ b/src/journal/journal-file.h
923a60
@@ -121,6 +121,8 @@ typedef struct JournalFile {
923a60
 
923a60
         void *fsprg_seed;
923a60
         size_t fsprg_seed_size;
923a60
+
923a60
+        unsigned last_seen_generation;
923a60
 #endif
923a60
 } JournalFile;
923a60
 
923a60
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
923a60
index eb23ac28ad..999e9d8cb6 100644
923a60
--- a/src/journal/journal-internal.h
923a60
+++ b/src/journal/journal-internal.h
923a60
@@ -81,6 +81,7 @@ struct Directory {
923a60
         char *path;
923a60
         int wd;
923a60
         bool is_root;
923a60
+        unsigned last_seen_generation;
923a60
 };
923a60
 
923a60
 struct sd_journal {
923a60
@@ -102,6 +103,7 @@ struct sd_journal {
923a60
         int inotify_fd;
923a60
         unsigned current_invalidate_counter, last_invalidate_counter;
923a60
         usec_t last_process_usec;
923a60
+        unsigned generation;
923a60
 
923a60
         char *unique_field;
923a60
         JournalFile *unique_file;
923a60
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
923a60
index 14b65cfedd..9186f5188e 100644
923a60
--- a/src/journal/sd-journal.c
923a60
+++ b/src/journal/sd-journal.c
923a60
@@ -1229,8 +1229,16 @@ static int add_any_file(sd_journal *j, const char *path) {
923a60
         assert(j);
923a60
         assert(path);
923a60
 
923a60
-        if (ordered_hashmap_get(j->files, path))
923a60
-                return 0;
923a60
+        if (path) {
923a60
+                f = ordered_hashmap_get(j->files, path);
923a60
+                if (f) {
923a60
+                        /* Mark this file as seen in this generation. This is used to GC old files in
923a60
+                         * process_q_overflow() to detect journal files that are still and discern them from those who
923a60
+                         * are gone. */
923a60
+                        f->last_seen_generation = j->generation;
923a60
+                        return 0;
923a60
+                }
923a60
+        }
923a60
 
923a60
         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
923a60
                 log_debug("Too many open journal files, not adding %s.", path);
923a60
@@ -1252,6 +1260,8 @@ static int add_any_file(sd_journal *j, const char *path) {
923a60
                 goto fail;
923a60
         }
923a60
 
923a60
+        f->last_seen_generation = j->generation;
923a60
+
923a60
         log_debug("File %s added.", f->path);
923a60
 
923a60
         check_network(j, f->fd);
923a60
@@ -1346,10 +1356,96 @@ static int dirname_is_machine_id(const char *fn) {
923a60
         return sd_id128_equal(id, machine);
923a60
 }
923a60
 
923a60
+static bool dirent_is_journal_file(const struct dirent *de) {
923a60
+        assert(de);
923a60
+
923a60
+        if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
923a60
+                return false;
923a60
+
923a60
+        return endswith(de->d_name, ".journal") ||
923a60
+                endswith(de->d_name, ".journal~");
923a60
+}
923a60
+
923a60
+static bool dirent_is_id128_subdir(const struct dirent *de) {
923a60
+        assert(de);
923a60
+
923a60
+        if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
923a60
+                return false;
923a60
+
923a60
+        return id128_is_valid(de->d_name);
923a60
+}
923a60
+
923a60
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
923a60
+        DIR *d;
923a60
+
923a60
+        assert(j);
923a60
+        assert(path);
923a60
+        assert(ret);
923a60
+
923a60
+        d = opendir(path);
923a60
+        if (!d)
923a60
+                return -errno;
923a60
+
923a60
+        *ret = d;
923a60
+        return 0;
923a60
+}
923a60
+
923a60
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
923a60
+
923a60
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
923a60
+        struct dirent *de;
923a60
+
923a60
+        assert(j);
923a60
+        assert(m);
923a60
+        assert(d);
923a60
+
923a60
+        FOREACH_DIRENT_ALL(de, d, goto fail) {
923a60
+                if (dirent_is_journal_file(de))
923a60
+                        (void) add_file(j, m->path, de->d_name);
923a60
+
923a60
+                if (m->is_root && dirent_is_id128_subdir(de))
923a60
+                        (void) add_directory(j, m->path, de->d_name);
923a60
+        }
923a60
+
923a60
+        return;
923a60
+
923a60
+fail:
923a60
+        log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
923a60
+}
923a60
+
923a60
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
923a60
+        int r;
923a60
+
923a60
+        assert(j);
923a60
+        assert(m);
923a60
+        assert(fd >= 0);
923a60
+
923a60
+        /* Watch this directory if that's enabled and if it not being watched yet. */
923a60
+
923a60
+        if (m->wd > 0) /* Already have a watch? */
923a60
+                return;
923a60
+        if (j->inotify_fd < 0) /* Not watching at all? */
923a60
+                return;
923a60
+
923a60
+        m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
923a60
+        if (m->wd < 0) {
923a60
+                log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
923a60
+                return;
923a60
+        }
923a60
+
923a60
+        r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
923a60
+        if (r == -EEXIST)
923a60
+                log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
923a60
+        if (r < 0) {
923a60
+                log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
923a60
+                (void) inotify_rm_watch(j->inotify_fd, m->wd);
923a60
+                m->wd = -1;
923a60
+        }
923a60
+}
923a60
+
923a60
 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
923a60
         _cleanup_free_ char *path = NULL;
923a60
         _cleanup_closedir_ DIR *d = NULL;
923a60
-        struct dirent *de = NULL;
923a60
         Directory *m;
923a60
         int r, k;
923a60
 
923a60
@@ -1357,7 +1453,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
923a60
         assert(prefix);
923a60
         assert(dirname);
923a60
 
923a60
-        log_debug("Considering %s/%s.", prefix, dirname);
923a60
+        log_debug("Considering '%s/%s'.", prefix, dirname);
923a60
 
923a60
         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
923a60
             !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run")))
923a60
@@ -1369,9 +1465,9 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
923a60
                 goto fail;
923a60
         }
923a60
 
923a60
-        d = opendir(path);
923a60
-        if (!d) {
923a60
-                r = log_debug_errno(errno, "Failed to open directory %s: %m", path);
923a60
+        r = directory_open(j, path, &d);
923a60
+        if (r < 0) {
923a60
+                r = log_debug_errno(errno, "Failed to open directory '%s': %m", path);
923a60
                 goto fail;
923a60
         }
923a60
 
923a60
@@ -1398,25 +1494,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
923a60
                 log_debug("Directory %s added.", m->path);
923a60
 
923a60
         } else if (m->is_root)
923a60
-                return 0;
923a60
-
923a60
-        if (m->wd <= 0 && j->inotify_fd >= 0) {
923a60
-
923a60
-                m->wd = inotify_add_watch(j->inotify_fd, m->path,
923a60
-                                          IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
923a60
-                                          IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
923a60
-                                          IN_ONLYDIR);
923a60
+                return 0; /* Don't 'downgrade' from root directory */
923a60
 
923a60
-                if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
923a60
-                        inotify_rm_watch(j->inotify_fd, m->wd);
923a60
-        }
923a60
+        m->last_seen_generation = j->generation;
923a60
 
923a60
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
923a60
+        directory_watch(j, m, dirfd(d),
923a60
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
923a60
+                        IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
923a60
+                        IN_ONLYDIR);
923a60
 
923a60
-                if (dirent_is_file_with_suffix(de, ".journal") ||
923a60
-                    dirent_is_file_with_suffix(de, ".journal~"))
923a60
-                        (void) add_file(j, m->path, de->d_name);
923a60
-        }
923a60
+        if (!j->no_new_files)
923a60
+                directory_enumerate(j, m, d);
923a60
 
923a60
         check_network(j, dirfd(d));
923a60
 
923a60
@@ -1432,13 +1520,14 @@ fail:
923a60
 
923a60
 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
923a60
         _cleanup_closedir_ DIR *d = NULL;
923a60
-        struct dirent *de;
923a60
         Directory *m;
923a60
         int r, k;
923a60
 
923a60
         assert(j);
923a60
         assert(p);
923a60
 
923a60
+        log_debug("Considering root directory '%s'.", p);
923a60
+
923a60
         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
923a60
             !path_startswith(p, "/run"))
923a60
                 return -EINVAL;
923a60
@@ -1446,12 +1535,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
923a60
         if (j->prefix)
923a60
                 p = strjoina(j->prefix, p);
923a60
 
923a60
-        d = opendir(p);
923a60
-        if (!d) {
923a60
-                if (errno == ENOENT && missing_ok)
923a60
-                        return 0;
923a60
-
923a60
-                r = log_debug_errno(errno, "Failed to open root directory %s: %m", p);
923a60
+        r = directory_open(j, p, &d);
923a60
+        if (r == -ENOENT && missing_ok)
923a60
+                return 0;
923a60
+        if (r < 0) {
923a60
+                log_debug_errno(r, "Failed to open root directory %s: %m", p);
923a60
                 goto fail;
923a60
         }
923a60
 
923a60
@@ -1495,19 +1583,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
923a60
                         inotify_rm_watch(j->inotify_fd, m->wd);
923a60
         }
923a60
 
923a60
-        if (j->no_new_files)
923a60
-                return 0;
923a60
-
923a60
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
923a60
-                sd_id128_t id;
923a60
+        directory_watch(j, m, dirfd(d),
923a60
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
923a60
+                        IN_ONLYDIR);
923a60
 
923a60
-                if (dirent_is_file_with_suffix(de, ".journal") ||
923a60
-                    dirent_is_file_with_suffix(de, ".journal~"))
923a60
-                        (void) add_file(j, m->path, de->d_name);
923a60
-                else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) &&
923a60
-                         sd_id128_from_string(de->d_name, &id) >= 0)
923a60
-                        (void) add_directory(j, m->path, de->d_name);
923a60
-        }
923a60
+        if (!j->no_new_files)
923a60
+                directory_enumerate(j, m, d);
923a60
 
923a60
         check_network(j, dirfd(d));
923a60
 
923a60
@@ -2068,6 +2149,18 @@ _public_ void sd_journal_restart_data(sd_journal *j) {
923a60
         j->current_field = 0;
923a60
 }
923a60
 
923a60
+static int reiterate_all_paths(sd_journal *j) {
923a60
+        assert(j);
923a60
+
923a60
+        if (j->no_new_files)
923a60
+                return add_current_paths(j);
923a60
+
923a60
+        if (j->path)
923a60
+                return add_root_directory(j, j->path, true);
923a60
+
923a60
+        return add_search_paths(j);
923a60
+}
923a60
+
923a60
 _public_ int sd_journal_get_fd(sd_journal *j) {
923a60
         int r;
923a60
 
923a60
@@ -2081,15 +2174,11 @@ _public_ int sd_journal_get_fd(sd_journal *j) {
923a60
         if (r < 0)
923a60
                 return r;
923a60
 
923a60
-        /* Iterate through all dirs again, to add them to the
923a60
-         * inotify */
923a60
-        if (j->no_new_files)
923a60
-                r = add_current_paths(j);
923a60
-        else if (j->path)
923a60
-                r = add_root_directory(j, j->path, true);
923a60
-        else
923a60
-                r = add_search_paths(j);
923a60
-        if (r < 0)
923a60
+         log_debug("Reiterating files to get inotify watches established.");
923a60
+
923a60
+        /* Iterate through all dirs again, to add them to the inotify */
923a60
+        r = reiterate_all_paths(j);
923a60
+         if (r < 0)
923a60
                 return r;
923a60
 
923a60
         return j->inotify_fd;
923a60
@@ -2131,12 +2220,58 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
923a60
         return 1;
923a60
 }
923a60
 
923a60
+static void process_q_overflow(sd_journal *j) {
923a60
+        JournalFile *f;
923a60
+        Directory *m;
923a60
+        Iterator i;
923a60
+
923a60
+        assert(j);
923a60
+
923a60
+        /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
923a60
+         * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
923a60
+         * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
923a60
+         * are subject for unloading. */
923a60
+
923a60
+        log_debug("Inotify queue overrun, reiterating everything.");
923a60
+
923a60
+        j->generation++;
923a60
+        (void) reiterate_all_paths(j);
923a60
+
923a60
+        ORDERED_HASHMAP_FOREACH(f, j->files, i) {
923a60
+
923a60
+                if (f->last_seen_generation == j->generation)
923a60
+                        continue;
923a60
+
923a60
+                log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
923a60
+                remove_file_real(j, f);
923a60
+        }
923a60
+
923a60
+        HASHMAP_FOREACH(m, j->directories_by_path, i) {
923a60
+
923a60
+                if (m->last_seen_generation == j->generation)
923a60
+                        continue;
923a60
+
923a60
+                if (m->is_root) /* Never GC root directories */
923a60
+                        continue;
923a60
+
923a60
+                log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
923a60
+                remove_directory(j, m);
923a60
+        }
923a60
+
923a60
+        log_debug("Reiteration complete.");
923a60
+}
923a60
+
923a60
 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
923a60
         Directory *d;
923a60
 
923a60
         assert(j);
923a60
         assert(e);
923a60
 
923a60
+        if (e->mask & IN_Q_OVERFLOW) {
923a60
+                process_q_overflow(j);
923a60
+                return;
923a60
+        }
923a60
+
923a60
         /* Is this a subdirectory we watch? */
923a60
         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
923a60
         if (d) {
923a60
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
923a60
index 5d4de9ec4d..fcc591686f 100644
923a60
--- a/src/shared/path-util.c
923a60
+++ b/src/shared/path-util.c
923a60
@@ -861,3 +861,17 @@ char *prefix_root(const char *root, const char *path) {
923a60
         strcpy(p, path);
923a60
         return n;
923a60
 }
923a60
+
923a60
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
923a60
+        char path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
923a60
+        int r;
923a60
+
923a60
+        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
923a60
+        xsprintf(path, "/proc/self/fd/%i", what);
923a60
+
923a60
+        r = inotify_add_watch(fd, path, mask);
923a60
+        if (r < 0)
923a60
+                return -errno;
923a60
+
923a60
+        return r;
923a60
+}
923a60
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
923a60
index 34c016229c..96490e12b1 100644
923a60
--- a/src/shared/path-util.h
923a60
+++ b/src/shared/path-util.h
923a60
@@ -66,6 +66,8 @@ int fsck_exists(const char *fstype);
923a60
 
923a60
 char *prefix_root(const char *root, const char *path);
923a60
 
923a60
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
923a60
+
923a60
 /* Similar to prefix_root(), but returns an alloca() buffer, or
923a60
  * possibly a const pointer into the path parameter */
923a60
 #define prefix_roota(root, path)                                        \