dd65c9
From 7204e7f9ea3067bda7e5658a06e91b67c736f8ab Mon Sep 17 00:00:00 2001
de541a
From: Lennart Poettering <lennart@poettering.net>
de541a
Date: Mon, 12 Feb 2018 16:14:58 +0100
de541a
Subject: [PATCH] sd-journal: properly handle inotify queue overflow
de541a
de541a
This adds proper handling of IN_Q_OVERFLOW: when the inotify queue runs
de541a
over we'll reiterate all directories we are looking at. At the same time
de541a
we'll mark all files and directories we encounter that way with a
de541a
generation counter we first increased. All files and directories not
de541a
marked like this are then unloaded.
de541a
de541a
With this logic we do the best when the inotify queue overflows: we
de541a
synchronize our in-memory state again with what's on disk.  This
de541a
contains some refactoring of the directory logic, to share more code
de541a
between uuid directories and "root" directories and generally make
de541a
things a bit more readable by splitting things up into smaller bits.
de541a
de541a
See: #7998 #8032
de541a
de541a
(cherry-picked from commit 858749f7312bd0adb5433075a92e1c35a2fb56ac)
de541a
de541a
Resolves: #1540538
de541a
---
de541a
 src/journal/journal-file.h     |   2 +
de541a
 src/journal/journal-internal.h |   2 +
23b3cf
 src/journal/sd-journal.c       | 237 ++++++++++++++++++++++++++-------
23b3cf
 src/shared/path-util.c         |  14 ++
de541a
 src/shared/path-util.h         |   2 +
de541a
 5 files changed, 206 insertions(+), 51 deletions(-)
de541a
de541a
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
c62b8e
index c74ad5fc58..dd8ef52d2a 100644
de541a
--- a/src/journal/journal-file.h
de541a
+++ b/src/journal/journal-file.h
de541a
@@ -121,6 +121,8 @@ typedef struct JournalFile {
de541a
 
de541a
         void *fsprg_seed;
de541a
         size_t fsprg_seed_size;
de541a
+
de541a
+        unsigned last_seen_generation;
de541a
 #endif
de541a
 } JournalFile;
de541a
 
de541a
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
c62b8e
index eb23ac28ad..999e9d8cb6 100644
de541a
--- a/src/journal/journal-internal.h
de541a
+++ b/src/journal/journal-internal.h
de541a
@@ -81,6 +81,7 @@ struct Directory {
de541a
         char *path;
de541a
         int wd;
de541a
         bool is_root;
de541a
+        unsigned last_seen_generation;
de541a
 };
de541a
 
de541a
 struct sd_journal {
de541a
@@ -102,6 +103,7 @@ struct sd_journal {
de541a
         int inotify_fd;
de541a
         unsigned current_invalidate_counter, last_invalidate_counter;
de541a
         usec_t last_process_usec;
de541a
+        unsigned generation;
de541a
 
de541a
         char *unique_field;
de541a
         JournalFile *unique_file;
de541a
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
c62b8e
index 14b65cfedd..9186f5188e 100644
de541a
--- a/src/journal/sd-journal.c
de541a
+++ b/src/journal/sd-journal.c
de541a
@@ -1229,8 +1229,16 @@ static int add_any_file(sd_journal *j, const char *path) {
de541a
         assert(j);
de541a
         assert(path);
de541a
 
de541a
-        if (ordered_hashmap_get(j->files, path))
de541a
-                return 0;
de541a
+        if (path) {
de541a
+                f = ordered_hashmap_get(j->files, path);
de541a
+                if (f) {
de541a
+                        /* Mark this file as seen in this generation. This is used to GC old files in
de541a
+                         * process_q_overflow() to detect journal files that are still and discern them from those who
de541a
+                         * are gone. */
de541a
+                        f->last_seen_generation = j->generation;
de541a
+                        return 0;
de541a
+                }
de541a
+        }
de541a
 
de541a
         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
de541a
                 log_debug("Too many open journal files, not adding %s.", path);
de541a
@@ -1252,6 +1260,8 @@ static int add_any_file(sd_journal *j, const char *path) {
de541a
                 goto fail;
de541a
         }
de541a
 
de541a
+        f->last_seen_generation = j->generation;
de541a
+
de541a
         log_debug("File %s added.", f->path);
de541a
 
de541a
         check_network(j, f->fd);
de541a
@@ -1346,10 +1356,96 @@ static int dirname_is_machine_id(const char *fn) {
de541a
         return sd_id128_equal(id, machine);
de541a
 }
de541a
 
de541a
+static bool dirent_is_journal_file(const struct dirent *de) {
de541a
+        assert(de);
de541a
+
de541a
+        if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
de541a
+                return false;
de541a
+
de541a
+        return endswith(de->d_name, ".journal") ||
de541a
+                endswith(de->d_name, ".journal~");
de541a
+}
de541a
+
de541a
+static bool dirent_is_id128_subdir(const struct dirent *de) {
de541a
+        assert(de);
de541a
+
de541a
+        if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
de541a
+                return false;
de541a
+
de541a
+        return id128_is_valid(de->d_name);
de541a
+}
de541a
+
de541a
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
de541a
+        DIR *d;
de541a
+
de541a
+        assert(j);
de541a
+        assert(path);
de541a
+        assert(ret);
de541a
+
de541a
+        d = opendir(path);
de541a
+        if (!d)
de541a
+                return -errno;
de541a
+
de541a
+        *ret = d;
de541a
+        return 0;
de541a
+}
de541a
+
de541a
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
de541a
+
de541a
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
de541a
+        struct dirent *de;
de541a
+
de541a
+        assert(j);
de541a
+        assert(m);
de541a
+        assert(d);
de541a
+
de541a
+        FOREACH_DIRENT_ALL(de, d, goto fail) {
de541a
+                if (dirent_is_journal_file(de))
de541a
+                        (void) add_file(j, m->path, de->d_name);
de541a
+
de541a
+                if (m->is_root && dirent_is_id128_subdir(de))
de541a
+                        (void) add_directory(j, m->path, de->d_name);
de541a
+        }
de541a
+
de541a
+        return;
de541a
+
de541a
+fail:
de541a
+        log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
de541a
+}
de541a
+
de541a
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
de541a
+        int r;
de541a
+
de541a
+        assert(j);
de541a
+        assert(m);
de541a
+        assert(fd >= 0);
de541a
+
de541a
+        /* Watch this directory if that's enabled and if it not being watched yet. */
de541a
+
de541a
+        if (m->wd > 0) /* Already have a watch? */
de541a
+                return;
de541a
+        if (j->inotify_fd < 0) /* Not watching at all? */
de541a
+                return;
de541a
+
de541a
+        m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
de541a
+        if (m->wd < 0) {
de541a
+                log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
de541a
+                return;
de541a
+        }
de541a
+
de541a
+        r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
de541a
+        if (r == -EEXIST)
de541a
+                log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
de541a
+        if (r < 0) {
de541a
+                log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
de541a
+                (void) inotify_rm_watch(j->inotify_fd, m->wd);
de541a
+                m->wd = -1;
de541a
+        }
de541a
+}
de541a
+
de541a
 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
de541a
         _cleanup_free_ char *path = NULL;
de541a
         _cleanup_closedir_ DIR *d = NULL;
de541a
-        struct dirent *de = NULL;
de541a
         Directory *m;
de541a
         int r, k;
de541a
 
de541a
@@ -1357,7 +1453,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
de541a
         assert(prefix);
de541a
         assert(dirname);
de541a
 
de541a
-        log_debug("Considering %s/%s.", prefix, dirname);
de541a
+        log_debug("Considering '%s/%s'.", prefix, dirname);
de541a
 
de541a
         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
de541a
             !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run")))
de541a
@@ -1369,9 +1465,9 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
de541a
                 goto fail;
de541a
         }
de541a
 
de541a
-        d = opendir(path);
de541a
-        if (!d) {
de541a
-                r = log_debug_errno(errno, "Failed to open directory %s: %m", path);
de541a
+        r = directory_open(j, path, &d);
de541a
+        if (r < 0) {
de541a
+                r = log_debug_errno(errno, "Failed to open directory '%s': %m", path);
de541a
                 goto fail;
de541a
         }
de541a
 
de541a
@@ -1398,25 +1494,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
de541a
                 log_debug("Directory %s added.", m->path);
de541a
 
de541a
         } else if (m->is_root)
de541a
-                return 0;
de541a
-
de541a
-        if (m->wd <= 0 && j->inotify_fd >= 0) {
de541a
-
de541a
-                m->wd = inotify_add_watch(j->inotify_fd, m->path,
de541a
-                                          IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
de541a
-                                          IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
de541a
-                                          IN_ONLYDIR);
de541a
+                return 0; /* Don't 'downgrade' from root directory */
de541a
 
de541a
-                if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
de541a
-                        inotify_rm_watch(j->inotify_fd, m->wd);
de541a
-        }
de541a
+        m->last_seen_generation = j->generation;
de541a
 
de541a
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
de541a
+        directory_watch(j, m, dirfd(d),
de541a
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
de541a
+                        IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
de541a
+                        IN_ONLYDIR);
de541a
 
de541a
-                if (dirent_is_file_with_suffix(de, ".journal") ||
de541a
-                    dirent_is_file_with_suffix(de, ".journal~"))
de541a
-                        (void) add_file(j, m->path, de->d_name);
de541a
-        }
de541a
+        if (!j->no_new_files)
de541a
+                directory_enumerate(j, m, d);
de541a
 
de541a
         check_network(j, dirfd(d));
de541a
 
de541a
@@ -1432,13 +1520,14 @@ fail:
de541a
 
de541a
 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
de541a
         _cleanup_closedir_ DIR *d = NULL;
de541a
-        struct dirent *de;
de541a
         Directory *m;
de541a
         int r, k;
de541a
 
de541a
         assert(j);
de541a
         assert(p);
de541a
 
de541a
+        log_debug("Considering root directory '%s'.", p);
de541a
+
de541a
         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
de541a
             !path_startswith(p, "/run"))
de541a
                 return -EINVAL;
de541a
@@ -1446,12 +1535,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
de541a
         if (j->prefix)
de541a
                 p = strjoina(j->prefix, p);
de541a
 
de541a
-        d = opendir(p);
de541a
-        if (!d) {
de541a
-                if (errno == ENOENT && missing_ok)
de541a
-                        return 0;
de541a
-
de541a
-                r = log_debug_errno(errno, "Failed to open root directory %s: %m", p);
de541a
+        r = directory_open(j, p, &d);
de541a
+        if (r == -ENOENT && missing_ok)
de541a
+                return 0;
de541a
+        if (r < 0) {
de541a
+                log_debug_errno(r, "Failed to open root directory %s: %m", p);
de541a
                 goto fail;
de541a
         }
de541a
 
de541a
@@ -1495,19 +1583,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
de541a
                         inotify_rm_watch(j->inotify_fd, m->wd);
de541a
         }
de541a
 
de541a
-        if (j->no_new_files)
de541a
-                return 0;
de541a
-
de541a
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
de541a
-                sd_id128_t id;
de541a
+        directory_watch(j, m, dirfd(d),
de541a
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
de541a
+                        IN_ONLYDIR);
de541a
 
de541a
-                if (dirent_is_file_with_suffix(de, ".journal") ||
de541a
-                    dirent_is_file_with_suffix(de, ".journal~"))
de541a
-                        (void) add_file(j, m->path, de->d_name);
de541a
-                else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) &&
de541a
-                         sd_id128_from_string(de->d_name, &id) >= 0)
de541a
-                        (void) add_directory(j, m->path, de->d_name);
de541a
-        }
de541a
+        if (!j->no_new_files)
de541a
+                directory_enumerate(j, m, d);
de541a
 
de541a
         check_network(j, dirfd(d));
de541a
 
de541a
@@ -2068,6 +2149,18 @@ _public_ void sd_journal_restart_data(sd_journal *j) {
de541a
         j->current_field = 0;
de541a
 }
de541a
 
de541a
+static int reiterate_all_paths(sd_journal *j) {
de541a
+        assert(j);
de541a
+
de541a
+        if (j->no_new_files)
de541a
+                return add_current_paths(j);
de541a
+
de541a
+        if (j->path)
de541a
+                return add_root_directory(j, j->path, true);
de541a
+
de541a
+        return add_search_paths(j);
de541a
+}
de541a
+
de541a
 _public_ int sd_journal_get_fd(sd_journal *j) {
de541a
         int r;
de541a
 
de541a
@@ -2081,15 +2174,11 @@ _public_ int sd_journal_get_fd(sd_journal *j) {
de541a
         if (r < 0)
de541a
                 return r;
de541a
 
de541a
-        /* Iterate through all dirs again, to add them to the
de541a
-         * inotify */
de541a
-        if (j->no_new_files)
de541a
-                r = add_current_paths(j);
de541a
-        else if (j->path)
de541a
-                r = add_root_directory(j, j->path, true);
de541a
-        else
de541a
-                r = add_search_paths(j);
de541a
-        if (r < 0)
de541a
+         log_debug("Reiterating files to get inotify watches established.");
de541a
+
de541a
+        /* Iterate through all dirs again, to add them to the inotify */
de541a
+        r = reiterate_all_paths(j);
de541a
+         if (r < 0)
de541a
                 return r;
de541a
 
de541a
         return j->inotify_fd;
de541a
@@ -2131,12 +2220,58 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
de541a
         return 1;
de541a
 }
de541a
 
de541a
+static void process_q_overflow(sd_journal *j) {
de541a
+        JournalFile *f;
de541a
+        Directory *m;
de541a
+        Iterator i;
de541a
+
de541a
+        assert(j);
de541a
+
de541a
+        /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
de541a
+         * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
de541a
+         * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
de541a
+         * are subject for unloading. */
de541a
+
de541a
+        log_debug("Inotify queue overrun, reiterating everything.");
de541a
+
de541a
+        j->generation++;
de541a
+        (void) reiterate_all_paths(j);
de541a
+
de541a
+        ORDERED_HASHMAP_FOREACH(f, j->files, i) {
de541a
+
de541a
+                if (f->last_seen_generation == j->generation)
de541a
+                        continue;
de541a
+
de541a
+                log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
de541a
+                remove_file_real(j, f);
de541a
+        }
de541a
+
de541a
+        HASHMAP_FOREACH(m, j->directories_by_path, i) {
de541a
+
de541a
+                if (m->last_seen_generation == j->generation)
de541a
+                        continue;
de541a
+
de541a
+                if (m->is_root) /* Never GC root directories */
de541a
+                        continue;
de541a
+
de541a
+                log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
de541a
+                remove_directory(j, m);
de541a
+        }
de541a
+
de541a
+        log_debug("Reiteration complete.");
de541a
+}
de541a
+
de541a
 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
de541a
         Directory *d;
de541a
 
de541a
         assert(j);
de541a
         assert(e);
de541a
 
de541a
+        if (e->mask & IN_Q_OVERFLOW) {
de541a
+                process_q_overflow(j);
de541a
+                return;
de541a
+        }
de541a
+
de541a
         /* Is this a subdirectory we watch? */
de541a
         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
de541a
         if (d) {
de541a
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
c62b8e
index 5d4de9ec4d..fcc591686f 100644
de541a
--- a/src/shared/path-util.c
de541a
+++ b/src/shared/path-util.c
dd65c9
@@ -861,3 +861,17 @@ char *prefix_root(const char *root, const char *path) {
de541a
         strcpy(p, path);
de541a
         return n;
de541a
 }
de541a
+
de541a
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
de541a
+        char path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
de541a
+        int r;
de541a
+
de541a
+        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
de541a
+        xsprintf(path, "/proc/self/fd/%i", what);
de541a
+
de541a
+        r = inotify_add_watch(fd, path, mask);
de541a
+        if (r < 0)
de541a
+                return -errno;
de541a
+
de541a
+        return r;
de541a
+}
de541a
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
c62b8e
index 34c016229c..96490e12b1 100644
de541a
--- a/src/shared/path-util.h
de541a
+++ b/src/shared/path-util.h
dd65c9
@@ -66,6 +66,8 @@ int fsck_exists(const char *fstype);
de541a
 
de541a
 char *prefix_root(const char *root, const char *path);
de541a
 
de541a
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
de541a
+
de541a
 /* Similar to prefix_root(), but returns an alloca() buffer, or
de541a
  * possibly a const pointer into the path parameter */
de541a
 #define prefix_roota(root, path)                                        \