dryang / rpms / systemd

Forked from rpms/systemd a year ago
Clone
Blob Blame History Raw
From 32244f8d21a3e06f6519c47234289da696f6b151 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Sun, 8 Oct 2017 09:05:59 +0200
Subject: [PATCH] journald: make maximum size of stream log lines configurable
 and bump it to 48K (#6838)

This adds a new setting LineMax= to journald.conf, and sets it by
default to 48K. When we convert stream-based stdout/stderr logging into
record-based log entries, read up to the specified amount of bytes
before forcing a line-break.

This also makes three related changes:

- When a NUL byte is read we'll not recognize this as alternative line
  break, instead of silently dropping everything after it. (see #4863)

- The reason for a line-break is now encoded in the log record, if it
  wasn't a plain newline. Specifically, we distuingish "nul",
  "line-max" and "eof", for line breaks due to NUL byte, due to the
  maximum line length as configured with LineMax= or due to end of
  stream. This data is stored in the new implicit _LINE_BREAK= field.
  It's not synthesized for plain \n line breaks.

- A randomized 128bit ID is assigned to each log stream.

With these three changes in place it's (mostly) possible to reconstruct
the original byte streams from log data, as (most) of the context of
the conversion from the byte stream to log records is saved now. (So,
the only bits we still drop are empty lines. Which might be something to
look into in a future change, and which is outside of the scope of this
work)

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=86465
See: #4863
Replaces: #4875

(cherry picked from commit ec20fe5ffb8a00469bab209fff6c069bb93c6db2)

Resolves: #1442262

[msekleta: I had to manually rewrite upstream commit, because git
did very poor job merging old and new code and identified a lot of merge
conflicts in a code that was totally unrelated.]
---
 man/journald.conf.xml            |  18 +++++
 man/systemd.journal-fields.xml   |  22 ++++++
 src/journal/journald-gperf.gperf |   1 +
 src/journal/journald-server.c    |  68 ++++++++++++++++++
 src/journal/journald-server.h    |   3 +
 src/journal/journald-stream.c    | 116 +++++++++++++++++++++++++------
 src/journal/journald.conf        |   1 +
 7 files changed, 209 insertions(+), 20 deletions(-)

diff --git a/man/journald.conf.xml b/man/journald.conf.xml
index 46a498b673..e2d6a12252 100644
--- a/man/journald.conf.xml
+++ b/man/journald.conf.xml
@@ -354,6 +354,24 @@
         <filename>/dev/console</filename>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>LineMax=</varname></term>
+
+        <listitem><para>The maximum line length to permit when converting stream logs into record logs. When a systemd
+        unit's standard output/error are connected to the journal via a stream socket, the data read is split into
+        individual log records at newline (<literal>\n</literal>, ASCII 10) and NUL characters. If no such delimiter is
+        read for the specified number of bytes a hard log record boundary is artifically inserted, breaking up overly
+        long lines into multiple log records. Selecting overly large values increases the possible memory usage of the
+        Journal daemon for each stream client, as in the worst case the journal daemon needs to buffer the specified
+        number of bytes in memory before it can flush a new log record to disk. Also note that permitting overly large
+        line maximum line lengths affects compatibility with traditional log protocols as log records might not fit
+        anymore into a single <constant>AF_UNIX</constant> or <constant>AF_INET</constant> datagram. Takes a size in
+        bytes. If the value is suffixed with K, M, G or T, the specified size is parsed as Kilobytes, Megabytes,
+        Gigabytes, or Terabytes (with the base 1024), respectively. Defaults to 48K, which is relatively large but
+        still small enough so that log records likely fit into network datagrams along with extra room for
+        metadata. Note that values below 79 are not accepted and will be bumped to 79.</para></listitem>
+      </varlistentry>
+
     </variablelist>
 
   </refsect1>
diff --git a/man/systemd.journal-fields.xml b/man/systemd.journal-fields.xml
index 7d6c5c715f..a53f8def2d 100644
--- a/man/systemd.journal-fields.xml
+++ b/man/systemd.journal-fields.xml
@@ -311,6 +311,28 @@
           </variablelist>
         </listitem>
       </varlistentry>
+      <varlistentry>
+        <term><varname>_STREAM_ID=</varname></term>
+        <listitem>
+          <para>Only applies to <literal>_TRANSPORT=stream</literal> records: specifies a randomized 128bit ID assigned
+          to the stream connection when it was first created. This ID is useful to reconstruct individual log streams
+          from the log records: all log records carrying the same stream ID originate from the same stream.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><varname>_LINE_BREAK=</varname></term>
+        <listitem>
+          <para>Only applies to <literal>_TRANSPORT=stream</literal> records: indicates that the log message in the
+          standard output/error stream was not terminated with a normal newline character (<literal>\n</literal>,
+          i.e. ASCII 10). Specifically, when set this field is one of <option>nul</option> (in case the line was
+          terminated by a NUL byte), <option>line-max</option> (in case the maximum log line length was reached, as
+          configured with <varname>LineMax=</varname> in
+          <citerefentry><refentrytitle>journald.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>) or
+          <option>eof</option> (if this was the last log record of a stream and the stream ended without a final
+          newline character). Note that this record is not generated when a normal newline character was used for
+          marking the log line end.</para>
+        </listitem>
+      </varlistentry>
     </variablelist>
   </refsect1>
 
diff --git a/src/journal/journald-gperf.gperf b/src/journal/journald-gperf.gperf
index 74554c1c34..73327c10e9 100644
--- a/src/journal/journald-gperf.gperf
+++ b/src/journal/journald-gperf.gperf
@@ -40,3 +40,4 @@ Journal.MaxLevelKMsg,       config_parse_log_level,  0, offsetof(Server, max_lev
 Journal.MaxLevelConsole,    config_parse_log_level,  0, offsetof(Server, max_level_console)
 Journal.MaxLevelWall,       config_parse_log_level,  0, offsetof(Server, max_level_wall)
 Journal.SplitMode,          config_parse_split_mode, 0, offsetof(Server, split_mode)
+Journal.LineMax,            config_parse_line_max,   0, offsetof(Server, line_max)
\ No newline at end of file
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index f6f8c30eb2..daeecd5191 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -67,6 +67,10 @@
 
 #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
 
+/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
++ * for a bit of additional metadata. */
+#define DEFAULT_LINE_MAX (48*1024)
+
 static const char* const storage_table[_STORAGE_MAX] = {
         [STORAGE_AUTO] = "auto",
         [STORAGE_VOLATILE] = "volatile",
@@ -83,9 +87,71 @@ static const char* const split_mode_table[_SPLIT_MAX] = {
         [SPLIT_NONE] = "none",
 };
 
+
 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
 
+int config_parse_line_max(
+                const char* unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        size_t *sz = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        if (isempty(rvalue))
+                /* Empty assignment means default */
+                *sz = DEFAULT_LINE_MAX;
+        else {
+                uint64_t v;
+                off_t u;
+
+                r = parse_size(rvalue, 1024, &u);
+                if (r < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
+                        return 0;
+                }
+
+                /* Backport note */
+                /* Upstream ditched use of off_t however our parse_size implementation still takes off_t*
+                 * as an argument. Since we compile with -Werror, we have two choices, either disable sign-compare
+                 * warning or do this casting so we don't have to change rest of the code. I think it is
+                 * better to do cast here instead of rewriting the code so it deals with off_t instead of
+                 * uint64_t. Doing conversion off_t -> uint64_t is something that we should think about. */
+                v = (uint64_t) u;
+
+                if (v < 79) {
+                        /* Why specify 79 here as minimum line length? Simply, because the most common traditional
+                         * terminal size is 80ch, and it might make sense to break one character before the natural
+                         * line break would occur on that. */
+                        log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
+                        *sz = 79;
+                } else if (v > (uint64_t) (SSIZE_MAX-1)) {
+                        /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
+                         * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
+                         * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
+                         * fail much earlier anyway. */
+                        log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
+                        *sz = SSIZE_MAX-1;
+                } else
+                        *sz = (size_t) v;
+        }
+
+        return 0;
+}
+
 static uint64_t available_space(Server *s, bool verbose) {
         char ids[33];
         _cleanup_free_ char *p = NULL;
@@ -1518,6 +1584,8 @@ int server_init(Server *s) {
         s->max_level_console = LOG_INFO;
         s->max_level_wall = LOG_EMERG;
 
+        s->line_max = DEFAULT_LINE_MAX;
+
         memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
         memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
 
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index 7a456c2d54..b294107788 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -143,6 +143,8 @@ typedef struct Server {
 
         /* Cached cgroup root, so that we don't have to query that all the time */
         char *cgroup_root;
+
+        size_t line_max;
 } Server;
 
 #define N_IOVEC_META_FIELDS 20
@@ -157,6 +159,7 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format,
 const struct ConfigPerfItem* journald_gperf_lookup(const char *key, unsigned length);
 
 int config_parse_storage(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_line_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 
 const char *storage_to_string(Storage s) _const_;
 Storage storage_from_string(const char *s) _pure_;
diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c
index 15c9110c0f..4d6b7ad189 100644
--- a/src/journal/journald-stream.c
+++ b/src/journal/journald-stream.c
@@ -53,6 +53,16 @@ typedef enum StdoutStreamState {
         STDOUT_STREAM_RUNNING
 } StdoutStreamState;
 
+/* The different types of log record terminators: a real \n was read, a NUL character was read, the maximum line length
+ * was reached, or the end of the stream was reached */
+
+typedef enum LineBreak {
+        LINE_BREAK_NEWLINE,
+        LINE_BREAK_NUL,
+        LINE_BREAK_LINE_MAX,
+        LINE_BREAK_EOF,
+} LineBreak;
+
 struct StdoutStream {
         Server *server;
         StdoutStreamState state;
@@ -71,14 +81,17 @@ struct StdoutStream {
 
         bool fdstore:1;
 
-        char buffer[LINE_MAX+1];
+        char *buffer;
         size_t length;
+        size_t allocated;
 
         sd_event_source *event_source;
 
         char *state_file;
 
         LIST_FIELDS(StdoutStream, stdout_stream);
+
+        char id_field[sizeof("_STREAM_ID=")-1 + SD_ID128_STRING_MAX];
 };
 
 void stdout_stream_free(StdoutStream *s) {
@@ -101,6 +114,7 @@ void stdout_stream_free(StdoutStream *s) {
         free(s->identifier);
         free(s->unit_id);
         free(s->state_file);
+        free(s->buffer);
 
         free(s);
 }
@@ -151,12 +165,14 @@ static int stdout_stream_save(StdoutStream *s) {
                 "LEVEL_PREFIX=%i\n"
                 "FORWARD_TO_SYSLOG=%i\n"
                 "FORWARD_TO_KMSG=%i\n"
-                "FORWARD_TO_CONSOLE=%i\n",
+                "FORWARD_TO_CONSOLE=%i\n"
+                "STREAM_ID=%s\n",
                 s->priority,
                 s->level_prefix,
                 s->forward_to_syslog,
                 s->forward_to_kmsg,
-                s->forward_to_console);
+                s->forward_to_console,
+                s->id_field + strlen("_STREAM_ID="));
 
         if (!isempty(s->identifier)) {
                 _cleanup_free_ char *escaped;
@@ -211,8 +227,8 @@ finish:
         return r;
 }
 
-static int stdout_stream_log(StdoutStream *s, const char *p) {
-        struct iovec iovec[N_IOVEC_META_FIELDS + 5];
+static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_break) {
+        struct iovec iovec[N_IOVEC_META_FIELDS + 7];
         int priority;
         char syslog_priority[] = "PRIORITY=\0";
         char syslog_facility[sizeof("SYSLOG_FACILITY=")-1 + DECIMAL_STR_MAX(int) + 1];
@@ -245,6 +261,8 @@ static int stdout_stream_log(StdoutStream *s, const char *p) {
 
         IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
 
+        IOVEC_SET_STRING(iovec[n++], s->id_field);
+
         syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
         IOVEC_SET_STRING(iovec[n++], syslog_priority);
 
@@ -259,6 +277,18 @@ static int stdout_stream_log(StdoutStream *s, const char *p) {
                         IOVEC_SET_STRING(iovec[n++], syslog_identifier);
         }
 
+        if (line_break != LINE_BREAK_NEWLINE) {
+                const char *c;
+
+                /* If this log message was generated due to an uncommon line break then mention this in the log
+                 * entry */
+
+                c =     line_break == LINE_BREAK_NUL ?      "_LINE_BREAK=nul" :
+                        line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
+                                                            "_LINE_BREAK=eof";
+                IOVEC_SET_STRING(iovec[n++], c);
+        }
+
         message = strappend("MESSAGE=", p);
         if (message)
                 IOVEC_SET_STRING(iovec[n++], message);
@@ -268,12 +298,18 @@ static int stdout_stream_log(StdoutStream *s, const char *p) {
         return 0;
 }
 
-static int stdout_stream_line(StdoutStream *s, char *p) {
+static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) {
         int r;
 
         assert(s);
         assert(p);
 
+        /* line breaks by NUL, line max length or EOF are not permissible during the negotiation part of the protocol */
+        if (line_break != LINE_BREAK_NEWLINE && s->state != STDOUT_STREAM_RUNNING) {
+                log_warning("Control protocol line not properly terminated.");
+                return -EINVAL;
+        }
+
         p = strstrip(p);
 
         switch (s->state) {
@@ -362,7 +398,7 @@ static int stdout_stream_line(StdoutStream *s, char *p) {
                 return 0;
 
         case STDOUT_STREAM_RUNNING:
-                return stdout_stream_log(s, p);
+                return stdout_stream_log(s, p, line_break);
         }
 
         assert_not_reached("Unknown stream state");
@@ -378,21 +414,32 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
         p = s->buffer;
         remaining = s->length;
         for (;;) {
-                char *end;
+                LineBreak line_break;
                 size_t skip;
 
-                end = memchr(p, '\n', remaining);
-                if (end)
-                        skip = end - p + 1;
-                else if (remaining >= sizeof(s->buffer) - 1) {
-                        end = p + sizeof(s->buffer) - 1;
+                char *end1, *end2;
+
+                end1 = memchr(p, '\n', remaining);
+                end2 = memchr(p, 0, end1 ? (size_t) (end1 - p) : remaining);
+
+                if (end2) {
+                        /* We found a NUL terminator */
+                        skip = end2 - p + 1;
+                        line_break = LINE_BREAK_NUL;
+                } else if (end1) {
+                        /* We found a \n terminator */
+                        *end1 = 0;
+                        skip = end1 - p + 1;
+                        line_break = LINE_BREAK_NEWLINE;
+                } else if (remaining >= s->server->line_max) {
+                        /* Force a line break after the maximum line length */
+                        *(p + s->server->line_max) = 0;
                         skip = remaining;
+                        line_break = LINE_BREAK_LINE_MAX;
                 } else
                         break;
 
-                *end = 0;
-
-                r = stdout_stream_line(s, p);
+                r = stdout_stream_line(s, p, line_break);
                 if (r < 0)
                         return r;
 
@@ -402,7 +449,7 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
 
         if (force_flush && remaining > 0) {
                 p[remaining] = 0;
-                r = stdout_stream_line(s, p);
+                r = stdout_stream_line(s, p, LINE_BREAK_EOF);
                 if (r < 0)
                         return r;
 
@@ -420,6 +467,7 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
 
 static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
         StdoutStream *s = userdata;
+        size_t limit;
         ssize_t l;
         int r;
 
@@ -430,9 +478,20 @@ static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents,
                 goto terminate;
         }
 
-        l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
-        if (l < 0) {
+        /* If the buffer is full already (discounting the extra NUL we need), add room for another 1K */
+        if (s->length + 1 >= s->allocated) {
+                if (!GREEDY_REALLOC(s->buffer, s->allocated, s->length + 1 + 1024)) {
+                        log_oom();
+                        goto terminate;
+                }
+        }
+
+        /* Try to make use of the allocated buffer in full, but never read more than the configured line size. Also,
+         * always leave room for a terminating NUL we might need to add. */
+        limit = MIN(s->allocated - 1, s->server->line_max);
 
+        l = read(s->fd, s->buffer + s->length, limit - s->length);
+        if (l < 0) {
                 if (errno == EAGAIN)
                         return 0;
 
@@ -459,11 +518,16 @@ terminate:
 
 static int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
         _cleanup_(stdout_stream_freep) StdoutStream *stream = NULL;
+        sd_id128_t id;
         int r;
 
         assert(s);
         assert(fd >= 0);
 
+        r = sd_id128_randomize(&id);
+        if (r < 0)
+                return log_error_errno(r, "Failed to generate stream ID: %m");
+
         stream = new0(StdoutStream, 1);
         if (!stream)
                 return log_oom();
@@ -471,6 +535,8 @@ static int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
         stream->fd = -1;
         stream->priority = LOG_INFO;
 
+        xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+
         r = getpeercred(fd, &stream->ucred);
         if (r < 0)
                 return log_error_errno(r, "Failed to determine peer credentials: %m");
@@ -545,7 +611,8 @@ static int stdout_stream_load(StdoutStream *stream, const char *fname) {
                 *level_prefix = NULL,
                 *forward_to_syslog = NULL,
                 *forward_to_kmsg = NULL,
-                *forward_to_console = NULL;
+                *forward_to_console = NULL,
+                *stream_id = NULL;
         int r;
 
         assert(stream);
@@ -565,6 +632,7 @@ static int stdout_stream_load(StdoutStream *stream, const char *fname) {
                            "FORWARD_TO_CONSOLE", &forward_to_console,
                            "IDENTIFIER", &stream->identifier,
                            "UNIT", &stream->unit_id,
+                           "STREAM_ID", &stream_id,
                            NULL);
         if (r < 0)
                 return log_error_errno(r, "Failed to read: %s", stream->state_file);
@@ -601,6 +669,14 @@ static int stdout_stream_load(StdoutStream *stream, const char *fname) {
                         stream->forward_to_console = r;
         }
 
+        if (stream_id) {
+                sd_id128_t id;
+
+                r = sd_id128_from_string(stream_id, &id);
+                if (r >= 0)
+                        xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+        }
+
         return 0;
 }
 
diff --git a/src/journal/journald.conf b/src/journal/journald.conf
index 3907dfb7ff..5355ec2b2f 100644
--- a/src/journal/journald.conf
+++ b/src/journal/journald.conf
@@ -37,3 +37,4 @@
 #MaxLevelKMsg=notice
 #MaxLevelConsole=info
 #MaxLevelWall=emerg
+#LineMax=48K