diff --git a/0001-core-add-new-PollLimit-settings-to-.socket-units.patch b/0001-core-add-new-PollLimit-settings-to-.socket-units.patch new file mode 100644 index 0000000..351f413 --- /dev/null +++ b/0001-core-add-new-PollLimit-settings-to-.socket-units.patch @@ -0,0 +1,243 @@ +From df25afd2cf5527fe1bb542bb146fef1be8d9a489 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering +Date: Sat, 9 Sep 2023 14:46:32 +0200 +Subject: [PATCH 1/3] core: add new "PollLimit" settings to .socket units + +This adds a new "PollLimit" pair of settings to .socket units, very +similar to existing "TriggerLimit" logic. The differences are: + +* PollLimit focusses on the polling on the sockets, and pauses that + temporarily if a ratelimit on that is reached. TriggerLimit otoh + focusses on the triggering effect of socket units, and stops + triggering once the ratelimit is hit. + +* While the trigger limit being hit is an action that causes the socket + unit to fail the polling limit being reached will just temporarily + disable polling on the socket fd, and it is resumed once the ratelimit + interval is over. + +* When a socket unit operates on multiple socket fds (e,g, ListenStream= + on both some ipv6 and an ipv4 address or so). Then the PollLimit will + be specific to each fd, while the trigger limit is specific to the + whole unit. + +Implementation-wise this is mostly a wrapper around sd-event's +sd_event_source_set_ratelimit(), which exposes the desired behaviour +directly. + +Usecase for all of this: socket services which when overloaded with +connections should just slow down reception of it, but not fail +persistently. + +(cherry picked from commit 2bec84e7a5bf3687ae65205753ba3d8067cf2f0e) +--- + man/org.freedesktop.systemd1.xml | 12 ++++++++++ + src/core/dbus-socket.c | 8 +++++++ + src/core/load-fragment-gperf.gperf.in | 2 ++ + src/core/socket.c | 32 +++++++++++++++++++-------- + src/core/socket.h | 2 ++ + src/shared/bus-unit-util.c | 10 +++++---- + 6 files changed, 53 insertions(+), 13 deletions(-) + +diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml +index 56906e2f3b..0557dc2379 100644 +--- a/man/org.freedesktop.systemd1.xml ++++ b/man/org.freedesktop.systemd1.xml +@@ -4727,6 +4727,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + readonly t TriggerLimitIntervalUSec = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly u TriggerLimitBurst = ...; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly t PollLimitIntervalUSec = ...; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly u PollLimitBurst = ...; + readonly u UID = ...; + readonly u GID = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("invalidates") +@@ -5961,6 +5965,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + ++ ++ ++ ++ + + + +@@ -6497,6 +6505,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + ++ PollLimitIntervalUSec/PollLimitBurst properties configure the ++ polling limit for the socket unit. Expects a time in µs, resp. an unsigned integer. If either is set to ++ zero the limiting feature is turned off. ++ + + Properties + +diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c +index 09a3a9502b..04552b7c60 100644 +--- a/src/core/dbus-socket.c ++++ b/src/core/dbus-socket.c +@@ -129,6 +129,8 @@ const sd_bus_vtable bus_socket_vtable[] = { + SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), ++ SD_BUS_PROPERTY("PollLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, poll_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST), ++ SD_BUS_PROPERTY("PollLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, poll_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), +@@ -248,6 +250,9 @@ static int bus_socket_set_transient_property( + if (streq(name, "TriggerLimitBurst")) + return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error); + ++ if (streq(name, "PollLimitBurst")) ++ return bus_set_transient_unsigned(u, name, &s->poll_limit_burst, message, flags, error); ++ + if (streq(name, "SocketMode")) + return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error); + +@@ -275,6 +280,9 @@ static int bus_socket_set_transient_property( + if (streq(name, "TriggerLimitIntervalUSec")) + return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error); + ++ if (streq(name, "PollLimitIntervalUSec")) ++ return bus_set_transient_usec(u, name, &s->poll_limit_interval, message, flags, error); ++ + if (streq(name, "SmackLabel")) + return bus_set_transient_string(u, name, &s->smack, message, flags, error); + +diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in +index b66adf2811..0d1ee9c231 100644 +--- a/src/core/load-fragment-gperf.gperf.in ++++ b/src/core/load-fragment-gperf.gperf.in +@@ -507,6 +507,8 @@ Socket.FileDescriptorName, config_parse_fdname, + Socket.Service, config_parse_socket_service, 0, 0 + Socket.TriggerLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, trigger_limit.interval) + Socket.TriggerLimitBurst, config_parse_unsigned, 0, offsetof(Socket, trigger_limit.burst) ++Socket.PollLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, poll_limit_interval) ++Socket.PollLimitBurst, config_parse_unsigned, 0, offsetof(Socket, poll_limit_burst) + {% if ENABLE_SMACK %} + Socket.SmackLabel, config_parse_unit_string_printf, 0, offsetof(Socket, smack) + Socket.SmackLabelIPIn, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_in) +diff --git a/src/core/socket.c b/src/core/socket.c +index 75034ac357..dc18744f54 100644 +--- a/src/core/socket.c ++++ b/src/core/socket.c +@@ -101,6 +101,9 @@ static void socket_init(Unit *u) { + + s->trigger_limit.interval = USEC_INFINITY; + s->trigger_limit.burst = UINT_MAX; ++ ++ s->poll_limit_interval = USEC_INFINITY; ++ s->poll_limit_burst = UINT_MAX; + } + + static void socket_unwatch_control_pid(Socket *s) { +@@ -310,17 +313,20 @@ static int socket_add_extras(Socket *s) { + * off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to + * process whatever is queued in one go, and thus should normally never have to be started frequently. This is + * different for Accept=yes where each connection is processed by a new service instance, and thus frequent +- * service starts are typical. */ ++ * service starts are typical. ++ * ++ * For the poll limit we follow a similar rule, but use 3/4th of the trigger limit parameters, to ++ * trigger this earlier. */ + + if (s->trigger_limit.interval == USEC_INFINITY) + s->trigger_limit.interval = 2 * USEC_PER_SEC; ++ if (s->trigger_limit.burst == UINT_MAX) ++ s->trigger_limit.burst = s->accept ? 200 : 20; + +- if (s->trigger_limit.burst == UINT_MAX) { +- if (s->accept) +- s->trigger_limit.burst = 200; +- else +- s->trigger_limit.burst = 20; +- } ++ if (s->poll_limit_interval == USEC_INFINITY) ++ s->poll_limit_interval = 2 * USEC_PER_SEC; ++ if (s->poll_limit_burst == UINT_MAX) ++ s->poll_limit_burst = s->accept ? 150 : 15; + + if (have_non_accept_socket(s)) { + +@@ -770,9 +776,13 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) { + + fprintf(f, + "%sTriggerLimitIntervalSec: %s\n" +- "%sTriggerLimitBurst: %u\n", ++ "%sTriggerLimitBurst: %u\n" ++ "%sPollLimitIntervalSec: %s\n" ++ "%sPollLimitBurst: %u\n", + prefix, FORMAT_TIMESPAN(s->trigger_limit.interval, USEC_PER_SEC), +- prefix, s->trigger_limit.burst); ++ prefix, s->trigger_limit.burst, ++ prefix, FORMAT_TIMESPAN(s->poll_limit_interval, USEC_PER_SEC), ++ prefix, s->poll_limit_burst); + + str = ip_protocol_to_name(s->socket_protocol); + if (str) +@@ -1765,6 +1775,10 @@ static int socket_watch_fds(Socket *s) { + + (void) sd_event_source_set_description(p->event_source, "socket-port-io"); + } ++ ++ r = sd_event_source_set_ratelimit(p->event_source, s->poll_limit_interval, s->poll_limit_burst); ++ if (r < 0) ++ log_unit_debug_errno(UNIT(s), r, "Failed to set poll limit on I/O event source, ignoring: %m"); + } + + return 0; +diff --git a/src/core/socket.h b/src/core/socket.h +index 191d27f46d..b03a291e4a 100644 +--- a/src/core/socket.h ++++ b/src/core/socket.h +@@ -158,6 +158,8 @@ struct Socket { + char *fdname; + + RateLimit trigger_limit; ++ usec_t poll_limit_interval; ++ unsigned poll_limit_burst; + }; + + SocketPeer *socket_peer_ref(SocketPeer *p); +diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c +index e7b44cc39b..9f0f37488d 100644 +--- a/src/shared/bus-unit-util.c ++++ b/src/shared/bus-unit-util.c +@@ -2170,10 +2170,10 @@ static int bus_append_path_property(sd_bus_message *m, const char *field, const + return 1; + } + +- if (streq(field, "TriggerLimitBurst")) ++ if (STR_IN_SET(field, "TriggerLimitBurst", "PollLimitBurst")) + return bus_append_safe_atou(m, field, eq); + +- if (streq(field, "TriggerLimitIntervalSec")) ++ if (STR_IN_SET(field, "TriggerLimitIntervalSec", "PollLimitIntervalSec")) + return bus_append_parse_sec_rename(m, field, eq); + + return 0; +@@ -2382,7 +2382,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons + "MaxConnections", + "MaxConnectionsPerSource", + "KeepAliveProbes", +- "TriggerLimitBurst")) ++ "TriggerLimitBurst", ++ "PollLimitBurst")) + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "SocketMode", +@@ -2397,7 +2398,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons + "KeepAliveTimeSec", + "KeepAliveIntervalSec", + "DeferAcceptSec", +- "TriggerLimitIntervalSec")) ++ "TriggerLimitIntervalSec", ++ "PollLimitIntervalSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (STR_IN_SET(field, "ReceiveBuffer", diff --git a/0002-man-document-the-new-PollLimitIntervalSec-PollLimitB.patch b/0002-man-document-the-new-PollLimitIntervalSec-PollLimitB.patch new file mode 100644 index 0000000..e2e80e9 --- /dev/null +++ b/0002-man-document-the-new-PollLimitIntervalSec-PollLimitB.patch @@ -0,0 +1,80 @@ +From f6b09a2ed646f0a0b54605d4c19a898ab2bbf192 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering +Date: Mon, 18 Sep 2023 17:51:49 +0200 +Subject: [PATCH 2/3] man: document the new + PollLimitIntervalSec=/PollLimitBurst= settings + +(cherry picked from commit 9373fce68de183a615d44fe100dcf22e3c9b8c3e) +--- + man/systemd.socket.xml | 58 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 47 insertions(+), 11 deletions(-) + +diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml +index 45555302f1..462978d438 100644 +--- a/man/systemd.socket.xml ++++ b/man/systemd.socket.xml +@@ -830,17 +830,53 @@ + TriggerLimitIntervalSec= + TriggerLimitBurst= + +- Configures a limit on how often this socket unit may be activated within a specific time +- interval. The TriggerLimitIntervalSec= may be used to configure the length of the time +- interval in the usual time units us, ms, s, +- min, h, … and defaults to 2s (See +- systemd.time7 for details on +- the various time units understood). The TriggerLimitBurst= setting takes a positive integer +- value and specifies the number of permitted activations per time interval, and defaults to 200 for +- Accept=yes sockets (thus by default permitting 200 activations per 2s), and 20 otherwise (20 +- activations per 2s). Set either to 0 to disable any form of trigger rate limiting. If the limit is hit, the +- socket unit is placed into a failure mode, and will not be connectible anymore until restarted. Note that this +- limit is enforced before the service activation is enqueued. ++ Configures a limit on how often this socket unit may be activated within a specific ++ time interval. The TriggerLimitIntervalSec= setting may be used to configure the ++ length of the time interval in the usual time units us, ms, ++ s, min, h, … and defaults to 2s (See ++ systemd.time7 for ++ details on the various time units understood). The TriggerLimitBurst= setting ++ takes a positive integer value and specifies the number of permitted activations per time interval, ++ and defaults to 200 for Accept=yes sockets (thus by default permitting 200 ++ activations per 2s), and 20 otherwise (20 activations per 2s). Set either to 0 to disable any form of ++ trigger rate limiting. ++ ++ If the limit is hit, the socket unit is placed into a failure mode, and will not be connectible ++ anymore until restarted. Note that this limit is enforced before the service activation is ++ enqueued. ++ ++ Compare with PollLimitIntervalSec=/PollLimitBurst= ++ described below, which implements a temporary slowdown if a socket unit is flooded with incoming ++ traffic, as opposed to the permanent failure state ++ TriggerLimitIntervalSec=/TriggerLimitBurst= results in. ++ ++ ++ ++ ++ PollLimitIntervalSec= ++ PollLimitBurst= ++ ++ Configures a limit on how often polling events on the file descriptors backing this ++ socket unit will be considered. This pair of settings is similar to ++ TriggerLimitIntervalSec=/TriggerLimitBurst= but instead of ++ putting a (fatal) limit on the activation frequency puts a (transient) limit on the polling ++ frequency. The expected parameter syntax and range are identical to that of the aforementioned ++ options, and can be disabled the same way. ++ ++ If the polling limit is hit polling is temporarily disabled on it until the specified time ++ window passes. The polling limit hence slows down connection attempts if hit, but unlike the trigger ++ limit won't cause permanent failures. It's the recommended mechanism to deal with DoS attempts ++ through packet flooding. ++ ++ The polling limit is enforced per file descriptor to listen on, as opposed to the trigger limit ++ which is enforced for the entire socket unit. This distinction matters for socket units that listen ++ on multiple file descriptors (i.e. have multiple ListenXYZ= stanzas). ++ ++ These setting defaults to 150 (in case of Accept=yes) and 15 (otherwise) ++ polling events per 2s. This is considerably lower than the default values for the trigger limit (see ++ above) and means that the polling limit should typically ensure the trigger limit is never hit, ++ unless one of them is reconfigured or disabled. ++ + + + diff --git a/0003-ci-add-test-for-poll-limit.patch b/0003-ci-add-test-for-poll-limit.patch new file mode 100644 index 0000000..33e2178 --- /dev/null +++ b/0003-ci-add-test-for-poll-limit.patch @@ -0,0 +1,79 @@ +From ae92a9714744bbf92fe69ffe276a668b031a6d26 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering +Date: Mon, 18 Sep 2023 18:05:27 +0200 +Subject: [PATCH 3/3] ci: add test for poll limit + +(cherry picked from commit 065e478a4a8cc8e41a6e87756c081396f253e853) +--- + test/TEST-07-PID1/test.sh | 2 ++ + test/units/testsuite-07.poll-limit.sh | 48 +++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+) + create mode 100755 test/units/testsuite-07.poll-limit.sh + +diff --git a/test/TEST-07-PID1/test.sh b/test/TEST-07-PID1/test.sh +index 1c3d7137fe..d0e35d870f 100755 +--- a/test/TEST-07-PID1/test.sh ++++ b/test/TEST-07-PID1/test.sh +@@ -32,6 +32,8 @@ Alias=issue2730-alias.mount + EOF + "${SYSTEMCTL:?}" enable --root="$workspace" issue2730.mount + ln -svrf "$workspace/etc/systemd/system/issue2730.mount" "$workspace/etc/systemd/system/issue2730-alias.mount" ++ ++ image_install logger + } + + do_test "$@" +diff --git a/test/units/testsuite-07.poll-limit.sh b/test/units/testsuite-07.poll-limit.sh +new file mode 100755 +index 0000000000..480d7ee8df +--- /dev/null ++++ b/test/units/testsuite-07.poll-limit.sh +@@ -0,0 +1,48 @@ ++#!/usr/bin/env bash ++# SPDX-License-Identifier: LGPL-2.1-or-later ++set -eux ++set -o pipefail ++ ++systemd-analyze log-level debug ++ ++cat > /run/systemd/system/floodme@.service < /run/systemd/system/floodme.socket <