cc2231
From 80753fa3c0622fd8117c64e1ed2d7841edee3b00 Mon Sep 17 00:00:00 2001
cc2231
From: Lennart Poettering <lennart@poettering.net>
cc2231
Date: Thu, 13 Nov 2014 14:34:42 +0100
cc2231
Subject: [PATCH] core: introduce new Delegate=yes/no property controlling
cc2231
 creation of cgroup subhierarchies
cc2231
cc2231
For priviliged units this resource control property ensures that the
cc2231
processes have all controllers systemd manages enabled.
cc2231
cc2231
For unpriviliged services (those with User= set) this ensures that
cc2231
access rights to the service cgroup is granted to the user in question,
cc2231
to create further subgroups. Note that this only applies to the
cc2231
name=systemd hierarchy though, as access to other controllers is not
cc2231
safe for unpriviliged processes.
cc2231
cc2231
Delegate=yes should be set for container scopes where a systemd instance
cc2231
inside the container shall manage the hierarchies below its own cgroup
cc2231
and have access to all controllers.
cc2231
cc2231
Delegate=yes should also be set for user@.service, so that systemd
cc2231
--user can run, controlling its own cgroup tree.
cc2231
cc2231
This commit changes machined, systemd-nspawn@.service and user@.service
cc2231
to set this boolean, in order to ensure that container management will
cc2231
just work, and the user systemd instance can run fine.
cc2231
cc2231
(cherry picked from a931ad47a8623163a29d898224d8a8c1177ffdaf)
cc2231
cc2231
Resolves: #1179715
cc2231
---
cc2231
 man/systemd.resource-control.xml      | 14 ++++++++++++
cc2231
 src/core/cgroup.c                     | 19 +++++++++++++++--
cc2231
 src/core/cgroup.h                     |  2 ++
cc2231
 src/core/dbus-cgroup.c                | 40 +++++++++++++++++++++++++++++++++++
cc2231
 src/core/execute.c                    | 23 +++++++++++++++++---
cc2231
 src/core/execute.h                    |  2 ++
cc2231
 src/core/load-fragment-gperf.gperf.m4 |  3 ++-
cc2231
 src/core/mount.c                      |  1 +
cc2231
 src/core/service.c                    |  1 +
cc2231
 src/core/socket.c                     |  1 +
cc2231
 src/core/swap.c                       |  1 +
cc2231
 src/machine/machined-dbus.c           | 10 +++++++++
cc2231
 src/shared/cgroup-util.h              |  3 ++-
cc2231
 units/systemd-nspawn@.service.in      |  1 +
cc2231
 14 files changed, 114 insertions(+), 7 deletions(-)
cc2231
cc2231
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
cc2231
index 8688905..3748c0c 100644
cc2231
--- a/man/systemd.resource-control.xml
cc2231
+++ b/man/systemd.resource-control.xml
cc2231
@@ -327,6 +327,20 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>.
cc2231
         </listitem>
cc2231
       </varlistentry>
cc2231
 
cc2231
+      <varlistentry>
cc2231
+        <term><varname>Delegate=</varname></term>
cc2231
+
cc2231
+        <listitem>
cc2231
+          <para>Turns on delegation of further resource control
cc2231
+          partitioning to processes of the unit. For unpriviliged
cc2231
+          services (i.e. those using the <varname>User=</varname>
cc2231
+          setting) this allows processes to create a subhierarchy
cc2231
+          beneath its control group path. For priviliged services and
cc2231
+          scopes this ensures the processes will have all control
cc2231
+          group controllers enabled.</para>
cc2231
+        </listitem>
cc2231
+      </varlistentry>
cc2231
+
cc2231
     </variablelist>
cc2231
   </refsect1>
cc2231
 
cc2231
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
cc2231
index 32e2599..764311f 100644
cc2231
--- a/src/core/cgroup.c
cc2231
+++ b/src/core/cgroup.c
cc2231
@@ -94,14 +94,16 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
cc2231
                 "%sCPUShares=%lu\n"
cc2231
                 "%sBlockIOWeight=%lu\n"
cc2231
                 "%sMemoryLimit=%" PRIu64 "\n"
cc2231
-                "%sDevicePolicy=%s\n",
cc2231
+                "%sDevicePolicy=%s\n"
cc2231
+                "%sDelegate=%s\n",
cc2231
                 prefix, yes_no(c->cpu_accounting),
cc2231
                 prefix, yes_no(c->blockio_accounting),
cc2231
                 prefix, yes_no(c->memory_accounting),
cc2231
                 prefix, c->cpu_shares,
cc2231
                 prefix, c->blockio_weight,
cc2231
                 prefix, c->memory_limit,
cc2231
-                prefix, cgroup_device_policy_to_string(c->device_policy));
cc2231
+                prefix, cgroup_device_policy_to_string(c->device_policy),
cc2231
+                prefix, yes_no(c->delegate));
cc2231
 
cc2231
         LIST_FOREACH(device_allow, a, c->device_allow)
cc2231
                 fprintf(f,
cc2231
@@ -342,6 +344,19 @@ static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
cc2231
         if (!c)
cc2231
                 return 0;
cc2231
 
cc2231
+        /* If delegation is turned on, then turn on all cgroups,
cc2231
+         * unless the process we fork into it is known to drop
cc2231
+         * privileges anyway, and shouldn't get access to the
cc2231
+         * controllers anyway. */
cc2231
+
cc2231
+        if (c->delegate) {
cc2231
+                ExecContext *e;
cc2231
+
cc2231
+                e = unit_get_exec_context(u);
cc2231
+                if (!e || exec_context_maintains_privileges(e))
cc2231
+                        return _CGROUP_CONTROLLER_MASK_ALL;
cc2231
+        }
cc2231
+
cc2231
         return cgroup_context_get_mask(c);
cc2231
 }
cc2231
 
cc2231
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
cc2231
index 0a079e9..d00bcac 100644
cc2231
--- a/src/core/cgroup.h
cc2231
+++ b/src/core/cgroup.h
cc2231
@@ -80,6 +80,8 @@ struct CGroupContext {
cc2231
 
cc2231
         CGroupDevicePolicy device_policy;
cc2231
         LIST_HEAD(CGroupDeviceAllow, device_allow);
cc2231
+
cc2231
+        bool delegate;
cc2231
 };
cc2231
 
cc2231
 #include "unit.h"
cc2231
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
cc2231
index 9ebcad9..a13c869 100644
cc2231
--- a/src/core/dbus-cgroup.c
cc2231
+++ b/src/core/dbus-cgroup.c
cc2231
@@ -124,6 +124,7 @@ static int bus_cgroup_append_device_allow(DBusMessageIter *i, const char *proper
cc2231
 }
cc2231
 
cc2231
 const BusProperty bus_cgroup_context_properties[] = {
cc2231
+        { "Delegate",                bus_property_append_bool,            "b",     offsetof(CGroupContext, delegate)           },
cc2231
         { "CPUAccounting",           bus_property_append_bool,            "b",     offsetof(CGroupContext, cpu_accounting)     },
cc2231
         { "CPUShares",               bus_property_append_ul,              "t",     offsetof(CGroupContext, cpu_shares)         },
cc2231
         { "BlockIOAccounting",       bus_property_append_bool,            "b",     offsetof(CGroupContext, blockio_accounting) },
cc2231
@@ -138,6 +139,38 @@ const BusProperty bus_cgroup_context_properties[] = {
cc2231
         {}
cc2231
 };
cc2231
 
cc2231
+static int bus_cgroup_set_transient_property(
cc2231
+                Unit *u,
cc2231
+                CGroupContext *c,
cc2231
+                const char *name,
cc2231
+                DBusMessageIter *i,
cc2231
+                UnitSetPropertiesMode mode,
cc2231
+                DBusError *error) {
cc2231
+
cc2231
+        assert(u);
cc2231
+        assert(c);
cc2231
+        assert(name);
cc2231
+        assert(i);
cc2231
+
cc2231
+        if (streq(name, "Delegate")) {
cc2231
+
cc2231
+                if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_BOOLEAN)
cc2231
+                        return -EINVAL;
cc2231
+
cc2231
+                if (mode != UNIT_CHECK) {
cc2231
+                        dbus_bool_t b;
cc2231
+
cc2231
+                        dbus_message_iter_get_basic(i, &b);
cc2231
+                        c->delegate = b;
cc2231
+                        unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes" : "Delegate=no");
cc2231
+                }
cc2231
+
cc2231
+                return 1;
cc2231
+        }
cc2231
+
cc2231
+        return 0;
cc2231
+}
cc2231
+
cc2231
 int bus_cgroup_set_property(
cc2231
                 Unit *u,
cc2231
                 CGroupContext *c,
cc2231
@@ -550,5 +583,12 @@ int bus_cgroup_set_property(
cc2231
                 return 1;
cc2231
         }
cc2231
 
cc2231
+        if (u->transient && u->load_state == UNIT_STUB) {
cc2231
+                int r;
cc2231
+                r = bus_cgroup_set_transient_property(u, c, name, i, mode, error);
cc2231
+                if (r != 0)
cc2231
+                        return r;
cc2231
+        }
cc2231
+
cc2231
         return 0;
cc2231
 }
cc2231
diff --git a/src/core/execute.c b/src/core/execute.c
cc2231
index 981b9e4..c3fd6a8 100644
cc2231
--- a/src/core/execute.c
cc2231
+++ b/src/core/execute.c
cc2231
@@ -1035,6 +1035,7 @@ int exec_spawn(ExecCommand *command,
cc2231
                bool confirm_spawn,
cc2231
                CGroupControllerMask cgroup_supported,
cc2231
                const char *cgroup_path,
cc2231
+               bool cgroup_delegate,
cc2231
                const char *unit_id,
cc2231
                int idle_pipe[4],
cc2231
                pid_t *ret) {
cc2231
@@ -1299,8 +1300,10 @@ int exec_spawn(ExecCommand *command,
cc2231
                         }
cc2231
                 }
cc2231
 
cc2231
-#ifdef HAVE_PAM
cc2231
-                if (cgroup_path && context->user && context->pam_name) {
cc2231
+                /* If delegation is enabled we'll pass ownership of the cgroup
cc2231
+                 * (but only in systemd's own controller hierarchy!) to the
cc2231
+                 * user of the new process. */
cc2231
+               if (cgroup_path && context->user && cgroup_delegate) {
cc2231
                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
cc2231
                         if (err < 0) {
cc2231
                                 r = EXIT_CGROUP;
cc2231
@@ -1314,7 +1317,6 @@ int exec_spawn(ExecCommand *command,
cc2231
                                 goto fail_child;
cc2231
                         }
cc2231
                 }
cc2231
-#endif
cc2231
 
cc2231
                 if (apply_permissions) {
cc2231
                         err = enforce_groups(context, username, gid);
cc2231
@@ -2069,6 +2071,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
cc2231
                         prefix, c->utmp_id);
cc2231
 }
cc2231
 
cc2231
+bool exec_context_maintains_privileges(ExecContext *c) {
cc2231
+        assert(c);
cc2231
+
cc2231
+        /* Returns true if the process forked off would run under
cc2231
+         * an unchanged UID or as root. */
cc2231
+
cc2231
+        if (!c->user)
cc2231
+                return true;
cc2231
+
cc2231
+        if (streq(c->user, "root") || streq(c->user, "0"))
cc2231
+                return true;
cc2231
+
cc2231
+        return false;
cc2231
+}
cc2231
+
cc2231
 void exec_status_start(ExecStatus *s, pid_t pid) {
cc2231
         assert(s);
cc2231
 
cc2231
diff --git a/src/core/execute.h b/src/core/execute.h
cc2231
index c1e9717..eca9d7d 100644
cc2231
--- a/src/core/execute.h
cc2231
+++ b/src/core/execute.h
cc2231
@@ -173,6 +173,7 @@ int exec_spawn(ExecCommand *command,
cc2231
                bool confirm_spawn,
cc2231
                CGroupControllerMask cgroup_mask,
cc2231
                const char *cgroup_path,
cc2231
+               bool cgroup_delegate,
cc2231
                const char *unit_id,
cc2231
                int pipe_fd[2],
cc2231
                pid_t *ret);
cc2231
@@ -199,6 +200,7 @@ void exec_context_tty_reset(const ExecContext *context);
cc2231
 int exec_context_load_environment(const ExecContext *c, char ***l);
cc2231
 
cc2231
 bool exec_context_may_touch_console(ExecContext *c);
cc2231
+bool exec_context_maintains_privileges(ExecContext *c);
cc2231
 void exec_context_serialize(const ExecContext *c, Unit *u, FILE *f);
cc2231
 
cc2231
 void exec_status_start(ExecStatus *s, pid_t pid);
cc2231
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
cc2231
index 0991cb9..3e88c8a 100644
cc2231
--- a/src/core/load-fragment-gperf.gperf.m4
cc2231
+++ b/src/core/load-fragment-gperf.gperf.m4
cc2231
@@ -95,7 +95,8 @@ $1.BlockIOAccounting,            config_parse_bool,                  0,
cc2231
 $1.BlockIOWeight,                config_parse_blockio_weight,        0,                             offsetof($1, cgroup_context)
cc2231
 $1.BlockIODeviceWeight,          config_parse_blockio_device_weight, 0,                             offsetof($1, cgroup_context)
cc2231
 $1.BlockIOReadBandwidth,         config_parse_blockio_bandwidth,     0,                             offsetof($1, cgroup_context)
cc2231
-$1.BlockIOWriteBandwidth,        config_parse_blockio_bandwidth,     0,                             offsetof($1, cgroup_context)'
cc2231
+$1.BlockIOWriteBandwidth,        config_parse_blockio_bandwidth,     0,                             offsetof($1, cgroup_context)
cc2231
+$1.Delegate,                     config_parse_bool,                  0,                             offsetof($1, cgroup_context.delegate)'
cc2231
 )m4_dnl
cc2231
 Unit.Description,                config_parse_unit_string_printf,    0,                             offsetof(Unit, description)
cc2231
 Unit.Documentation,              config_parse_documentation,         0,                             offsetof(Unit, documentation)
cc2231
diff --git a/src/core/mount.c b/src/core/mount.c
cc2231
index 3672338..f7c98c4 100644
cc2231
--- a/src/core/mount.c
cc2231
+++ b/src/core/mount.c
cc2231
@@ -793,6 +793,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
cc2231
                        UNIT(m)->manager->confirm_spawn,
cc2231
                        UNIT(m)->manager->cgroup_supported,
cc2231
                        UNIT(m)->cgroup_path,
cc2231
+                       m->cgroup_context.delegate,
cc2231
                        UNIT(m)->id,
cc2231
                        NULL,
cc2231
                        &pid;;
cc2231
diff --git a/src/core/service.c b/src/core/service.c
cc2231
index f6fdbbc..da30d77 100644
cc2231
--- a/src/core/service.c
cc2231
+++ b/src/core/service.c
cc2231
@@ -1869,6 +1869,7 @@ static int service_spawn(
cc2231
                        UNIT(s)->manager->confirm_spawn,
cc2231
                        UNIT(s)->manager->cgroup_supported,
cc2231
                        path,
cc2231
+                       s->cgroup_context.delegate,
cc2231
                        UNIT(s)->id,
cc2231
                        s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL,
cc2231
                        &pid;;
cc2231
diff --git a/src/core/socket.c b/src/core/socket.c
cc2231
index 32e0d35..f365125 100644
cc2231
--- a/src/core/socket.c
cc2231
+++ b/src/core/socket.c
cc2231
@@ -1229,6 +1229,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
cc2231
                        UNIT(s)->manager->confirm_spawn,
cc2231
                        UNIT(s)->manager->cgroup_supported,
cc2231
                        UNIT(s)->cgroup_path,
cc2231
+                       s->cgroup_context.delegate,
cc2231
                        UNIT(s)->id,
cc2231
                        NULL,
cc2231
                        &pid;;
cc2231
diff --git a/src/core/swap.c b/src/core/swap.c
cc2231
index 727bb95..597c8ca 100644
cc2231
--- a/src/core/swap.c
cc2231
+++ b/src/core/swap.c
cc2231
@@ -591,6 +591,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
cc2231
                        UNIT(s)->manager->confirm_spawn,
cc2231
                        UNIT(s)->manager->cgroup_supported,
cc2231
                        UNIT(s)->cgroup_path,
cc2231
+                       s->cgroup_context.delegate,
cc2231
                        UNIT(s)->id,
cc2231
                        NULL,
cc2231
                        &pid;;
cc2231
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
cc2231
index 22caadf..0cebdc5 100644
cc2231
--- a/src/machine/machined-dbus.c
cc2231
+++ b/src/machine/machined-dbus.c
cc2231
@@ -739,9 +739,11 @@ int manager_start_scope(
cc2231
         DBusMessageIter iter, sub, sub2, sub3, sub4;
cc2231
         const char *timeout_stop_property = "TimeoutStopUSec";
cc2231
         const char *pids_property = "PIDs";
cc2231
+        const char *delegate_property = "Delegate";
cc2231
         uint64_t timeout = 500 * USEC_PER_MSEC;
cc2231
         const char *fail = "fail";
cc2231
         uint32_t u;
cc2231
+        dbus_bool_t b = 1;
cc2231
         int r;
cc2231
 
cc2231
         assert(manager);
cc2231
@@ -814,6 +816,14 @@ int manager_start_scope(
cc2231
             !dbus_message_iter_close_container(&sub, &sub2))
cc2231
                 return log_oom();
cc2231
 
cc2231
+        if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||
cc2231
+            !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &delegate_property) ||
cc2231
+            !dbus_message_iter_open_container(&sub2, DBUS_TYPE_VARIANT, "b", &sub3) ||
cc2231
+            !dbus_message_iter_append_basic(&sub3, DBUS_TYPE_BOOLEAN, &b) ||
cc2231
+            !dbus_message_iter_close_container(&sub2, &sub3) ||
cc2231
+            !dbus_message_iter_close_container(&sub, &sub2))
cc2231
+                return log_oom();
cc2231
+
cc2231
         if (more_properties) {
cc2231
                 r = copy_many_fields(&sub, more_properties);
cc2231
                 if (r < 0)
cc2231
diff --git a/src/shared/cgroup-util.h b/src/shared/cgroup-util.h
cc2231
index 0963450..0608b9a 100644
cc2231
--- a/src/shared/cgroup-util.h
cc2231
+++ b/src/shared/cgroup-util.h
cc2231
@@ -34,7 +34,8 @@ typedef enum CGroupControllerMask {
cc2231
         CGROUP_CPUACCT = 2,
cc2231
         CGROUP_BLKIO = 4,
cc2231
         CGROUP_MEMORY = 8,
cc2231
-        CGROUP_DEVICE = 16
cc2231
+        CGROUP_DEVICE = 16,
cc2231
+        _CGROUP_CONTROLLER_MASK_ALL = 31
cc2231
 } CGroupControllerMask;
cc2231
 
cc2231
 /*
cc2231
diff --git a/units/systemd-nspawn@.service.in b/units/systemd-nspawn@.service.in
cc2231
index 8e00736..bdfa89f 100644
cc2231
--- a/units/systemd-nspawn@.service.in
cc2231
+++ b/units/systemd-nspawn@.service.in
cc2231
@@ -12,6 +12,7 @@ Documentation=man:systemd-nspawn(1)
cc2231
 [Service]
cc2231
 ExecStart=@bindir@/systemd-nspawn -bjD /var/lib/container/%i
cc2231
 Type=notify
cc2231
+Delegate=yes
cc2231
 
cc2231
 [Install]
cc2231
 WantedBy=multi-user.target