05ad79
diff -up util-linux-2.23.2/include/pathnames.h.kzak util-linux-2.23.2/include/pathnames.h
05ad79
--- util-linux-2.23.2/include/pathnames.h.kzak	2015-06-26 10:00:19.111877564 +0200
05ad79
+++ util-linux-2.23.2/include/pathnames.h	2015-06-26 10:00:51.623630869 +0200
05ad79
@@ -85,6 +85,10 @@
05ad79
 #define _PATH_PROC_LOCKS        "/proc/locks"
05ad79
 #define _PATH_PROC_CDROMINFO	"/proc/sys/dev/cdrom/info"
05ad79
 
05ad79
+#define _PATH_PROC_UIDMAP	"/proc/self/uid_map"
05ad79
+#define _PATH_PROC_GIDMAP	"/proc/self/gid_map"
05ad79
+#define _PATH_PROC_SETGROUPS	"/proc/self/setgroups"
05ad79
+
05ad79
 #define _PATH_PROC_ATTR_CURRENT	"/proc/self/attr/current"
05ad79
 #define _PATH_PROC_ATTR_EXEC	"/proc/self/attr/exec"
05ad79
 #define _PATH_PROC_CAPLASTCAP	"/proc/sys/kernel/cap_last_cap"
05ad79
diff -up util-linux-2.23.2/sys-utils/Makemodule.am.kzak util-linux-2.23.2/sys-utils/Makemodule.am
05ad79
diff -up util-linux-2.23.2/sys-utils/nsenter.1.kzak util-linux-2.23.2/sys-utils/nsenter.1
05ad79
--- util-linux-2.23.2/sys-utils/nsenter.1.kzak	2015-06-26 09:58:39.468633643 +0200
05ad79
+++ util-linux-2.23.2/sys-utils/nsenter.1	2015-06-26 09:58:51.672541041 +0200
05ad79
@@ -1,44 +1,45 @@
05ad79
-.TH NSENTER 1 "January 2013" "util-linux" "User Commands"
05ad79
+.TH NSENTER 1 "June 2013" "util-linux" "User Commands"
05ad79
 .SH NAME
05ad79
 nsenter \- run program with namespaces of other processes
05ad79
 .SH SYNOPSIS
05ad79
 .B nsenter
05ad79
-.RI [ options ]
05ad79
-.RI [ program ]
05ad79
-.RI [ arguments ]
05ad79
+[options]
05ad79
+.RI [ program
05ad79
+.RI [ arguments ]]
05ad79
 .SH DESCRIPTION
05ad79
 Enters the namespaces of one or more other processes and then executes the specified
05ad79
 program.  Enterable namespaces are:
05ad79
 .TP
05ad79
 .B mount namespace
05ad79
-mounting and unmounting filesystems will not affect rest of the system
05ad79
+Mounting and unmounting filesystems will not affect the rest of the system
05ad79
 .RB ( CLONE_\:NEWNS
05ad79
-flag), except for filesystems which are explicitly marked as shared (by mount
05ad79
---make-\:shared).  See /proc\:/self\:/mountinfo for the shared flag.
05ad79
+flag), except for filesystems which are explicitly marked as shared (with
05ad79
+\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the
05ad79
+\fBshared\fP flag).
05ad79
 .TP
05ad79
 .B UTS namespace
05ad79
-setting hostname, domainname will not affect rest of the system
05ad79
+Setting hostname or domainname will not affect the rest of the system.
05ad79
 .RB ( CLONE_\:NEWUTS
05ad79
-flag).
05ad79
+flag)
05ad79
 .TP
05ad79
 .B IPC namespace
05ad79
-process will have independent namespace for System V message queues, semaphore
05ad79
-sets and shared memory segments
05ad79
+The process will have an independent namespace for System V message queues,
05ad79
+semaphore sets and shared memory segments.
05ad79
 .RB ( CLONE_\:NEWIPC
05ad79
-flag).
05ad79
+flag)
05ad79
 .TP
05ad79
 .B network namespace
05ad79
-process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall
05ad79
-rules, the
05ad79
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
05ad79
+firewall rules, the
05ad79
 .I /proc\:/net
05ad79
 and
05ad79
 .I /sys\:/class\:/net
05ad79
-directory trees, sockets etc.
05ad79
+directory trees, sockets, etc.
05ad79
 .RB ( CLONE_\:NEWNET
05ad79
-flag).
05ad79
+flag)
05ad79
 .TP
05ad79
 .B PID namespace
05ad79
-children will have a set of PID to process mappings separate from the
05ad79
+Children will have a set of PID to process mappings separate from the
05ad79
 .B nsenter
05ad79
 process
05ad79
 .RB ( CLONE_\:NEWPID
05ad79
@@ -46,18 +47,18 @@ flag).
05ad79
 .B nsenter
05ad79
 will fork by default if changing the PID namespace, so that the new program
05ad79
 and its children share the same PID namespace and are visible to each other.
05ad79
-If \-\-no\-fork is used, the new program will be exec'ed without forking.
05ad79
-.PP
05ad79
-See the
05ad79
-.BR clone (2)
05ad79
-for exact semantics of the flags.
05ad79
+If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking.
05ad79
 .TP
05ad79
-If program is not given, run ``${SHELL}'' (default: /bin\:/sh).
05ad79
+.B user namespace
05ad79
+The process will have a distinct set of UIDs, GIDs and capabilities.
05ad79
+.RB ( CLONE_\:NEWUSER
05ad79
+flag)
05ad79
+.TP
05ad79
+See \fBclone\fP(2) for the exact semantics of the flags.
05ad79
+.TP
05ad79
+If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh).
05ad79
 
05ad79
 .SH OPTIONS
05ad79
-Argument with square brakets, such as [\fIfile\fR], means optional argument.
05ad79
-Command line syntax to specify optional argument \-\-mount=/path\:/to\:/file.
05ad79
-Please notice the equals sign.
05ad79
 .TP
05ad79
 \fB\-t\fR, \fB\-\-target\fR \fIpid\fP
05ad79
 Specify a target process to get contexts from.  The paths to the contexts
05ad79
@@ -83,6 +84,9 @@ the network namespace
05ad79
 /proc/\fIpid\fR/ns/pid
05ad79
 the PID namespace
05ad79
 .TP
05ad79
+/proc/\fIpid\fR/ns/user
05ad79
+the user namespace
05ad79
+.TP
05ad79
 /proc/\fIpid\fR/root
05ad79
 the root directory
05ad79
 .TP
05ad79
@@ -91,51 +95,71 @@ the working directory respectively
05ad79
 .PD
05ad79
 .RE
05ad79
 .TP
05ad79
-\fB\-m\fR, \fB\-\-mount\fR [\fIfile\fR]
05ad79
-Enter the mount namespace.  If no file is specified enter the mount namespace
05ad79
-of the target process.  If file is specified enter the mount namespace
05ad79
+\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR]
05ad79
+Enter the mount namespace.  If no file is specified, enter the mount namespace
05ad79
+of the target process.  If file is specified, enter the mount namespace
05ad79
 specified by file.
05ad79
 .TP
05ad79
-\fB\-u\fR, \fB\-\-uts\fR [\fIfile\fR]
05ad79
-Enter the UTS namespace.  If no file is specified enter the UTS namespace of
05ad79
-the target process.  If file is specified enter the UTS namespace specified by
05ad79
+\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR]
05ad79
+Enter the UTS namespace.  If no file is specified, enter the UTS namespace of
05ad79
+the target process.  If file is specified, enter the UTS namespace specified by
05ad79
 file.
05ad79
 .TP
05ad79
-\fB\-i\fR, \fB\-\-ipc\fR [\fIfile\fR]
05ad79
-Enter the IPC namespace.  If no file is specified enter the IPC namespace of
05ad79
-the target process.  If file is specified enter the IPC namespace specified by
05ad79
+\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR]
05ad79
+Enter the IPC namespace.  If no file is specified, enter the IPC namespace of
05ad79
+the target process.  If file is specified, enter the IPC namespace specified by
05ad79
 file.
05ad79
 .TP
05ad79
-\fB\-n\fR, \fB\-\-net\fR [\fIfile\fR]
05ad79
-Enter the network namespace.  If no file is specified enter the network
05ad79
-namespace of the target process.  If file is specified enter the network
05ad79
+\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR]
05ad79
+Enter the network namespace.  If no file is specified, enter the network
05ad79
+namespace of the target process.  If file is specified, enter the network
05ad79
 namespace specified by file.
05ad79
 .TP
05ad79
-\fB\-p\fR, \fB\-\-pid\fR [\fIfile\fR]
05ad79
-Enter the PID namespace.  If no file is specified enter the PID namespace of
05ad79
-the target process.  If file is specified enter the PID namespace specified by
05ad79
+\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR]
05ad79
+Enter the PID namespace.  If no file is specified, enter the PID namespace of
05ad79
+the target process.  If file is specified, enter the PID namespace specified by
05ad79
 file.
05ad79
 .TP
05ad79
-\fB\-r\fR, \fB\-\-root\fR [\fIdirectory\fR]
05ad79
-Set the root directory.  If no directory is specified set the root directory to
05ad79
-the root directory of the target process.  If directory is specified set the
05ad79
+\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR]
05ad79
+Enter the user namespace.  If no file is specified, enter the user namespace of
05ad79
+the target process.  If file is specified, enter the user namespace specified by
05ad79
+file.  See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options.
05ad79
+.TP
05ad79
+\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR
05ad79
+Set the group ID which will be used in the entered namespace and drop
05ad79
+supplementary groups.
05ad79
+.BR nsenter (1)
05ad79
+always sets GID for user namespaces, the default is 0.
05ad79
+.TP
05ad79
+\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR
05ad79
+Set the user ID which will be used in the entered namespace.
05ad79
+.BR nsenter (1)
05ad79
+always sets UID for user namespaces, the default is 0.
05ad79
+.TP
05ad79
+\fB\-\-preserve\-credentials\fR
05ad79
+Don't modify UID and GID when enter user namespace. The default is to
05ad79
+drops supplementary groups and sets GID and UID to 0.
05ad79
+.TP
05ad79
+\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR]
05ad79
+Set the root directory.  If no directory is specified, set the root directory to
05ad79
+the root directory of the target process.  If directory is specified, set the
05ad79
 root directory to the specified directory.
05ad79
 .TP
05ad79
-\fB\-w\fR, \fB\-\-wd\fR [\fIdirectory\fR]
05ad79
-Set the working directory.  If no directory is specified set the working
05ad79
+\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR]
05ad79
+Set the working directory.  If no directory is specified, set the working
05ad79
 directory to the working directory of the target process.  If directory is
05ad79
-specified set the working directory to the specified directory.
05ad79
+specified, set the working directory to the specified directory.
05ad79
 .TP
05ad79
-\fB\-F\fR, \fB\-\-no-fork\fR
05ad79
-Do not fork before exec'ing the specified program.  By default when entering a
05ad79
-pid namespace enter calls fork before calling exec so that the children will be
05ad79
-in the newly entered pid namespace.
05ad79
+\fB\-F\fR, \fB\-\-no\-fork\fR
05ad79
+Do not fork before exec'ing the specified program.  By default, when entering a
05ad79
+PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that
05ad79
+any children will also be in the newly entered PID namespace.
05ad79
 .TP
05ad79
 \fB\-V\fR, \fB\-\-version\fR
05ad79
 Display version information and exit.
05ad79
 .TP
05ad79
 \fB\-h\fR, \fB\-\-help\fR
05ad79
-Print a help message.
05ad79
+Display help text and exit.
05ad79
 .SH SEE ALSO
05ad79
 .BR setns (2),
05ad79
 .BR clone (2)
05ad79
diff -up util-linux-2.23.2/sys-utils/nsenter.c.kzak util-linux-2.23.2/sys-utils/nsenter.c
05ad79
--- util-linux-2.23.2/sys-utils/nsenter.c.kzak	2015-06-26 09:58:39.468633643 +0200
05ad79
+++ util-linux-2.23.2/sys-utils/nsenter.c	2015-06-26 09:58:51.673541033 +0200
05ad79
@@ -28,6 +28,7 @@
05ad79
 #include <assert.h>
05ad79
 #include <sys/types.h>
05ad79
 #include <sys/wait.h>
05ad79
+#include <grp.h>
05ad79
 
05ad79
 #include "strutils.h"
05ad79
 #include "nls.h"
05ad79
@@ -42,7 +43,12 @@ static struct namespace_file {
05ad79
 	int fd;
05ad79
 } namespace_files[] = {
05ad79
 	/* Careful the order is significant in this array.
05ad79
+	 *
05ad79
+	 * The user namespace comes first, so that it is entered
05ad79
+	 * first.  This gives an unprivileged user the potential to
05ad79
+	 * enter the other namespaces.
05ad79
 	 */
05ad79
+	{ .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 },
05ad79
 	{ .nstype = CLONE_NEWIPC,  .name = "ns/ipc",  .fd = -1 },
05ad79
 	{ .nstype = CLONE_NEWUTS,  .name = "ns/uts",  .fd = -1 },
05ad79
 	{ .nstype = CLONE_NEWNET,  .name = "ns/net",  .fd = -1 },
05ad79
@@ -56,18 +62,25 @@ static void usage(int status)
05ad79
 	FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
05ad79
 
05ad79
 	fputs(USAGE_HEADER, out);
05ad79
-	fprintf(out, _(" %s [options] <program> [args...]\n"),
05ad79
+	fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
05ad79
 		program_invocation_short_name);
05ad79
 
05ad79
+	fputs(USAGE_SEPARATOR, out);
05ad79
+	fputs(_("Run a program with namespaces of other processes.\n"), out);
05ad79
+
05ad79
 	fputs(USAGE_OPTIONS, out);
05ad79
 	fputs(_(" -t, --target <pid>     target process to get namespaces from\n"), out);
05ad79
-	fputs(_(" -m, --mount [=<file>]  enter mount namespace\n"), out);
05ad79
-	fputs(_(" -u, --uts   [=<file>]  enter UTS namespace (hostname etc)\n"), out);
05ad79
-	fputs(_(" -i, --ipc   [=<file>]  enter System V IPC namespace\n"), out);
05ad79
-	fputs(_(" -n, --net   [=<file>]  enter network namespace\n"), out);
05ad79
-	fputs(_(" -p, --pid   [=<file>]  enter pid namespace\n"), out);
05ad79
-	fputs(_(" -r, --root  [=<dir>]   set the root directory\n"), out);
05ad79
-	fputs(_(" -w, --wd    [=<dir>]   set the working directory\n"), out);
05ad79
+	fputs(_(" -m, --mount[=<file>]   enter mount namespace\n"), out);
05ad79
+	fputs(_(" -u, --uts[=<file>]     enter UTS namespace (hostname etc)\n"), out);
05ad79
+	fputs(_(" -i, --ipc[=<file>]     enter System V IPC namespace\n"), out);
05ad79
+	fputs(_(" -n, --net[=<file>]     enter network namespace\n"), out);
05ad79
+	fputs(_(" -p, --pid[=<file>]     enter pid namespace\n"), out);
05ad79
+	fputs(_(" -U, --user[=<file>]    enter user namespace\n"), out);
05ad79
+	fputs(_(" -S, --setuid <uid>     set uid in entered namespace\n"), out);
05ad79
+	fputs(_(" -G, --setgid <gid>     set gid in entered namespace\n"), out);
05ad79
+	fputs(_("     --preserve-credentials do not touch uids or gids\n"), out);
05ad79
+	fputs(_(" -r, --root[=<dir>]     set the root directory\n"), out);
05ad79
+	fputs(_(" -w, --wd[=<dir>]       set the working directory\n"), out);
05ad79
 	fputs(_(" -F, --no-fork          do not fork before exec'ing <program>\n"), out);
05ad79
 
05ad79
 	fputs(USAGE_SEPARATOR, out);
05ad79
@@ -153,6 +166,9 @@ static void continue_as_child(void)
05ad79
 
05ad79
 int main(int argc, char *argv[])
05ad79
 {
05ad79
+	enum {
05ad79
+		OPT_PRESERVE_CRED = CHAR_MAX + 1
05ad79
+	};
05ad79
 	static const struct option longopts[] = {
05ad79
 		{ "help", no_argument, NULL, 'h' },
05ad79
 		{ "version", no_argument, NULL, 'V'},
05ad79
@@ -162,24 +178,30 @@ int main(int argc, char *argv[])
05ad79
 		{ "ipc", optional_argument, NULL, 'i' },
05ad79
 		{ "net", optional_argument, NULL, 'n' },
05ad79
 		{ "pid", optional_argument, NULL, 'p' },
05ad79
+		{ "user", optional_argument, NULL, 'U' },
05ad79
+		{ "setuid", required_argument, NULL, 'S' },
05ad79
+		{ "setgid", required_argument, NULL, 'G' },
05ad79
 		{ "root", optional_argument, NULL, 'r' },
05ad79
 		{ "wd", optional_argument, NULL, 'w' },
05ad79
 		{ "no-fork", no_argument, NULL, 'F' },
05ad79
+		{ "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED },
05ad79
 		{ NULL, 0, NULL, 0 }
05ad79
 	};
05ad79
 
05ad79
 	struct namespace_file *nsfile;
05ad79
-	int c, namespaces = 0;
05ad79
-	bool do_rd = false, do_wd = false;
05ad79
+	int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0;
05ad79
+	bool do_rd = false, do_wd = false, force_uid = false, force_gid = false;
05ad79
 	int do_fork = -1; /* unknown yet */
05ad79
+	uid_t uid = 0;
05ad79
+	gid_t gid = 0;
05ad79
 
05ad79
-	setlocale(LC_MESSAGES, "");
05ad79
+	setlocale(LC_ALL, "");
05ad79
 	bindtextdomain(PACKAGE, LOCALEDIR);
05ad79
 	textdomain(PACKAGE);
05ad79
 	atexit(close_stdout);
05ad79
 
05ad79
 	while ((c =
05ad79
-		getopt_long(argc, argv, "hVt:m::u::i::n::p::r::w::F",
05ad79
+		getopt_long(argc, argv, "+hVt:m::u::i::n::p::U::S:G:r::w::F",
05ad79
 			    longopts, NULL)) != -1) {
05ad79
 		switch (c) {
05ad79
 		case 'h':
05ad79
@@ -221,6 +243,20 @@ int main(int argc, char *argv[])
05ad79
 			else
05ad79
 				namespaces |= CLONE_NEWPID;
05ad79
 			break;
05ad79
+		case 'U':
05ad79
+			if (optarg)
05ad79
+				open_namespace_fd(CLONE_NEWUSER, optarg);
05ad79
+			else
05ad79
+				namespaces |= CLONE_NEWUSER;
05ad79
+			break;
05ad79
+		case 'S':
05ad79
+			uid = strtoul_or_err(optarg, _("failed to parse uid"));
05ad79
+			force_uid = true;
05ad79
+			break;
05ad79
+		case 'G':
05ad79
+			gid = strtoul_or_err(optarg, _("failed to parse gid"));
05ad79
+			force_gid = true;
05ad79
+			break;
05ad79
 		case 'F':
05ad79
 			do_fork = 0;
05ad79
 			break;
05ad79
@@ -236,6 +272,9 @@ int main(int argc, char *argv[])
05ad79
 			else
05ad79
 				do_wd = true;
05ad79
 			break;
05ad79
+		case OPT_PRESERVE_CRED:
05ad79
+			preserve_cred = 1;
05ad79
+			break;
05ad79
 		default:
05ad79
 			usage(EXIT_FAILURE);
05ad79
 		}
05ad79
@@ -253,6 +292,26 @@ int main(int argc, char *argv[])
05ad79
 		open_target_fd(&wd_fd, "cwd", NULL);
05ad79
 
05ad79
 	/*
05ad79
+	 * Update namespaces variable to contain all requested namespaces
05ad79
+	 */
05ad79
+	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
05ad79
+		if (nsfile->fd < 0)
05ad79
+			continue;
05ad79
+		namespaces |= nsfile->nstype;
05ad79
+	}
05ad79
+
05ad79
+	/* for user namespaces we always set UID and GID (default is 0)
05ad79
+	 * and clear root's groups if --preserve-credentials is no specified */
05ad79
+	if ((namespaces & CLONE_NEWUSER) && !preserve_cred) {
05ad79
+		force_uid = true, force_gid = true;
05ad79
+
05ad79
+		/* We call setgroups() before and after we enter user namespace,
05ad79
+		 * let's complain only if both fail */
05ad79
+		if (setgroups(0, NULL) != 0)
05ad79
+			setgroups_nerrs++;
05ad79
+	}
05ad79
+
05ad79
+	/*
05ad79
 	 * Now that we know which namespaces we want to enter, enter them.
05ad79
 	 */
05ad79
 	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
05ad79
@@ -302,6 +361,15 @@ int main(int argc, char *argv[])
05ad79
 	if (do_fork == 1)
05ad79
 		continue_as_child();
05ad79
 
05ad79
+	if (force_uid || force_gid) {
05ad79
+		if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs)	/* drop supplementary groups */
05ad79
+			err(EXIT_FAILURE, _("setgroups failed"));
05ad79
+		if (force_gid && setgid(gid) < 0)		/* change GID */
05ad79
+			err(EXIT_FAILURE, _("setgid failed"));
05ad79
+		if (force_uid && setuid(uid) < 0)		/* change UID */
05ad79
+			err(EXIT_FAILURE, _("setuid failed"));
05ad79
+	}
05ad79
+
05ad79
 	if (optind < argc) {
05ad79
 		execvp(argv[optind], argv + optind);
05ad79
 		err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]);
05ad79
diff -up util-linux-2.23.2/sys-utils/unshare.1.kzak util-linux-2.23.2/sys-utils/unshare.1
05ad79
--- util-linux-2.23.2/sys-utils/unshare.1.kzak	2015-06-26 09:58:39.484633521 +0200
05ad79
+++ util-linux-2.23.2/sys-utils/unshare.1	2015-06-26 09:58:51.673541033 +0200
05ad79
@@ -1,28 +1,27 @@
05ad79
-.\" Process this file with
05ad79
-.\" groff -man -Tascii lscpu.1
05ad79
-.\"
05ad79
-.TH UNSHARE 1 "July 2013" "util-linux" "User Commands"
05ad79
+.TH UNSHARE 1 "July 2014" "util-linux" "User Commands"
05ad79
 .SH NAME
05ad79
 unshare \- run program with some namespaces unshared from parent
05ad79
 .SH SYNOPSIS
05ad79
 .B unshare
05ad79
-.RI [ options ]
05ad79
+[options]
05ad79
 .I program
05ad79
 .RI [ arguments ]
05ad79
 .SH DESCRIPTION
05ad79
 Unshares the indicated namespaces from the parent process and then executes
05ad79
-the specified program.  The namespaces to be unshared are indicated via
05ad79
+the specified \fIprogram\fR.  The namespaces to be unshared are indicated via
05ad79
 options.  Unshareable namespaces are:
05ad79
 .TP
05ad79
 .BR "mount namespace"
05ad79
 Mounting and unmounting filesystems will not affect the rest of the system
05ad79
 (\fBCLONE_NEWNS\fP flag), except for filesystems which are explicitly marked as
05ad79
-shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP for the
05ad79
-\fBshared\fP flags).
05ad79
-
05ad79
-It's recommended to use \fBmount --make-rprivate\fP or \fBmount --make-rslave\fP
05ad79
-after \fBunshare --mount\fP to make sure that mountpoints in the new namespace
05ad79
-are really unshared from parental namespace.
05ad79
+shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or
05ad79
+\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags).
05ad79
+.sp
05ad79
+.B unshare
05ad79
+automatically sets propagation to \fBprivate\fP
05ad79
+in the new mount namespace to make sure that the new namespace is really
05ad79
+unshared. This feature is possible to disable by option \fB\-\-propagation unchanged\fP.
05ad79
+Note that \fBprivate\fP is the kernel default.
05ad79
 .TP
05ad79
 .BR "UTS namespace"
05ad79
 Setting hostname or domainname will not affect the rest of the system.
05ad79
@@ -40,13 +39,14 @@ sockets, etc.  (\fBCLONE_NEWNET\fP flag)
05ad79
 .BR "pid namespace"
05ad79
 Children will have a distinct set of PID to process mappings from their parent.
05ad79
 (\fBCLONE_NEWPID\fP flag)
05ad79
+.TP
05ad79
+.BR "user namespace"
05ad79
+The process will have a distinct set of UIDs, GIDs and capabilities.
05ad79
+(\fBCLONE_NEWUSER\fP flag)
05ad79
 .PP
05ad79
 See \fBclone\fR(2) for the exact semantics of the flags.
05ad79
 .SH OPTIONS
05ad79
 .TP
05ad79
-.BR \-h , " \-\-help"
05ad79
-Display help text and exit.
05ad79
-.TP
05ad79
 .BR \-i , " \-\-ipc"
05ad79
 Unshare the IPC namespace.
05ad79
 .TP
05ad79
@@ -63,16 +63,68 @@ See also the \fB--fork\fP and \fB--mount
05ad79
 .BR \-u , " \-\-uts"
05ad79
 Unshare the UTS namespace.
05ad79
 .TP
05ad79
+.BR \-U , " \-\-user"
05ad79
+Unshare the user namespace.
05ad79
+.TP
05ad79
 .BR \-f , " \-\-fork"
05ad79
 Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than
05ad79
 running it directly.  This is useful when creating a new pid namespace.
05ad79
 .TP
05ad79
-.BR \-\-mount-proc "[=\fImountpoint\fP]"
05ad79
-Just before running the program, mount the proc filesystem at the \fImountpoint\fP
05ad79
+.BR \-\-mount\-proc "[=\fImountpoint\fP]"
05ad79
+Just before running the program, mount the proc filesystem at \fImountpoint\fP
05ad79
 (default is /proc).  This is useful when creating a new pid namespace.  It also
05ad79
 implies creating a new mount namespace since the /proc mount would otherwise
05ad79
-mess up existing programs on the system. The new proc filesystem is explicitly
05ad79
+mess up existing programs on the system.  The new proc filesystem is explicitly
05ad79
 mounted as private (by MS_PRIVATE|MS_REC).
05ad79
+.TP
05ad79
+.BR \-r , " \-\-map\-root\-user"
05ad79
+Run the program only after the current effective user and group IDs have been mapped to
05ad79
+the superuser UID and GID in the newly created user namespace.  This makes it possible to
05ad79
+conveniently gain capabilities needed to manage various aspects of the newly created
05ad79
+namespaces (such as configuring interfaces in the network namespace or mounting filesystems in
05ad79
+the mount namespace) even when run unprivileged.  As a mere convenience feature, it does not support
05ad79
+more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs.
05ad79
+This option implies --setgroups=deny.
05ad79
+.TP
05ad79
+.BR "\-\-propagation \fIprivate|shared|slave|unchanged\fP"
05ad79
+Recursively sets mount propagation flag in the new mount namespace. The default
05ad79
+is to set the propagation to \fIprivate\fP, this feature is possible to disable
05ad79
+by \fIunchanged\fP argument. The options is silently ignored when mount namespace (\fB\-\-mount\fP)
05ad79
+is not requested.
05ad79
+.TP
05ad79
+.BR "\-\-setgroups \fIallow|deny\fP"
05ad79
+Allow or deny
05ad79
+.BR setgroups (2)
05ad79
+syscall in user namespaces.
05ad79
+
05ad79
+.BR setgroups(2)
05ad79
+is only callable with CAP_SETGID and CAP_SETGID in a user
05ad79
+namespace (since Linux 3.19) does not give you permission to call setgroups(2)
05ad79
+until after GID map has been set. The GID map is writable by root when
05ad79
+.BR setgroups(2)
05ad79
+is enabled and GID map becomes writable by unprivileged processes when
05ad79
+.BR setgroups(2)
05ad79
+is permanently disabled.
05ad79
+.TP
05ad79
+.BR \-V , " \-\-version"
05ad79
+Display version information and exit.
05ad79
+.TP
05ad79
+.BR \-h , " \-\-help"
05ad79
+Display help text and exit.
05ad79
+.SH EXAMPLES
05ad79
+.TP
05ad79
+.B # unshare --fork --pid --mount-proc readlink /proc/self
05ad79
+.TQ
05ad79
+1
05ad79
+.br
05ad79
+Establish a PID namespace, ensure we're PID 1 in it against newly mounted
05ad79
+procfs instance.
05ad79
+.TP
05ad79
+.B $ unshare --map-root-user --user sh -c whoami
05ad79
+.TQ
05ad79
+root
05ad79
+.br
05ad79
+Establish a user namespace as an unprivileged user with a root user within it.
05ad79
 .SH SEE ALSO
05ad79
 .BR unshare (2),
05ad79
 .BR clone (2),
05ad79
diff -up util-linux-2.23.2/sys-utils/unshare.c.kzak util-linux-2.23.2/sys-utils/unshare.c
05ad79
--- util-linux-2.23.2/sys-utils/unshare.c.kzak	2015-06-26 09:58:39.484633521 +0200
05ad79
+++ util-linux-2.23.2/sys-utils/unshare.c	2015-06-26 09:58:51.673541033 +0200
05ad79
@@ -32,19 +32,117 @@
05ad79
 
05ad79
 #include "nls.h"
05ad79
 #include "c.h"
05ad79
+#include "closestream.h"
05ad79
 #include "namespace.h"
05ad79
 #include "exec_shell.h"
05ad79
 #include "xalloc.h"
05ad79
 #include "pathnames.h"
05ad79
+#include "all-io.h"
05ad79
 
05ad79
+/* 'private' is kernel default */
05ad79
+#define UNSHARE_PROPAGATION_DEFAULT	(MS_REC | MS_PRIVATE)
05ad79
+
05ad79
+enum {
05ad79
+	SETGROUPS_NONE = -1,
05ad79
+	SETGROUPS_DENY = 0,
05ad79
+	SETGROUPS_ALLOW = 1,
05ad79
+};
05ad79
+
05ad79
+static const char *setgroups_strings[] =
05ad79
+{
05ad79
+	[SETGROUPS_DENY] = "deny",
05ad79
+	[SETGROUPS_ALLOW] = "allow"
05ad79
+};
05ad79
+
05ad79
+static int setgroups_str2id(const char *str)
05ad79
+{
05ad79
+	size_t i;
05ad79
+
05ad79
+	for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
05ad79
+		if (strcmp(str, setgroups_strings[i]) == 0)
05ad79
+			return i;
05ad79
+
05ad79
+	errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
05ad79
+}
05ad79
+
05ad79
+static void setgroups_control(int action)
05ad79
+{
05ad79
+	const char *file = _PATH_PROC_SETGROUPS;
05ad79
+	const char *cmd;
05ad79
+	int fd;
05ad79
+
05ad79
+	if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
05ad79
+		return;
05ad79
+	cmd = setgroups_strings[action];
05ad79
+
05ad79
+	fd = open(file, O_WRONLY);
05ad79
+	if (fd < 0) {
05ad79
+		if (errno == ENOENT)
05ad79
+			return;
05ad79
+		 err(EXIT_FAILURE, _("cannot open %s"), file);
05ad79
+	}
05ad79
+
05ad79
+	if (write_all(fd, cmd, strlen(cmd)))
05ad79
+		err(EXIT_FAILURE, _("write failed %s"), file);
05ad79
+	close(fd);
05ad79
+}
05ad79
+
05ad79
+static void map_id(const char *file, uint32_t from, uint32_t to)
05ad79
+{
05ad79
+	char *buf;
05ad79
+	int fd;
05ad79
+
05ad79
+	fd = open(file, O_WRONLY);
05ad79
+	if (fd < 0)
05ad79
+		 err(EXIT_FAILURE, _("cannot open %s"), file);
05ad79
+
05ad79
+	xasprintf(&buf, "%u %u 1", from, to);
05ad79
+	if (write_all(fd, buf, strlen(buf)))
05ad79
+		err(EXIT_FAILURE, _("write failed %s"), file);
05ad79
+	free(buf);
05ad79
+	close(fd);
05ad79
+}
05ad79
+
05ad79
+static unsigned long parse_propagation(const char *str)
05ad79
+{
05ad79
+	size_t i;
05ad79
+	static const struct prop_opts {
05ad79
+		const char *name;
05ad79
+		unsigned long flag;
05ad79
+	} opts[] = {
05ad79
+		{ "slave",	MS_REC | MS_SLAVE },
05ad79
+		{ "private",	MS_REC | MS_PRIVATE },
05ad79
+		{ "shared",     MS_REC | MS_SHARED },
05ad79
+		{ "unchanged",        0 }
05ad79
+	};
05ad79
+
05ad79
+	for (i = 0; i < ARRAY_SIZE(opts); i++) {
05ad79
+		if (strcmp(opts[i].name, str) == 0)
05ad79
+			return opts[i].flag;
05ad79
+	}
05ad79
+
05ad79
+	errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
05ad79
+}
05ad79
+
05ad79
+static void set_propagation(unsigned long flags)
05ad79
+{
05ad79
+	if (flags == 0)
05ad79
+		return;
05ad79
+
05ad79
+	if (mount("none", "/", NULL, flags, NULL) != 0)
05ad79
+		err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
05ad79
+}
05ad79
 
05ad79
 static void usage(int status)
05ad79
 {
05ad79
 	FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
05ad79
 
05ad79
 	fputs(USAGE_HEADER, out);
05ad79
-	fprintf(out,
05ad79
-	      _(" %s [options] <program> [args...]\n"),	program_invocation_short_name);
05ad79
+	fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
05ad79
+		program_invocation_short_name);
05ad79
+
05ad79
+	fputs(USAGE_SEPARATOR, out);
05ad79
+	fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
05ad79
 
05ad79
 	fputs(USAGE_OPTIONS, out);
05ad79
 	fputs(_(" -m, --mount               unshare mounts namespace\n"), out);
05ad79
@@ -52,8 +150,13 @@ static void usage(int status)
05ad79
 	fputs(_(" -i, --ipc                 unshare System V IPC namespace\n"), out);
05ad79
 	fputs(_(" -n, --net                 unshare network namespace\n"), out);
05ad79
 	fputs(_(" -p, --pid                 unshare pid namespace\n"), out);
05ad79
+	fputs(_(" -U, --user                unshare user namespace\n"), out);
05ad79
 	fputs(_(" -f, --fork                fork before launching <program>\n"), out);
05ad79
 	fputs(_("     --mount-proc[=<dir>]  mount proc filesystem first (implies --mount)\n"), out);
05ad79
+	fputs(_(" -r, --map-root-user       map current user to root (implies --user)\n"), out);
05ad79
+	fputs(_("     --propagation <slave|shared|private|unchanged>\n"
05ad79
+	        "                           modify mount propagation in mount namespace\n"), out);
05ad79
+	fputs(_(" -s, --setgroups allow|deny  control the setgroups syscall in user namespaces\n"), out);
05ad79
 
05ad79
 	fputs(USAGE_SEPARATOR, out);
05ad79
 	fputs(USAGE_HELP, out);
05ad79
@@ -66,7 +169,9 @@ static void usage(int status)
05ad79
 int main(int argc, char *argv[])
05ad79
 {
05ad79
 	enum {
05ad79
-		OPT_MOUNTPROC = CHAR_MAX + 1
05ad79
+		OPT_MOUNTPROC = CHAR_MAX + 1,
05ad79
+		OPT_PROPAGATION,
05ad79
+		OPT_SETGROUPS
05ad79
 	};
05ad79
 	static const struct option longopts[] = {
05ad79
 		{ "help", no_argument, 0, 'h' },
05ad79
@@ -76,20 +181,29 @@ int main(int argc, char *argv[])
05ad79
 		{ "ipc", no_argument, 0, 'i' },
05ad79
 		{ "net", no_argument, 0, 'n' },
05ad79
 		{ "pid", no_argument, 0, 'p' },
05ad79
+		{ "user", no_argument, 0, 'U' },
05ad79
 		{ "fork", no_argument, 0, 'f' },
05ad79
 		{ "mount-proc", optional_argument, 0, OPT_MOUNTPROC },
05ad79
+		{ "map-root-user", no_argument, 0, 'r' },
05ad79
+		{ "propagation", required_argument, 0, OPT_PROPAGATION },
05ad79
+		{ "setgroups", required_argument, 0, OPT_SETGROUPS },
05ad79
 		{ NULL, 0, 0, 0 }
05ad79
 	};
05ad79
 
05ad79
+	int setgrpcmd = SETGROUPS_NONE;
05ad79
 	int unshare_flags = 0;
05ad79
-	int c, forkit = 0;
05ad79
+	int c, forkit = 0, maproot = 0;
05ad79
 	const char *procmnt = NULL;
05ad79
+	unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
05ad79
+	uid_t real_euid = geteuid();
05ad79
+	gid_t real_egid = getegid();;
05ad79
 
05ad79
 	setlocale(LC_ALL, "");
05ad79
 	bindtextdomain(PACKAGE, LOCALEDIR);
05ad79
 	textdomain(PACKAGE);
05ad79
+	atexit(close_stdout);
05ad79
 
05ad79
-	while ((c = getopt_long(argc, argv, "+fhVmuinp", longopts, NULL)) != -1) {
05ad79
+	while ((c = getopt_long(argc, argv, "+fhVmuinpUr", longopts, NULL)) != -1) {
05ad79
 		switch (c) {
05ad79
 		case 'f':
05ad79
 			forkit = 1;
05ad79
@@ -114,10 +228,23 @@ int main(int argc, char *argv[])
05ad79
 		case 'p':
05ad79
 			unshare_flags |= CLONE_NEWPID;
05ad79
 			break;
05ad79
+		case 'U':
05ad79
+			unshare_flags |= CLONE_NEWUSER;
05ad79
+			break;
05ad79
 		case OPT_MOUNTPROC:
05ad79
 			unshare_flags |= CLONE_NEWNS;
05ad79
 			procmnt = optarg ? optarg : "/proc";
05ad79
 			break;
05ad79
+		case 'r':
05ad79
+			unshare_flags |= CLONE_NEWUSER;
05ad79
+			maproot = 1;
05ad79
+			break;
05ad79
+		case OPT_SETGROUPS:
05ad79
+			setgrpcmd = setgroups_str2id(optarg);
05ad79
+			break;
05ad79
+		case OPT_PROPAGATION:
05ad79
+			propagation = parse_propagation(optarg);
05ad79
+			break;
05ad79
 		default:
05ad79
 			usage(EXIT_FAILURE);
05ad79
 		}
05ad79
@@ -146,6 +273,25 @@ int main(int argc, char *argv[])
05ad79
 		}
05ad79
 	}
05ad79
 
05ad79
+	if (maproot) {
05ad79
+		if (setgrpcmd == SETGROUPS_ALLOW)
05ad79
+			errx(EXIT_FAILURE, _("options --setgroups=allow and "
05ad79
+					"--map-root-user are mutually exclusive"));
05ad79
+
05ad79
+		/* since Linux 3.19 unprivileged writing of /proc/self/gid_map
05ad79
+		 * has s been disabled unless /proc/self/setgroups is written
05ad79
+		 * first to permanently disable the ability to call setgroups
05ad79
+		 * in that user namespace. */
05ad79
+		setgroups_control(SETGROUPS_DENY);
05ad79
+		map_id(_PATH_PROC_UIDMAP, 0, real_euid);
05ad79
+		map_id(_PATH_PROC_GIDMAP, 0, real_egid);
05ad79
+
05ad79
+	} else if (setgrpcmd != SETGROUPS_NONE)
05ad79
+		setgroups_control(setgrpcmd);
05ad79
+
05ad79
+	if ((unshare_flags & CLONE_NEWNS) && propagation)
05ad79
+		set_propagation(propagation);
05ad79
+
05ad79
 	if (procmnt &&
05ad79
 	    (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
05ad79
 	     mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))