linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] capabilities: add capability cgroup controller
@ 2016-06-18 19:31 Topi Miettinen
  2016-06-19 20:01 ` serge
  0 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-18 19:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, Topi Miettinen, Tejun Heo, Li Zefan, Johannes Weiner,
	Serge Hallyn, James Morris, Serge E. Hallyn, Andrew Morton,
	David Howells, David Woodhouse, Ingo Molnar, Petr Mladek,
	Ard Biesheuvel, Rusty Russell, open list:CONTROL GROUP (CGROUP),
	open list:CAPABILITIES

Add a new cgroup controller for enforcement of and monitoring of
capabilities in the cgroup.

Test case (boot to rdshell);
BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) cd /sys/fs
(initramfs) mount -t cgroup2 cgroup cgroup
(initramfs) cd cgroup
(initramfs) echo +capability > cgroup.subtree_control
(initramfs) mkdir test; cd test
(initramfs) ls
capability.bounding_set  cgroup.controllers       cgroup.procs
capability.used          cgroup.events            cgroup.subtree_control
(initramfs) sh

BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) echo $$ >cgroup.procs
(initramfs) cat capability.used
0000000000000000
(initramfs) mknod /dev/z1 c 1 2
(initramfs) cat capability.used
0000000008000000
(initramfs) exit
(initramfs) echo 0000000000000000 > capability.bounding_set
(initramfs) sh

BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) echo $$ >cgroup.procs
(initramfs) mknod /dev/z2 c 1 2
mknod: /dev/z2: Operation not permitted
(initramfs) exit

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 include/linux/capability_cgroup.h |   7 ++
 include/linux/cgroup_subsys.h     |   4 +
 init/Kconfig                      |   6 ++
 kernel/capability.c               |   2 +
 security/Makefile                 |   1 +
 security/capability_cgroup.c      | 216 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 236 insertions(+)
 create mode 100644 include/linux/capability_cgroup.h
 create mode 100644 security/capability_cgroup.c

diff --git a/include/linux/capability_cgroup.h b/include/linux/capability_cgroup.h
new file mode 100644
index 0000000..c03b58d
--- /dev/null
+++ b/include/linux/capability_cgroup.h
@@ -0,0 +1,7 @@
+#ifdef CONFIG_CGROUP_CAPABILITY
+void capability_cgroup_update_used(int cap);
+#else
+static inline void capability_cgroup_update_used(int cap)
+{
+}
+#endif
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336a..a5161d0 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -56,6 +56,10 @@ SUBSYS(hugetlb)
 SUBSYS(pids)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_CAPABILITY)
+SUBSYS(capability)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index f755a60..098ce66 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1141,6 +1141,12 @@ config CGROUP_PERF
 
 	  Say N if unsure.
 
+config CGROUP_CAPABILITY
+	bool "Capability controller"
+	help
+	  Provides a simple controller for enforcement of and monitoring of
+	  capabilities in the cgroup.
+
 config CGROUP_DEBUG
 	bool "Example controller"
 	default n
diff --git a/kernel/capability.c b/kernel/capability.c
index 45432b5..b57d7f9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/capability_cgroup.h>
 #include <asm/uaccess.h>
 
 /*
@@ -380,6 +381,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
 	}
 
 	if (security_capable(current_cred(), ns, cap) == 0) {
+		capability_cgroup_update_used(cap);
 		current->flags |= PF_SUPERPRIV;
 		return true;
 	}
diff --git a/security/Makefile b/security/Makefile
index f2d71cd..2bb04f1 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
 obj-$(CONFIG_SECURITY_YAMA)		+= yama/
 obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
+obj-$(CONFIG_CGROUP_CAPABILITY)		+= capability_cgroup.o
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity
diff --git a/security/capability_cgroup.c b/security/capability_cgroup.c
new file mode 100644
index 0000000..6e03fce
--- /dev/null
+++ b/security/capability_cgroup.c
@@ -0,0 +1,216 @@
+/*
+ * Capability cgroup
+ *
+ * Copyright 2016 Topi Miettinen
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/capability.h>
+#include <linux/capability_cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/cred.h>
+#include <linux/security.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(capcg_mutex);
+
+struct capcg_cgroup {
+	struct cgroup_subsys_state css;
+	kernel_cap_t cap_bset; /* Capability bounding set */
+	kernel_cap_t cap_used; /* Capabilities actually used */
+};
+
+static inline struct capcg_cgroup *css_to_capcg(struct cgroup_subsys_state *s)
+{
+	return s ? container_of(s, struct capcg_cgroup, css) : NULL;
+}
+
+static inline struct capcg_cgroup *task_to_capcg(struct task_struct *task)
+{
+	return css_to_capcg(task_css(task, capability_cgrp_id));
+}
+
+static struct cgroup_subsys_state *capcg_css_alloc(struct cgroup_subsys_state
+						   *parent)
+{
+	struct capcg_cgroup *caps;
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return ERR_PTR(-ENOMEM);
+
+	caps->cap_bset = CAP_FULL_SET;
+	cap_clear(caps->cap_used);
+	return &caps->css;
+}
+
+static void capcg_css_free(struct cgroup_subsys_state *css)
+{
+	kfree(css_to_capcg(css));
+}
+
+/**
+ * capcg_apply_bset - apply cgroup bounding set to all task's capabilities
+ */
+static int capcg_task_apply_bset(struct task_struct *task, kernel_cap_t bset)
+{
+	struct cred *new;
+	const struct cred *old;
+	kernel_cap_t bounding, effective, inheritable, permitted;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = security_capget(task, 
+			      &effective, &inheritable, &permitted);
+	if (ret < 0)
+		goto abort_cred;
+
+	old = get_task_cred(task);
+	bounding = cap_intersect(bset, old->cap_bset);
+	effective = cap_intersect(bset, effective);
+	inheritable = cap_intersect(bset, inheritable);
+	permitted = cap_intersect(bset, permitted);
+
+	/* security_capset() also updates ambient capabilities */
+	ret = security_capset(new, old,
+			      &effective, &inheritable, &permitted);
+	new->cap_bset = bounding;
+		
+	put_cred(old);
+	if (ret < 0)
+		goto abort_cred;
+
+	ret = commit_creds(new);
+	return ret;
+
+ abort_cred:
+	abort_creds(new);
+	return ret;
+}
+
+static void capcg_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+
+	rcu_read_lock();
+	cgroup_taskset_for_each(task, css, tset) {
+		struct capcg_cgroup *caps = css_to_capcg(css);
+		
+		capcg_task_apply_bset(task, caps->cap_bset);
+	}
+	rcu_read_unlock();
+}
+
+/** capcg_write_bset - update css tree and their tasks with new
+ *  bounding capability
+ */
+static ssize_t capcg_write_bset(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	struct cgroup_subsys_state *css = of_css(of), *pos;
+	struct capcg_cgroup *caps = css_to_capcg(css);
+	u32 capi;
+	int err;
+	kernel_cap_t new_bset;
+
+	buf = strstrip(buf);
+
+	CAP_FOR_EACH_U32(capi) {
+		char buf2[9]; /* for each 32 bit block */
+		u32 capv;
+
+		memcpy(buf2, &buf[capi * 8], 8);
+		buf2[8] = '\0';
+		err = kstrtou32(buf2, 16, &capv);
+		if (err)
+			return err;
+		new_bset.cap[CAP_LAST_U32 - capi] = capv;
+	}
+
+	mutex_lock(&capcg_mutex);
+	caps->cap_bset = cap_intersect(caps->cap_bset, new_bset);
+	mutex_unlock(&capcg_mutex);
+
+	rcu_read_lock();
+	css_for_each_child(pos, css) {
+		struct css_task_iter it;
+		struct task_struct *task;
+
+		css_task_iter_start(pos, &it);
+		while ((task = css_task_iter_next(&it)))
+			capcg_task_apply_bset(task, new_bset);
+	}
+	rcu_read_unlock();
+
+	return nbytes;
+}
+
+static int capcg_seq_show_cap(struct seq_file *m, kernel_cap_t *cap)
+{
+	u32 capi;
+
+	rcu_read_lock();
+
+	CAP_FOR_EACH_U32(capi) {
+		seq_printf(m, "%08x",
+			   cap->cap[CAP_LAST_U32 - capi]);
+	}
+	seq_putc(m, '\n');
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int capcg_seq_show_bset(struct seq_file *m, void *v)
+{
+	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
+
+	return capcg_seq_show_cap(m, &capcg->cap_bset);
+}
+
+static int capcg_seq_show_used(struct seq_file *m, void *v)
+{
+	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
+
+	return capcg_seq_show_cap(m, &capcg->cap_used);
+}
+
+static struct cftype capcg_files[] = {
+	{
+		.name = "bounding_set",
+		.seq_show = capcg_seq_show_bset,
+		.write = capcg_write_bset,
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
+	{
+		.name = "used",
+		.seq_show = capcg_seq_show_used,
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys capability_cgrp_subsys = {
+	.css_alloc = capcg_css_alloc,
+	.css_free = capcg_css_free,
+	.attach = capcg_attach,
+	.dfl_cftypes = capcg_files,
+};
+
+void capability_cgroup_update_used(int cap)
+{
+	struct capcg_cgroup *caps = task_to_capcg(current);
+
+	mutex_lock(&capcg_mutex);
+	cap_raise(caps->cap_used, cap);
+	mutex_unlock(&capcg_mutex);
+}
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-18 19:31 [RFC] capabilities: add capability cgroup controller Topi Miettinen
@ 2016-06-19 20:01 ` serge
  2016-06-20 18:46   ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: serge @ 2016-06-19 20:01 UTC (permalink / raw)
  To: Topi Miettinen, linux-kernel

apologies for top posting, this phone doesn't support inline)

Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?

Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
On 6/18/16 14:31 Topi Miettinen wrote:
Add a new cgroup controller for enforcement of and monitoring of
capabilities in the cgroup.

Test case (boot to rdshell);
BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) cd /sys/fs
(initramfs) mount -t cgroup2 cgroup cgroup
(initramfs) cd cgroup
(initramfs) echo +capability > cgroup.subtree_control
(initramfs) mkdir test; cd test
(initramfs) ls
capability.bounding_set  cgroup.controllers       cgroup.procs
capability.used          cgroup.events            cgroup.subtree_control
(initramfs) sh

BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) echo $$ >cgroup.procs
(initramfs) cat capability.used
0000000000000000
(initramfs) mknod /dev/z1 c 1 2
(initramfs) cat capability.used
0000000008000000
(initramfs) exit
(initramfs) echo 0000000000000000 > capability.bounding_set
(initramfs) sh

BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
Enter 'help' for a list of built-in commands.

(initramfs) echo $$ >cgroup.procs
(initramfs) mknod /dev/z2 c 1 2
mknod: /dev/z2: Operation not permitted
(initramfs) exit

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 include/linux/capability_cgroup.h |   7 ++
 include/linux/cgroup_subsys.h     |   4 +
 init/Kconfig                      |   6 ++
 kernel/capability.c               |   2 +
 security/Makefile                 |   1 +
 security/capability_cgroup.c      | 216 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 236 insertions(+)
 create mode 100644 include/linux/capability_cgroup.h
 create mode 100644 security/capability_cgroup.c

diff --git a/include/linux/capability_cgroup.h b/include/linux/capability_cgroup.h
new file mode 100644
index 0000000..c03b58d
--- /dev/null
+++ b/include/linux/capability_cgroup.h
@@ -0,0 +1,7 @@
+#ifdef CONFIG_CGROUP_CAPABILITY
+void capability_cgroup_update_used(int cap);
+#else
+static inline void capability_cgroup_update_used(int cap)
+{
+}
+#endif
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336a..a5161d0 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -56,6 +56,10 @@ SUBSYS(hugetlb)
 SUBSYS(pids)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_CAPABILITY)
+SUBSYS(capability)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index f755a60..098ce66 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1141,6 +1141,12 @@ config CGROUP_PERF
 
 	  Say N if unsure.
 
+config CGROUP_CAPABILITY
+	bool "Capability controller"
+	help
+	  Provides a simple controller for enforcement of and monitoring of
+	  capabilities in the cgroup.
+
 config CGROUP_DEBUG
 	bool "Example controller"
 	default n
diff --git a/kernel/capability.c b/kernel/capability.c
index 45432b5..b57d7f9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/capability_cgroup.h>
 #include <asm/uaccess.h>
 
 /*
@@ -380,6 +381,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
 	}
 
 	if (security_capable(current_cred(), ns, cap) == 0) {
+		capability_cgroup_update_used(cap);
 		current->flags |= PF_SUPERPRIV;
 		return true;
 	}
diff --git a/security/Makefile b/security/Makefile
index f2d71cd..2bb04f1 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
 obj-$(CONFIG_SECURITY_YAMA)		+= yama/
 obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
+obj-$(CONFIG_CGROUP_CAPABILITY)		+= capability_cgroup.o
 
 # Object integrity file lists
 subdir-$(CONFIG_INTEGRITY)		+= integrity
diff --git a/security/capability_cgroup.c b/security/capability_cgroup.c
new file mode 100644
index 0000000..6e03fce
--- /dev/null
+++ b/security/capability_cgroup.c
@@ -0,0 +1,216 @@
+/*
+ * Capability cgroup
+ *
+ * Copyright 2016 Topi Miettinen
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/capability.h>
+#include <linux/capability_cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/cred.h>
+#include <linux/security.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(capcg_mutex);
+
+struct capcg_cgroup {
+	struct cgroup_subsys_state css;
+	kernel_cap_t cap_bset; /* Capability bounding set */
+	kernel_cap_t cap_used; /* Capabilities actually used */
+};
+
+static inline struct capcg_cgroup *css_to_capcg(struct cgroup_subsys_state *s)
+{
+	return s ? container_of(s, struct capcg_cgroup, css) : NULL;
+}
+
+static inline struct capcg_cgroup *task_to_capcg(struct task_struct *task)
+{
+	return css_to_capcg(task_css(task, capability_cgrp_id));
+}
+
+static struct cgroup_subsys_state *capcg_css_alloc(struct cgroup_subsys_state
+						   *parent)
+{
+	struct capcg_cgroup *caps;
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return ERR_PTR(-ENOMEM);
+
+	caps->cap_bset = CAP_FULL_SET;
+	cap_clear(caps->cap_used);
+	return &caps->css;
+}
+
+static void capcg_css_free(struct cgroup_subsys_state *css)
+{
+	kfree(css_to_capcg(css));
+}
+
+/**
+ * capcg_apply_bset - apply cgroup bounding set to all task's capabilities
+ */
+static int capcg_task_apply_bset(struct task_struct *task, kernel_cap_t bset)
+{
+	struct cred *new;
+	const struct cred *old;
+	kernel_cap_t bounding, effective, inheritable, permitted;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = security_capget(task, 
+			      &effective, &inheritable, &permitted);
+	if (ret < 0)
+		goto abort_cred;
+
+	old = get_task_cred(task);
+	bounding = cap_intersect(bset, old->cap_bset);
+	effective = cap_intersect(bset, effective);
+	inheritable = cap_intersect(bset, inheritable);
+	permitted = cap_intersect(bset, permitted);
+
+	/* security_capset() also updates ambient capabilities */
+	ret = security_capset(new, old,
+			      &effective, &inheritable, &permitted);
+	new->cap_bset = bounding;
+		
+	put_cred(old);
+	if (ret < 0)
+		goto abort_cred;
+
+	ret = commit_creds(new);
+	return ret;
+
+ abort_cred:
+	abort_creds(new);
+	return ret;
+}
+
+static void capcg_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+
+	rcu_read_lock();
+	cgroup_taskset_for_each(task, css, tset) {
+		struct capcg_cgroup *caps = css_to_capcg(css);
+		
+		capcg_task_apply_bset(task, caps->cap_bset);
+	}
+	rcu_read_unlock();
+}
+
+/** capcg_write_bset - update css tree and their tasks with new
+ *  bounding capability
+ */
+static ssize_t capcg_write_bset(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	struct cgroup_subsys_state *css = of_css(of), *pos;
+	struct capcg_cgroup *caps = css_to_capcg(css);
+	u32 capi;
+	int err;
+	kernel_cap_t new_bset;
+
+	buf = strstrip(buf);
+
+	CAP_FOR_EACH_U32(capi) {
+		char buf2[9]; /* for each 32 bit block */
+		u32 capv;
+
+		memcpy(buf2, &buf[capi * 8], 8);
+		buf2[8] = '\0';
+		err = kstrtou32(buf2, 16, &capv);
+		if (err)
+			return err;
+		new_bset.cap[CAP_LAST_U32 - capi] = capv;
+	}
+
+	mutex_lock(&capcg_mutex);
+	caps->cap_bset = cap_intersect(caps->cap_bset, new_bset);
+	mutex_unlock(&capcg_mutex);
+
+	rcu_read_lock();
+	css_for_each_child(pos, css) {
+		struct css_task_iter it;
+		struct task_struct *task;
+
+		css_task_iter_start(pos, &it);
+		while ((task = css_task_iter_next(&it)))
+			capcg_task_apply_bset(task, new_bset);
+	}
+	rcu_read_unlock();
+
+	return nbytes;
+}
+
+static int capcg_seq_show_cap(struct seq_file *m, kernel_cap_t *cap)
+{
+	u32 capi;
+
+	rcu_read_lock();
+
+	CAP_FOR_EACH_U32(capi) {
+		seq_printf(m, "%08x",
+			   cap->cap[CAP_LAST_U32 - capi]);
+	}
+	seq_putc(m, '\n');
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int capcg_seq_show_bset(struct seq_file *m, void *v)
+{
+	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
+
+	return capcg_seq_show_cap(m, &capcg->cap_bset);
+}
+
+static int capcg_seq_show_used(struct seq_file *m, void *v)
+{
+	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
+
+	return capcg_seq_show_cap(m, &capcg->cap_used);
+}
+
+static struct cftype capcg_files[] = {
+	{
+		.name = "bounding_set",
+		.seq_show = capcg_seq_show_bset,
+		.write = capcg_write_bset,
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
+	{
+		.name = "used",
+		.seq_show = capcg_seq_show_used,
+		.flags = CFTYPE_NOT_ON_ROOT,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys capability_cgrp_subsys = {
+	.css_alloc = capcg_css_alloc,
+	.css_free = capcg_css_free,
+	.attach = capcg_attach,
+	.dfl_cftypes = capcg_files,
+};
+
+void capability_cgroup_update_used(int cap)
+{
+	struct capcg_cgroup *caps = task_to_capcg(current);
+
+	mutex_lock(&capcg_mutex);
+	cap_raise(caps->cap_used, cap);
+	mutex_unlock(&capcg_mutex);
+}
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-19 20:01 ` serge
@ 2016-06-20 18:46   ` Topi Miettinen
  2016-06-21 15:45     ` Serge E. Hallyn
  0 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-20 18:46 UTC (permalink / raw)
  To: serge, linux-kernel

On 06/19/16 20:01, serge@hallyn.com wrote:
> apologies for top posting, this phone doesn't support inline)
> 
> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?

I didn't think that aspect. Some of that could be dealt with by
preventing tasks which don't have CAP_SETPCAP to make other tasks join
or set the bounding set. One problem is that the privileges would not be
checked at cgroup.procs open(2) time but only when writing. In general,
less privileged tasks should not be able to gain new capabilities even
if they were somehow able to join the cgroup and also your case must be
addressed in full.

> 
> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?

There are two.

1. Capability use tracking at cgroup level. There is no way to know
which capabilities have been used and which could be trimmed. With
cgroup approach, we can also keep track of how subprocesses use
capabilities. Thus the administrator can quickly get a reasonable
estimate of a bounding set just by reading the capability.used file.

2. cgroup approach to capability management. Currently the capabilities
are inherited with bounding set and ambient capabilities taking their
part. With cgroups, additional limits can be set which apply to the
whole group. I admit that the difference to the current model is small.

Could you list the several reasons you mentioned?

-Topi

> On 6/18/16 14:31 Topi Miettinen wrote:
> Add a new cgroup controller for enforcement of and monitoring of
> capabilities in the cgroup.
> 
> Test case (boot to rdshell);
> BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
> Enter 'help' for a list of built-in commands.
> 
> (initramfs) cd /sys/fs
> (initramfs) mount -t cgroup2 cgroup cgroup
> (initramfs) cd cgroup
> (initramfs) echo +capability > cgroup.subtree_control
> (initramfs) mkdir test; cd test
> (initramfs) ls
> capability.bounding_set  cgroup.controllers       cgroup.procs
> capability.used          cgroup.events            cgroup.subtree_control
> (initramfs) sh
> 
> BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
> Enter 'help' for a list of built-in commands.
> 
> (initramfs) echo $$ >cgroup.procs
> (initramfs) cat capability.used
> 0000000000000000
> (initramfs) mknod /dev/z1 c 1 2
> (initramfs) cat capability.used
> 0000000008000000
> (initramfs) exit
> (initramfs) echo 0000000000000000 > capability.bounding_set
> (initramfs) sh
> 
> BusyBox v1.22.1 (Debian 1:1.22.0-19) built-in shell (ash)
> Enter 'help' for a list of built-in commands.
> 
> (initramfs) echo $$ >cgroup.procs
> (initramfs) mknod /dev/z2 c 1 2
> mknod: /dev/z2: Operation not permitted
> (initramfs) exit
> 
> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
> ---
>  include/linux/capability_cgroup.h |   7 ++
>  include/linux/cgroup_subsys.h     |   4 +
>  init/Kconfig                      |   6 ++
>  kernel/capability.c               |   2 +
>  security/Makefile                 |   1 +
>  security/capability_cgroup.c      | 216 ++++++++++++++++++++++++++++++++++++++
>  6 files changed, 236 insertions(+)
>  create mode 100644 include/linux/capability_cgroup.h
>  create mode 100644 security/capability_cgroup.c
> 
> diff --git a/include/linux/capability_cgroup.h b/include/linux/capability_cgroup.h
> new file mode 100644
> index 0000000..c03b58d
> --- /dev/null
> +++ b/include/linux/capability_cgroup.h
> @@ -0,0 +1,7 @@
> +#ifdef CONFIG_CGROUP_CAPABILITY
> +void capability_cgroup_update_used(int cap);
> +#else
> +static inline void capability_cgroup_update_used(int cap)
> +{
> +}
> +#endif
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 0df0336a..a5161d0 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -56,6 +56,10 @@ SUBSYS(hugetlb)
>  SUBSYS(pids)
>  #endif
>  
> +#if IS_ENABLED(CONFIG_CGROUP_CAPABILITY)
> +SUBSYS(capability)
> +#endif
> +
>  /*
>   * The following subsystems are not supported on the default hierarchy.
>   */
> diff --git a/init/Kconfig b/init/Kconfig
> index f755a60..098ce66 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1141,6 +1141,12 @@ config CGROUP_PERF
>  
>  	  Say N if unsure.
>  
> +config CGROUP_CAPABILITY
> +	bool "Capability controller"
> +	help
> +	  Provides a simple controller for enforcement of and monitoring of
> +	  capabilities in the cgroup.
> +
>  config CGROUP_DEBUG
>  	bool "Example controller"
>  	default n
> diff --git a/kernel/capability.c b/kernel/capability.c
> index 45432b5..b57d7f9 100644
> --- a/kernel/capability.c
> +++ b/kernel/capability.c
> @@ -17,6 +17,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/user_namespace.h>
> +#include <linux/capability_cgroup.h>
>  #include <asm/uaccess.h>
>  
>  /*
> @@ -380,6 +381,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
>  	}
>  
>  	if (security_capable(current_cred(), ns, cap) == 0) {
> +		capability_cgroup_update_used(cap);
>  		current->flags |= PF_SUPERPRIV;
>  		return true;
>  	}
> diff --git a/security/Makefile b/security/Makefile
> index f2d71cd..2bb04f1 100644
> --- a/security/Makefile
> +++ b/security/Makefile
> @@ -25,6 +25,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
>  obj-$(CONFIG_SECURITY_YAMA)		+= yama/
>  obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
>  obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
> +obj-$(CONFIG_CGROUP_CAPABILITY)		+= capability_cgroup.o
>  
>  # Object integrity file lists
>  subdir-$(CONFIG_INTEGRITY)		+= integrity
> diff --git a/security/capability_cgroup.c b/security/capability_cgroup.c
> new file mode 100644
> index 0000000..6e03fce
> --- /dev/null
> +++ b/security/capability_cgroup.c
> @@ -0,0 +1,216 @@
> +/*
> + * Capability cgroup
> + *
> + * Copyright 2016 Topi Miettinen
> + *
> + * This file is subject to the terms and conditions of the GNU General
> + * Public License.  See the file COPYING in the main directory of the
> + * Linux distribution for more details.
> + */
> +
> +#include <linux/capability.h>
> +#include <linux/capability_cgroup.h>
> +#include <linux/cgroup.h>
> +#include <linux/cred.h>
> +#include <linux/security.h>
> +#include <linux/seq_file.h>
> +#include <linux/slab.h>
> +
> +static DEFINE_MUTEX(capcg_mutex);
> +
> +struct capcg_cgroup {
> +	struct cgroup_subsys_state css;
> +	kernel_cap_t cap_bset; /* Capability bounding set */
> +	kernel_cap_t cap_used; /* Capabilities actually used */
> +};
> +
> +static inline struct capcg_cgroup *css_to_capcg(struct cgroup_subsys_state *s)
> +{
> +	return s ? container_of(s, struct capcg_cgroup, css) : NULL;
> +}
> +
> +static inline struct capcg_cgroup *task_to_capcg(struct task_struct *task)
> +{
> +	return css_to_capcg(task_css(task, capability_cgrp_id));
> +}
> +
> +static struct cgroup_subsys_state *capcg_css_alloc(struct cgroup_subsys_state
> +						   *parent)
> +{
> +	struct capcg_cgroup *caps;
> +
> +	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
> +	if (!caps)
> +		return ERR_PTR(-ENOMEM);
> +
> +	caps->cap_bset = CAP_FULL_SET;
> +	cap_clear(caps->cap_used);
> +	return &caps->css;
> +}
> +
> +static void capcg_css_free(struct cgroup_subsys_state *css)
> +{
> +	kfree(css_to_capcg(css));
> +}
> +
> +/**
> + * capcg_apply_bset - apply cgroup bounding set to all task's capabilities
> + */
> +static int capcg_task_apply_bset(struct task_struct *task, kernel_cap_t bset)
> +{
> +	struct cred *new;
> +	const struct cred *old;
> +	kernel_cap_t bounding, effective, inheritable, permitted;
> +	int ret;
> +
> +	new = prepare_creds();
> +	if (!new)
> +		return -ENOMEM;
> +
> +	ret = security_capget(task, 
> +			      &effective, &inheritable, &permitted);
> +	if (ret < 0)
> +		goto abort_cred;
> +
> +	old = get_task_cred(task);
> +	bounding = cap_intersect(bset, old->cap_bset);
> +	effective = cap_intersect(bset, effective);
> +	inheritable = cap_intersect(bset, inheritable);
> +	permitted = cap_intersect(bset, permitted);
> +
> +	/* security_capset() also updates ambient capabilities */
> +	ret = security_capset(new, old,
> +			      &effective, &inheritable, &permitted);
> +	new->cap_bset = bounding;
> +		
> +	put_cred(old);
> +	if (ret < 0)
> +		goto abort_cred;
> +
> +	ret = commit_creds(new);
> +	return ret;
> +
> + abort_cred:
> +	abort_creds(new);
> +	return ret;
> +}
> +
> +static void capcg_attach(struct cgroup_taskset *tset)
> +{
> +	struct task_struct *task;
> +	struct cgroup_subsys_state *css;
> +
> +	rcu_read_lock();
> +	cgroup_taskset_for_each(task, css, tset) {
> +		struct capcg_cgroup *caps = css_to_capcg(css);
> +		
> +		capcg_task_apply_bset(task, caps->cap_bset);
> +	}
> +	rcu_read_unlock();
> +}
> +
> +/** capcg_write_bset - update css tree and their tasks with new
> + *  bounding capability
> + */
> +static ssize_t capcg_write_bset(struct kernfs_open_file *of, char *buf,
> +				size_t nbytes, loff_t off)
> +{
> +	struct cgroup_subsys_state *css = of_css(of), *pos;
> +	struct capcg_cgroup *caps = css_to_capcg(css);
> +	u32 capi;
> +	int err;
> +	kernel_cap_t new_bset;
> +
> +	buf = strstrip(buf);
> +
> +	CAP_FOR_EACH_U32(capi) {
> +		char buf2[9]; /* for each 32 bit block */
> +		u32 capv;
> +
> +		memcpy(buf2, &buf[capi * 8], 8);
> +		buf2[8] = '\0';
> +		err = kstrtou32(buf2, 16, &capv);
> +		if (err)
> +			return err;
> +		new_bset.cap[CAP_LAST_U32 - capi] = capv;
> +	}
> +
> +	mutex_lock(&capcg_mutex);
> +	caps->cap_bset = cap_intersect(caps->cap_bset, new_bset);
> +	mutex_unlock(&capcg_mutex);
> +
> +	rcu_read_lock();
> +	css_for_each_child(pos, css) {
> +		struct css_task_iter it;
> +		struct task_struct *task;
> +
> +		css_task_iter_start(pos, &it);
> +		while ((task = css_task_iter_next(&it)))
> +			capcg_task_apply_bset(task, new_bset);
> +	}
> +	rcu_read_unlock();
> +
> +	return nbytes;
> +}
> +
> +static int capcg_seq_show_cap(struct seq_file *m, kernel_cap_t *cap)
> +{
> +	u32 capi;
> +
> +	rcu_read_lock();
> +
> +	CAP_FOR_EACH_U32(capi) {
> +		seq_printf(m, "%08x",
> +			   cap->cap[CAP_LAST_U32 - capi]);
> +	}
> +	seq_putc(m, '\n');
> +
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +static int capcg_seq_show_bset(struct seq_file *m, void *v)
> +{
> +	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
> +
> +	return capcg_seq_show_cap(m, &capcg->cap_bset);
> +}
> +
> +static int capcg_seq_show_used(struct seq_file *m, void *v)
> +{
> +	struct capcg_cgroup *capcg = css_to_capcg(seq_css(m));
> +
> +	return capcg_seq_show_cap(m, &capcg->cap_used);
> +}
> +
> +static struct cftype capcg_files[] = {
> +	{
> +		.name = "bounding_set",
> +		.seq_show = capcg_seq_show_bset,
> +		.write = capcg_write_bset,
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +	},
> +	{
> +		.name = "used",
> +		.seq_show = capcg_seq_show_used,
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +	},
> +	{ }	/* terminate */
> +};
> +
> +struct cgroup_subsys capability_cgrp_subsys = {
> +	.css_alloc = capcg_css_alloc,
> +	.css_free = capcg_css_free,
> +	.attach = capcg_attach,
> +	.dfl_cftypes = capcg_files,
> +};
> +
> +void capability_cgroup_update_used(int cap)
> +{
> +	struct capcg_cgroup *caps = task_to_capcg(current);
> +
> +	mutex_lock(&capcg_mutex);
> +	cap_raise(caps->cap_used, cap);
> +	mutex_unlock(&capcg_mutex);
> +}
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-20 18:46   ` Topi Miettinen
@ 2016-06-21 15:45     ` Serge E. Hallyn
  2016-06-21 21:33       ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Serge E. Hallyn @ 2016-06-21 15:45 UTC (permalink / raw)
  To: Topi Miettinen; +Cc: serge, linux-kernel

Quoting Topi Miettinen (toiwoton@gmail.com):
> On 06/19/16 20:01, serge@hallyn.com wrote:
> > apologies for top posting, this phone doesn't support inline)
> > 
> > Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
> 
> I didn't think that aspect. Some of that could be dealt with by
> preventing tasks which don't have CAP_SETPCAP to make other tasks join
> or set the bounding set. One problem is that the privileges would not be
> checked at cgroup.procs open(2) time but only when writing. In general,
> less privileged tasks should not be able to gain new capabilities even
> if they were somehow able to join the cgroup and also your case must be
> addressed in full.
> 
> > 
> > Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
> 
> There are two.
> 
> 1. Capability use tracking at cgroup level. There is no way to know
> which capabilities have been used and which could be trimmed. With
> cgroup approach, we can also keep track of how subprocesses use
> capabilities. Thus the administrator can quickly get a reasonable
> estimate of a bounding set just by reading the capability.used file.

So to estimate the privileges needed by an application?  Note this
could also be done with something like systemtap, but that's not as
friendly of course.

Keeping the tracking part separate from enforcement might be worthwhile.
If you wanted to push that part of the patchset, we could keep
discussing the enforcement aspect separately.

> 2. cgroup approach to capability management. Currently the capabilities
> are inherited with bounding set and ambient capabilities taking their
> part. With cgroups, additional limits can be set which apply to the
> whole group. I admit that the difference to the current model is small.
> 
> Could you list the several reasons you mentioned?

Should have done it sunday while my mind was clear on it

The first is that while we normally think of preventing a less
privileged task from becoming more privileged, it can be just as
dangerous to allow a less privileged task from robbing a more privileged
task of some capability.  See in particular the sendmail capability
story.  By allowing an unprivileged task to run a setuid-root task in an
unexpected configuration - namely, denying it the ability to setuid(),
it was possible to get a root owned task doing your bidding.

So that's why I'm particularly concerned about allowing cgroupfs dac
permissions to dictate who gets to say what privileges other tasks on
the system can get.

Another reason is simply that the capability calculation scheme is
for historical reasons already quite complicated.  So if there is
something worthwhile to add we can discuss, but it'll take a compelling
otherwise-unsolvable use case to convince me we should complicate it
further.

In general, capabilites can be very cleanly predicted by looking at
the parent task and the file being executed.  Adding a cgroup into
the mix allows basically any random task to sneak in, change the
setting, and make a process unexpectedly not get a privileged on a
new execve when it did get it on the previous execve.

As amorgan will point out, posix caps are meant to be purely orthogonal
to dac.  We have hooks in place to make setuid work, but those can be
shut off to get a system where uid root is noone special (other than
owning system files).  So again, allowing a root user through cgroupfs
access to change the bounding set for other tasks flies in the face of
that.  (we're already smudging that picture with the user-namespaced
filecaps, though trying not to)

-serge

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-21 15:45     ` Serge E. Hallyn
@ 2016-06-21 21:33       ` Topi Miettinen
  2016-06-21 22:01         ` Serge E. Hallyn
  2016-06-22 17:14         ` Serge E. Hallyn
  0 siblings, 2 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-21 21:33 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: linux-kernel

On 06/21/16 15:45, Serge E. Hallyn wrote:
> Quoting Topi Miettinen (toiwoton@gmail.com):
>> On 06/19/16 20:01, serge@hallyn.com wrote:
>>> apologies for top posting, this phone doesn't support inline)
>>>
>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
>>
>> I didn't think that aspect. Some of that could be dealt with by
>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
>> or set the bounding set. One problem is that the privileges would not be
>> checked at cgroup.procs open(2) time but only when writing. In general,
>> less privileged tasks should not be able to gain new capabilities even
>> if they were somehow able to join the cgroup and also your case must be
>> addressed in full.
>>
>>>
>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
>>
>> There are two.
>>
>> 1. Capability use tracking at cgroup level. There is no way to know
>> which capabilities have been used and which could be trimmed. With
>> cgroup approach, we can also keep track of how subprocesses use
>> capabilities. Thus the administrator can quickly get a reasonable
>> estimate of a bounding set just by reading the capability.used file.
> 
> So to estimate the privileges needed by an application?  Note this
> could also be done with something like systemtap, but that's not as
> friendly of course.
> 

I've used systemtap to track how a single process uses capabilities, but
I can imagine that without the cgroup, using it to track several
subprocesses could be difficult.

> Keeping the tracking part separate from enforcement might be worthwhile.
> If you wanted to push that part of the patchset, we could keep
> discussing the enforcement aspect separately.
> 

OK, I'll prepare the tracking part first.

>> 2. cgroup approach to capability management. Currently the capabilities
>> are inherited with bounding set and ambient capabilities taking their
>> part. With cgroups, additional limits can be set which apply to the
>> whole group. I admit that the difference to the current model is small.
>>
>> Could you list the several reasons you mentioned?
> 
> Should have done it sunday while my mind was clear on it
> 
> The first is that while we normally think of preventing a less
> privileged task from becoming more privileged, it can be just as
> dangerous to allow a less privileged task from robbing a more privileged
> task of some capability.  See in particular the sendmail capability
> story.  By allowing an unprivileged task to run a setuid-root task in an
> unexpected configuration - namely, denying it the ability to setuid(),
> it was possible to get a root owned task doing your bidding.
> 
> So that's why I'm particularly concerned about allowing cgroupfs dac
> permissions to dictate who gets to say what privileges other tasks on
> the system can get.
> 

It could be especially tricky if the privileges are suddenly lost while
the processs is already executing.

> Another reason is simply that the capability calculation scheme is
> for historical reasons already quite complicated.  So if there is
> something worthwhile to add we can discuss, but it'll take a compelling
> otherwise-unsolvable use case to convince me we should complicate it
> further.
> 
> In general, capabilites can be very cleanly predicted by looking at
> the parent task and the file being executed.  Adding a cgroup into
> the mix allows basically any random task to sneak in, change the
> setting, and make a process unexpectedly not get a privileged on a
> new execve when it did get it on the previous execve.
> 
> As amorgan will point out, posix caps are meant to be purely orthogonal
> to dac.  We have hooks in place to make setuid work, but those can be
> shut off to get a system where uid root is noone special (other than
> owning system files).  So again, allowing a root user through cgroupfs
> access to change the bounding set for other tasks flies in the face of
> that.  (we're already smudging that picture with the user-namespaced
> filecaps, though trying not to)
> 
> -serge
> 

Right. I'm almost convinced that the capability management part doesn't
make much sense.

-Topi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-21 21:33       ` Topi Miettinen
@ 2016-06-21 22:01         ` Serge E. Hallyn
  2016-06-22 17:14         ` Serge E. Hallyn
  1 sibling, 0 replies; 13+ messages in thread
From: Serge E. Hallyn @ 2016-06-21 22:01 UTC (permalink / raw)
  To: Topi Miettinen; +Cc: Serge E. Hallyn, linux-kernel

Quoting Topi Miettinen (toiwoton@gmail.com):
> On 06/21/16 15:45, Serge E. Hallyn wrote:
> > Quoting Topi Miettinen (toiwoton@gmail.com):
> >> On 06/19/16 20:01, serge@hallyn.com wrote:
> >>> apologies for top posting, this phone doesn't support inline)
> >>>
> >>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
> >>
> >> I didn't think that aspect. Some of that could be dealt with by
> >> preventing tasks which don't have CAP_SETPCAP to make other tasks join
> >> or set the bounding set. One problem is that the privileges would not be
> >> checked at cgroup.procs open(2) time but only when writing. In general,
> >> less privileged tasks should not be able to gain new capabilities even
> >> if they were somehow able to join the cgroup and also your case must be
> >> addressed in full.
> >>
> >>>
> >>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
> >>
> >> There are two.
> >>
> >> 1. Capability use tracking at cgroup level. There is no way to know
> >> which capabilities have been used and which could be trimmed. With
> >> cgroup approach, we can also keep track of how subprocesses use
> >> capabilities. Thus the administrator can quickly get a reasonable
> >> estimate of a bounding set just by reading the capability.used file.
> > 
> > So to estimate the privileges needed by an application?  Note this
> > could also be done with something like systemtap, but that's not as
> > friendly of course.
> > 
> 
> I've used systemtap to track how a single process uses capabilities, but
> I can imagine that without the cgroup, using it to track several
> subprocesses could be difficult.
> 
> > Keeping the tracking part separate from enforcement might be worthwhile.
> > If you wanted to push that part of the patchset, we could keep
> > discussing the enforcement aspect separately.
> > 
> 
> OK, I'll prepare the tracking part first.

Awesome - thanks!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-21 21:33       ` Topi Miettinen
  2016-06-21 22:01         ` Serge E. Hallyn
@ 2016-06-22 17:14         ` Serge E. Hallyn
  2016-06-22 18:14           ` Topi Miettinen
  1 sibling, 1 reply; 13+ messages in thread
From: Serge E. Hallyn @ 2016-06-22 17:14 UTC (permalink / raw)
  To: Topi Miettinen; +Cc: Serge E. Hallyn, linux-kernel, Kees Cook

Quoting Topi Miettinen (toiwoton@gmail.com):
> On 06/21/16 15:45, Serge E. Hallyn wrote:
> > Quoting Topi Miettinen (toiwoton@gmail.com):
> >> On 06/19/16 20:01, serge@hallyn.com wrote:
> >>> apologies for top posting, this phone doesn't support inline)
> >>>
> >>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
> >>
> >> I didn't think that aspect. Some of that could be dealt with by
> >> preventing tasks which don't have CAP_SETPCAP to make other tasks join
> >> or set the bounding set. One problem is that the privileges would not be
> >> checked at cgroup.procs open(2) time but only when writing. In general,
> >> less privileged tasks should not be able to gain new capabilities even
> >> if they were somehow able to join the cgroup and also your case must be
> >> addressed in full.
> >>
> >>>
> >>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
> >>
> >> There are two.
> >>
> >> 1. Capability use tracking at cgroup level. There is no way to know
> >> which capabilities have been used and which could be trimmed. With
> >> cgroup approach, we can also keep track of how subprocesses use
> >> capabilities. Thus the administrator can quickly get a reasonable
> >> estimate of a bounding set just by reading the capability.used file.
> > 
> > So to estimate the privileges needed by an application?  Note this
> > could also be done with something like systemtap, but that's not as
> > friendly of course.
> > 
> 
> I've used systemtap to track how a single process uses capabilities, but
> I can imagine that without the cgroup, using it to track several
> subprocesses could be difficult.
> 
> > Keeping the tracking part separate from enforcement might be worthwhile.
> > If you wanted to push that part of the patchset, we could keep
> > discussing the enforcement aspect separately.
> > 
> 
> OK, I'll prepare the tracking part first.

So this does still have some security concerns, namely leaking information
to a less privileged process about what privs a root owned process used.
That's not on the same level as giving away details about memory mappings,
but could be an issue.  Kees (cc'd), do you see that as an issue ?

thanks,
-serge

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-22 17:14         ` Serge E. Hallyn
@ 2016-06-22 18:14           ` Topi Miettinen
  2016-06-22 18:17             ` Serge E. Hallyn
  0 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-22 18:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: linux-kernel, Kees Cook

On 06/22/16 17:14, Serge E. Hallyn wrote:
> Quoting Topi Miettinen (toiwoton@gmail.com):
>> On 06/21/16 15:45, Serge E. Hallyn wrote:
>>> Quoting Topi Miettinen (toiwoton@gmail.com):
>>>> On 06/19/16 20:01, serge@hallyn.com wrote:
>>>>> apologies for top posting, this phone doesn't support inline)
>>>>>
>>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
>>>>
>>>> I didn't think that aspect. Some of that could be dealt with by
>>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
>>>> or set the bounding set. One problem is that the privileges would not be
>>>> checked at cgroup.procs open(2) time but only when writing. In general,
>>>> less privileged tasks should not be able to gain new capabilities even
>>>> if they were somehow able to join the cgroup and also your case must be
>>>> addressed in full.
>>>>
>>>>>
>>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
>>>>
>>>> There are two.
>>>>
>>>> 1. Capability use tracking at cgroup level. There is no way to know
>>>> which capabilities have been used and which could be trimmed. With
>>>> cgroup approach, we can also keep track of how subprocesses use
>>>> capabilities. Thus the administrator can quickly get a reasonable
>>>> estimate of a bounding set just by reading the capability.used file.
>>>
>>> So to estimate the privileges needed by an application?  Note this
>>> could also be done with something like systemtap, but that's not as
>>> friendly of course.
>>>
>>
>> I've used systemtap to track how a single process uses capabilities, but
>> I can imagine that without the cgroup, using it to track several
>> subprocesses could be difficult.
>>
>>> Keeping the tracking part separate from enforcement might be worthwhile.
>>> If you wanted to push that part of the patchset, we could keep
>>> discussing the enforcement aspect separately.
>>>
>>
>> OK, I'll prepare the tracking part first.
> 
> So this does still have some security concerns, namely leaking information
> to a less privileged process about what privs a root owned process used.
> That's not on the same level as giving away details about memory mappings,
> but could be an issue.  Kees (cc'd), do you see that as an issue ?
> 
> thanks,
> -serge
> 

Anyone can see the full set of capabilities available to each process
via /proc/pid/status. But should I for example add a new flag
CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
(root) and use it here?

-Topi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-22 18:14           ` Topi Miettinen
@ 2016-06-22 18:17             ` Serge E. Hallyn
  2016-06-22 23:06               ` Kees Cook
  0 siblings, 1 reply; 13+ messages in thread
From: Serge E. Hallyn @ 2016-06-22 18:17 UTC (permalink / raw)
  To: Topi Miettinen; +Cc: Serge E. Hallyn, linux-kernel, Kees Cook

Quoting Topi Miettinen (toiwoton@gmail.com):
> On 06/22/16 17:14, Serge E. Hallyn wrote:
> > Quoting Topi Miettinen (toiwoton@gmail.com):
> >> On 06/21/16 15:45, Serge E. Hallyn wrote:
> >>> Quoting Topi Miettinen (toiwoton@gmail.com):
> >>>> On 06/19/16 20:01, serge@hallyn.com wrote:
> >>>>> apologies for top posting, this phone doesn't support inline)
> >>>>>
> >>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
> >>>>
> >>>> I didn't think that aspect. Some of that could be dealt with by
> >>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
> >>>> or set the bounding set. One problem is that the privileges would not be
> >>>> checked at cgroup.procs open(2) time but only when writing. In general,
> >>>> less privileged tasks should not be able to gain new capabilities even
> >>>> if they were somehow able to join the cgroup and also your case must be
> >>>> addressed in full.
> >>>>
> >>>>>
> >>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
> >>>>
> >>>> There are two.
> >>>>
> >>>> 1. Capability use tracking at cgroup level. There is no way to know
> >>>> which capabilities have been used and which could be trimmed. With
> >>>> cgroup approach, we can also keep track of how subprocesses use
> >>>> capabilities. Thus the administrator can quickly get a reasonable
> >>>> estimate of a bounding set just by reading the capability.used file.
> >>>
> >>> So to estimate the privileges needed by an application?  Note this
> >>> could also be done with something like systemtap, but that's not as
> >>> friendly of course.
> >>>
> >>
> >> I've used systemtap to track how a single process uses capabilities, but
> >> I can imagine that without the cgroup, using it to track several
> >> subprocesses could be difficult.
> >>
> >>> Keeping the tracking part separate from enforcement might be worthwhile.
> >>> If you wanted to push that part of the patchset, we could keep
> >>> discussing the enforcement aspect separately.
> >>>
> >>
> >> OK, I'll prepare the tracking part first.
> > 
> > So this does still have some security concerns, namely leaking information
> > to a less privileged process about what privs a root owned process used.
> > That's not on the same level as giving away details about memory mappings,
> > but could be an issue.  Kees (cc'd), do you see that as an issue ?
> > 
> > thanks,
> > -serge
> > 
> 
> Anyone can see the full set of capabilities available to each process

But not the capabilities used.  That's much more invasive.

> via /proc/pid/status. But should I for example add a new flag
> CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
> (root) and use it here?

Not sure that it's needed, let's see what Kees says.  However if it is,
then using owner would not suffice, since that's tangential to the
privilege level of the task.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-22 18:17             ` Serge E. Hallyn
@ 2016-06-22 23:06               ` Kees Cook
  2016-06-23  0:01                 ` Serge E. Hallyn
  0 siblings, 1 reply; 13+ messages in thread
From: Kees Cook @ 2016-06-22 23:06 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Topi Miettinen, LKML

On Wed, Jun 22, 2016 at 11:17 AM, Serge E. Hallyn <serge@hallyn.com> wrote:
> Quoting Topi Miettinen (toiwoton@gmail.com):
>> On 06/22/16 17:14, Serge E. Hallyn wrote:
>> > Quoting Topi Miettinen (toiwoton@gmail.com):
>> >> On 06/21/16 15:45, Serge E. Hallyn wrote:
>> >>> Quoting Topi Miettinen (toiwoton@gmail.com):
>> >>>> On 06/19/16 20:01, serge@hallyn.com wrote:
>> >>>>> apologies for top posting, this phone doesn't support inline)
>> >>>>>
>> >>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
>> >>>>
>> >>>> I didn't think that aspect. Some of that could be dealt with by
>> >>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
>> >>>> or set the bounding set. One problem is that the privileges would not be
>> >>>> checked at cgroup.procs open(2) time but only when writing. In general,
>> >>>> less privileged tasks should not be able to gain new capabilities even
>> >>>> if they were somehow able to join the cgroup and also your case must be
>> >>>> addressed in full.
>> >>>>
>> >>>>>
>> >>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
>> >>>>
>> >>>> There are two.
>> >>>>
>> >>>> 1. Capability use tracking at cgroup level. There is no way to know
>> >>>> which capabilities have been used and which could be trimmed. With
>> >>>> cgroup approach, we can also keep track of how subprocesses use
>> >>>> capabilities. Thus the administrator can quickly get a reasonable
>> >>>> estimate of a bounding set just by reading the capability.used file.
>> >>>
>> >>> So to estimate the privileges needed by an application?  Note this
>> >>> could also be done with something like systemtap, but that's not as
>> >>> friendly of course.
>> >>>
>> >>
>> >> I've used systemtap to track how a single process uses capabilities, but
>> >> I can imagine that without the cgroup, using it to track several
>> >> subprocesses could be difficult.
>> >>
>> >>> Keeping the tracking part separate from enforcement might be worthwhile.
>> >>> If you wanted to push that part of the patchset, we could keep
>> >>> discussing the enforcement aspect separately.
>> >>>
>> >>
>> >> OK, I'll prepare the tracking part first.
>> >
>> > So this does still have some security concerns, namely leaking information
>> > to a less privileged process about what privs a root owned process used.
>> > That's not on the same level as giving away details about memory mappings,
>> > but could be an issue.  Kees (cc'd), do you see that as an issue ?
>> >
>> > thanks,
>> > -serge
>> >
>>
>> Anyone can see the full set of capabilities available to each process
>
> But not the capabilities used.  That's much more invasive.
>
>> via /proc/pid/status. But should I for example add a new flag
>> CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
>> (root) and use it here?
>
> Not sure that it's needed, let's see what Kees says.  However if it is,
> then using owner would not suffice, since that's tangential to the
> privilege level of the task.

I don't see a problem exposing the history of used capabilities to
less privileged processes. The only thing I could see that being used
for would be to improve some kind of race against a buggy process
where you know caps get used at a certain time in the code, so
spinning on reading /proc/pid/status might give you a better chance of
timing the race. That seems like a pretty far-out exposure, though. I
imagine instruction counters would give a way finer grained timing
too, so I wouldn't object to this being visible.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-22 23:06               ` Kees Cook
@ 2016-06-23  0:01                 ` Serge E. Hallyn
  2016-06-23  5:59                   ` Kees Cook
  0 siblings, 1 reply; 13+ messages in thread
From: Serge E. Hallyn @ 2016-06-23  0:01 UTC (permalink / raw)
  To: Kees Cook; +Cc: Serge E. Hallyn, Topi Miettinen, LKML

Quoting Kees Cook (keescook@chromium.org):
> On Wed, Jun 22, 2016 at 11:17 AM, Serge E. Hallyn <serge@hallyn.com> wrote:
> > Quoting Topi Miettinen (toiwoton@gmail.com):
> >> On 06/22/16 17:14, Serge E. Hallyn wrote:
> >> > Quoting Topi Miettinen (toiwoton@gmail.com):
> >> >> On 06/21/16 15:45, Serge E. Hallyn wrote:
> >> >>> Quoting Topi Miettinen (toiwoton@gmail.com):
> >> >>>> On 06/19/16 20:01, serge@hallyn.com wrote:
> >> >>>>> apologies for top posting, this phone doesn't support inline)
> >> >>>>>
> >> >>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
> >> >>>>
> >> >>>> I didn't think that aspect. Some of that could be dealt with by
> >> >>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
> >> >>>> or set the bounding set. One problem is that the privileges would not be
> >> >>>> checked at cgroup.procs open(2) time but only when writing. In general,
> >> >>>> less privileged tasks should not be able to gain new capabilities even
> >> >>>> if they were somehow able to join the cgroup and also your case must be
> >> >>>> addressed in full.
> >> >>>>
> >> >>>>>
> >> >>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
> >> >>>>
> >> >>>> There are two.
> >> >>>>
> >> >>>> 1. Capability use tracking at cgroup level. There is no way to know
> >> >>>> which capabilities have been used and which could be trimmed. With
> >> >>>> cgroup approach, we can also keep track of how subprocesses use
> >> >>>> capabilities. Thus the administrator can quickly get a reasonable
> >> >>>> estimate of a bounding set just by reading the capability.used file.
> >> >>>
> >> >>> So to estimate the privileges needed by an application?  Note this
> >> >>> could also be done with something like systemtap, but that's not as
> >> >>> friendly of course.
> >> >>>
> >> >>
> >> >> I've used systemtap to track how a single process uses capabilities, but
> >> >> I can imagine that without the cgroup, using it to track several
> >> >> subprocesses could be difficult.
> >> >>
> >> >>> Keeping the tracking part separate from enforcement might be worthwhile.
> >> >>> If you wanted to push that part of the patchset, we could keep
> >> >>> discussing the enforcement aspect separately.
> >> >>>
> >> >>
> >> >> OK, I'll prepare the tracking part first.
> >> >
> >> > So this does still have some security concerns, namely leaking information
> >> > to a less privileged process about what privs a root owned process used.
> >> > That's not on the same level as giving away details about memory mappings,
> >> > but could be an issue.  Kees (cc'd), do you see that as an issue ?
> >> >
> >> > thanks,
> >> > -serge
> >> >
> >>
> >> Anyone can see the full set of capabilities available to each process
> >
> > But not the capabilities used.  That's much more invasive.
> >
> >> via /proc/pid/status. But should I for example add a new flag
> >> CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
> >> (root) and use it here?
> >
> > Not sure that it's needed, let's see what Kees says.  However if it is,
> > then using owner would not suffice, since that's tangential to the
> > privilege level of the task.
> 
> I don't see a problem exposing the history of used capabilities to

Thanks, Kees.

> less privileged processes. The only thing I could see that being used
> for would be to improve some kind of race against a buggy process
> where you know caps get used at a certain time in the code, so
> spinning on reading /proc/pid/status might give you a better chance of

It would actually be a cgroup file, I think someone else was suggesting
a /proc/pid/status enhancement to the same effect a few weeks ago.

> timing the race. That seems like a pretty far-out exposure, though. I
> imagine instruction counters would give a way finer grained timing
> too, so I wouldn't object to this being visible.
> 
> -Kees
> 
> -- 
> Kees Cook
> Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-23  0:01                 ` Serge E. Hallyn
@ 2016-06-23  5:59                   ` Kees Cook
  2016-06-23 13:55                     ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Kees Cook @ 2016-06-23  5:59 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Topi Miettinen, LKML

On Wed, Jun 22, 2016 at 5:01 PM, Serge E. Hallyn <serge@hallyn.com> wrote:
> Quoting Kees Cook (keescook@chromium.org):
>> On Wed, Jun 22, 2016 at 11:17 AM, Serge E. Hallyn <serge@hallyn.com> wrote:
>> > Quoting Topi Miettinen (toiwoton@gmail.com):
>> >> On 06/22/16 17:14, Serge E. Hallyn wrote:
>> >> > Quoting Topi Miettinen (toiwoton@gmail.com):
>> >> >> On 06/21/16 15:45, Serge E. Hallyn wrote:
>> >> >>> Quoting Topi Miettinen (toiwoton@gmail.com):
>> >> >>>> On 06/19/16 20:01, serge@hallyn.com wrote:
>> >> >>>>> apologies for top posting, this phone doesn't support inline)
>> >> >>>>>
>> >> >>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
>> >> >>>>
>> >> >>>> I didn't think that aspect. Some of that could be dealt with by
>> >> >>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
>> >> >>>> or set the bounding set. One problem is that the privileges would not be
>> >> >>>> checked at cgroup.procs open(2) time but only when writing. In general,
>> >> >>>> less privileged tasks should not be able to gain new capabilities even
>> >> >>>> if they were somehow able to join the cgroup and also your case must be
>> >> >>>> addressed in full.
>> >> >>>>
>> >> >>>>>
>> >> >>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
>> >> >>>>
>> >> >>>> There are two.
>> >> >>>>
>> >> >>>> 1. Capability use tracking at cgroup level. There is no way to know
>> >> >>>> which capabilities have been used and which could be trimmed. With
>> >> >>>> cgroup approach, we can also keep track of how subprocesses use
>> >> >>>> capabilities. Thus the administrator can quickly get a reasonable
>> >> >>>> estimate of a bounding set just by reading the capability.used file.
>> >> >>>
>> >> >>> So to estimate the privileges needed by an application?  Note this
>> >> >>> could also be done with something like systemtap, but that's not as
>> >> >>> friendly of course.
>> >> >>>
>> >> >>
>> >> >> I've used systemtap to track how a single process uses capabilities, but
>> >> >> I can imagine that without the cgroup, using it to track several
>> >> >> subprocesses could be difficult.
>> >> >>
>> >> >>> Keeping the tracking part separate from enforcement might be worthwhile.
>> >> >>> If you wanted to push that part of the patchset, we could keep
>> >> >>> discussing the enforcement aspect separately.
>> >> >>>
>> >> >>
>> >> >> OK, I'll prepare the tracking part first.
>> >> >
>> >> > So this does still have some security concerns, namely leaking information
>> >> > to a less privileged process about what privs a root owned process used.
>> >> > That's not on the same level as giving away details about memory mappings,
>> >> > but could be an issue.  Kees (cc'd), do you see that as an issue ?
>> >> >
>> >> > thanks,
>> >> > -serge
>> >> >
>> >>
>> >> Anyone can see the full set of capabilities available to each process
>> >
>> > But not the capabilities used.  That's much more invasive.
>> >
>> >> via /proc/pid/status. But should I for example add a new flag
>> >> CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
>> >> (root) and use it here?
>> >
>> > Not sure that it's needed, let's see what Kees says.  However if it is,
>> > then using owner would not suffice, since that's tangential to the
>> > privilege level of the task.
>>
>> I don't see a problem exposing the history of used capabilities to
>
> Thanks, Kees.
>
>> less privileged processes. The only thing I could see that being used
>> for would be to improve some kind of race against a buggy process
>> where you know caps get used at a certain time in the code, so
>> spinning on reading /proc/pid/status might give you a better chance of
>
> It would actually be a cgroup file, I think someone else was suggesting
> a /proc/pid/status enhancement to the same effect a few weeks ago.

Oh! Sorry, I misunderstood. What does the interface look like for the
new cgroup file? (I assume my evaluation remains the same, though.)

-Kees

>
>> timing the race. That seems like a pretty far-out exposure, though. I
>> imagine instruction counters would give a way finer grained timing
>> too, so I wouldn't object to this being visible.
>>
>> -Kees
>>
>> --
>> Kees Cook
>> Chrome OS & Brillo Security



-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] capabilities: add capability cgroup controller
  2016-06-23  5:59                   ` Kees Cook
@ 2016-06-23 13:55                     ` Topi Miettinen
  0 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-23 13:55 UTC (permalink / raw)
  To: Kees Cook, Serge E. Hallyn; +Cc: LKML

On 06/23/16 05:59, Kees Cook wrote:
> On Wed, Jun 22, 2016 at 5:01 PM, Serge E. Hallyn <serge@hallyn.com> wrote:
>> Quoting Kees Cook (keescook@chromium.org):
>>> On Wed, Jun 22, 2016 at 11:17 AM, Serge E. Hallyn <serge@hallyn.com> wrote:
>>>> Quoting Topi Miettinen (toiwoton@gmail.com):
>>>>> On 06/22/16 17:14, Serge E. Hallyn wrote:
>>>>>> Quoting Topi Miettinen (toiwoton@gmail.com):
>>>>>>> On 06/21/16 15:45, Serge E. Hallyn wrote:
>>>>>>>> Quoting Topi Miettinen (toiwoton@gmail.com):
>>>>>>>>> On 06/19/16 20:01, serge@hallyn.com wrote:
>>>>>>>>>> apologies for top posting, this phone doesn't support inline)
>>>>>>>>>>
>>>>>>>>>> Where are you preventing less privileged tasks from limiting the caps of a more privileged task?  It looks like you are relying on the cgroupfs for that?
>>>>>>>>>
>>>>>>>>> I didn't think that aspect. Some of that could be dealt with by
>>>>>>>>> preventing tasks which don't have CAP_SETPCAP to make other tasks join
>>>>>>>>> or set the bounding set. One problem is that the privileges would not be
>>>>>>>>> checked at cgroup.procs open(2) time but only when writing. In general,
>>>>>>>>> less privileged tasks should not be able to gain new capabilities even
>>>>>>>>> if they were somehow able to join the cgroup and also your case must be
>>>>>>>>> addressed in full.
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Overall I'm not a fan of this for several reasons.  Can you tell us precisely what your use case is?
>>>>>>>>>
>>>>>>>>> There are two.
>>>>>>>>>
>>>>>>>>> 1. Capability use tracking at cgroup level. There is no way to know
>>>>>>>>> which capabilities have been used and which could be trimmed. With
>>>>>>>>> cgroup approach, we can also keep track of how subprocesses use
>>>>>>>>> capabilities. Thus the administrator can quickly get a reasonable
>>>>>>>>> estimate of a bounding set just by reading the capability.used file.
>>>>>>>>
>>>>>>>> So to estimate the privileges needed by an application?  Note this
>>>>>>>> could also be done with something like systemtap, but that's not as
>>>>>>>> friendly of course.
>>>>>>>>
>>>>>>>
>>>>>>> I've used systemtap to track how a single process uses capabilities, but
>>>>>>> I can imagine that without the cgroup, using it to track several
>>>>>>> subprocesses could be difficult.
>>>>>>>
>>>>>>>> Keeping the tracking part separate from enforcement might be worthwhile.
>>>>>>>> If you wanted to push that part of the patchset, we could keep
>>>>>>>> discussing the enforcement aspect separately.
>>>>>>>>
>>>>>>>
>>>>>>> OK, I'll prepare the tracking part first.
>>>>>>
>>>>>> So this does still have some security concerns, namely leaking information
>>>>>> to a less privileged process about what privs a root owned process used.
>>>>>> That's not on the same level as giving away details about memory mappings,
>>>>>> but could be an issue.  Kees (cc'd), do you see that as an issue ?
>>>>>>
>>>>>> thanks,
>>>>>> -serge
>>>>>>
>>>>>
>>>>> Anyone can see the full set of capabilities available to each process
>>>>
>>>> But not the capabilities used.  That's much more invasive.
>>>>
>>>>> via /proc/pid/status. But should I for example add a new flag
>>>>> CFTYPE_OWNER_ONLY to limit reading capability.used file to only owner
>>>>> (root) and use it here?
>>>>
>>>> Not sure that it's needed, let's see what Kees says.  However if it is,
>>>> then using owner would not suffice, since that's tangential to the
>>>> privilege level of the task.
>>>
>>> I don't see a problem exposing the history of used capabilities to
>>
>> Thanks, Kees.
>>
>>> less privileged processes. The only thing I could see that being used
>>> for would be to improve some kind of race against a buggy process
>>> where you know caps get used at a certain time in the code, so
>>> spinning on reading /proc/pid/status might give you a better chance of
>>
>> It would actually be a cgroup file, I think someone else was suggesting
>> a /proc/pid/status enhancement to the same effect a few weeks ago.

That was also me. I think cgroup level monitoring is much more flexible
than per process monitoring. Checking how individual processes use
capabilities could be complementary, but I suppose cgroup is enough.

The per process information could be delayed to avoid races. For
example, store a timestamp (jiffies) with the capability set and keep
two sets of these as LIFO. When we're going to update the capability set
with new usage, check if the last value is old enough, if so, move it to
first slot. Then update the last slot. The reading process would read
the first slot and then get somewhat delayed information. This could be
also done for cgroup level tracking.

> 
> Oh! Sorry, I misunderstood. What does the interface look like for the
> new cgroup file? (I assume my evaluation remains the same, though.)

I'll send a patch shortly which includes this addition to
Documentation/cgroup-v2.txt:
+5-4. Capabilities
+
+The "capability" controller is used to monitor capability use in the
+cgroup. This can be used to discover a starting point for capability
+bounding sets, even when running a shell script under ambient
+capabilities, with only short-lived helper processes exercising the
+capabilities.
+
+
+5-4-1. Capability Interface Files
+
+  capability.used
+
+	A read-only file which exists on all cgroups.
+
+	This reports the combined value of capability use in the
+	current cgroup and all its children.

-Topi

> 
> -Kees
> 
>>
>>> timing the race. That seems like a pretty far-out exposure, though. I
>>> imagine instruction counters would give a way finer grained timing
>>> too, so I wouldn't object to this being visible.
>>>
>>> -Kees
>>>
>>> --
>>> Kees Cook
>>> Chrome OS & Brillo Security
> 
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-23 13:56 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-18 19:31 [RFC] capabilities: add capability cgroup controller Topi Miettinen
2016-06-19 20:01 ` serge
2016-06-20 18:46   ` Topi Miettinen
2016-06-21 15:45     ` Serge E. Hallyn
2016-06-21 21:33       ` Topi Miettinen
2016-06-21 22:01         ` Serge E. Hallyn
2016-06-22 17:14         ` Serge E. Hallyn
2016-06-22 18:14           ` Topi Miettinen
2016-06-22 18:17             ` Serge E. Hallyn
2016-06-22 23:06               ` Kees Cook
2016-06-23  0:01                 ` Serge E. Hallyn
2016-06-23  5:59                   ` Kees Cook
2016-06-23 13:55                     ` Topi Miettinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).