All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/10] cred: #include init.h in cred.h
@ 2009-06-10  1:44 Serge E. Hallyn
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:44 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

cred.h can't be included as first header because it uses __init and
doesn't include init.h which is enough to break compilation on at least
ia64.

Signed-off-by: Alexey Dobriyan <adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
 include/linux/cred.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 3282ee4..4fa9996 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -13,6 +13,7 @@
 #define _LINUX_CRED_H
 
 #include <linux/capability.h>
+#include <linux/init.h>
 #include <linux/key.h>
 #include <asm/atomic.h>
 
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 02/10] groups: move code to kernel/groups.c
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-06-10  1:44   ` Serge E. Hallyn
  2009-06-10  1:44   ` [PATCH 03/10] cr: break out new_user_ns() Serge E. Hallyn
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:44 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

Move supplementary groups implementation to kernel/groups.c .
kernel/sys.c already accumulated quite a few random stuff.

Do strictly copy/paste + add required headers to compile.
Compile-tested on many configs and archs.

Signed-off-by: Alexey Dobriyan <adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 kernel/Makefile |    1 +
 kernel/groups.c |  288 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sys.c    |  283 ------------------------------------------------------
 3 files changed, 289 insertions(+), 283 deletions(-)
 create mode 100644 kernel/groups.c

diff --git a/kernel/Makefile b/kernel/Makefile
index 6bc638d..4d4f741 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
 	    async.o
+obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
diff --git a/kernel/groups.c b/kernel/groups.c
new file mode 100644
index 0000000..2b45b2e
--- /dev/null
+++ b/kernel/groups.c
@@ -0,0 +1,288 @@
+/*
+ * Supplementary group IDs
+ */
+#include <linux/cred.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+/* init to 2 - one for init_task, one to ensure it is never freed */
+struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
+
+struct group_info *groups_alloc(int gidsetsize)
+{
+	struct group_info *group_info;
+	int nblocks;
+	int i;
+
+	nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
+	/* Make sure we always allocate at least one indirect block pointer */
+	nblocks = nblocks ? : 1;
+	group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
+	if (!group_info)
+		return NULL;
+	group_info->ngroups = gidsetsize;
+	group_info->nblocks = nblocks;
+	atomic_set(&group_info->usage, 1);
+
+	if (gidsetsize <= NGROUPS_SMALL)
+		group_info->blocks[0] = group_info->small_block;
+	else {
+		for (i = 0; i < nblocks; i++) {
+			gid_t *b;
+			b = (void *)__get_free_page(GFP_USER);
+			if (!b)
+				goto out_undo_partial_alloc;
+			group_info->blocks[i] = b;
+		}
+	}
+	return group_info;
+
+out_undo_partial_alloc:
+	while (--i >= 0) {
+		free_page((unsigned long)group_info->blocks[i]);
+	}
+	kfree(group_info);
+	return NULL;
+}
+
+EXPORT_SYMBOL(groups_alloc);
+
+void groups_free(struct group_info *group_info)
+{
+	if (group_info->blocks[0] != group_info->small_block) {
+		int i;
+		for (i = 0; i < group_info->nblocks; i++)
+			free_page((unsigned long)group_info->blocks[i]);
+	}
+	kfree(group_info);
+}
+
+EXPORT_SYMBOL(groups_free);
+
+/* export the group_info to a user-space array */
+static int groups_to_user(gid_t __user *grouplist,
+			  const struct group_info *group_info)
+{
+	int i;
+	unsigned int count = group_info->ngroups;
+
+	for (i = 0; i < group_info->nblocks; i++) {
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
+
+		if (copy_to_user(grouplist, group_info->blocks[i], len))
+			return -EFAULT;
+
+		grouplist += NGROUPS_PER_BLOCK;
+		count -= cp_count;
+	}
+	return 0;
+}
+
+/* fill a group_info from a user-space array - it must be allocated already */
+static int groups_from_user(struct group_info *group_info,
+    gid_t __user *grouplist)
+{
+	int i;
+	unsigned int count = group_info->ngroups;
+
+	for (i = 0; i < group_info->nblocks; i++) {
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
+
+		if (copy_from_user(group_info->blocks[i], grouplist, len))
+			return -EFAULT;
+
+		grouplist += NGROUPS_PER_BLOCK;
+		count -= cp_count;
+	}
+	return 0;
+}
+
+/* a simple Shell sort */
+static void groups_sort(struct group_info *group_info)
+{
+	int base, max, stride;
+	int gidsetsize = group_info->ngroups;
+
+	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
+		; /* nothing */
+	stride /= 3;
+
+	while (stride) {
+		max = gidsetsize - stride;
+		for (base = 0; base < max; base++) {
+			int left = base;
+			int right = left + stride;
+			gid_t tmp = GROUP_AT(group_info, right);
+
+			while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
+				GROUP_AT(group_info, right) =
+				    GROUP_AT(group_info, left);
+				right = left;
+				left -= stride;
+			}
+			GROUP_AT(group_info, right) = tmp;
+		}
+		stride /= 3;
+	}
+}
+
+/* a simple bsearch */
+int groups_search(const struct group_info *group_info, gid_t grp)
+{
+	unsigned int left, right;
+
+	if (!group_info)
+		return 0;
+
+	left = 0;
+	right = group_info->ngroups;
+	while (left < right) {
+		unsigned int mid = (left+right)/2;
+		int cmp = grp - GROUP_AT(group_info, mid);
+		if (cmp > 0)
+			left = mid + 1;
+		else if (cmp < 0)
+			right = mid;
+		else
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * set_groups - Change a group subscription in a set of credentials
+ * @new: The newly prepared set of credentials to alter
+ * @group_info: The group list to install
+ *
+ * Validate a group subscription and, if valid, insert it into a set
+ * of credentials.
+ */
+int set_groups(struct cred *new, struct group_info *group_info)
+{
+	int retval;
+
+	retval = security_task_setgroups(group_info);
+	if (retval)
+		return retval;
+
+	put_group_info(new->group_info);
+	groups_sort(group_info);
+	get_group_info(group_info);
+	new->group_info = group_info;
+	return 0;
+}
+
+EXPORT_SYMBOL(set_groups);
+
+/**
+ * set_current_groups - Change current's group subscription
+ * @group_info: The group list to impose
+ *
+ * Validate a group subscription and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_groups(struct group_info *group_info)
+{
+	struct cred *new;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = set_groups(new, group_info);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret;
+	}
+
+	return commit_creds(new);
+}
+
+EXPORT_SYMBOL(set_current_groups);
+
+SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
+{
+	const struct cred *cred = current_cred();
+	int i;
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+
+	/* no need to grab task_lock here; it cannot change */
+	i = cred->group_info->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize) {
+			i = -EINVAL;
+			goto out;
+		}
+		if (groups_to_user(grouplist, cred->group_info)) {
+			i = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	return i;
+}
+
+/*
+ *	SMP: Our groups are copy-on-write. We can set them safely
+ *	without another task interfering.
+ */
+
+SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
+{
+	struct group_info *group_info;
+	int retval;
+
+	if (!capable(CAP_SETGID))
+		return -EPERM;
+	if ((unsigned)gidsetsize > NGROUPS_MAX)
+		return -EINVAL;
+
+	group_info = groups_alloc(gidsetsize);
+	if (!group_info)
+		return -ENOMEM;
+	retval = groups_from_user(group_info, grouplist);
+	if (retval) {
+		put_group_info(group_info);
+		return retval;
+	}
+
+	retval = set_current_groups(group_info);
+	put_group_info(group_info);
+
+	return retval;
+}
+
+/*
+ * Check whether we're fsgid/egid or in the supplemental group..
+ */
+int in_group_p(gid_t grp)
+{
+	const struct cred *cred = current_cred();
+	int retval = 1;
+
+	if (grp != cred->fsgid)
+		retval = groups_search(cred->group_info, grp);
+	return retval;
+}
+
+EXPORT_SYMBOL(in_group_p);
+
+int in_egroup_p(gid_t grp)
+{
+	const struct cred *cred = current_cred();
+	int retval = 1;
+
+	if (grp != cred->egid)
+		retval = groups_search(cred->group_info, grp);
+	return retval;
+}
+
+EXPORT_SYMBOL(in_egroup_p);
diff --git a/kernel/sys.c b/kernel/sys.c
index e7998cf..4edcf51 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1112,289 +1112,6 @@ out:
 	return err;
 }
 
-/*
- * Supplementary group IDs
- */
-
-/* init to 2 - one for init_task, one to ensure it is never freed */
-struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
-
-struct group_info *groups_alloc(int gidsetsize)
-{
-	struct group_info *group_info;
-	int nblocks;
-	int i;
-
-	nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
-	/* Make sure we always allocate at least one indirect block pointer */
-	nblocks = nblocks ? : 1;
-	group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
-	if (!group_info)
-		return NULL;
-	group_info->ngroups = gidsetsize;
-	group_info->nblocks = nblocks;
-	atomic_set(&group_info->usage, 1);
-
-	if (gidsetsize <= NGROUPS_SMALL)
-		group_info->blocks[0] = group_info->small_block;
-	else {
-		for (i = 0; i < nblocks; i++) {
-			gid_t *b;
-			b = (void *)__get_free_page(GFP_USER);
-			if (!b)
-				goto out_undo_partial_alloc;
-			group_info->blocks[i] = b;
-		}
-	}
-	return group_info;
-
-out_undo_partial_alloc:
-	while (--i >= 0) {
-		free_page((unsigned long)group_info->blocks[i]);
-	}
-	kfree(group_info);
-	return NULL;
-}
-
-EXPORT_SYMBOL(groups_alloc);
-
-void groups_free(struct group_info *group_info)
-{
-	if (group_info->blocks[0] != group_info->small_block) {
-		int i;
-		for (i = 0; i < group_info->nblocks; i++)
-			free_page((unsigned long)group_info->blocks[i]);
-	}
-	kfree(group_info);
-}
-
-EXPORT_SYMBOL(groups_free);
-
-/* export the group_info to a user-space array */
-static int groups_to_user(gid_t __user *grouplist,
-			  const struct group_info *group_info)
-{
-	int i;
-	unsigned int count = group_info->ngroups;
-
-	for (i = 0; i < group_info->nblocks; i++) {
-		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
-		unsigned int len = cp_count * sizeof(*grouplist);
-
-		if (copy_to_user(grouplist, group_info->blocks[i], len))
-			return -EFAULT;
-
-		grouplist += NGROUPS_PER_BLOCK;
-		count -= cp_count;
-	}
-	return 0;
-}
-
-/* fill a group_info from a user-space array - it must be allocated already */
-static int groups_from_user(struct group_info *group_info,
-    gid_t __user *grouplist)
-{
-	int i;
-	unsigned int count = group_info->ngroups;
-
-	for (i = 0; i < group_info->nblocks; i++) {
-		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
-		unsigned int len = cp_count * sizeof(*grouplist);
-
-		if (copy_from_user(group_info->blocks[i], grouplist, len))
-			return -EFAULT;
-
-		grouplist += NGROUPS_PER_BLOCK;
-		count -= cp_count;
-	}
-	return 0;
-}
-
-/* a simple Shell sort */
-static void groups_sort(struct group_info *group_info)
-{
-	int base, max, stride;
-	int gidsetsize = group_info->ngroups;
-
-	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
-		; /* nothing */
-	stride /= 3;
-
-	while (stride) {
-		max = gidsetsize - stride;
-		for (base = 0; base < max; base++) {
-			int left = base;
-			int right = left + stride;
-			gid_t tmp = GROUP_AT(group_info, right);
-
-			while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
-				GROUP_AT(group_info, right) =
-				    GROUP_AT(group_info, left);
-				right = left;
-				left -= stride;
-			}
-			GROUP_AT(group_info, right) = tmp;
-		}
-		stride /= 3;
-	}
-}
-
-/* a simple bsearch */
-int groups_search(const struct group_info *group_info, gid_t grp)
-{
-	unsigned int left, right;
-
-	if (!group_info)
-		return 0;
-
-	left = 0;
-	right = group_info->ngroups;
-	while (left < right) {
-		unsigned int mid = (left+right)/2;
-		int cmp = grp - GROUP_AT(group_info, mid);
-		if (cmp > 0)
-			left = mid + 1;
-		else if (cmp < 0)
-			right = mid;
-		else
-			return 1;
-	}
-	return 0;
-}
-
-/**
- * set_groups - Change a group subscription in a set of credentials
- * @new: The newly prepared set of credentials to alter
- * @group_info: The group list to install
- *
- * Validate a group subscription and, if valid, insert it into a set
- * of credentials.
- */
-int set_groups(struct cred *new, struct group_info *group_info)
-{
-	int retval;
-
-	retval = security_task_setgroups(group_info);
-	if (retval)
-		return retval;
-
-	put_group_info(new->group_info);
-	groups_sort(group_info);
-	get_group_info(group_info);
-	new->group_info = group_info;
-	return 0;
-}
-
-EXPORT_SYMBOL(set_groups);
-
-/**
- * set_current_groups - Change current's group subscription
- * @group_info: The group list to impose
- *
- * Validate a group subscription and, if valid, impose it upon current's task
- * security record.
- */
-int set_current_groups(struct group_info *group_info)
-{
-	struct cred *new;
-	int ret;
-
-	new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
-
-	ret = set_groups(new, group_info);
-	if (ret < 0) {
-		abort_creds(new);
-		return ret;
-	}
-
-	return commit_creds(new);
-}
-
-EXPORT_SYMBOL(set_current_groups);
-
-SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
-{
-	const struct cred *cred = current_cred();
-	int i;
-
-	if (gidsetsize < 0)
-		return -EINVAL;
-
-	/* no need to grab task_lock here; it cannot change */
-	i = cred->group_info->ngroups;
-	if (gidsetsize) {
-		if (i > gidsetsize) {
-			i = -EINVAL;
-			goto out;
-		}
-		if (groups_to_user(grouplist, cred->group_info)) {
-			i = -EFAULT;
-			goto out;
-		}
-	}
-out:
-	return i;
-}
-
-/*
- *	SMP: Our groups are copy-on-write. We can set them safely
- *	without another task interfering.
- */
- 
-SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
-{
-	struct group_info *group_info;
-	int retval;
-
-	if (!capable(CAP_SETGID))
-		return -EPERM;
-	if ((unsigned)gidsetsize > NGROUPS_MAX)
-		return -EINVAL;
-
-	group_info = groups_alloc(gidsetsize);
-	if (!group_info)
-		return -ENOMEM;
-	retval = groups_from_user(group_info, grouplist);
-	if (retval) {
-		put_group_info(group_info);
-		return retval;
-	}
-
-	retval = set_current_groups(group_info);
-	put_group_info(group_info);
-
-	return retval;
-}
-
-/*
- * Check whether we're fsgid/egid or in the supplemental group..
- */
-int in_group_p(gid_t grp)
-{
-	const struct cred *cred = current_cred();
-	int retval = 1;
-
-	if (grp != cred->fsgid)
-		retval = groups_search(cred->group_info, grp);
-	return retval;
-}
-
-EXPORT_SYMBOL(in_group_p);
-
-int in_egroup_p(gid_t grp)
-{
-	const struct cred *cred = current_cred();
-	int retval = 1;
-
-	if (grp != cred->egid)
-		retval = groups_search(cred->group_info, grp);
-	return retval;
-}
-
-EXPORT_SYMBOL(in_egroup_p);
-
 DECLARE_RWSEM(uts_sem);
 
 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/10] cr: break out new_user_ns()
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-06-10  1:44   ` [PATCH 02/10] groups: move code to kernel/groups.c Serge E. Hallyn
@ 2009-06-10  1:44   ` Serge E. Hallyn
  2009-06-10  1:44   ` [PATCH 04/10] cr: split core function out of some set*{u,g}id functions Serge E. Hallyn
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:44 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

Break out the core function which checks privilege and (if
allowed) creates a new user namespace, with the passed-in
creating user_struct.  Note that a user_namespace, unlike
other namespace pointers, is not stored in the nsproxy.
Rather it is purely a property of user_structs.

This will let us keep the task restore code simpler.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/user_namespace.h |    8 ++++++
 kernel/user_namespace.c        |   53 ++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index cc4f453..a2b82d5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -20,6 +20,8 @@ extern struct user_namespace init_user_ns;
 
 #ifdef CONFIG_USER_NS
 
+struct user_namespace *new_user_ns(struct user_struct *creator,
+				   struct user_struct **newroot);
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	if (ns)
@@ -38,6 +40,12 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #else
 
+static inline struct user_namespace *new_user_ns(struct user_struct *creator,
+				   struct user_struct **newroot)
+{
+	return -EINVAL;
+}
+
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	return &init_user_ns;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8..e624b0f 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -11,15 +11,8 @@
 #include <linux/user_namespace.h>
 #include <linux/cred.h>
 
-/*
- * Create a new user namespace, deriving the creator from the user in the
- * passed credentials, and replacing that user with the new root user for the
- * new namespace.
- *
- * This is called by copy_creds(), which will finish setting the target task's
- * credentials.
- */
-int create_user_ns(struct cred *new)
+static struct user_namespace *_new_user_ns(struct user_struct *creator,
+				   struct user_struct **newroot)
 {
 	struct user_namespace *ns;
 	struct user_struct *root_user;
@@ -27,7 +20,7 @@ int create_user_ns(struct cred *new)
 
 	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
 	if (!ns)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ns->kref);
 
@@ -38,12 +31,43 @@ int create_user_ns(struct cred *new)
 	root_user = alloc_uid(ns, 0);
 	if (!root_user) {
 		kfree(ns);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/* set the new root user in the credentials under preparation */
-	ns->creator = new->user;
-	new->user = root_user;
+	ns->creator = creator;
+
+	/* alloc_uid() incremented the userns refcount.  Just set it to 1 */
+	kref_set(&ns->kref, 1);
+
+	*newroot = root_user;
+	return ns;
+}
+
+struct user_namespace *new_user_ns(struct user_struct *creator,
+				   struct user_struct **newroot)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+	return _new_user_ns(creator, newroot);
+}
+
+/*
+ * Create a new user namespace, deriving the creator from the user in the
+ * passed credentials, and replacing that user with the new root user for the
+ * new namespace.
+ *
+ * This is called by copy_creds(), which will finish setting the target task's
+ * credentials.
+ */
+int create_user_ns(struct cred *new)
+{
+	struct user_namespace *ns;
+
+	ns = new_user_ns(new->user, &new->user);
+	if (IS_ERR(ns))
+		return PTR_ERR(ns);
+
 	new->uid = new->euid = new->suid = new->fsuid = 0;
 	new->gid = new->egid = new->sgid = new->fsgid = 0;
 	put_group_info(new->group_info);
@@ -54,9 +78,6 @@ int create_user_ns(struct cred *new)
 #endif
 	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
 
-	/* alloc_uid() incremented the userns refcount.  Just set it to 1 */
-	kref_set(&ns->kref, 1);
-
 	return 0;
 }
 
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/10] cr: split core function out of some set*{u,g}id functions
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-06-10  1:44   ` [PATCH 02/10] groups: move code to kernel/groups.c Serge E. Hallyn
  2009-06-10  1:44   ` [PATCH 03/10] cr: break out new_user_ns() Serge E. Hallyn
@ 2009-06-10  1:44   ` Serge E. Hallyn
       [not found]     ` <20090610014456.GC5658-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-06-10  1:45   ` [PATCH 05/10] cr: ipc: reset kern_ipc_perms Serge E. Hallyn
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:44 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

When restarting tasks, we want to be able to change xuid and
xgid in a struct cred, and do so with security checks.  Break
the core functionality of set{fs,res}{u,g}id into cred_setX
which performs the access checks based on current_cred(),
but performs the requested change on a passed-in cred.

This will allow us to securely construct struct creds based
on a checkpoint image, constrained by the caller's permissions,
and apply them to the caller at the end of sys_restart().

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/cred.h |    8 +++
 kernel/cred.c        |  114 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sys.c         |  134 ++++++++------------------------------------------
 3 files changed, 143 insertions(+), 113 deletions(-)

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4fa9996..2ffffbe 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -21,6 +21,9 @@ struct user_struct;
 struct cred;
 struct inode;
 
+/* defined in sys.c, used in cred_setresuid */
+extern int set_user(struct cred *new);
+
 /*
  * COW Supplementary groups list
  */
@@ -344,4 +347,9 @@ do {						\
 	*(_fsgid) = __cred->fsgid;		\
 } while(0)
 
+int cred_setresuid(struct cred *new, uid_t ruid, uid_t euid, uid_t suid);
+int cred_setresgid(struct cred *new, gid_t rgid, gid_t egid, gid_t sgid);
+int cred_setfsuid(struct cred *new, uid_t uid, uid_t *old_fsuid);
+int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid);
+
 #endif /* _LINUX_CRED_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a03918..a017399 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -589,3 +589,117 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 	return security_kernel_create_files_as(new, inode);
 }
 EXPORT_SYMBOL(set_create_files_as);
+
+int cred_setresuid(struct cred *new, uid_t ruid, uid_t euid, uid_t suid)
+{
+	int retval;
+	const struct cred *old;
+
+	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
+	if (retval)
+		return retval;
+	old = current_cred();
+
+	if (!capable(CAP_SETUID)) {
+		if (ruid != (uid_t) -1 && ruid != old->uid &&
+		    ruid != old->euid  && ruid != old->suid)
+			return -EPERM;
+		if (euid != (uid_t) -1 && euid != old->uid &&
+		    euid != old->euid  && euid != old->suid)
+			return -EPERM;
+		if (suid != (uid_t) -1 && suid != old->uid &&
+		    suid != old->euid  && suid != old->suid)
+			return -EPERM;
+	}
+
+	if (ruid != (uid_t) -1) {
+		new->uid = ruid;
+		if (ruid != old->uid) {
+			retval = set_user(new);
+			if (retval < 0)
+				return retval;
+		}
+	}
+	if (euid != (uid_t) -1)
+		new->euid = euid;
+	if (suid != (uid_t) -1)
+		new->suid = suid;
+	new->fsuid = new->euid;
+
+	return security_task_fix_setuid(new, old, LSM_SETID_RES);
+}
+
+int cred_setresgid(struct cred *new, gid_t rgid, gid_t egid,
+			gid_t sgid)
+{
+	const struct cred *old = current_cred();
+	int retval;
+
+	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
+	if (retval)
+		return retval;
+
+	if (!capable(CAP_SETGID)) {
+		if (rgid != (gid_t) -1 && rgid != old->gid &&
+		    rgid != old->egid  && rgid != old->sgid)
+			return -EPERM;
+		if (egid != (gid_t) -1 && egid != old->gid &&
+		    egid != old->egid  && egid != old->sgid)
+			return -EPERM;
+		if (sgid != (gid_t) -1 && sgid != old->gid &&
+		    sgid != old->egid  && sgid != old->sgid)
+			return -EPERM;
+	}
+
+	if (rgid != (gid_t) -1)
+		new->gid = rgid;
+	if (egid != (gid_t) -1)
+		new->egid = egid;
+	if (sgid != (gid_t) -1)
+		new->sgid = sgid;
+	new->fsgid = new->egid;
+	return 0;
+}
+
+int cred_setfsuid(struct cred *new, uid_t uid, uid_t *old_fsuid)
+{
+	const struct cred *old;
+
+	old = current_cred();
+	*old_fsuid = old->fsuid;
+
+	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
+		return -EPERM;
+
+	if (uid == old->uid  || uid == old->euid  ||
+	    uid == old->suid || uid == old->fsuid ||
+	    capable(CAP_SETUID)) {
+		if (uid != *old_fsuid) {
+			new->fsuid = uid;
+			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
+				return 0;
+		}
+	}
+	return -EPERM;
+}
+
+int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid)
+{
+	const struct cred *old;
+
+	old = current_cred();
+	*old_fsgid = old->fsgid;
+
+	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
+		return -EPERM;
+
+	if (gid == old->gid  || gid == old->egid  ||
+	    gid == old->sgid || gid == old->fsgid ||
+	    capable(CAP_SETGID)) {
+		if (gid != *old_fsgid) {
+			new->fsgid = gid;
+			return 0;
+		}
+	}
+	return -EPERM;
+}
diff --git a/kernel/sys.c b/kernel/sys.c
index 4edcf51..0cedec0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -558,11 +558,12 @@ error:
 /*
  * change the user struct in a credentials set to match the new UID
  */
-static int set_user(struct cred *new)
+int set_user(struct cred *new)
 {
 	struct user_struct *new_user;
 
-	new_user = alloc_uid(current_user_ns(), new->uid);
+	/* is this ok? */
+	new_user = alloc_uid(new->user->user_ns, new->uid);
 	if (!new_user)
 		return -EAGAIN;
 
@@ -703,14 +704,12 @@ error:
 	return retval;
 }
 
-
 /*
  * This function implements a generic ability to update ruid, euid,
  * and suid.  This allows you to implement the 4.4 compatible seteuid().
  */
 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 {
-	const struct cred *old;
 	struct cred *new;
 	int retval;
 
@@ -718,45 +717,10 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 	if (!new)
 		return -ENOMEM;
 
-	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
-	if (retval)
-		goto error;
-	old = current_cred();
-
-	retval = -EPERM;
-	if (!capable(CAP_SETUID)) {
-		if (ruid != (uid_t) -1 && ruid != old->uid &&
-		    ruid != old->euid  && ruid != old->suid)
-			goto error;
-		if (euid != (uid_t) -1 && euid != old->uid &&
-		    euid != old->euid  && euid != old->suid)
-			goto error;
-		if (suid != (uid_t) -1 && suid != old->uid &&
-		    suid != old->euid  && suid != old->suid)
-			goto error;
-	}
-
-	if (ruid != (uid_t) -1) {
-		new->uid = ruid;
-		if (ruid != old->uid) {
-			retval = set_user(new);
-			if (retval < 0)
-				goto error;
-		}
-	}
-	if (euid != (uid_t) -1)
-		new->euid = euid;
-	if (suid != (uid_t) -1)
-		new->suid = suid;
-	new->fsuid = new->euid;
-
-	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
-	if (retval < 0)
-		goto error;
-
-	return commit_creds(new);
+	retval = cred_setresuid(new, ruid, euid, suid);
+	if (retval == 0)
+		return commit_creds(new);
 
-error:
 	abort_creds(new);
 	return retval;
 }
@@ -778,43 +742,17 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __u
  */
 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
 {
-	const struct cred *old;
 	struct cred *new;
 	int retval;
 
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
-	old = current_cred();
 
-	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
-	if (retval)
-		goto error;
+	retval = cred_setresgid(new, rgid, egid, sgid);
+	if (retval == 0)
+		return commit_creds(new);
 
-	retval = -EPERM;
-	if (!capable(CAP_SETGID)) {
-		if (rgid != (gid_t) -1 && rgid != old->gid &&
-		    rgid != old->egid  && rgid != old->sgid)
-			goto error;
-		if (egid != (gid_t) -1 && egid != old->gid &&
-		    egid != old->egid  && egid != old->sgid)
-			goto error;
-		if (sgid != (gid_t) -1 && sgid != old->gid &&
-		    sgid != old->egid  && sgid != old->sgid)
-			goto error;
-	}
-
-	if (rgid != (gid_t) -1)
-		new->gid = rgid;
-	if (egid != (gid_t) -1)
-		new->egid = egid;
-	if (sgid != (gid_t) -1)
-		new->sgid = sgid;
-	new->fsgid = new->egid;
-
-	return commit_creds(new);
-
-error:
 	abort_creds(new);
 	return retval;
 }
@@ -831,7 +769,6 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __u
 	return retval;
 }
 
-
 /*
  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
  * is used for "access()" and for the NFS daemon (letting nfsd stay at
@@ -840,35 +777,20 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __u
  */
 SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 {
-	const struct cred *old;
 	struct cred *new;
 	uid_t old_fsuid;
+	int retval;
 
 	new = prepare_creds();
 	if (!new)
 		return current_fsuid();
-	old = current_cred();
-	old_fsuid = old->fsuid;
-
-	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
-		goto error;
-
-	if (uid == old->uid  || uid == old->euid  ||
-	    uid == old->suid || uid == old->fsuid ||
-	    capable(CAP_SETUID)) {
-		if (uid != old_fsuid) {
-			new->fsuid = uid;
-			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
-				goto change_okay;
-		}
-	}
 
-error:
-	abort_creds(new);
-	return old_fsuid;
+	retval = cred_setfsuid(new, uid, &old_fsuid);
+	if (retval == 0)
+		commit_creds(new);
+	else
+		abort_creds(new);
 
-change_okay:
-	commit_creds(new);
 	return old_fsuid;
 }
 
@@ -877,34 +799,20 @@ change_okay:
  */
 SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 {
-	const struct cred *old;
 	struct cred *new;
 	gid_t old_fsgid;
+	int retval;
 
 	new = prepare_creds();
 	if (!new)
 		return current_fsgid();
-	old = current_cred();
-	old_fsgid = old->fsgid;
-
-	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
-		goto error;
-
-	if (gid == old->gid  || gid == old->egid  ||
-	    gid == old->sgid || gid == old->fsgid ||
-	    capable(CAP_SETGID)) {
-		if (gid != old_fsgid) {
-			new->fsgid = gid;
-			goto change_okay;
-		}
-	}
 
-error:
-	abort_creds(new);
-	return old_fsgid;
+	retval = cred_setfsgid(new, gid, &old_fsgid);
+	if (retval == 0)
+		commit_creds(new);
+	else
+		abort_creds(new);
 
-change_okay:
-	commit_creds(new);
 	return old_fsgid;
 }
 
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/10] cr: ipc: reset kern_ipc_perms
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (2 preceding siblings ...)
  2009-06-10  1:44   ` [PATCH 04/10] cr: split core function out of some set*{u,g}id functions Serge E. Hallyn
@ 2009-06-10  1:45   ` Serge E. Hallyn
  2009-06-10  1:45   ` [PATCH 06/10] cr: capabilities: define checkpoint and restore fns Serge E. Hallyn
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:45 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

Reset the checkpointed uid and gid info on ipc objects.

Right now, we return -EPERM if the user calling sys_restart() isn't
allowed to create an object with the checkpointed uid.  We may prefer
to simply use the caller's uid in that case - but that could lead to
subtle userspace bugs?  Unsure, so going for the stricter behavior.

TODO: restore kern_ipc_perms->security.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 ipc/checkpoint.c |   33 +++++++++++++++++++++++++++++++--
 1 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index f621226..bc77743 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -119,6 +119,26 @@ int checkpoint_ipc_ns(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns)
  * Restart
  */
 
+/*
+ * check whether current task may create ipc object with
+ * checkpointed uids and gids.
+ * Return 1 if ok, 0 if not.
+ */
+static int validate_created_perms(struct ckpt_hdr_ipc_perms *h)
+{
+	const struct cred *cred = current_cred();
+	uid_t uid = cred->uid, euid = cred->euid;
+
+	/* actually I don't know - is CAP_IPC_OWNER the right one? */
+	if (((h->uid != uid && h->uid == euid) ||
+			(h->cuid != uid && h->cuid != euid) ||
+			!in_group_p(h->cgid) ||
+			!in_group_p(h->gid)) &&
+			!capable(CAP_IPC_OWNER))
+		return 0;
+	return 1;
+}
+
 int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 			   struct kern_ipc_perm *perm)
 {
@@ -139,14 +159,23 @@ int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 
 	perm->id = h->id;
 	perm->key = h->key;
-#if 0 /* FIX: requires security checks */
+
+	if (!validate_created_perms(h))
+		return -EPERM;
 	perm->uid = h->uid;
 	perm->gid = h->gid;
 	perm->cuid = h->cuid;
 	perm->cgid = h->cgid;
-#endif
 	perm->mode = h->mode;
 	perm->seq = h->seq;
+	/*
+	 * Todo: restore perm->security.
+	 * At the moment it gets set by security_x_alloc() called through
+	 * ipcget()->ipcget_public()->ops-.getnew (->nequeue for instance)
+	 * We will want to ask the LSM to consider resetting the
+	 * checkpointed ->security, based on current_security(),
+	 * the checkpointed ->security, and the checkpoint file context.
+	 */
 
 	return 0;
 }
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/10] cr: capabilities: define checkpoint and restore fns
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (3 preceding siblings ...)
  2009-06-10  1:45   ` [PATCH 05/10] cr: ipc: reset kern_ipc_perms Serge E. Hallyn
@ 2009-06-10  1:45   ` Serge E. Hallyn
  2009-06-10  1:46   ` [PATCH 07/10] cr: checkpoint and restore task credentials Serge E. Hallyn
                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:45 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

[ Andrew: I am punting on dealing with the subsystem cooperation
issues in this version, in favor of trying to get LSM issues
straightened out ]

An application checkpoint image will store capability sets
(and the bounding set) as __u64s.  Define checkpoint and
restart functions to translate between those and kernel_cap_t's.

Define a common function do_capset_tocred() which applies capability
set changes to a passed-in struct cred.

The restore function uses do_capset_tocred() to apply the restored
capabilities to the struct cred being crafted, subject to the
current task's (task executing sys_restart()) permissions.

Changelog:
	Jun 09: Can't choose securebits or drop bounding set if
		file capabilities aren't compiled into the kernel.
		Also just store caps in __u32s (looks cleaner).
	Jun 01: Made the checkpoint and restore functions and the
		ckpt_hdr_capabilities struct more opaque to the
		rest of the c/r code, as suggested by Andrew Morgan,
		and using naming suggested by Oren.
	Jun 01: Add commented BUILD_BUG_ON() to point out that the
		current implementation depends on 64-bit capabilities.
		(Andrew Morgan and Alexey Dobriyan).
	May 28: add helpers to c/r securebits

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/capability.h     |    6 ++
 include/linux/checkpoint_hdr.h |   11 +++
 kernel/capability.c            |  162 +++++++++++++++++++++++++++++++++++++---
 security/commoncap.c           |   19 +----
 4 files changed, 170 insertions(+), 28 deletions(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c302110..3a74655 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -568,6 +568,12 @@ extern int capable(int cap);
 struct dentry;
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
 
+struct cred;
+int apply_securebits(unsigned securebits, struct cred *new);
+struct ckpt_capabilities;
+int restore_capabilities(struct ckpt_capabilities *h, struct cred *new);
+void checkpoint_capabilities(struct ckpt_capabilities *h, struct cred * cred);
+
 #endif /* __KERNEL__ */
 
 #endif /* !_LINUX_CAPABILITY_H */
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 874518c..37f2d4d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -56,6 +56,7 @@ enum {
 	CKPT_HDR_NS,
 	CKPT_HDR_UTS_NS,
 	CKPT_HDR_IPC_NS,
+	CKPT_HDR_CAPABILITIES,
 
 	/* 201-299: reserved for arch-dependent */
 
@@ -166,6 +167,16 @@ struct ckpt_hdr_task {
 	__u32 task_comm_len;
 } __attribute__((aligned(8)));
 
+/* Posix capabilities */
+struct ckpt_capabilities {
+	__u32 cap_i_0, cap_i_1; /* inheritable set */
+	__u32 cap_p_0, cap_p_1; /* permitted set */
+	__u32 cap_e_0, cap_e_1; /* effective set */
+	__u32 cap_b_0, cap_b_1; /* bounding set */
+	__u32 securebits;
+	__u32 padding;
+} __attribute__((aligned(8)));
+
 /* namespaces */
 struct ckpt_hdr_task_ns {
 	struct ckpt_hdr h;
diff --git a/kernel/capability.c b/kernel/capability.c
index 4e17041..90cc7b4 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,8 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
+#include <linux/securebits.h>
+#include <linux/checkpoint_hdr.h>
 #include <asm/uaccess.h>
 #include "cred-internals.h"
 
@@ -217,6 +219,45 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
 	return ret;
 }
 
+static int do_capset_tocred(kernel_cap_t *effective, kernel_cap_t *inheritable,
+			kernel_cap_t *permitted, struct cred *new)
+{
+	int ret;
+
+	ret = security_capset(new, current_cred(),
+			      effective, inheritable, permitted);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * for checkpoint-restart, do we want to wait until end of restart?
+	 * not sure we care */
+	audit_log_capset(current->pid, new, current_cred());
+
+	return 0;
+}
+
+static int do_capset(kernel_cap_t *effective, kernel_cap_t *inheritable,
+			kernel_cap_t *permitted)
+{
+	struct cred *new;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = do_capset_tocred(effective, inheritable, permitted, new);
+	if (ret < 0)
+		goto error;
+
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return ret;
+}
+
 /**
  * sys_capset - set capabilities for a process or (*) a group of processes
  * @header: pointer to struct that contains capability version and
@@ -240,7 +281,6 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
-	struct cred *new;
 	int ret;
 	pid_t pid;
 
@@ -271,21 +311,121 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 		i++;
 	}
 
-	new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
+	return do_capset(&effective, &inheritable, &permitted);
 
-	ret = security_capset(new, current_cred(),
-			      &effective, &inheritable, &permitted);
+}
+
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+int apply_securebits(unsigned securebits, struct cred *new)
+{
+	if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
+	     & (new->securebits ^ securebits))				/*[1]*/
+	    || ((new->securebits & SECURE_ALL_LOCKS & ~securebits))	/*[2]*/
+	    || (securebits & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
+	    || (cap_capable(current, current_cred(), CAP_SETPCAP,
+			    SECURITY_CAP_AUDIT) != 0)			/*[4]*/
+		/*
+		 * [1] no changing of bits that are locked
+		 * [2] no unlocking of locks
+		 * [3] no setting of unsupported bits
+		 * [4] doing anything requires privilege (go read about
+		 *     the "sendmail capabilities bug")
+		 */
+	    )
+		/* cannot change a locked bit */
+		return -EPERM;
+	new->securebits = securebits;
+	return 0;
+}
+
+static void do_capbset_drop(struct cred *cred, int cap)
+{
+	cap_lower(cred->cap_bset, cap);
+}
+
+static inline int restore_cap_bset(kernel_cap_t bset, struct cred *cred)
+{
+	int i, may_dropbcap = capable(CAP_SETPCAP);
+
+	for (i = 0; i < CAP_LAST_CAP; i++) {
+		if (cap_raised(bset, i))
+			continue;
+		if (!cap_raised(current_cred()->cap_bset, i))
+			continue;
+		if (!may_dropbcap)
+			return -EPERM;
+		do_capbset_drop(cred, i);
+	}
+
+	return 0;
+}
+
+#else /* CONFIG_SECURITY_FILE_CAPABILITIES */
+
+int apply_securebits(unsigned securebits, struct cred *new)
+{
+	/* settable securebits not supported */
+	return 0;
+}
+
+static inline int restore_cap_bset(kernel_cap_t bset, struct cred *cred)
+{
+	/* bounding sets not supported */
+	return 0;
+}
+#endif /* CONFIG_SECURITY_FILE_CAPABILITIES */
+
+static int do_restore_caps(struct ckpt_capabilities *h, struct cred *cred)
+{
+	kernel_cap_t effective, inheritable, permitted, bset;
+	int ret;
+
+	effective.cap[0] = h->cap_e_0;
+	effective.cap[1] = h->cap_e_1;
+	inheritable.cap[0] = h->cap_i_0;
+	inheritable.cap[1] = h->cap_i_1;
+	permitted.cap[0] = h->cap_p_0;
+	permitted.cap[1] = h->cap_p_1;
+	bset.cap[0] = h->cap_b_0;
+	bset.cap[1] = h->cap_b_1;
+
+	ret = do_capset_tocred(&effective, &inheritable, &permitted, cred);
 	if (ret < 0)
-		goto error;
+		return ret;
+
+	ret = restore_cap_bset(bset, cred);
+	return ret;
+}
 
-	audit_log_capset(pid, new, current_cred());
+void checkpoint_capabilities(struct ckpt_capabilities *h, struct cred * cred)
+{
+	BUILD_BUG_ON(CAP_LAST_CAP >= 64);
+	h->securebits = cred->securebits;
+	h->cap_i_0 = cred->cap_inheritable.cap[0];
+	h->cap_i_1 = cred->cap_inheritable.cap[1];
+	h->cap_p_0 = cred->cap_permitted.cap[0];
+	h->cap_p_1 = cred->cap_permitted.cap[1];
+	h->cap_e_0 = cred->cap_effective.cap[0];
+	h->cap_e_1 = cred->cap_effective.cap[1];
+	h->cap_b_0 = cred->cap_bset.cap[0];
+	h->cap_b_1 = cred->cap_bset.cap[1];
+}
 
-	return commit_creds(new);
+/*
+ * restore_capabilities: called by restore_creds() to set the
+ * restored capabilities (if permitted) in a new struct cred which
+ * will be attached at the end of the sys_restart().
+ * struct cred *new is prepared by caller (using prepare_creds())
+ * (and aborted by caller on error)
+ * return 0 on success, < 0 on error
+ */
+int restore_capabilities(struct ckpt_capabilities *h, struct cred *new)
+{
+	int ret = do_restore_caps(h, new);
+
+	if (!ret)
+		ret = apply_securebits(h->securebits, new);
 
-error:
-	abort_creds(new);
 	return ret;
 }
 
diff --git a/security/commoncap.c b/security/commoncap.c
index beac025..528da3b 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -861,24 +861,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 	 * capability-based-privilege environment.
 	 */
 	case PR_SET_SECUREBITS:
-		error = -EPERM;
-		if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
-		     & (new->securebits ^ arg2))			/*[1]*/
-		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
-		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, current_cred(), CAP_SETPCAP,
-				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
-			/*
-			 * [1] no changing of bits that are locked
-			 * [2] no unlocking of locks
-			 * [3] no setting of unsupported bits
-			 * [4] doing anything requires privilege (go read about
-			 *     the "sendmail capabilities bug")
-			 */
-		    )
-			/* cannot change a locked bit */
+		error = apply_securebits(arg2, new);
+		if (error)
 			goto error;
-		new->securebits = arg2;
 		goto changed;
 
 	case PR_GET_SECUREBITS:
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/10] cr: checkpoint and restore task credentials
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (4 preceding siblings ...)
  2009-06-10  1:45   ` [PATCH 06/10] cr: capabilities: define checkpoint and restore fns Serge E. Hallyn
@ 2009-06-10  1:46   ` Serge E. Hallyn
  2009-06-10  1:46   ` [PATCH 08/10] cr: restore file->f_cred Serge E. Hallyn
                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:46 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

This patch adds the checkpointing and restart of credentials
(uids, gids, and capabilities) to Oren's c/r patchset (on top
of v14).  It goes to great pains to re-use (and define when
needed) common helpers, in order to make sure that as security
code is modified, the cr code will be updated.  Some of the
helpers should still be moved (i.e. _creds() functions should
be in kernel/cred.c).

When building the credentials for the restarted process, I
1. create a new struct cred as a copy of the running task's
cred (using prepare_cred())
2. always authorize any changes to the new struct cred
based on the permissions of current_cred() (not the current
transient state of the new cred).

While this may mean that certain transient_cred1->transient_cred2
states are allowed which otherwise wouldn't be allowed, the
fact remains that current_cred() is allowed to transition to
transient_cred2.

The reconstructed creds are applied to the task at the very
end of the sys_restart call.  This ensures that any objects which
need to be re-created (file, socket, etc) are re-created using
the creds of the task calling sys_restart - preventing an unpriv
user from creating a privileged object, and ensuring that a
root task can restart a process which had started out privileged,
created some privileged objects, then dropped its privilege.

With these patches, the root user can restart checkpoint images
(created by either hallyn or root) of user hallyn's tasks,
resulting in a program owned by hallyn.

Changelog:
	Jun 01: Don't check ordering of groups in group_info, bc
		set_groups() will sort it for us.
	May 28: 1. Restore securebits
		2. Address Alexey's comments: move prototypes out of
		   sched.h, validate ngroups < NGROUPS_MAX, validate
		   groups are sorted, and get rid of ckpt_hdr_cred->version.
		3. remove bogus unused flag RESTORE_CREATE_USERNS
	May 26: Move group, user, userns, creds c/r functions out
		of checkpoint/process.c and into the appropriate files.
	May 26: Define struct ckpt_hdr_task_creds and move task cred
		objref c/r into {checkpoint_restore}_task_shared().
	May 26: Take cred refs around checkpoint_write_creds()
	May 20: Remove the limit on number of groups in groupinfo
		at checkpoint time
	May 20: Remove the depth limit on empty user namespaces
	May 20: Better document checkpoint_user
	May 18: fix more refcounting: if (userns 5, uid 0) had
		no active tasks or child user_namespaces, then
		it shouldn't exist at restart or it, its namespace,
		and its whole chain of creators will be leaked.
	May 14: fix some refcounting:
		1. a new user_ns needs a ref to remain pinned
		   by its root user
		2. current_user_ns needs an extra ref bc objhash
		   drops two on restart
		3. cred needs a ref for the real credentials bc
		   commit_creds eats one ref.
	May 13: folded in fix to userns refcounting.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/objhash.c             |  146 +++++++++++++++++++++++++++++++++----
 checkpoint/process.c             |  152 +++++++++++++++++++++++++++++++++++++-
 include/linux/checkpoint.h       |   11 +++
 include/linux/checkpoint_hdr.h   |   60 +++++++++++++++
 include/linux/checkpoint_types.h |    1 +
 include/linux/cred.h             |   13 +++
 include/linux/user_namespace.h   |    6 ++
 kernel/cred.c                    |  113 ++++++++++++++++++++++++++++
 kernel/groups.c                  |   59 +++++++++++++++
 kernel/user.c                    |  147 ++++++++++++++++++++++++++++++++++++
 kernel/user_namespace.c          |   86 +++++++++++++++++++++
 11 files changed, 778 insertions(+), 16 deletions(-)

diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 50a124b..84f003d 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -17,6 +17,7 @@
 #include <linux/fdtable.h>
 #include <linux/sched.h>
 #include <linux/ipc_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
@@ -181,6 +182,71 @@ static int obj_ipc_ns_users(void *ptr)
 	return atomic_read(&((struct ipc_namespace *) ptr)->count);
 }
 
+static int obj_cred_grab(void *ptr)
+{
+	get_cred((struct cred *) ptr);
+	return 0;
+}
+
+static void obj_cred_drop(void *ptr)
+{
+	put_cred((struct cred *) ptr);
+}
+
+static int obj_cred_users(void *ptr)
+{
+	return atomic_read(&((struct cred *) ptr)->usage);
+}
+
+static int obj_user_grab(void *ptr)
+{
+	struct user_struct *u = ptr;
+	(void) get_uid(u);
+	return 0;
+}
+
+static void obj_user_drop(void *ptr)
+{
+	free_uid((struct user_struct *) ptr);
+}
+
+static int obj_user_users(void *ptr)
+{
+	return atomic_read(&((struct user_struct *) ptr)->__count);
+}
+
+static int obj_userns_grab(void *ptr)
+{
+	get_user_ns((struct user_namespace *) ptr);
+	return 0;
+}
+
+static void obj_userns_drop(void *ptr)
+{
+	put_user_ns((struct user_namespace *) ptr);
+}
+
+static int obj_user_ns_users(void *ptr)
+{
+	return atomic_read(&((struct user_namespace *) ptr)->kref.refcount);
+}
+
+static int obj_groupinfo_grab(void *ptr)
+{
+	get_group_info((struct group_info *) ptr);
+	return 0;
+}
+
+static void obj_groupinfo_drop(void *ptr)
+{
+	put_group_info((struct group_info *) ptr);
+}
+
+static int obj_groupinfo_users(void *ptr)
+{
+	return atomic_read(&((struct group_info *) ptr)->usage);
+}
+
 static struct ckpt_obj_ops ckpt_obj_ops[] = {
 	/* ignored object */
 	{
@@ -258,6 +324,46 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
 		.checkpoint = checkpoint_bad,
 		.restore = restore_bad,
 	},
+	/* user_ns object */
+	{
+		.obj_name = "USER_NS",
+		.obj_type = CKPT_OBJ_USER_NS,
+		.ref_drop = obj_userns_drop,
+		.ref_grab = obj_userns_grab,
+		.ref_users = obj_user_ns_users,
+		.checkpoint = checkpoint_userns,
+		.restore = restore_userns,
+	},
+	/* struct cred */
+	{
+		.obj_name = "CRED",
+		.obj_type = CKPT_OBJ_CRED,
+		.ref_drop = obj_cred_drop,
+		.ref_grab = obj_cred_grab,
+		.ref_users = obj_cred_users,
+		.checkpoint = checkpoint_cred,
+		.restore = restore_cred,
+	},
+	/* user object */
+	{
+		.obj_name = "USER",
+		.obj_type = CKPT_OBJ_USER,
+		.ref_drop = obj_user_drop,
+		.ref_grab = obj_user_grab,
+		.ref_users = obj_user_users,
+		.checkpoint = checkpoint_user,
+		.restore = restore_user,
+	},
+	/* struct groupinfo */
+	{
+		.obj_name = "GROUPINFO",
+		.obj_type = CKPT_OBJ_GROUPINFO,
+		.ref_drop = obj_groupinfo_drop,
+		.ref_grab = obj_groupinfo_grab,
+		.ref_users = obj_groupinfo_users,
+		.checkpoint = checkpoint_groupinfo,
+		.restore = restore_groupinfo,
+	},
 };
 
 
@@ -313,6 +419,32 @@ int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx)
 	return 0;
 }
 
+static struct ckpt_obj *obj_find_by_ptr(struct ckpt_ctx *ctx, void *ptr)
+{
+	struct hlist_head *h;
+	struct hlist_node *n;
+	struct ckpt_obj *obj;
+
+	h = &ctx->obj_hash->head[hash_long((unsigned long) ptr,
+					   CKPT_OBJ_HASH_NBITS)];
+	hlist_for_each_entry(obj, n, h, hash)
+		if (obj->ptr == ptr)
+			return obj;
+	return NULL;
+}
+
+/*
+ * look up an obj and return objref if in hash, else
+ * return 0.  Used during checkpoint.
+ */
+int obj_lookup(struct ckpt_ctx *ctx, void *ptr)
+{
+	struct ckpt_obj *obj = obj_find_by_ptr(ctx, ptr);
+	if (obj)
+		return obj->objref;
+	return 0;
+}
+
 /**
  * ckpt_obj_new - add an object to the obj_hash
  * @ctx: checkpoint context
@@ -370,20 +502,6 @@ static struct ckpt_obj *obj_new(struct ckpt_ctx *ctx, void *ptr,
  * Checkpoint
  */
 
-static struct ckpt_obj *obj_find_by_ptr(struct ckpt_ctx *ctx, void *ptr)
-{
-	struct hlist_head *h;
-	struct hlist_node *n;
-	struct ckpt_obj *obj;
-
-	h = &ctx->obj_hash->head[hash_long((unsigned long) ptr,
-					   CKPT_OBJ_HASH_NBITS)];
-	hlist_for_each_entry(obj, n, h, hash)
-		if (obj->ptr == ptr)
-			return obj;
-	return NULL;
-}
-
 /**
  * obj_lookup_add - lookup object and add if not in objhash
  * @ctx: checkpoint context
diff --git a/checkpoint/process.c b/checkpoint/process.c
index e7f43d4..fbf2bd5 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -17,6 +17,7 @@
 #include <linux/futex.h>
 #include <linux/poll.h>
 #include <linux/utsname.h>
+#include <linux/user_namespace.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 #include <linux/syscalls.h>
@@ -25,6 +26,26 @@
  * Checkpoint
  */
 
+int checkpoint_groupinfo(struct ckpt_ctx *ctx, void *ptr)
+{
+	return checkpoint_write_groupinfo(ctx, (struct group_info *)ptr);
+}
+
+int checkpoint_userns(struct ckpt_ctx *ctx, void *ptr)
+{
+	return checkpoint_write_userns(ctx, (struct user_namespace *) ptr);
+}
+
+int checkpoint_user(struct ckpt_ctx *ctx, void *ptr)
+{
+	return checkpoint_write_user(ctx, (struct user_struct *)ptr);
+}
+
+int checkpoint_cred(struct ckpt_ctx *ctx, void *ptr)
+{
+	return checkpoint_write_cred(ctx, (struct cred *) ptr);
+}
+
 /* dump the task_struct of a given task */
 static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t)
 {
@@ -161,6 +182,46 @@ static int checkpoint_task_ns(struct ckpt_ctx *ctx, struct task_struct *t)
 	return ret;
 }
 
+static int checkpoint_task_creds(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	int realcred_ref, ecred_ref;
+	struct cred *rcred, *ecred;
+	struct ckpt_hdr_task_creds *h;
+	int ret;
+
+	rcred = get_cred(t->real_cred);
+	ecred = get_cred(t->cred);
+
+	realcred_ref = checkpoint_obj(ctx, rcred, CKPT_OBJ_CRED);
+	if (realcred_ref < 0) {
+		ret = realcred_ref;
+		goto error;
+	}
+
+	ecred_ref = checkpoint_obj(ctx, ecred, CKPT_OBJ_CRED);
+	if (ecred_ref < 0) {
+		ret = ecred_ref;
+		goto error;
+	}
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_CREDS);
+	if (!h) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	h->cred_ref = realcred_ref;
+	h->ecred_ref = ecred_ref;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+	ckpt_hdr_put(ctx, h);
+
+error:
+	put_cred(rcred);
+	put_cred(ecred);
+	return ret;
+
+}
+
 static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 {
 	struct ckpt_hdr_task_objs *h;
@@ -176,7 +237,9 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 	 * restored when it gets to restore, e.g. its memory.
 	 */
 
-	ret = checkpoint_task_ns(ctx, t);
+	ret = checkpoint_task_creds(ctx, t);
+	if (!ret)
+		ret = checkpoint_task_ns(ctx, t);
 	if (ret < 0)
 		return ret;
 
@@ -387,6 +450,26 @@ int ckpt_collect_task(struct ckpt_ctx *ctx, struct task_struct *t)
  * Restart
  */
 
+void *restore_groupinfo(struct ckpt_ctx *ctx)
+{
+	return (void *) restore_read_groupinfo(ctx);
+}
+
+void *restore_userns(struct ckpt_ctx *ctx)
+{
+	return (void *) restore_read_userns(ctx);
+}
+
+void *restore_user(struct ckpt_ctx *ctx)
+{
+	return (void *) restore_read_user(ctx);
+}
+
+void *restore_cred(struct ckpt_ctx *ctx)
+{
+	return (void *) restore_read_cred(ctx);
+}
+
 /* read the task_struct into the current task */
 static int restore_task_struct(struct ckpt_ctx *ctx)
 {
@@ -404,8 +487,12 @@ static int restore_task_struct(struct ckpt_ctx *ctx)
 
 	memset(t->comm, 0, TASK_COMM_LEN);
 	ret = _ckpt_read_string(ctx, t->comm, h->task_comm_len);
+	if (ret < 0)
+		goto out;
 
 	/* FIXME: restore remaining relevant task_struct fields */
+
+	ret = 0;
  out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
@@ -553,6 +640,34 @@ static int restore_task_ns(struct ckpt_ctx *ctx)
 	return ret;
 }
 
+static int restore_task_creds(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_task_creds *h;
+	struct cred *realcred, *ecred;
+	int ret = 0;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_CREDS);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	realcred = ckpt_obj_fetch(ctx, h->cred_ref, CKPT_OBJ_CRED);
+	if (IS_ERR(realcred)) {
+		ret = PTR_ERR(realcred);
+		goto out;
+	}
+	ecred = ckpt_obj_fetch(ctx, h->ecred_ref, CKPT_OBJ_CRED);
+	if (IS_ERR(ecred)) {
+		ret = PTR_ERR(ecred);
+		goto out;
+	}
+	ctx->realcred = realcred;
+	ctx->ecred = ecred;
+
+out:
+	ckpt_hdr_put(ctx, h);
+	return ret;
+}
+
 static int restore_task_objs(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_task_objs *h;
@@ -563,7 +678,9 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
 	 * and because shared objects are restored before they are
 	 * referenced. See comment in checkpoint_task_objs.
 	 */
-	ret = restore_task_ns(ctx);
+	ret = restore_task_creds(ctx);
+	if (!ret)
+		ret = restore_task_ns(ctx);
 	if (ret < 0)
 		return ret;
 
@@ -581,6 +698,33 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
 	return ret;
 }
 
+static int restore_creds(struct ckpt_ctx *ctx)
+{
+	int ret;
+	const struct cred *old;
+	struct cred *rcred, *ecred;
+
+	rcred = ctx->realcred;
+	ecred = ctx->ecred;
+
+	/* commit_creds will take one ref for the eff creds, but
+	 * expects us to hold a ref for the obj creds, so take a
+	 * ref here */
+	get_cred(rcred);
+	ret = commit_creds(rcred);
+	if (ret)
+		return ret;
+
+	if (ecred == rcred)
+		return 0;
+
+	old =  override_creds(ecred); /* override_creds otoh takes new ref */
+	put_cred(old);
+
+	ctx->realcred = ctx->ecred = NULL;
+	return 0;
+}
+
 int restore_restart_block(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_restart_block *h;
@@ -709,6 +853,10 @@ int restore_task(struct ckpt_ctx *ctx)
 		goto out;
 	ret = restore_cpu(ctx);
 	ckpt_debug("cpu %d\n", ret);
+	if (ret < 0)
+		goto out;
+	ret = restore_creds(ctx);
+	ckpt_debug("creds: ret %d\n", ret);
  out:
 	return ret;
 }
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 7005227..db47d47 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -97,6 +97,7 @@ extern void *ckpt_obj_fetch(struct ckpt_ctx *ctx, int objref,
 			    enum obj_type type);
 extern int ckpt_obj_lookup_add(struct ckpt_ctx *ctx, void *ptr,
 			       enum obj_type type, int *first);
+extern int obj_lookup(struct ckpt_ctx *ctx, void *ptr);
 extern int ckpt_obj_insert(struct ckpt_ctx *ctx, void *ptr, int objref,
 			   enum obj_type type);
 
@@ -163,6 +164,16 @@ extern int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 extern int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 			       struct ckpt_hdr_file *h);
 
+/* credentials */
+int checkpoint_groupinfo(struct ckpt_ctx *ctx, void *ptr);
+int checkpoint_userns(struct ckpt_ctx *ctx, void *ptr);
+int checkpoint_user(struct ckpt_ctx *ctx, void *ptr);
+int checkpoint_cred(struct ckpt_ctx *ctx, void *ptr);
+void *restore_groupinfo(struct ckpt_ctx *ctx);
+void *restore_userns(struct ckpt_ctx *ctx);
+void *restore_user(struct ckpt_ctx *ctx);
+void *restore_cred(struct ckpt_ctx *ctx);
+
 /* memory */
 extern void ckpt_pgarr_free(struct ckpt_ctx *ctx);
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 37f2d4d..02f874e 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -57,6 +57,11 @@ enum {
 	CKPT_HDR_UTS_NS,
 	CKPT_HDR_IPC_NS,
 	CKPT_HDR_CAPABILITIES,
+	CKPT_HDR_USER_NS,
+	CKPT_HDR_CRED,
+	CKPT_HDR_USER,
+	CKPT_HDR_GROUPINFO,
+	CKPT_HDR_TASK_CREDS,
 
 	/* 201-299: reserved for arch-dependent */
 
@@ -105,6 +110,10 @@ enum obj_type {
 	CKPT_OBJ_NS,
 	CKPT_OBJ_UTS_NS,
 	CKPT_OBJ_IPC_NS,
+	CKPT_OBJ_USER_NS,
+	CKPT_OBJ_CRED,
+	CKPT_OBJ_USER,
+	CKPT_OBJ_GROUPINFO,
 	CKPT_OBJ_MAX
 };
 
@@ -164,6 +173,11 @@ struct ckpt_hdr_task {
 	__u32 exit_code;
 	__u32 exit_signal;
 
+#ifdef CONFIG_AUDITSYSCALL
+	/* would audit want to track the checkpointed ids,
+	   or (more likely) who actually restarted? */
+#endif
+
 	__u32 task_comm_len;
 } __attribute__((aligned(8)));
 
@@ -177,6 +191,52 @@ struct ckpt_capabilities {
 	__u32 padding;
 } __attribute__((aligned(8)));
 
+struct ckpt_hdr_task_creds {
+	struct ckpt_hdr h;
+	__s32 cred_ref;
+	__s32 ecred_ref;
+} __attribute__((aligned(8)));
+
+struct ckpt_hdr_cred {
+	struct ckpt_hdr h;
+	__u32 uid, suid, euid, fsuid;
+	__u32 gid, sgid, egid, fsgid;
+	__s32 user_ref;
+	__s32 groupinfo_ref;
+	struct ckpt_capabilities cap_s;
+} __attribute__((aligned(8)));
+
+struct ckpt_hdr_groupinfo {
+	struct ckpt_hdr h;
+	__u32 ngroups;
+	/*
+	 * This is followed by ngroups __u32s
+	 */
+	__u32 groups[0];
+} __attribute__((aligned(8)));
+
+/*
+ * todo - keyrings and LSM
+ * These may be better done with userspace help though
+ */
+struct ckpt_hdr_user_struct {
+	struct ckpt_hdr h;
+	__u32 uid;
+	__s32 userns_ref;
+} __attribute__((aligned(8)));
+
+/*
+ * The user-struct mostly tracks system resource usage.
+ * Most of it's contents therefore will simply be set
+ * correctly as restart opens resources
+ */
+#define CKPT_USERNS_INIT 1
+struct ckpt_hdr_user_ns {
+	struct ckpt_hdr h;
+	__u32 flags;
+	__s32 creator_ref;
+} __attribute__((aligned(8)));
+
 /* namespaces */
 struct ckpt_hdr_task_ns {
 	struct ckpt_hdr h;
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index df88c55..452cd57 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -65,6 +65,7 @@ struct ckpt_ctx {
 	atomic_t tasks_count;		/* sync of tasks: used to coordinate */
 	struct completion complete;	/* container root and other tasks on */
 	wait_queue_head_t waitq;	/* start, end, and restart ordering */
+	struct cred *realcred, *ecred;	/* tmp storage for cred at restart */
 };
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 2ffffbe..e3269d5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -77,6 +77,14 @@ extern int groups_search(const struct group_info *, gid_t);
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
 
+#ifdef CONFIG_CHECKPOINT
+struct ckpt_ctx;
+int checkpoint_write_groupinfo(struct ckpt_ctx *, struct group_info *);
+struct group_info *restore_read_groupinfo(struct ckpt_ctx *);
+int checkpoint_write_user(struct ckpt_ctx *, struct user_struct *);
+struct user_struct *restore_read_user(struct ckpt_ctx *);
+#endif
+
 /*
  * The common credentials for a thread group
  * - shared by CLONE_THREAD
@@ -352,4 +360,9 @@ int cred_setresgid(struct cred *new, gid_t rgid, gid_t egid, gid_t sgid);
 int cred_setfsuid(struct cred *new, uid_t uid, uid_t *old_fsuid);
 int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid);
 
+#ifdef CONFIG_CHECKPOINT
+int checkpoint_write_cred(struct ckpt_ctx *, const struct cred *);
+struct cred *restore_read_cred(struct ckpt_ctx *);
+#endif
+
 #endif /* _LINUX_CRED_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a2b82d5..3eeee40 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -62,4 +62,10 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
+#ifdef CONFIG_CHECKPOINT
+struct ckpt_ctx;
+int checkpoint_write_userns(struct ckpt_ctx *, struct user_namespace *);
+struct user_namespace *restore_read_userns(struct ckpt_ctx *);
+#endif
+
 #endif /* _LINUX_USER_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index a017399..6ef75a1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -16,6 +16,7 @@
 #include <linux/init_task.h>
 #include <linux/security.h>
 #include <linux/cn_proc.h>
+#include <linux/checkpoint.h>
 #include "cred-internals.h"
 
 static struct kmem_cache *cred_jar;
@@ -703,3 +704,115 @@ int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid)
 	}
 	return -EPERM;
 }
+
+#ifdef CONFIG_CHECKPOINT
+int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
+{
+	int ret;
+	int groupinfo_ref, user_ref;
+	struct ckpt_hdr_cred *h;
+
+	groupinfo_ref = checkpoint_obj(ctx, cred->group_info,
+					CKPT_OBJ_GROUPINFO);
+	if (groupinfo_ref < 0)
+		return groupinfo_ref;
+	user_ref = checkpoint_obj(ctx, cred->user, CKPT_OBJ_USER);
+	if (user_ref < 0)
+		return user_ref;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_CRED);
+	if (!h)
+		return -ENOMEM;
+
+	h->uid = cred->uid;
+	h->suid = cred->suid;
+	h->euid = cred->euid;
+	h->fsuid = cred->fsuid;
+
+	h->gid = cred->gid;
+	h->sgid = cred->sgid;
+	h->egid = cred->egid;
+	h->fsgid = cred->fsgid;
+
+	checkpoint_capabilities(&h->cap_s, cred);
+
+	h->user_ref = user_ref;
+	h->groupinfo_ref = groupinfo_ref;
+
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
+}
+
+struct cred *restore_read_cred(struct ckpt_ctx *ctx)
+{
+	struct cred *cred;
+	struct ckpt_hdr_cred *h;
+	struct user_struct *user;
+	struct group_info *groupinfo;
+	int ret = -EINVAL;
+	uid_t olduid;
+	gid_t oldgid;
+	int i;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_CRED);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+
+	cred = prepare_creds();
+	if (!cred)
+		goto error;
+
+
+	/* Do we care if the target user and target group were compatible?
+	 * Probably.  But then, we can't do any setuid without CAP_SETUID,
+	 * so we must have been privileged to abuse it... */
+	groupinfo = ckpt_obj_fetch(ctx, h->groupinfo_ref, CKPT_OBJ_GROUPINFO);
+	if (IS_ERR(groupinfo))
+		goto err_putcred;
+	user = ckpt_obj_fetch(ctx, h->user_ref, CKPT_OBJ_USER);
+	if (IS_ERR(user))
+		goto err_putcred;
+
+	/*
+	 * TODO: this check should  go into the common helper in
+	 * kernel/sys.c, and should account for user namespaces
+	 */
+	if (!capable(CAP_SETGID))
+		for (i = 0; i < groupinfo->ngroups; i++) {
+			if (!in_egroup_p(GROUP_AT(groupinfo, i)))
+				goto err_putcred;
+		}
+	ret = set_groups(cred, groupinfo);
+	if (ret < 0)
+		goto err_putcred;
+	free_uid(cred->user);
+	cred->user = get_uid(user);
+	ret = cred_setresuid(cred, h->uid, h->euid, h->suid);
+	if (ret < 0)
+		goto err_putcred;
+	ret = cred_setfsuid(cred, h->fsuid, &olduid);
+	if (olduid != h->fsuid && ret < 0)
+		goto err_putcred;
+	ret = cred_setresgid(cred, h->gid, h->egid, h->sgid);
+	if (ret < 0)
+		goto err_putcred;
+	ret = cred_setfsgid(cred, h->fsgid, &oldgid);
+	if (oldgid != h->fsgid && ret < 0)
+		goto err_putcred;
+	ret = restore_capabilities(&h->cap_s, cred);
+	if (ret)
+		goto err_putcred;
+
+	ckpt_hdr_put(ctx, h);
+	return cred;
+
+err_putcred:
+	abort_creds(cred);
+error:
+	ckpt_hdr_put(ctx, h);
+	return ERR_PTR(ret);
+}
+
+#endif
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2e..74db0ae 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/checkpoint.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -286,3 +287,61 @@ int in_egroup_p(gid_t grp)
 }
 
 EXPORT_SYMBOL(in_egroup_p);
+
+#ifdef CONFIG_CHECKPOINT
+int checkpoint_write_groupinfo(struct ckpt_ctx *ctx, struct group_info *g)
+{
+	int ret, i, size;
+	struct ckpt_hdr_groupinfo *h;
+
+	size = sizeof(*h) + g->ngroups * sizeof(__u32);
+	h = ckpt_hdr_get_type(ctx, size, CKPT_HDR_GROUPINFO);
+	if (!h)
+		return -ENOMEM;
+
+	h->ngroups = g->ngroups;
+	for (i = 0; i < g->ngroups; i++)
+		h->groups[i] = GROUP_AT(g, i);
+
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
+}
+
+/*
+ * TODO - switch to reading in smaller blocks?
+ */
+#define MAX_GROUPINFO_SIZE (sizeof(*h)+NGROUPS_MAX*sizeof(gid_t))
+struct group_info *restore_read_groupinfo(struct ckpt_ctx *ctx)
+{
+	struct group_info *g;
+	struct ckpt_hdr_groupinfo *h;
+	int i;
+
+	h = ckpt_read_buf_type(ctx, MAX_GROUPINFO_SIZE, CKPT_HDR_GROUPINFO);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+
+	g = ERR_PTR(-EINVAL);
+	if (h->ngroups > NGROUPS_MAX)
+		goto out;
+
+	for (i = 1; i < h->ngroups; i++)
+		if (h->groups[i-1] >= h->groups[i])
+			goto out;
+
+	g = groups_alloc(h->ngroups);
+	if (!g) {
+		g = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	for (i = 0; i < h->ngroups; i++)
+		GROUP_AT(g, i) = h->groups[i];
+
+out:
+	ckpt_hdr_put(ctx, h);
+	return g;
+}
+
+#endif
diff --git a/kernel/user.c b/kernel/user.c
index 850e0ba..97f13e2 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/user_namespace.h>
+#include <linux/checkpoint.h>
 #include "cred-internals.h"
 
 struct user_namespace init_user_ns = {
@@ -497,3 +498,149 @@ static int __init uid_cache_init(void)
 }
 
 module_init(uid_cache_init);
+
+#ifdef CONFIG_CHECKPOINT
+/*
+ * write the user struct
+ * TODO keyring will need to be dumped
+ *
+ * Here is what we're doing.  Remember a task can do clone(CLONE_NEWUSER)
+ * resulting in a cloned task in a new user namespace, with uid 0 in that
+ * new user_ns.  In that case, the parent's user (uid+user_ns) is the
+ * 'creator' of the new user_ns.
+ * Here, we call the user_ns of the ctx->root_task the 'root_ns'.  When we
+ * checkpoint a user-struct, we must store the chain of creators.  We
+ * must not do so recursively, this being the kernel.  In
+ * checkpoint_write_user() we walk and record in memory the list of creators up
+ * to either the latest user_struct which has already been saved, or the
+ * root_ns.  Then we walk that chain backward, writing out the user_ns and
+ * user_struct to the checkpoint image.
+ */
+#define UNSAVED_STRIDE 50
+int checkpoint_write_user(struct ckpt_ctx *ctx, struct user_struct *u)
+{
+	struct user_namespace *ns, *root_ns;
+	struct ckpt_hdr_user_struct *h;
+	int ns_objref;
+	int ret, i, unsaved_ns_nr = 0;
+	struct user_struct *save_u;
+	struct user_struct **unsaved_creators;
+	int step = 1, size;
+
+	/* if we've already saved the userns, then life is good */
+	ns_objref = obj_lookup(ctx, u->user_ns);
+	if (ns_objref)
+		goto write_user;
+
+	root_ns = task_cred_xxx(ctx->root_task, user)->user_ns;
+
+	if (u->user_ns == root_ns)
+		goto save_last_ns;
+
+	size = UNSAVED_STRIDE*sizeof(struct user_struct *);
+	unsaved_creators = kmalloc(size, GFP_KERNEL);
+	if (!unsaved_creators)
+		return -ENOMEM;
+	save_u = u;
+	do {
+		ns = save_u->user_ns;
+		save_u = ns->creator;
+		if (obj_lookup(ctx, save_u))
+			goto found;
+		unsaved_creators[unsaved_ns_nr++] = save_u;
+		if (unsaved_ns_nr == step * UNSAVED_STRIDE) {
+			step++;
+			size = step*UNSAVED_STRIDE*sizeof(struct user_struct *);
+			unsaved_creators = krealloc(unsaved_creators, size,
+							GFP_KERNEL);
+			if (!unsaved_creators)
+				return -ENOMEM;
+		}
+	} while (ns != root_ns);
+
+found:
+	for (i = unsaved_ns_nr-1; i >= 0; i--) {
+		ret = checkpoint_obj(ctx, unsaved_creators[i], CKPT_OBJ_USER);
+		if (ret < 0) {
+			kfree(unsaved_creators);
+			return ret;
+		}
+	}
+	kfree(unsaved_creators);
+
+save_last_ns:
+	ns_objref = checkpoint_obj(ctx, u->user_ns, CKPT_OBJ_USER_NS);
+	if (ns_objref < 0)
+		return ns_objref;
+
+write_user:
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_USER);
+	if (!h)
+		return -ENOMEM;
+
+	h->uid = u->uid;
+	h->userns_ref = ns_objref;
+
+	/* write out the user_struct */
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
+}
+
+static int may_setuid(struct user_namespace *ns, uid_t uid)
+{
+	/*
+	 * this next check will one day become
+	 * if capable(CAP_SETUID, ns) return 1;
+	 * followed by uid_equiv(current_userns, current_uid, ns, uid)
+	 * instead of just uids.
+	 */
+	if (capable(CAP_SETUID))
+		return 1;
+
+	/*
+	 * this may be overly strict, but since we might end up
+	 * restarting a privileged program here, we do not want
+	 * someone with only CAP_SYS_ADMIN but no CAP_SETUID to
+	 * be able to create random userids even in a userns he
+	 * created.
+	 */
+	if (current_user()->user_ns != ns)
+		return 0;
+	if (current_uid() == uid ||
+		current_euid() == uid ||
+		current_suid() == uid)
+		return 1;
+	return 0;
+}
+
+struct user_struct *restore_read_user(struct ckpt_ctx *ctx)
+{
+	struct user_struct *u;
+	struct user_namespace *ns;
+	struct ckpt_hdr_user_struct *h;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_USER);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+
+	ns = ckpt_obj_fetch(ctx, h->userns_ref, CKPT_OBJ_USER_NS);
+	if (IS_ERR(ns)) {
+		u = ERR_PTR(PTR_ERR(ns));
+		goto out;
+	}
+
+	if (!may_setuid(ns, h->uid)) {
+		u = ERR_PTR(-EPERM);
+		goto out;
+	}
+	u = alloc_uid(ns, h->uid);
+	if (!u)
+		u = ERR_PTR(-EINVAL);
+
+out:
+	ckpt_hdr_put(ctx, h);
+	return u;
+}
+#endif
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e624b0f..857cb3d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/checkpoint.h>
 #include <linux/cred.h>
 
 static struct user_namespace *_new_user_ns(struct user_struct *creator,
@@ -103,3 +104,88 @@ void free_user_ns(struct kref *kref)
 	schedule_work(&ns->destroyer);
 }
 EXPORT_SYMBOL(free_user_ns);
+
+#ifdef CONFIG_CHECKPOINT
+/*
+ * checkpoint_write_userns() is only called from
+ * checkpoint_write_user().  When called, we always know that
+ * either:
+ *   1. This is the root_ns (user_ns of the ctx->root_task),
+ *	in which case we don't store a creator, but rather
+ *	set the CKPT_USERNS_INIT flag.
+ * or
+ *   2. The creator has already been written out to the
+ *	checkpoint image (and saved in the objhash)
+ */
+int checkpoint_write_userns(struct ckpt_ctx *ctx,
+				   struct user_namespace *ns)
+{
+	struct ckpt_hdr_user_ns *h;
+	int creator_ref = 0;
+	unsigned int flags = 0;
+	struct user_namespace *root_ns;
+	int ret;
+
+	root_ns = task_cred_xxx(ctx->root_task, user)->user_ns;
+	if (ns == root_ns)
+		flags = CKPT_USERNS_INIT;
+	else
+		creator_ref = obj_lookup(ctx, ns->creator);
+	if (!flags && !creator_ref)
+		return -EINVAL;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_USER_NS);
+	if (!h)
+		return -ENOMEM;
+	h->creator_ref = creator_ref;
+	h->flags = flags;
+	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
+}
+
+struct user_namespace *restore_read_userns(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_user_ns *h;
+	struct user_namespace *ns;
+	struct user_struct *new_root, *creator;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_USER_NS);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+	if (h->flags & ~CKPT_USERNS_INIT)  /* only 1 valid flag */
+		return ERR_PTR(-EINVAL);
+	if (h->flags & CKPT_USERNS_INIT) {
+		ckpt_hdr_put(ctx, h);
+		/* grab an extra ref bc objhash will drop an extra */
+		return get_user_ns(current_user_ns());
+	}
+	creator = ckpt_obj_fetch(ctx, h->creator_ref, CKPT_OBJ_USER);
+	ckpt_hdr_put(ctx, h);
+
+	if (IS_ERR(creator))
+		return ERR_PTR(-EINVAL);
+	ns = new_user_ns(creator, &new_root);
+
+	if (IS_ERR(ns))
+		return ns;
+
+	/* new_user_ns() doesn't bump creator's refcount */
+	get_uid(creator);
+
+	/* objhash will drop new_ns refcount, but new_root
+	 * should hold a ref */
+	get_user_ns(ns);
+
+	/*
+	 * Free the new root user.  If we actually needed it,
+	 * then it will show up later in the checkpoint image
+	 * The objhash will keep the userns pinned until then.
+	 */
+	free_uid(new_root);
+
+	return ns;
+}
+
+#endif
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/10] cr: restore file->f_cred
       [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (5 preceding siblings ...)
  2009-06-10  1:46   ` [PATCH 07/10] cr: checkpoint and restore task credentials Serge E. Hallyn
@ 2009-06-10  1:46   ` Serge E. Hallyn
  2009-06-10  1:46     ` Serge E. Hallyn
  2009-06-10  1:47     ` Serge E. Hallyn
  8 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:46 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, Alexey Dobriyan, Andrew Morgan

Restore a file's f_cred.  This is set to the cred of the task doing
the open, so often it will be the same as that of the restarted task.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/files.c             |   16 ++++++++++++++--
 include/linux/checkpoint_hdr.h |    2 +-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/checkpoint/files.c b/checkpoint/files.c
index 5ca2e6c..5be7d1b 100644
--- a/checkpoint/files.c
+++ b/checkpoint/files.c
@@ -151,7 +151,11 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 	h->f_pos = file->f_pos;
 	h->f_version = file->f_version;
 
-	/* FIX: need also file->uid, file->gid, file->f_owner, etc */
+	h->f_credref = checkpoint_obj(ctx, file->f_cred, CKPT_OBJ_CRED);
+	if (h->f_credref < 0)
+		return h->f_credref;
+
+	/* FIX: need also file->f_owner, etc */
 
 	return 0;
 }
@@ -440,8 +444,16 @@ int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 			struct ckpt_hdr_file *h)
 {
 	int ret;
+	struct cred *cred;
+
+	/* FIX: need to restore owner etc */
 
-	/* FIX: need to restore uid, gid, owner etc */
+	/* restore the cred */
+	cred = ckpt_obj_fetch(ctx, h->f_credref, CKPT_OBJ_CRED);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	put_cred(file->f_cred);
+	file->f_cred = get_cred(cred);
 
 	/* safe to set 1st arg (fd) to 0, as command is F_SETFL */
 	ret = vfs_fcntl(0, F_SETFL, h->f_flags & CKPT_SETFL_MASK, file);
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 02f874e..1a4033c 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -306,7 +306,7 @@ struct ckpt_hdr_file {
 	__u32 f_type;
 	__u32 f_mode;
 	__u32 f_flags;
-	__u32 _padding;
+	__s32 f_credref;
 	__u64 f_pos;
 	__u64 f_version;
 } __attribute__((aligned(8)));
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/10] cr: restore LSM credentials
  2009-06-10  1:44 [PATCH 01/10] cred: #include init.h in cred.h Serge E. Hallyn
@ 2009-06-10  1:46     ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:46 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, SELinux, Alexey Dobriyan, Andrew Morgan

Checkpoint and restore task and ipc struct ->security info.
(files->f_security yet to be done).

LSM contexts (a string representation of obj->security) are
checkpointed as shared objects before any object referencing
it.  The object's checkpoint header struct has a reference
(h->sec_ref) to the shared object.  A NULL ->security is indicated
by h->sec_ref = -1.

At checkpoint time, for each obj->security to be checkpointed,
the LSM will be asked (once) to convert it to a string, in memory
which the checkpoint subsystem will kfree.  At restart time,
the LSM will first return some meaningful token given the
checkpointed string.  That token will be passed to per-object-type
restore functions (task_restore_context(), shm_restore_security(),
etc) where the LSM can determine based on the object type, the
caller, and the token, whether to allow the object restore, and
what value to actually assign to ->security.  In smack, the
token is the actual imported label.  In SELinux, it is a temporary
pointer to the sid which the checkpointed context referred to.

In smack, the checkpointed labels are used for both tasks and
ipc objects so long as the task calling sys_restart() has
CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
from current_security(), -EPERM is returned.

The basics of SELinux support are there (enough to demonstrate working
c/r with SELinux enforcing), but there will need to be new object
permissions for restore, so the precise nature of those needs to be
discussed.  For instance, do we want to define process:restore
and ipc_msg_msg:restore, in which case
        allow root_t user_t:process restore
would mean that root_t may restart a task and label it user_t?

Since we are potentially skipping several allowed domain transitions
(resulting in an illegal short-cut domain transition or type creation),
I have a fear that the only sane way to proceed would be to have
one all-powerful domain, checkpoint_restore_t, which can effectively
transition to any domain it wants to by (ab)using the checkpoint
image.

Or, perhaps we can define intermediate domains...  So if we want
user_t to be able to restart a server of type X_t, then we create
a X_restore_t type, allow user_t to transition to it using a
program which does sys_restart(), which in turn may transition to
X_t?

Obviously this needs discussion.

Tomoyo has not been updated or tested.  Given its path-based
domain name model, I'm not sure what the tomoyo maintainers
would prefer - that the restart program be reflected in the
domain name, or that the original domain name be restored.

This is the first posting of this patch.  There are testcases
in git://git.sr71.net/~hallyn/cr_tests.git , in particular
under (the slightly mis-named) cr_tests/userns/ directory.
All pass fine with all LSMS (except Tomoyo, not tested).

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/objhash.c           |   56 ++++++++++++
 include/linux/checkpoint_hdr.h |   15 +++
 include/linux/security.h       |  105 +++++++++++++++++++++
 ipc/checkpoint.c               |   19 +++--
 ipc/checkpoint_msg.c           |   30 ++++++-
 ipc/checkpoint_sem.c           |   12 +++-
 ipc/checkpoint_shm.c           |   12 +++-
 ipc/util.h                     |    3 +-
 kernel/cred.c                  |   29 ++++++-
 security/capability.c          |   47 ++++++++++
 security/security.c            |   39 ++++++++
 security/selinux/hooks.c       |  196 ++++++++++++++++++++++++++++++++++++++++
 security/smack/smack_lsm.c     |  135 +++++++++++++++++++++++++++
 13 files changed, 686 insertions(+), 12 deletions(-)

diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 84f003d..674b9b3 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -20,6 +20,7 @@
 #include <linux/user_namespace.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
+#include <linux/security.h>
 
 struct ckpt_obj;
 struct ckpt_obj_ops;
@@ -247,6 +248,52 @@ static int obj_groupinfo_users(void *ptr)
 	return atomic_read(&((struct group_info *) ptr)->usage);
 }
 
+/*
+ * checkpoint_string - checkpoint a shared string
+ * @ctx: checkpoint context
+ * @str: string to checkpoint
+ *
+ * If ptr has already been checkpointed, return the objref
+ * Else write it to checkpoint image and return the objref
+ * On error, return <0.
+ */
+#define MAX_STR_LEN 200
+static int checkpoint_security(struct ckpt_ctx *ctx, void *ptr)
+{
+	char *str;
+	int ret;
+	int len = 0;
+
+	str = security_context_to_str(ptr);
+	if (!str)
+		return 0;
+	if (IS_ERR(str))
+		return PTR_ERR(str);
+	len = strlen(str) + 1;
+	if (len > MAX_STR_LEN) {
+		printk(KERN_NOTICE "%s: security context too long\n",
+			__func__);
+		return -EINVAL;
+	}
+	ret = ckpt_write_obj_type(ctx, str, len, CKPT_HDR_SEC);
+	/* the LSM created a new string for us, now free it */
+	kfree(str);
+	return ret;
+}
+
+static void *restore_security(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_lsm *h;
+	void *security;
+
+	h = ckpt_read_buf_type(ctx, MAX_STR_LEN, CKPT_HDR_SEC);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+	security = security_context_from_str(h->str);
+	ckpt_hdr_put(ctx, h);
+	return security;
+}
+
 static struct ckpt_obj_ops ckpt_obj_ops[] = {
 	/* ignored object */
 	{
@@ -364,6 +411,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
 		.checkpoint = checkpoint_groupinfo,
 		.restore = restore_groupinfo,
 	},
+	/* struct ckpt_sec */
+	{
+		.obj_name = "SECURITY",
+		.obj_type = CKPT_OBJ_SEC,
+		.ref_drop = obj_no_drop,
+		.ref_grab = obj_no_grab,
+		.checkpoint = checkpoint_security,
+		.restore = restore_security,
+	},
 };
 
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 1a4033c..a447b5a 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -45,6 +45,7 @@ enum {
 	CKPT_HDR_BUFFER,
 	CKPT_HDR_STRING,
 	CKPT_HDR_OBJREF,
+	CKPT_HDR_SEC,
 
 	CKPT_HDR_TREE = 101,
 	CKPT_HDR_TASK,
@@ -114,6 +115,7 @@ enum obj_type {
 	CKPT_OBJ_CRED,
 	CKPT_OBJ_USER,
 	CKPT_OBJ_GROUPINFO,
+	CKPT_OBJ_SEC,
 	CKPT_OBJ_MAX
 };
 
@@ -181,6 +183,13 @@ struct ckpt_hdr_task {
 	__u32 task_comm_len;
 } __attribute__((aligned(8)));
 
+/* LSM security contexts (shared) */
+struct ckpt_hdr_lsm {
+	struct ckpt_hdr h;
+	/* followed by `len' characters */
+	char str[];
+} __attribute__((aligned(8)));
+
 /* Posix capabilities */
 struct ckpt_capabilities {
 	__u32 cap_i_0, cap_i_1; /* inheritable set */
@@ -204,6 +213,8 @@ struct ckpt_hdr_cred {
 	__s32 user_ref;
 	__s32 groupinfo_ref;
 	struct ckpt_capabilities cap_s;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_groupinfo {
@@ -418,6 +429,8 @@ struct ckpt_hdr_ipc_perms {
 	__u32 mode;
 	__u32 _padding;
 	__u64 seq;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_ipc_shm {
@@ -451,6 +464,8 @@ struct ckpt_hdr_ipc_msg_msg {
 	struct ckpt_hdr h;
 	__s32 m_type;
 	__u32 m_ts;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_ipc_sem {
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd616..5625553 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1089,6 +1089,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	created.
  *	@msg contains the message structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @msg_msg_restore_security:
+ *	Allocate and attach a security structure to the msg->security field
+ *	during sys_restart().
+ *	@msg contains the message structure to be modified.
+ *	@stored contains a string representing the checkpointed context
+ *	Return 0 if operation was successful and permission is granted.
  * @msg_msg_free_security:
  *	Deallocate the security structure for this message.
  *	@msg contains the message structure to be modified.
@@ -1101,6 +1107,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	NULL when the structure is first created.
  *	@msq contains the message queue structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @msg_queue_restore_security:
+ *	Allocate and attach a security structure to the
+ *	msq->q_perm.security field when a msgq is being restored, based on the
+ *	checkpointed context.
+ *	@msq contains the message queue structure to be modified.
+ *	@stored containers a string representation of the checkpointed
+ *	context.
+ *	Return 0 if operation was successful and permission is granted.
  * @msg_queue_free_security:
  *	Deallocate security structure for this message queue.
  *	@msq contains the message queue structure to be modified.
@@ -1146,6 +1160,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	first created.
  *	@shp contains the shared memory structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @shm_restore_security:
+ *	Allocate and attach a security structure to the shp->shm_perm.security
+ *	field during sys_restart().
+ *	@shp contains the shared memory structure to be modified.
+ *	@stored containers the checkpoint security context string.
+ *	Return 0 if operation was successful and permission is granted.
  * @shm_free_security:
  *	Deallocate the security struct for this memory segment.
  *	@shp contains the shared memory structure to be modified.
@@ -1181,6 +1201,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	first created.
  *	@sma contains the semaphore structure
  *	Return 0 if operation was successful and permission is granted.
+ * @sem_restore_security:
+ *	Allocate and attach a security structure to the sma->sem_perm.security
+ *	field during sys_restart().
+ *	@sma contains the semaphore structure
+ *	@stored contains the string respresentation of checkpointed ->security.
+ *	Return 0 if operation was successful and permission is granted.
  * @sem_free_security:
  *	deallocate security struct for this semaphore
  *	@sma contains the semaphore structure.
@@ -1331,6 +1357,31 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	audit_rule_init.
  *	@rule contains the allocated rule
  *
+ * Security hooks for checkpoint/restore of security contexts
+
+ * @context_to_str:
+ *	Given a void *->security, return a char* which will be meaningful
+ *	at the restart.  For most LSMs, this will likely be
+ *	"(char *)ptr"
+ *	@ptr: The ->security field to convert to a string.
+ *	@str: The string representation of @ptr.
+ *	Return a valid char*, or < 0 if error.
+ *
+ * @context_from_str:
+ *	Given a char *, return a valid ->security which can be assigned
+ *	to an object.
+ *	@str: The string representation of a context
+ *	Return < 0 if error.  For instance -EINVAL if the current policy
+ *	has no such context.
+ *
+ * @task_restore_context:
+ *	Choose a valid context for a task being restored from checkpoint
+ *	image.
+ *	@orig: The struct cred of the task which called sys_restart()
+ *	@stored: The context stored for the checkpointed task.
+ *	@f_security: The context of the checkpoint file.
+ *	Return 0 if cred was updated, < 0 if restart should be denied
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1498,9 +1549,11 @@ struct security_operations {
 	void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid);
 
 	int (*msg_msg_alloc_security) (struct msg_msg *msg);
+	int (*msg_msg_restore_security) (struct msg_msg *msg, void *stored);
 	void (*msg_msg_free_security) (struct msg_msg *msg);
 
 	int (*msg_queue_alloc_security) (struct msg_queue *msq);
+	int (*msg_queue_restore_security) (struct msg_queue *msq, void *stored);
 	void (*msg_queue_free_security) (struct msg_queue *msq);
 	int (*msg_queue_associate) (struct msg_queue *msq, int msqflg);
 	int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd);
@@ -1512,6 +1565,7 @@ struct security_operations {
 				 long type, int mode);
 
 	int (*shm_alloc_security) (struct shmid_kernel *shp);
+	int (*shm_restore_security) (struct shmid_kernel *shp, void *stored);
 	void (*shm_free_security) (struct shmid_kernel *shp);
 	int (*shm_associate) (struct shmid_kernel *shp, int shmflg);
 	int (*shm_shmctl) (struct shmid_kernel *shp, int cmd);
@@ -1519,6 +1573,7 @@ struct security_operations {
 			  char __user *shmaddr, int shmflg);
 
 	int (*sem_alloc_security) (struct sem_array *sma);
+	int (*sem_restore_security) (struct sem_array *sma, void *stored);
 	void (*sem_free_security) (struct sem_array *sma);
 	int (*sem_associate) (struct sem_array *sma, int semflg);
 	int (*sem_semctl) (struct sem_array *sma, int cmd);
@@ -1609,6 +1664,11 @@ struct security_operations {
 				 struct audit_context *actx);
 	void (*audit_rule_free) (void *lsmrule);
 #endif /* CONFIG_AUDIT */
+
+	char *(*context_to_str) (void *security);
+	void *(*context_from_str) (char *str);
+	int (*task_restore_context) (struct cred *cred, void *stored,
+				       void *f_security);
 };
 
 /* prototypes */
@@ -1747,8 +1807,10 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode);
 int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
 void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
 int security_msg_msg_alloc(struct msg_msg *msg);
+int security_msg_msg_restore(struct msg_msg *msg, void *stored);
 void security_msg_msg_free(struct msg_msg *msg);
 int security_msg_queue_alloc(struct msg_queue *msq);
+int security_msg_queue_restore(struct msg_queue *msq, void *stored);
 void security_msg_queue_free(struct msg_queue *msq);
 int security_msg_queue_associate(struct msg_queue *msq, int msqflg);
 int security_msg_queue_msgctl(struct msg_queue *msq, int cmd);
@@ -1757,11 +1819,13 @@ int security_msg_queue_msgsnd(struct msg_queue *msq,
 int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 			      struct task_struct *target, long type, int mode);
 int security_shm_alloc(struct shmid_kernel *shp);
+int security_shm_restore(struct shmid_kernel *shp, void *stored);
 void security_shm_free(struct shmid_kernel *shp);
 int security_shm_associate(struct shmid_kernel *shp, int shmflg);
 int security_shm_shmctl(struct shmid_kernel *shp, int cmd);
 int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg);
 int security_sem_alloc(struct sem_array *sma);
+int security_sem_restore(struct sem_array *sma, void *stored);
 void security_sem_free(struct sem_array *sma);
 int security_sem_associate(struct sem_array *sma, int semflg);
 int security_sem_semctl(struct sem_array *sma, int cmd);
@@ -1775,6 +1839,10 @@ int security_netlink_recv(struct sk_buff *skb, int cap);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
+char *security_context_to_str(void *security);
+void *security_context_from_str(char *str);
+int security_task_restore_context(struct cred *cred, void *stored,
+				void *f_security);
 
 #else /* CONFIG_SECURITY */
 struct security_mnt_opts {
@@ -2393,6 +2461,11 @@ static inline int security_msg_msg_alloc(struct msg_msg *msg)
 	return 0;
 }
 
+static inline int security_msg_msg_restore(struct msg_msg *msg, void *stored)
+{
+	return 0;
+}
+
 static inline void security_msg_msg_free(struct msg_msg *msg)
 { }
 
@@ -2401,6 +2474,12 @@ static inline int security_msg_queue_alloc(struct msg_queue *msq)
 	return 0;
 }
 
+static inline int security_msg_queue_restore(struct msg_queue *msq,
+						void *stored)
+{
+	return 0;
+}
+
 static inline void security_msg_queue_free(struct msg_queue *msq)
 { }
 
@@ -2434,6 +2513,11 @@ static inline int security_shm_alloc(struct shmid_kernel *shp)
 	return 0;
 }
 
+static inline int security_shm_restore(struct shmid_kernel *shp, void *stored)
+{
+	return 0;
+}
+
 static inline void security_shm_free(struct shmid_kernel *shp)
 { }
 
@@ -2459,6 +2543,11 @@ static inline int security_sem_alloc(struct sem_array *sma)
 	return 0;
 }
 
+static inline int security_sem_restore(struct sem_array *sma, void *stored)
+{
+	return 0;
+}
+
 static inline void security_sem_free(struct sem_array *sma)
 { }
 
@@ -2517,6 +2606,22 @@ static inline int security_secctx_to_secid(const char *secdata,
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline char *security_context_to_str(void *security)
+{
+	return NULL;
+}
+
+static inline void *security_context_from_str(char *str)
+{
+	return NULL;
+}
+
+static inline int security_task_restore_context(struct cred *cred,
+				void *stored, void *f_security)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index bc77743..6da8ac8 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -27,7 +27,8 @@ static char *ipc_ind_to_str[] = { "sem", "msg", "shm" };
  * Checkpoint
  */
 
-int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
+int checkpoint_fill_ipc_perms(struct ckpt_ctx *ctx,
+			      struct ckpt_hdr_ipc_perms *h,
 			      struct kern_ipc_perm *perm)
 {
 	if (ipcperms(perm, S_IROTH))
@@ -42,6 +43,13 @@ int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 	h->mode = perm->mode & S_IRWXUGO;
 	h->seq = perm->seq;
 
+	if (perm->security) {
+		h->sec_ref = checkpoint_obj(ctx, perm->security, CKPT_OBJ_SEC);
+		if (h->sec_ref < 0)
+			return h->sec_ref;
+	} else
+		h->sec_ref = -1;
+
 	return 0;
 }
 
@@ -169,13 +177,10 @@ int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 	perm->mode = h->mode;
 	perm->seq = h->seq;
 	/*
-	 * Todo: restore perm->security.
-	 * At the moment it gets set by security_x_alloc() called through
-	 * ipcget()->ipcget_public()->ops-.getnew (->nequeue for instance)
-	 * We will want to ask the LSM to consider resetting the
-	 * checkpointed ->security, based on current_security(),
-	 * the checkpointed ->security, and the checkpoint file context.
+	 * The checkpointed ->security value will be restored
+	 * (and verified) by our caller.
 	 */
+	perm->security = NULL;
 
 	return 0;
 }
diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
index fb1a61e..025e33f 100644
--- a/ipc/checkpoint_msg.c
+++ b/ipc/checkpoint_msg.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/security.h>
 
 #include "util.h"
 
@@ -36,7 +37,7 @@ static int fill_ipc_msg_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&msq->q_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &msq->q_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &msq->q_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -63,13 +64,20 @@ static int checkpoint_msg_contents(struct ckpt_ctx *ctx, struct msg_msg *msg)
 	struct msg_msgseg *seg;
 	int total, len;
 	int ret;
+	int sec_ref = -1;
 
+	if (msg->security) {
+		sec_ref = checkpoint_obj(ctx, msg->security, CKPT_OBJ_SEC);
+		if (sec_ref < 0)
+			return sec_ref;
+	}
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_IPC_MSG_MSG);
 	if (!h)
 		return -ENOMEM;
 
 	h->m_type = msg->m_type;
 	h->m_ts = msg->m_ts;
+	h->sec_ref = sec_ref;
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
@@ -175,10 +183,19 @@ static int load_ipc_msg_hdr(struct ckpt_ctx *ctx,
 			    struct msg_queue *msq)
 {
 	int ret = 0;
+	void *security = NULL;;
 
 	ret = restore_load_ipc_perms(&h->perms, &msq->q_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_msg_queue_restore(msq, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("msq: lspid %d lrpid %d qnum %lld qbytes %lld\n",
 		 h->q_lspid, h->q_lrpid, h->q_qnum, h->q_qbytes);
@@ -200,6 +217,7 @@ static struct msg_msg *restore_msg_contents_one(struct ckpt_ctx *ctx, int *clen)
 	struct ckpt_hdr_ipc_msg_msg *h;
 	struct msg_msg *msg = NULL;
 	struct msg_msgseg *seg, **pseg;
+	void *security = NULL;
 	int total, len;
 	int ret;
 
@@ -222,6 +240,16 @@ static struct msg_msg *restore_msg_contents_one(struct ckpt_ctx *ctx, int *clen)
 	}
 	msg->next = NULL;
 	pseg = &msg->next;
+	if (h->sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security)) {
+			ret = PTR_ERR(security);
+			goto out;
+		}
+	}
+	ret = security_msg_msg_restore(msg, security);
+	if (ret)
+		goto out;
 
 	ret = _ckpt_read_buffer(ctx, (msg + 1), len);
 	if (ret < 0)
diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
index e6934dc..cd76bcc 100644
--- a/ipc/checkpoint_sem.c
+++ b/ipc/checkpoint_sem.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/security.h>
 
 #include <linux/msg.h>	/* needed for util.h that uses 'struct msg_msg' */
 #include "util.h"
@@ -37,7 +38,7 @@ static int fill_ipc_sem_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&sem->sem_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &sem->sem_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &sem->sem_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -112,10 +113,19 @@ static int load_ipc_sem_hdr(struct ckpt_ctx *ctx,
 			       struct sem_array *sem)
 {
 	int ret = 0;
+	void *security = NULL;
 
 	ret = restore_load_ipc_perms(&h->perms, &sem->sem_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_sem_restore(sem, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("sem: nsems %u\n", h->sem_nsems);
 
diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
index 0d8eb14..cf100fe 100644
--- a/ipc/checkpoint_shm.c
+++ b/ipc/checkpoint_shm.c
@@ -22,6 +22,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/deferqueue.h>
+#include <linux/security.h>
 
 #include <linux/msg.h>	/* needed for util.h that uses 'struct msg_msg' */
 #include "util.h"
@@ -41,7 +42,7 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&shp->shm_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &shp->shm_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &shp->shm_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -148,10 +149,19 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
 			    struct shmid_kernel *shp)
 {
 	int ret;
+	void *security = NULL;
 
 	ret = restore_load_ipc_perms(&h->perms, &shp->shm_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_shm_restore(shp, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("shm: cprid %d lprid %d segsz %lld mlock %d\n",
 		 h->shm_cprid, h->shm_lprid, h->shm_segsz, h->mlock_uid);
diff --git a/ipc/util.h b/ipc/util.h
index 020de7b..5976308 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -192,7 +192,8 @@ extern void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
 extern void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp);
 
 #ifdef CONFIG_CHECKPOINT
-extern int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
+extern int checkpoint_fill_ipc_perms(struct ckpt_ctx *ctx,
+				     struct ckpt_hdr_ipc_perms *h,
 				     struct kern_ipc_perm *perm);
 extern int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 				  struct kern_ipc_perm *perm);
diff --git a/kernel/cred.c b/kernel/cred.c
index 6ef75a1..c47d175 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -709,8 +709,9 @@ int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid)
 int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 {
 	int ret;
-	int groupinfo_ref, user_ref;
+	int groupinfo_ref, user_ref, sec_ref = -1;
 	struct ckpt_hdr_cred *h;
+	void *security = NULL;
 
 	groupinfo_ref = checkpoint_obj(ctx, cred->group_info,
 					CKPT_OBJ_GROUPINFO);
@@ -719,6 +720,16 @@ int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 	user_ref = checkpoint_obj(ctx, cred->user, CKPT_OBJ_USER);
 	if (user_ref < 0)
 		return user_ref;
+#ifdef CONFIG_SECURITY
+	/* should we checkpoint a 'lsm_na' field when !security ? */
+	security = cred->security;
+#endif
+
+	if (security) {
+		sec_ref = checkpoint_obj(ctx, security, CKPT_OBJ_SEC);
+		if (sec_ref < 0)
+			return sec_ref;
+	}
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_CRED);
 	if (!h)
@@ -738,6 +749,7 @@ int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 
 	h->user_ref = user_ref;
 	h->groupinfo_ref = groupinfo_ref;
+	h->sec_ref = sec_ref;
 
 	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
 	ckpt_hdr_put(ctx, h);
@@ -751,6 +763,7 @@ struct cred *restore_read_cred(struct ckpt_ctx *ctx)
 	struct ckpt_hdr_cred *h;
 	struct user_struct *user;
 	struct group_info *groupinfo;
+	void *security = NULL;
 	int ret = -EINVAL;
 	uid_t olduid;
 	gid_t oldgid;
@@ -774,6 +787,20 @@ struct cred *restore_read_cred(struct ckpt_ctx *ctx)
 	user = ckpt_obj_fetch(ctx, h->user_ref, CKPT_OBJ_USER);
 	if (IS_ERR(user))
 		goto err_putcred;
+	if (h->sec_ref != -1)
+		security = ckpt_obj_fetch(ctx, h->sec_ref, CKPT_OBJ_SEC);
+	if (IS_ERR(security))
+		goto err_putcred;
+
+#ifdef CONFIG_SECURITY
+	/*
+	 * Ask LSM to reset original task->security if allowed
+	 */
+	ret = security_task_restore_context(cred, security,
+					ctx->file->f_security);
+	if (ret)
+		goto err_putcred;
+#endif
 
 	/*
 	 * TODO: this check should  go into the common helper in
diff --git a/security/capability.c b/security/capability.c
index 21b6cea..4e586a7 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -490,6 +490,11 @@ static int cap_msg_msg_alloc_security(struct msg_msg *msg)
 	return 0;
 }
 
+static int cap_msg_msg_restore_security(struct msg_msg *msg, void *stored)
+{
+	return 0;
+}
+
 static void cap_msg_msg_free_security(struct msg_msg *msg)
 {
 }
@@ -499,6 +504,12 @@ static int cap_msg_queue_alloc_security(struct msg_queue *msq)
 	return 0;
 }
 
+static int cap_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	return 0;
+}
+
 static void cap_msg_queue_free_security(struct msg_queue *msq)
 {
 }
@@ -530,6 +541,11 @@ static int cap_shm_alloc_security(struct shmid_kernel *shp)
 	return 0;
 }
 
+static int cap_shm_restore_security(struct shmid_kernel *shp, void *stored)
+{
+	return 0;
+}
+
 static void cap_shm_free_security(struct shmid_kernel *shp)
 {
 }
@@ -555,6 +571,11 @@ static int cap_sem_alloc_security(struct sem_array *sma)
 	return 0;
 }
 
+static int cap_sem_restore_security(struct sem_array *sma, void *stored)
+{
+	return 0;
+}
+
 static void cap_sem_free_security(struct sem_array *sma)
 {
 }
@@ -848,6 +869,23 @@ static void cap_audit_rule_free(void *lsmrule)
 }
 #endif /* CONFIG_AUDIT */
 
+/* checkpoint/restore hooks */
+static char *cap_context_to_str(void *security)
+{
+	return NULL;
+}
+
+static void *cap_context_from_str(char *str)
+{
+	return NULL;
+}
+
+static int cap_task_restore_context(struct cred *cred, void *stored,
+				      void *f_security)
+{
+	return 0;
+}
+
 struct security_operations default_security_ops = {
 	.name	= "default",
 };
@@ -976,19 +1014,23 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, ipc_permission);
 	set_to_cap_if_null(ops, ipc_getsecid);
 	set_to_cap_if_null(ops, msg_msg_alloc_security);
+	set_to_cap_if_null(ops, msg_msg_restore_security);
 	set_to_cap_if_null(ops, msg_msg_free_security);
 	set_to_cap_if_null(ops, msg_queue_alloc_security);
+	set_to_cap_if_null(ops, msg_queue_restore_security);
 	set_to_cap_if_null(ops, msg_queue_free_security);
 	set_to_cap_if_null(ops, msg_queue_associate);
 	set_to_cap_if_null(ops, msg_queue_msgctl);
 	set_to_cap_if_null(ops, msg_queue_msgsnd);
 	set_to_cap_if_null(ops, msg_queue_msgrcv);
 	set_to_cap_if_null(ops, shm_alloc_security);
+	set_to_cap_if_null(ops, shm_restore_security);
 	set_to_cap_if_null(ops, shm_free_security);
 	set_to_cap_if_null(ops, shm_associate);
 	set_to_cap_if_null(ops, shm_shmctl);
 	set_to_cap_if_null(ops, shm_shmat);
 	set_to_cap_if_null(ops, sem_alloc_security);
+	set_to_cap_if_null(ops, sem_restore_security);
 	set_to_cap_if_null(ops, sem_free_security);
 	set_to_cap_if_null(ops, sem_associate);
 	set_to_cap_if_null(ops, sem_semctl);
@@ -1054,4 +1096,9 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, audit_rule_match);
 	set_to_cap_if_null(ops, audit_rule_free);
 #endif
+
+/* checkpoint/restore hooks */
+	set_to_cap_if_null(ops, context_from_str);
+	set_to_cap_if_null(ops, context_to_str);
+	set_to_cap_if_null(ops, task_restore_context);
 }
diff --git a/security/security.c b/security/security.c
index 5284255..26e7989 100644
--- a/security/security.c
+++ b/security/security.c
@@ -827,6 +827,11 @@ int security_msg_msg_alloc(struct msg_msg *msg)
 	return security_ops->msg_msg_alloc_security(msg);
 }
 
+int security_msg_msg_restore(struct msg_msg *msg, void *stored)
+{
+	return security_ops->msg_msg_restore_security(msg, stored);
+}
+
 void security_msg_msg_free(struct msg_msg *msg)
 {
 	security_ops->msg_msg_free_security(msg);
@@ -837,6 +842,11 @@ int security_msg_queue_alloc(struct msg_queue *msq)
 	return security_ops->msg_queue_alloc_security(msq);
 }
 
+int security_msg_queue_restore(struct msg_queue *msq, void *stored)
+{
+	return security_ops->msg_queue_restore_security(msq, stored);
+}
+
 void security_msg_queue_free(struct msg_queue *msq)
 {
 	security_ops->msg_queue_free_security(msq);
@@ -869,6 +879,11 @@ int security_shm_alloc(struct shmid_kernel *shp)
 	return security_ops->shm_alloc_security(shp);
 }
 
+int security_shm_restore(struct shmid_kernel *shp, void *stored)
+{
+	return security_ops->shm_restore_security(shp, stored);
+}
+
 void security_shm_free(struct shmid_kernel *shp)
 {
 	security_ops->shm_free_security(shp);
@@ -894,6 +909,11 @@ int security_sem_alloc(struct sem_array *sma)
 	return security_ops->sem_alloc_security(sma);
 }
 
+int security_sem_restore(struct sem_array *sma, void *stored)
+{
+	return security_ops->sem_restore_security(sma, stored);
+}
+
 void security_sem_free(struct sem_array *sma)
 {
 	security_ops->sem_free_security(sma);
@@ -962,6 +982,25 @@ void security_release_secctx(char *secdata, u32 seclen)
 }
 EXPORT_SYMBOL(security_release_secctx);
 
+char *security_context_to_str(void *security)
+{
+	return security_ops->context_to_str(security);
+}
+EXPORT_SYMBOL(security_context_to_str);
+
+void *security_context_from_str(char *str)
+{
+	return security_ops->context_from_str(str);
+}
+EXPORT_SYMBOL(security_context_from_str);
+
+int security_task_restore_context(struct cred *cred, void *stored,
+			void *f_security)
+{
+	return security_ops->task_restore_context(cred, stored, f_security);
+}
+EXPORT_SYMBOL(security_task_restore_context);
+
 #ifdef CONFIG_SECURITY_NETWORK
 
 int security_unix_stream_connect(struct socket *sock, struct socket *other,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c..ba24808 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4695,6 +4695,15 @@ static int selinux_msg_msg_alloc_security(struct msg_msg *msg)
 	return msg_msg_alloc_security(msg);
 }
 
+static int selinux_msg_msg_restore_security(struct msg_msg *msg, void *stored)
+{
+	/*
+	 * TODO - actually restore from 'stored' subject to authorization
+	 */
+	kfree(stored);
+	return msg_msg_alloc_security(msg);
+}
+
 static void selinux_msg_msg_free_security(struct msg_msg *msg)
 {
 	msg_msg_free_security(msg);
@@ -4726,6 +4735,38 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 	return 0;
 }
 
+static int selinux_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ);
+	if (rc)
+		return rc;
+
+	isec = msq->q_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = msq->q_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
+			  MSGQ__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&msq->q_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_msg_queue_free_security(struct msg_queue *msq)
 {
 	ipc_free_security(&msq->q_perm);
@@ -4867,6 +4908,38 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 	return 0;
 }
 
+static int selinux_shm_restore_security(struct shmid_kernel *shp,
+					void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM);
+	if (rc)
+		return rc;
+
+	isec = shp->shm_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = shp->shm_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
+			  SHM__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&shp->shm_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_shm_free_security(struct shmid_kernel *shp)
 {
 	ipc_free_security(&shp->shm_perm);
@@ -4959,6 +5032,37 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
 	return 0;
 }
 
+static int selinux_sem_restore_security(struct sem_array *sma, void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM);
+	if (rc)
+		return rc;
+
+	isec = sma->sem_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = sma->sem_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
+			  SEM__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&sma->sem_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_sem_free_security(struct sem_array *sma)
 {
 	ipc_free_security(&sma->sem_perm);
@@ -5315,6 +5419,90 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 
 #endif
 
+/* checkpoint/restore hooks */
+
+/*
+ * the c/r code will free the char* we return
+ */
+static char *selinux_context_to_str(void *security)
+{
+	struct task_security_struct *s = security;
+	int len = 0;
+	char *v = NULL;
+	int ret;
+
+	if (!s)
+		return ERR_PTR(-EINVAL);
+	ret = security_sid_to_context(s->sid, &v, &len);
+	if (ret)
+		return ERR_PTR(ret);
+	return v;
+}
+
+struct sidp {
+	u32 sid;
+};
+
+/*
+ * we just return error if the context is not valid.  We let the
+ * domain transition/type creation code construct a valid
+ * task_security (or whatever) struct with the sid.
+ */
+static void *selinux_context_from_str(char *str)
+{
+	int error;
+	u32 sid;
+	struct sidp *retv;
+
+	error = security_context_to_sid(str, strlen(str), &sid);
+	if (error)
+		return ERR_PTR(error);
+	retv = kmalloc(sizeof(*retv), GFP_KERNEL);
+	if (!retv)
+		return ERR_PTR(-ENOMEM);
+	retv->sid = sid;
+	return retv;
+}
+
+/*
+ * stored is actually still the char* representation of the context
+ * (bc that's what we return in selinux_context_from_str())
+ */
+static int selinux_task_restore_context(struct cred *cred, void *stored,
+					void *f_security)
+{
+	struct task_security_struct *s = cred->security;
+	struct file_security_struct *fsec = f_security;
+	struct sidp *sidp = stored;
+	u32 newsid;
+	int error;
+
+	if (!sidp)
+		return -EINVAL;
+
+	newsid = sidp->sid;
+	kfree(sidp);
+
+	if (s->sid == newsid)
+		return 0;
+
+	/* these are not the right checks.  Will we want a
+	 * process:PROCESS_RESTORE_TRANSITION permission?
+	 */
+	error = avc_has_perm(s->sid, newsid,
+			  SECCLASS_PROCESS, PROCESS__TRANSITION, NULL);
+	if (error)
+		return error;
+
+	error = avc_has_perm(newsid, fsec->sid,
+			  SECCLASS_FILE, FILE__ENTRYPOINT, NULL);
+	if (error)
+		return error;
+	
+	s->sid = s->osid = newsid;
+	return 0;
+}
+
 static struct security_operations selinux_ops = {
 	.name =				"selinux",
 
@@ -5414,9 +5602,11 @@ static struct security_operations selinux_ops = {
 	.ipc_getsecid =			selinux_ipc_getsecid,
 
 	.msg_msg_alloc_security =	selinux_msg_msg_alloc_security,
+	.msg_msg_restore_security =	selinux_msg_msg_restore_security,
 	.msg_msg_free_security =	selinux_msg_msg_free_security,
 
 	.msg_queue_alloc_security =	selinux_msg_queue_alloc_security,
+	.msg_queue_restore_security =	selinux_msg_queue_restore_security,
 	.msg_queue_free_security =	selinux_msg_queue_free_security,
 	.msg_queue_associate =		selinux_msg_queue_associate,
 	.msg_queue_msgctl =		selinux_msg_queue_msgctl,
@@ -5424,12 +5614,14 @@ static struct security_operations selinux_ops = {
 	.msg_queue_msgrcv =		selinux_msg_queue_msgrcv,
 
 	.shm_alloc_security =		selinux_shm_alloc_security,
+	.shm_restore_security =		selinux_shm_restore_security,
 	.shm_free_security =		selinux_shm_free_security,
 	.shm_associate =		selinux_shm_associate,
 	.shm_shmctl =			selinux_shm_shmctl,
 	.shm_shmat =			selinux_shm_shmat,
 
 	.sem_alloc_security =		selinux_sem_alloc_security,
+	.sem_restore_security =		selinux_sem_restore_security,
 	.sem_free_security =		selinux_sem_free_security,
 	.sem_associate =		selinux_sem_associate,
 	.sem_semctl =			selinux_sem_semctl,
@@ -5499,6 +5691,10 @@ static struct security_operations selinux_ops = {
 	.audit_rule_match =		selinux_audit_rule_match,
 	.audit_rule_free =		selinux_audit_rule_free,
 #endif
+
+	.context_to_str =		selinux_context_to_str,
+	.context_from_str =		selinux_context_from_str,
+	.task_restore_context =		selinux_task_restore_context,
 };
 
 static __init int selinux_init(void)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 98b3195..dfc0f7a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1608,6 +1608,32 @@ static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 }
 
 /**
+ * smack_msg_msg_restore_security - Set the security blob for msg_msg
+ * @msg: the object
+ * @stored: the checkpointed label
+ *
+ * Returns 0
+ */
+static int smack_msg_msg_restore_security(struct msg_msg *msg,
+					void *stored)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	msg->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		msg->security = str;
+	}
+	return 0;
+	return 0;
+}
+
+/**
  * smack_msg_msg_free_security - Clear the security blob for msg_msg
  * @msg: the object
  *
@@ -1644,6 +1670,30 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
 }
 
 /**
+ * smack_shm_restore_security - retore the security blob for shm
+ * @shp: the object
+ * @stored: the checkpointed label
+ *
+ * Returns 0
+ */
+static int smack_shm_restore_security(struct shmid_kernel *shp, void *stored)
+{
+	struct kern_ipc_perm *isp = &shp->shm_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	isp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		isp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_shm_free_security - Clear the security blob for shm
  * @shp: the object
  *
@@ -1753,6 +1803,31 @@ static int smack_sem_alloc_security(struct sem_array *sma)
 }
 
 /**
+ * smack_sem_restore_security - Set the security blob for sem
+ * @sma: the object
+ * @stored: the label stored in checkpoint image
+ *
+ * Returns 0
+ */
+static int smack_sem_restore_security(struct sem_array *sma,
+				void *stored)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	isp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		isp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_sem_free_security - Clear the security blob for sem
  * @sma: the object
  *
@@ -1857,6 +1932,31 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
 }
 
 /**
+ * smack_msg_restore_security - Set the security blob for msg
+ * @msq: the object
+ * @stored: the stored label
+ *
+ * Returns 0
+ */
+static int smack_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	struct kern_ipc_perm *kisp = &msq->q_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	kisp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		kisp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_msg_free_security - Clear the security blob for msg
  * @msq: the object
  *
@@ -2823,6 +2923,33 @@ static void smack_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+/* checkpoint/restore hooks */
+static char *smack_context_to_str(void *security)
+{
+	return kstrdup((char *)security, GFP_KERNEL);
+}
+
+static void *smack_context_from_str(char *str)
+{
+	char *newsmack = smk_import(str, 0);
+
+	if (newsmack == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return newsmack;
+}
+
+static int smack_task_restore_context(struct cred *cred, void *stored,
+					void *f_security)
+{
+	if (cred->security != stored) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		cred->security = stored;
+	}
+	return 0;
+}
+
 struct security_operations smack_ops = {
 	.name =				"smack",
 
@@ -2902,9 +3029,11 @@ struct security_operations smack_ops = {
 	.ipc_getsecid =			smack_ipc_getsecid,
 
 	.msg_msg_alloc_security = 	smack_msg_msg_alloc_security,
+	.msg_msg_restore_security = 	smack_msg_msg_restore_security,
 	.msg_msg_free_security = 	smack_msg_msg_free_security,
 
 	.msg_queue_alloc_security = 	smack_msg_queue_alloc_security,
+	.msg_queue_restore_security = 	smack_msg_queue_restore_security,
 	.msg_queue_free_security = 	smack_msg_queue_free_security,
 	.msg_queue_associate = 		smack_msg_queue_associate,
 	.msg_queue_msgctl = 		smack_msg_queue_msgctl,
@@ -2912,12 +3041,14 @@ struct security_operations smack_ops = {
 	.msg_queue_msgrcv = 		smack_msg_queue_msgrcv,
 
 	.shm_alloc_security = 		smack_shm_alloc_security,
+	.shm_restore_security = 	smack_shm_restore_security,
 	.shm_free_security = 		smack_shm_free_security,
 	.shm_associate = 		smack_shm_associate,
 	.shm_shmctl = 			smack_shm_shmctl,
 	.shm_shmat = 			smack_shm_shmat,
 
 	.sem_alloc_security = 		smack_sem_alloc_security,
+	.sem_restore_security = 	smack_sem_restore_security,
 	.sem_free_security = 		smack_sem_free_security,
 	.sem_associate = 		smack_sem_associate,
 	.sem_semctl = 			smack_sem_semctl,
@@ -2964,6 +3095,10 @@ struct security_operations smack_ops = {
 	.secid_to_secctx = 		smack_secid_to_secctx,
 	.secctx_to_secid = 		smack_secctx_to_secid,
 	.release_secctx = 		smack_release_secctx,
+
+	.context_to_str =		smack_context_to_str,
+	.context_from_str =		smack_context_from_str,
+	.task_restore_context =		smack_task_restore_context,
 };
 
 
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/10] cr: restore LSM credentials
@ 2009-06-10  1:46     ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:46 UTC (permalink / raw)
  To: Linux Containers
  Cc: Oren Laadan, David Howells, Alexey Dobriyan, Andrew Morgan, SELinux

Checkpoint and restore task and ipc struct ->security info.
(files->f_security yet to be done).

LSM contexts (a string representation of obj->security) are
checkpointed as shared objects before any object referencing
it.  The object's checkpoint header struct has a reference
(h->sec_ref) to the shared object.  A NULL ->security is indicated
by h->sec_ref = -1.

At checkpoint time, for each obj->security to be checkpointed,
the LSM will be asked (once) to convert it to a string, in memory
which the checkpoint subsystem will kfree.  At restart time,
the LSM will first return some meaningful token given the
checkpointed string.  That token will be passed to per-object-type
restore functions (task_restore_context(), shm_restore_security(),
etc) where the LSM can determine based on the object type, the
caller, and the token, whether to allow the object restore, and
what value to actually assign to ->security.  In smack, the
token is the actual imported label.  In SELinux, it is a temporary
pointer to the sid which the checkpointed context referred to.

In smack, the checkpointed labels are used for both tasks and
ipc objects so long as the task calling sys_restart() has
CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
from current_security(), -EPERM is returned.

The basics of SELinux support are there (enough to demonstrate working
c/r with SELinux enforcing), but there will need to be new object
permissions for restore, so the precise nature of those needs to be
discussed.  For instance, do we want to define process:restore
and ipc_msg_msg:restore, in which case
        allow root_t user_t:process restore
would mean that root_t may restart a task and label it user_t?

Since we are potentially skipping several allowed domain transitions
(resulting in an illegal short-cut domain transition or type creation),
I have a fear that the only sane way to proceed would be to have
one all-powerful domain, checkpoint_restore_t, which can effectively
transition to any domain it wants to by (ab)using the checkpoint
image.

Or, perhaps we can define intermediate domains...  So if we want
user_t to be able to restart a server of type X_t, then we create
a X_restore_t type, allow user_t to transition to it using a
program which does sys_restart(), which in turn may transition to
X_t?

Obviously this needs discussion.

Tomoyo has not been updated or tested.  Given its path-based
domain name model, I'm not sure what the tomoyo maintainers
would prefer - that the restart program be reflected in the
domain name, or that the original domain name be restored.

This is the first posting of this patch.  There are testcases
in git://git.sr71.net/~hallyn/cr_tests.git , in particular
under (the slightly mis-named) cr_tests/userns/ directory.
All pass fine with all LSMS (except Tomoyo, not tested).

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
 checkpoint/objhash.c           |   56 ++++++++++++
 include/linux/checkpoint_hdr.h |   15 +++
 include/linux/security.h       |  105 +++++++++++++++++++++
 ipc/checkpoint.c               |   19 +++--
 ipc/checkpoint_msg.c           |   30 ++++++-
 ipc/checkpoint_sem.c           |   12 +++-
 ipc/checkpoint_shm.c           |   12 +++-
 ipc/util.h                     |    3 +-
 kernel/cred.c                  |   29 ++++++-
 security/capability.c          |   47 ++++++++++
 security/security.c            |   39 ++++++++
 security/selinux/hooks.c       |  196 ++++++++++++++++++++++++++++++++++++++++
 security/smack/smack_lsm.c     |  135 +++++++++++++++++++++++++++
 13 files changed, 686 insertions(+), 12 deletions(-)

diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 84f003d..674b9b3 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -20,6 +20,7 @@
 #include <linux/user_namespace.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
+#include <linux/security.h>
 
 struct ckpt_obj;
 struct ckpt_obj_ops;
@@ -247,6 +248,52 @@ static int obj_groupinfo_users(void *ptr)
 	return atomic_read(&((struct group_info *) ptr)->usage);
 }
 
+/*
+ * checkpoint_string - checkpoint a shared string
+ * @ctx: checkpoint context
+ * @str: string to checkpoint
+ *
+ * If ptr has already been checkpointed, return the objref
+ * Else write it to checkpoint image and return the objref
+ * On error, return <0.
+ */
+#define MAX_STR_LEN 200
+static int checkpoint_security(struct ckpt_ctx *ctx, void *ptr)
+{
+	char *str;
+	int ret;
+	int len = 0;
+
+	str = security_context_to_str(ptr);
+	if (!str)
+		return 0;
+	if (IS_ERR(str))
+		return PTR_ERR(str);
+	len = strlen(str) + 1;
+	if (len > MAX_STR_LEN) {
+		printk(KERN_NOTICE "%s: security context too long\n",
+			__func__);
+		return -EINVAL;
+	}
+	ret = ckpt_write_obj_type(ctx, str, len, CKPT_HDR_SEC);
+	/* the LSM created a new string for us, now free it */
+	kfree(str);
+	return ret;
+}
+
+static void *restore_security(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_lsm *h;
+	void *security;
+
+	h = ckpt_read_buf_type(ctx, MAX_STR_LEN, CKPT_HDR_SEC);
+	if (IS_ERR(h))
+		return ERR_PTR(PTR_ERR(h));
+	security = security_context_from_str(h->str);
+	ckpt_hdr_put(ctx, h);
+	return security;
+}
+
 static struct ckpt_obj_ops ckpt_obj_ops[] = {
 	/* ignored object */
 	{
@@ -364,6 +411,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
 		.checkpoint = checkpoint_groupinfo,
 		.restore = restore_groupinfo,
 	},
+	/* struct ckpt_sec */
+	{
+		.obj_name = "SECURITY",
+		.obj_type = CKPT_OBJ_SEC,
+		.ref_drop = obj_no_drop,
+		.ref_grab = obj_no_grab,
+		.checkpoint = checkpoint_security,
+		.restore = restore_security,
+	},
 };
 
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 1a4033c..a447b5a 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -45,6 +45,7 @@ enum {
 	CKPT_HDR_BUFFER,
 	CKPT_HDR_STRING,
 	CKPT_HDR_OBJREF,
+	CKPT_HDR_SEC,
 
 	CKPT_HDR_TREE = 101,
 	CKPT_HDR_TASK,
@@ -114,6 +115,7 @@ enum obj_type {
 	CKPT_OBJ_CRED,
 	CKPT_OBJ_USER,
 	CKPT_OBJ_GROUPINFO,
+	CKPT_OBJ_SEC,
 	CKPT_OBJ_MAX
 };
 
@@ -181,6 +183,13 @@ struct ckpt_hdr_task {
 	__u32 task_comm_len;
 } __attribute__((aligned(8)));
 
+/* LSM security contexts (shared) */
+struct ckpt_hdr_lsm {
+	struct ckpt_hdr h;
+	/* followed by `len' characters */
+	char str[];
+} __attribute__((aligned(8)));
+
 /* Posix capabilities */
 struct ckpt_capabilities {
 	__u32 cap_i_0, cap_i_1; /* inheritable set */
@@ -204,6 +213,8 @@ struct ckpt_hdr_cred {
 	__s32 user_ref;
 	__s32 groupinfo_ref;
 	struct ckpt_capabilities cap_s;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_groupinfo {
@@ -418,6 +429,8 @@ struct ckpt_hdr_ipc_perms {
 	__u32 mode;
 	__u32 _padding;
 	__u64 seq;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_ipc_shm {
@@ -451,6 +464,8 @@ struct ckpt_hdr_ipc_msg_msg {
 	struct ckpt_hdr h;
 	__s32 m_type;
 	__u32 m_ts;
+	__s32 sec_ref;
+	__u32 padding;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_ipc_sem {
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd616..5625553 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1089,6 +1089,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	created.
  *	@msg contains the message structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @msg_msg_restore_security:
+ *	Allocate and attach a security structure to the msg->security field
+ *	during sys_restart().
+ *	@msg contains the message structure to be modified.
+ *	@stored contains a string representing the checkpointed context
+ *	Return 0 if operation was successful and permission is granted.
  * @msg_msg_free_security:
  *	Deallocate the security structure for this message.
  *	@msg contains the message structure to be modified.
@@ -1101,6 +1107,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	NULL when the structure is first created.
  *	@msq contains the message queue structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @msg_queue_restore_security:
+ *	Allocate and attach a security structure to the
+ *	msq->q_perm.security field when a msgq is being restored, based on the
+ *	checkpointed context.
+ *	@msq contains the message queue structure to be modified.
+ *	@stored containers a string representation of the checkpointed
+ *	context.
+ *	Return 0 if operation was successful and permission is granted.
  * @msg_queue_free_security:
  *	Deallocate security structure for this message queue.
  *	@msq contains the message queue structure to be modified.
@@ -1146,6 +1160,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	first created.
  *	@shp contains the shared memory structure to be modified.
  *	Return 0 if operation was successful and permission is granted.
+ * @shm_restore_security:
+ *	Allocate and attach a security structure to the shp->shm_perm.security
+ *	field during sys_restart().
+ *	@shp contains the shared memory structure to be modified.
+ *	@stored containers the checkpoint security context string.
+ *	Return 0 if operation was successful and permission is granted.
  * @shm_free_security:
  *	Deallocate the security struct for this memory segment.
  *	@shp contains the shared memory structure to be modified.
@@ -1181,6 +1201,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	first created.
  *	@sma contains the semaphore structure
  *	Return 0 if operation was successful and permission is granted.
+ * @sem_restore_security:
+ *	Allocate and attach a security structure to the sma->sem_perm.security
+ *	field during sys_restart().
+ *	@sma contains the semaphore structure
+ *	@stored contains the string respresentation of checkpointed ->security.
+ *	Return 0 if operation was successful and permission is granted.
  * @sem_free_security:
  *	deallocate security struct for this semaphore
  *	@sma contains the semaphore structure.
@@ -1331,6 +1357,31 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	audit_rule_init.
  *	@rule contains the allocated rule
  *
+ * Security hooks for checkpoint/restore of security contexts
+
+ * @context_to_str:
+ *	Given a void *->security, return a char* which will be meaningful
+ *	at the restart.  For most LSMs, this will likely be
+ *	"(char *)ptr"
+ *	@ptr: The ->security field to convert to a string.
+ *	@str: The string representation of @ptr.
+ *	Return a valid char*, or < 0 if error.
+ *
+ * @context_from_str:
+ *	Given a char *, return a valid ->security which can be assigned
+ *	to an object.
+ *	@str: The string representation of a context
+ *	Return < 0 if error.  For instance -EINVAL if the current policy
+ *	has no such context.
+ *
+ * @task_restore_context:
+ *	Choose a valid context for a task being restored from checkpoint
+ *	image.
+ *	@orig: The struct cred of the task which called sys_restart()
+ *	@stored: The context stored for the checkpointed task.
+ *	@f_security: The context of the checkpoint file.
+ *	Return 0 if cred was updated, < 0 if restart should be denied
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1498,9 +1549,11 @@ struct security_operations {
 	void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid);
 
 	int (*msg_msg_alloc_security) (struct msg_msg *msg);
+	int (*msg_msg_restore_security) (struct msg_msg *msg, void *stored);
 	void (*msg_msg_free_security) (struct msg_msg *msg);
 
 	int (*msg_queue_alloc_security) (struct msg_queue *msq);
+	int (*msg_queue_restore_security) (struct msg_queue *msq, void *stored);
 	void (*msg_queue_free_security) (struct msg_queue *msq);
 	int (*msg_queue_associate) (struct msg_queue *msq, int msqflg);
 	int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd);
@@ -1512,6 +1565,7 @@ struct security_operations {
 				 long type, int mode);
 
 	int (*shm_alloc_security) (struct shmid_kernel *shp);
+	int (*shm_restore_security) (struct shmid_kernel *shp, void *stored);
 	void (*shm_free_security) (struct shmid_kernel *shp);
 	int (*shm_associate) (struct shmid_kernel *shp, int shmflg);
 	int (*shm_shmctl) (struct shmid_kernel *shp, int cmd);
@@ -1519,6 +1573,7 @@ struct security_operations {
 			  char __user *shmaddr, int shmflg);
 
 	int (*sem_alloc_security) (struct sem_array *sma);
+	int (*sem_restore_security) (struct sem_array *sma, void *stored);
 	void (*sem_free_security) (struct sem_array *sma);
 	int (*sem_associate) (struct sem_array *sma, int semflg);
 	int (*sem_semctl) (struct sem_array *sma, int cmd);
@@ -1609,6 +1664,11 @@ struct security_operations {
 				 struct audit_context *actx);
 	void (*audit_rule_free) (void *lsmrule);
 #endif /* CONFIG_AUDIT */
+
+	char *(*context_to_str) (void *security);
+	void *(*context_from_str) (char *str);
+	int (*task_restore_context) (struct cred *cred, void *stored,
+				       void *f_security);
 };
 
 /* prototypes */
@@ -1747,8 +1807,10 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode);
 int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
 void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
 int security_msg_msg_alloc(struct msg_msg *msg);
+int security_msg_msg_restore(struct msg_msg *msg, void *stored);
 void security_msg_msg_free(struct msg_msg *msg);
 int security_msg_queue_alloc(struct msg_queue *msq);
+int security_msg_queue_restore(struct msg_queue *msq, void *stored);
 void security_msg_queue_free(struct msg_queue *msq);
 int security_msg_queue_associate(struct msg_queue *msq, int msqflg);
 int security_msg_queue_msgctl(struct msg_queue *msq, int cmd);
@@ -1757,11 +1819,13 @@ int security_msg_queue_msgsnd(struct msg_queue *msq,
 int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 			      struct task_struct *target, long type, int mode);
 int security_shm_alloc(struct shmid_kernel *shp);
+int security_shm_restore(struct shmid_kernel *shp, void *stored);
 void security_shm_free(struct shmid_kernel *shp);
 int security_shm_associate(struct shmid_kernel *shp, int shmflg);
 int security_shm_shmctl(struct shmid_kernel *shp, int cmd);
 int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg);
 int security_sem_alloc(struct sem_array *sma);
+int security_sem_restore(struct sem_array *sma, void *stored);
 void security_sem_free(struct sem_array *sma);
 int security_sem_associate(struct sem_array *sma, int semflg);
 int security_sem_semctl(struct sem_array *sma, int cmd);
@@ -1775,6 +1839,10 @@ int security_netlink_recv(struct sk_buff *skb, int cap);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
+char *security_context_to_str(void *security);
+void *security_context_from_str(char *str);
+int security_task_restore_context(struct cred *cred, void *stored,
+				void *f_security);
 
 #else /* CONFIG_SECURITY */
 struct security_mnt_opts {
@@ -2393,6 +2461,11 @@ static inline int security_msg_msg_alloc(struct msg_msg *msg)
 	return 0;
 }
 
+static inline int security_msg_msg_restore(struct msg_msg *msg, void *stored)
+{
+	return 0;
+}
+
 static inline void security_msg_msg_free(struct msg_msg *msg)
 { }
 
@@ -2401,6 +2474,12 @@ static inline int security_msg_queue_alloc(struct msg_queue *msq)
 	return 0;
 }
 
+static inline int security_msg_queue_restore(struct msg_queue *msq,
+						void *stored)
+{
+	return 0;
+}
+
 static inline void security_msg_queue_free(struct msg_queue *msq)
 { }
 
@@ -2434,6 +2513,11 @@ static inline int security_shm_alloc(struct shmid_kernel *shp)
 	return 0;
 }
 
+static inline int security_shm_restore(struct shmid_kernel *shp, void *stored)
+{
+	return 0;
+}
+
 static inline void security_shm_free(struct shmid_kernel *shp)
 { }
 
@@ -2459,6 +2543,11 @@ static inline int security_sem_alloc(struct sem_array *sma)
 	return 0;
 }
 
+static inline int security_sem_restore(struct sem_array *sma, void *stored)
+{
+	return 0;
+}
+
 static inline void security_sem_free(struct sem_array *sma)
 { }
 
@@ -2517,6 +2606,22 @@ static inline int security_secctx_to_secid(const char *secdata,
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline char *security_context_to_str(void *security)
+{
+	return NULL;
+}
+
+static inline void *security_context_from_str(char *str)
+{
+	return NULL;
+}
+
+static inline int security_task_restore_context(struct cred *cred,
+				void *stored, void *f_security)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index bc77743..6da8ac8 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -27,7 +27,8 @@ static char *ipc_ind_to_str[] = { "sem", "msg", "shm" };
  * Checkpoint
  */
 
-int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
+int checkpoint_fill_ipc_perms(struct ckpt_ctx *ctx,
+			      struct ckpt_hdr_ipc_perms *h,
 			      struct kern_ipc_perm *perm)
 {
 	if (ipcperms(perm, S_IROTH))
@@ -42,6 +43,13 @@ int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 	h->mode = perm->mode & S_IRWXUGO;
 	h->seq = perm->seq;
 
+	if (perm->security) {
+		h->sec_ref = checkpoint_obj(ctx, perm->security, CKPT_OBJ_SEC);
+		if (h->sec_ref < 0)
+			return h->sec_ref;
+	} else
+		h->sec_ref = -1;
+
 	return 0;
 }
 
@@ -169,13 +177,10 @@ int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 	perm->mode = h->mode;
 	perm->seq = h->seq;
 	/*
-	 * Todo: restore perm->security.
-	 * At the moment it gets set by security_x_alloc() called through
-	 * ipcget()->ipcget_public()->ops-.getnew (->nequeue for instance)
-	 * We will want to ask the LSM to consider resetting the
-	 * checkpointed ->security, based on current_security(),
-	 * the checkpointed ->security, and the checkpoint file context.
+	 * The checkpointed ->security value will be restored
+	 * (and verified) by our caller.
 	 */
+	perm->security = NULL;
 
 	return 0;
 }
diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
index fb1a61e..025e33f 100644
--- a/ipc/checkpoint_msg.c
+++ b/ipc/checkpoint_msg.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/security.h>
 
 #include "util.h"
 
@@ -36,7 +37,7 @@ static int fill_ipc_msg_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&msq->q_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &msq->q_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &msq->q_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -63,13 +64,20 @@ static int checkpoint_msg_contents(struct ckpt_ctx *ctx, struct msg_msg *msg)
 	struct msg_msgseg *seg;
 	int total, len;
 	int ret;
+	int sec_ref = -1;
 
+	if (msg->security) {
+		sec_ref = checkpoint_obj(ctx, msg->security, CKPT_OBJ_SEC);
+		if (sec_ref < 0)
+			return sec_ref;
+	}
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_IPC_MSG_MSG);
 	if (!h)
 		return -ENOMEM;
 
 	h->m_type = msg->m_type;
 	h->m_ts = msg->m_ts;
+	h->sec_ref = sec_ref;
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
@@ -175,10 +183,19 @@ static int load_ipc_msg_hdr(struct ckpt_ctx *ctx,
 			    struct msg_queue *msq)
 {
 	int ret = 0;
+	void *security = NULL;;
 
 	ret = restore_load_ipc_perms(&h->perms, &msq->q_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_msg_queue_restore(msq, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("msq: lspid %d lrpid %d qnum %lld qbytes %lld\n",
 		 h->q_lspid, h->q_lrpid, h->q_qnum, h->q_qbytes);
@@ -200,6 +217,7 @@ static struct msg_msg *restore_msg_contents_one(struct ckpt_ctx *ctx, int *clen)
 	struct ckpt_hdr_ipc_msg_msg *h;
 	struct msg_msg *msg = NULL;
 	struct msg_msgseg *seg, **pseg;
+	void *security = NULL;
 	int total, len;
 	int ret;
 
@@ -222,6 +240,16 @@ static struct msg_msg *restore_msg_contents_one(struct ckpt_ctx *ctx, int *clen)
 	}
 	msg->next = NULL;
 	pseg = &msg->next;
+	if (h->sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security)) {
+			ret = PTR_ERR(security);
+			goto out;
+		}
+	}
+	ret = security_msg_msg_restore(msg, security);
+	if (ret)
+		goto out;
 
 	ret = _ckpt_read_buffer(ctx, (msg + 1), len);
 	if (ret < 0)
diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
index e6934dc..cd76bcc 100644
--- a/ipc/checkpoint_sem.c
+++ b/ipc/checkpoint_sem.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/security.h>
 
 #include <linux/msg.h>	/* needed for util.h that uses 'struct msg_msg' */
 #include "util.h"
@@ -37,7 +38,7 @@ static int fill_ipc_sem_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&sem->sem_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &sem->sem_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &sem->sem_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -112,10 +113,19 @@ static int load_ipc_sem_hdr(struct ckpt_ctx *ctx,
 			       struct sem_array *sem)
 {
 	int ret = 0;
+	void *security = NULL;
 
 	ret = restore_load_ipc_perms(&h->perms, &sem->sem_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_sem_restore(sem, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("sem: nsems %u\n", h->sem_nsems);
 
diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
index 0d8eb14..cf100fe 100644
--- a/ipc/checkpoint_shm.c
+++ b/ipc/checkpoint_shm.c
@@ -22,6 +22,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/deferqueue.h>
+#include <linux/security.h>
 
 #include <linux/msg.h>	/* needed for util.h that uses 'struct msg_msg' */
 #include "util.h"
@@ -41,7 +42,7 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
 
 	ipc_lock_by_ptr(&shp->shm_perm);
 
-	ret = checkpoint_fill_ipc_perms(&h->perms, &shp->shm_perm);
+	ret = checkpoint_fill_ipc_perms(ctx, &h->perms, &shp->shm_perm);
 	if (ret < 0)
 		goto unlock;
 
@@ -148,10 +149,19 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
 			    struct shmid_kernel *shp)
 {
 	int ret;
+	void *security = NULL;
 
 	ret = restore_load_ipc_perms(&h->perms, &shp->shm_perm);
 	if (ret < 0)
 		return ret;
+	if (h->perms.sec_ref != -1) {
+		security = ckpt_obj_fetch(ctx, h->perms.sec_ref, CKPT_OBJ_SEC);
+		if (IS_ERR(security))
+			return PTR_ERR(security);
+	}
+	ret = security_shm_restore(shp, security);
+	if (ret)
+		return ret;
 
 	ckpt_debug("shm: cprid %d lprid %d segsz %lld mlock %d\n",
 		 h->shm_cprid, h->shm_lprid, h->shm_segsz, h->mlock_uid);
diff --git a/ipc/util.h b/ipc/util.h
index 020de7b..5976308 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -192,7 +192,8 @@ extern void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
 extern void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp);
 
 #ifdef CONFIG_CHECKPOINT
-extern int checkpoint_fill_ipc_perms(struct ckpt_hdr_ipc_perms *h,
+extern int checkpoint_fill_ipc_perms(struct ckpt_ctx *ctx,
+				     struct ckpt_hdr_ipc_perms *h,
 				     struct kern_ipc_perm *perm);
 extern int restore_load_ipc_perms(struct ckpt_hdr_ipc_perms *h,
 				  struct kern_ipc_perm *perm);
diff --git a/kernel/cred.c b/kernel/cred.c
index 6ef75a1..c47d175 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -709,8 +709,9 @@ int cred_setfsgid(struct cred *new, gid_t gid, gid_t *old_fsgid)
 int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 {
 	int ret;
-	int groupinfo_ref, user_ref;
+	int groupinfo_ref, user_ref, sec_ref = -1;
 	struct ckpt_hdr_cred *h;
+	void *security = NULL;
 
 	groupinfo_ref = checkpoint_obj(ctx, cred->group_info,
 					CKPT_OBJ_GROUPINFO);
@@ -719,6 +720,16 @@ int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 	user_ref = checkpoint_obj(ctx, cred->user, CKPT_OBJ_USER);
 	if (user_ref < 0)
 		return user_ref;
+#ifdef CONFIG_SECURITY
+	/* should we checkpoint a 'lsm_na' field when !security ? */
+	security = cred->security;
+#endif
+
+	if (security) {
+		sec_ref = checkpoint_obj(ctx, security, CKPT_OBJ_SEC);
+		if (sec_ref < 0)
+			return sec_ref;
+	}
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_CRED);
 	if (!h)
@@ -738,6 +749,7 @@ int checkpoint_write_cred(struct ckpt_ctx *ctx, const struct cred *cred)
 
 	h->user_ref = user_ref;
 	h->groupinfo_ref = groupinfo_ref;
+	h->sec_ref = sec_ref;
 
 	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
 	ckpt_hdr_put(ctx, h);
@@ -751,6 +763,7 @@ struct cred *restore_read_cred(struct ckpt_ctx *ctx)
 	struct ckpt_hdr_cred *h;
 	struct user_struct *user;
 	struct group_info *groupinfo;
+	void *security = NULL;
 	int ret = -EINVAL;
 	uid_t olduid;
 	gid_t oldgid;
@@ -774,6 +787,20 @@ struct cred *restore_read_cred(struct ckpt_ctx *ctx)
 	user = ckpt_obj_fetch(ctx, h->user_ref, CKPT_OBJ_USER);
 	if (IS_ERR(user))
 		goto err_putcred;
+	if (h->sec_ref != -1)
+		security = ckpt_obj_fetch(ctx, h->sec_ref, CKPT_OBJ_SEC);
+	if (IS_ERR(security))
+		goto err_putcred;
+
+#ifdef CONFIG_SECURITY
+	/*
+	 * Ask LSM to reset original task->security if allowed
+	 */
+	ret = security_task_restore_context(cred, security,
+					ctx->file->f_security);
+	if (ret)
+		goto err_putcred;
+#endif
 
 	/*
 	 * TODO: this check should  go into the common helper in
diff --git a/security/capability.c b/security/capability.c
index 21b6cea..4e586a7 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -490,6 +490,11 @@ static int cap_msg_msg_alloc_security(struct msg_msg *msg)
 	return 0;
 }
 
+static int cap_msg_msg_restore_security(struct msg_msg *msg, void *stored)
+{
+	return 0;
+}
+
 static void cap_msg_msg_free_security(struct msg_msg *msg)
 {
 }
@@ -499,6 +504,12 @@ static int cap_msg_queue_alloc_security(struct msg_queue *msq)
 	return 0;
 }
 
+static int cap_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	return 0;
+}
+
 static void cap_msg_queue_free_security(struct msg_queue *msq)
 {
 }
@@ -530,6 +541,11 @@ static int cap_shm_alloc_security(struct shmid_kernel *shp)
 	return 0;
 }
 
+static int cap_shm_restore_security(struct shmid_kernel *shp, void *stored)
+{
+	return 0;
+}
+
 static void cap_shm_free_security(struct shmid_kernel *shp)
 {
 }
@@ -555,6 +571,11 @@ static int cap_sem_alloc_security(struct sem_array *sma)
 	return 0;
 }
 
+static int cap_sem_restore_security(struct sem_array *sma, void *stored)
+{
+	return 0;
+}
+
 static void cap_sem_free_security(struct sem_array *sma)
 {
 }
@@ -848,6 +869,23 @@ static void cap_audit_rule_free(void *lsmrule)
 }
 #endif /* CONFIG_AUDIT */
 
+/* checkpoint/restore hooks */
+static char *cap_context_to_str(void *security)
+{
+	return NULL;
+}
+
+static void *cap_context_from_str(char *str)
+{
+	return NULL;
+}
+
+static int cap_task_restore_context(struct cred *cred, void *stored,
+				      void *f_security)
+{
+	return 0;
+}
+
 struct security_operations default_security_ops = {
 	.name	= "default",
 };
@@ -976,19 +1014,23 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, ipc_permission);
 	set_to_cap_if_null(ops, ipc_getsecid);
 	set_to_cap_if_null(ops, msg_msg_alloc_security);
+	set_to_cap_if_null(ops, msg_msg_restore_security);
 	set_to_cap_if_null(ops, msg_msg_free_security);
 	set_to_cap_if_null(ops, msg_queue_alloc_security);
+	set_to_cap_if_null(ops, msg_queue_restore_security);
 	set_to_cap_if_null(ops, msg_queue_free_security);
 	set_to_cap_if_null(ops, msg_queue_associate);
 	set_to_cap_if_null(ops, msg_queue_msgctl);
 	set_to_cap_if_null(ops, msg_queue_msgsnd);
 	set_to_cap_if_null(ops, msg_queue_msgrcv);
 	set_to_cap_if_null(ops, shm_alloc_security);
+	set_to_cap_if_null(ops, shm_restore_security);
 	set_to_cap_if_null(ops, shm_free_security);
 	set_to_cap_if_null(ops, shm_associate);
 	set_to_cap_if_null(ops, shm_shmctl);
 	set_to_cap_if_null(ops, shm_shmat);
 	set_to_cap_if_null(ops, sem_alloc_security);
+	set_to_cap_if_null(ops, sem_restore_security);
 	set_to_cap_if_null(ops, sem_free_security);
 	set_to_cap_if_null(ops, sem_associate);
 	set_to_cap_if_null(ops, sem_semctl);
@@ -1054,4 +1096,9 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, audit_rule_match);
 	set_to_cap_if_null(ops, audit_rule_free);
 #endif
+
+/* checkpoint/restore hooks */
+	set_to_cap_if_null(ops, context_from_str);
+	set_to_cap_if_null(ops, context_to_str);
+	set_to_cap_if_null(ops, task_restore_context);
 }
diff --git a/security/security.c b/security/security.c
index 5284255..26e7989 100644
--- a/security/security.c
+++ b/security/security.c
@@ -827,6 +827,11 @@ int security_msg_msg_alloc(struct msg_msg *msg)
 	return security_ops->msg_msg_alloc_security(msg);
 }
 
+int security_msg_msg_restore(struct msg_msg *msg, void *stored)
+{
+	return security_ops->msg_msg_restore_security(msg, stored);
+}
+
 void security_msg_msg_free(struct msg_msg *msg)
 {
 	security_ops->msg_msg_free_security(msg);
@@ -837,6 +842,11 @@ int security_msg_queue_alloc(struct msg_queue *msq)
 	return security_ops->msg_queue_alloc_security(msq);
 }
 
+int security_msg_queue_restore(struct msg_queue *msq, void *stored)
+{
+	return security_ops->msg_queue_restore_security(msq, stored);
+}
+
 void security_msg_queue_free(struct msg_queue *msq)
 {
 	security_ops->msg_queue_free_security(msq);
@@ -869,6 +879,11 @@ int security_shm_alloc(struct shmid_kernel *shp)
 	return security_ops->shm_alloc_security(shp);
 }
 
+int security_shm_restore(struct shmid_kernel *shp, void *stored)
+{
+	return security_ops->shm_restore_security(shp, stored);
+}
+
 void security_shm_free(struct shmid_kernel *shp)
 {
 	security_ops->shm_free_security(shp);
@@ -894,6 +909,11 @@ int security_sem_alloc(struct sem_array *sma)
 	return security_ops->sem_alloc_security(sma);
 }
 
+int security_sem_restore(struct sem_array *sma, void *stored)
+{
+	return security_ops->sem_restore_security(sma, stored);
+}
+
 void security_sem_free(struct sem_array *sma)
 {
 	security_ops->sem_free_security(sma);
@@ -962,6 +982,25 @@ void security_release_secctx(char *secdata, u32 seclen)
 }
 EXPORT_SYMBOL(security_release_secctx);
 
+char *security_context_to_str(void *security)
+{
+	return security_ops->context_to_str(security);
+}
+EXPORT_SYMBOL(security_context_to_str);
+
+void *security_context_from_str(char *str)
+{
+	return security_ops->context_from_str(str);
+}
+EXPORT_SYMBOL(security_context_from_str);
+
+int security_task_restore_context(struct cred *cred, void *stored,
+			void *f_security)
+{
+	return security_ops->task_restore_context(cred, stored, f_security);
+}
+EXPORT_SYMBOL(security_task_restore_context);
+
 #ifdef CONFIG_SECURITY_NETWORK
 
 int security_unix_stream_connect(struct socket *sock, struct socket *other,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c..ba24808 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4695,6 +4695,15 @@ static int selinux_msg_msg_alloc_security(struct msg_msg *msg)
 	return msg_msg_alloc_security(msg);
 }
 
+static int selinux_msg_msg_restore_security(struct msg_msg *msg, void *stored)
+{
+	/*
+	 * TODO - actually restore from 'stored' subject to authorization
+	 */
+	kfree(stored);
+	return msg_msg_alloc_security(msg);
+}
+
 static void selinux_msg_msg_free_security(struct msg_msg *msg)
 {
 	msg_msg_free_security(msg);
@@ -4726,6 +4735,38 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 	return 0;
 }
 
+static int selinux_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ);
+	if (rc)
+		return rc;
+
+	isec = msq->q_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = msq->q_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
+			  MSGQ__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&msq->q_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_msg_queue_free_security(struct msg_queue *msq)
 {
 	ipc_free_security(&msq->q_perm);
@@ -4867,6 +4908,38 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 	return 0;
 }
 
+static int selinux_shm_restore_security(struct shmid_kernel *shp,
+					void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM);
+	if (rc)
+		return rc;
+
+	isec = shp->shm_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = shp->shm_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
+			  SHM__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&shp->shm_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_shm_free_security(struct shmid_kernel *shp)
 {
 	ipc_free_security(&shp->shm_perm);
@@ -4959,6 +5032,37 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
 	return 0;
 }
 
+static int selinux_sem_restore_security(struct sem_array *sma, void *stored)
+{
+	struct ipc_security_struct *isec;
+	struct avc_audit_data ad;
+	u32 sid = current_sid();
+	int rc;
+
+	/*
+	 * TODO restore based on 'stored'.  Since I don't know how we
+	 * want to decide on that yet, I'm for now just treating it as
+	 * a brand new message queue
+	 */
+	kfree(stored);
+	rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM);
+	if (rc)
+		return rc;
+
+	isec = sma->sem_perm.security;
+
+	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	ad.u.ipc_id = sma->sem_perm.key;
+
+	rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
+			  SEM__CREATE, &ad);
+	if (rc) {
+		ipc_free_security(&sma->sem_perm);
+		return rc;
+	}
+	return 0;
+}
+
 static void selinux_sem_free_security(struct sem_array *sma)
 {
 	ipc_free_security(&sma->sem_perm);
@@ -5315,6 +5419,90 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 
 #endif
 
+/* checkpoint/restore hooks */
+
+/*
+ * the c/r code will free the char* we return
+ */
+static char *selinux_context_to_str(void *security)
+{
+	struct task_security_struct *s = security;
+	int len = 0;
+	char *v = NULL;
+	int ret;
+
+	if (!s)
+		return ERR_PTR(-EINVAL);
+	ret = security_sid_to_context(s->sid, &v, &len);
+	if (ret)
+		return ERR_PTR(ret);
+	return v;
+}
+
+struct sidp {
+	u32 sid;
+};
+
+/*
+ * we just return error if the context is not valid.  We let the
+ * domain transition/type creation code construct a valid
+ * task_security (or whatever) struct with the sid.
+ */
+static void *selinux_context_from_str(char *str)
+{
+	int error;
+	u32 sid;
+	struct sidp *retv;
+
+	error = security_context_to_sid(str, strlen(str), &sid);
+	if (error)
+		return ERR_PTR(error);
+	retv = kmalloc(sizeof(*retv), GFP_KERNEL);
+	if (!retv)
+		return ERR_PTR(-ENOMEM);
+	retv->sid = sid;
+	return retv;
+}
+
+/*
+ * stored is actually still the char* representation of the context
+ * (bc that's what we return in selinux_context_from_str())
+ */
+static int selinux_task_restore_context(struct cred *cred, void *stored,
+					void *f_security)
+{
+	struct task_security_struct *s = cred->security;
+	struct file_security_struct *fsec = f_security;
+	struct sidp *sidp = stored;
+	u32 newsid;
+	int error;
+
+	if (!sidp)
+		return -EINVAL;
+
+	newsid = sidp->sid;
+	kfree(sidp);
+
+	if (s->sid == newsid)
+		return 0;
+
+	/* these are not the right checks.  Will we want a
+	 * process:PROCESS_RESTORE_TRANSITION permission?
+	 */
+	error = avc_has_perm(s->sid, newsid,
+			  SECCLASS_PROCESS, PROCESS__TRANSITION, NULL);
+	if (error)
+		return error;
+
+	error = avc_has_perm(newsid, fsec->sid,
+			  SECCLASS_FILE, FILE__ENTRYPOINT, NULL);
+	if (error)
+		return error;
+	
+	s->sid = s->osid = newsid;
+	return 0;
+}
+
 static struct security_operations selinux_ops = {
 	.name =				"selinux",
 
@@ -5414,9 +5602,11 @@ static struct security_operations selinux_ops = {
 	.ipc_getsecid =			selinux_ipc_getsecid,
 
 	.msg_msg_alloc_security =	selinux_msg_msg_alloc_security,
+	.msg_msg_restore_security =	selinux_msg_msg_restore_security,
 	.msg_msg_free_security =	selinux_msg_msg_free_security,
 
 	.msg_queue_alloc_security =	selinux_msg_queue_alloc_security,
+	.msg_queue_restore_security =	selinux_msg_queue_restore_security,
 	.msg_queue_free_security =	selinux_msg_queue_free_security,
 	.msg_queue_associate =		selinux_msg_queue_associate,
 	.msg_queue_msgctl =		selinux_msg_queue_msgctl,
@@ -5424,12 +5614,14 @@ static struct security_operations selinux_ops = {
 	.msg_queue_msgrcv =		selinux_msg_queue_msgrcv,
 
 	.shm_alloc_security =		selinux_shm_alloc_security,
+	.shm_restore_security =		selinux_shm_restore_security,
 	.shm_free_security =		selinux_shm_free_security,
 	.shm_associate =		selinux_shm_associate,
 	.shm_shmctl =			selinux_shm_shmctl,
 	.shm_shmat =			selinux_shm_shmat,
 
 	.sem_alloc_security =		selinux_sem_alloc_security,
+	.sem_restore_security =		selinux_sem_restore_security,
 	.sem_free_security =		selinux_sem_free_security,
 	.sem_associate =		selinux_sem_associate,
 	.sem_semctl =			selinux_sem_semctl,
@@ -5499,6 +5691,10 @@ static struct security_operations selinux_ops = {
 	.audit_rule_match =		selinux_audit_rule_match,
 	.audit_rule_free =		selinux_audit_rule_free,
 #endif
+
+	.context_to_str =		selinux_context_to_str,
+	.context_from_str =		selinux_context_from_str,
+	.task_restore_context =		selinux_task_restore_context,
 };
 
 static __init int selinux_init(void)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 98b3195..dfc0f7a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1608,6 +1608,32 @@ static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 }
 
 /**
+ * smack_msg_msg_restore_security - Set the security blob for msg_msg
+ * @msg: the object
+ * @stored: the checkpointed label
+ *
+ * Returns 0
+ */
+static int smack_msg_msg_restore_security(struct msg_msg *msg,
+					void *stored)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	msg->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		msg->security = str;
+	}
+	return 0;
+	return 0;
+}
+
+/**
  * smack_msg_msg_free_security - Clear the security blob for msg_msg
  * @msg: the object
  *
@@ -1644,6 +1670,30 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
 }
 
 /**
+ * smack_shm_restore_security - retore the security blob for shm
+ * @shp: the object
+ * @stored: the checkpointed label
+ *
+ * Returns 0
+ */
+static int smack_shm_restore_security(struct shmid_kernel *shp, void *stored)
+{
+	struct kern_ipc_perm *isp = &shp->shm_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	isp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		isp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_shm_free_security - Clear the security blob for shm
  * @shp: the object
  *
@@ -1753,6 +1803,31 @@ static int smack_sem_alloc_security(struct sem_array *sma)
 }
 
 /**
+ * smack_sem_restore_security - Set the security blob for sem
+ * @sma: the object
+ * @stored: the label stored in checkpoint image
+ *
+ * Returns 0
+ */
+static int smack_sem_restore_security(struct sem_array *sma,
+				void *stored)
+{
+	struct kern_ipc_perm *isp = &sma->sem_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	isp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		isp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_sem_free_security - Clear the security blob for sem
  * @sma: the object
  *
@@ -1857,6 +1932,31 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
 }
 
 /**
+ * smack_msg_restore_security - Set the security blob for msg
+ * @msq: the object
+ * @stored: the stored label
+ *
+ * Returns 0
+ */
+static int smack_msg_queue_restore_security(struct msg_queue *msq,
+					void *stored)
+{
+	struct kern_ipc_perm *kisp = &msq->q_perm;
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	kisp->security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		kisp->security = str;
+	}
+	return 0;
+}
+
+/**
  * smack_msg_free_security - Clear the security blob for msg
  * @msq: the object
  *
@@ -2823,6 +2923,33 @@ static void smack_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+/* checkpoint/restore hooks */
+static char *smack_context_to_str(void *security)
+{
+	return kstrdup((char *)security, GFP_KERNEL);
+}
+
+static void *smack_context_from_str(char *str)
+{
+	char *newsmack = smk_import(str, 0);
+
+	if (newsmack == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return newsmack;
+}
+
+static int smack_task_restore_context(struct cred *cred, void *stored,
+					void *f_security)
+{
+	if (cred->security != stored) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		cred->security = stored;
+	}
+	return 0;
+}
+
 struct security_operations smack_ops = {
 	.name =				"smack",
 
@@ -2902,9 +3029,11 @@ struct security_operations smack_ops = {
 	.ipc_getsecid =			smack_ipc_getsecid,
 
 	.msg_msg_alloc_security = 	smack_msg_msg_alloc_security,
+	.msg_msg_restore_security = 	smack_msg_msg_restore_security,
 	.msg_msg_free_security = 	smack_msg_msg_free_security,
 
 	.msg_queue_alloc_security = 	smack_msg_queue_alloc_security,
+	.msg_queue_restore_security = 	smack_msg_queue_restore_security,
 	.msg_queue_free_security = 	smack_msg_queue_free_security,
 	.msg_queue_associate = 		smack_msg_queue_associate,
 	.msg_queue_msgctl = 		smack_msg_queue_msgctl,
@@ -2912,12 +3041,14 @@ struct security_operations smack_ops = {
 	.msg_queue_msgrcv = 		smack_msg_queue_msgrcv,
 
 	.shm_alloc_security = 		smack_shm_alloc_security,
+	.shm_restore_security = 	smack_shm_restore_security,
 	.shm_free_security = 		smack_shm_free_security,
 	.shm_associate = 		smack_shm_associate,
 	.shm_shmctl = 			smack_shm_shmctl,
 	.shm_shmat = 			smack_shm_shmat,
 
 	.sem_alloc_security = 		smack_sem_alloc_security,
+	.sem_restore_security = 	smack_sem_restore_security,
 	.sem_free_security = 		smack_sem_free_security,
 	.sem_associate = 		smack_sem_associate,
 	.sem_semctl = 			smack_sem_semctl,
@@ -2964,6 +3095,10 @@ struct security_operations smack_ops = {
 	.secid_to_secctx = 		smack_secid_to_secctx,
 	.secctx_to_secid = 		smack_secctx_to_secid,
 	.release_secctx = 		smack_release_secctx,
+
+	.context_to_str =		smack_context_to_str,
+	.context_from_str =		smack_context_from_str,
+	.task_restore_context =		smack_task_restore_context,
 };
 
 
-- 
1.6.1


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/10] cr: lsm: restore file->f_security
  2009-06-10  1:44 [PATCH 01/10] cred: #include init.h in cred.h Serge E. Hallyn
@ 2009-06-10  1:47     ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:47 UTC (permalink / raw)
  To: Linux Containers; +Cc: David Howells, SELinux, Alexey Dobriyan, Andrew Morgan

file->f_cred is the cred of the task which opened it.  file->f_security
can be separately set by the LSM.  Checkpoint the file->f_security,
and at restart ask the LSM, using security_file_restore, based on the current
task's context and the checkpointed f_security, which f_security to apply (or
whether to refuse the restart altogether).

For Smack, accept the checkpointed label if the restarting task has
CAP_MAC_ADMIN.

For SELinux, I currently ignore the checkpointed label and call
file_alloc_security().  Do we want to have 'restore' permission for
class file?

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/files.c             |   33 +++++++++++++++++++++++++++++++++
 include/linux/checkpoint_hdr.h |    1 +
 include/linux/security.h       |   13 +++++++++++++
 security/capability.c          |    6 ++++++
 security/security.c            |    5 +++++
 security/selinux/hooks.c       |   11 +++++++++++
 security/smack/smack_lsm.c     |   26 +++++++++++++++++++++++++-
 7 files changed, 94 insertions(+), 1 deletions(-)

diff --git a/checkpoint/files.c b/checkpoint/files.c
index 5be7d1b..36c7f35 100644
--- a/checkpoint/files.c
+++ b/checkpoint/files.c
@@ -19,6 +19,7 @@
 #include <linux/fsnotify.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/syscalls.h>
+#include <linux/security.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
@@ -155,6 +156,14 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 	if (h->f_credref < 0)
 		return h->f_credref;
 
+#ifdef CONFIG_SECURITY
+	if (file->f_security) {
+		h->f_secref = checkpoint_obj(ctx, file->f_security, CKPT_OBJ_SEC);
+		if (h->f_secref < 0)
+			return h->f_secref;
+	} else
+		h->f_secref = -1;
+#endif
 	/* FIX: need also file->f_owner, etc */
 
 	return 0;
@@ -437,6 +446,26 @@ static int attach_file(struct file *file)
 	return fd;
 }
 
+#ifdef CONFIG_SECURITY
+int restore_file_security(struct ckpt_ctx *ctx, struct file *file,
+			   struct ckpt_hdr_file *h)
+{
+	void *security = NULL;
+
+	if (h->f_secref != -1)
+		security = ckpt_obj_fetch(ctx, h->f_secref, CKPT_OBJ_SEC);
+	if (IS_ERR(security))
+		return PTR_ERR(security);
+	return security_file_restore(file, security);
+}
+#else
+static inline int restore_file_security(struct ckpt_ctx *ctx,
+		struct file *file, struct ckpt_hdr_file *h)
+{
+	return 0;
+}
+#endif
+
 #define CKPT_SETFL_MASK  \
 	(O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 
@@ -455,6 +484,10 @@ int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 	put_cred(file->f_cred);
 	file->f_cred = get_cred(cred);
 
+	ret = restore_file_security(ctx, file, h);
+	if (ret < 0)
+		goto out;
+
 	/* safe to set 1st arg (fd) to 0, as command is F_SETFL */
 	ret = vfs_fcntl(0, F_SETFL, h->f_flags & CKPT_SETFL_MASK, file);
 	if (ret < 0)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index a447b5a..1722826 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -320,6 +320,7 @@ struct ckpt_hdr_file {
 	__s32 f_credref;
 	__u64 f_pos;
 	__u64 f_version;
+	__s32 f_secref;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_file_generic {
diff --git a/include/linux/security.h b/include/linux/security.h
index 5625553..936a2a1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -554,6 +554,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	created.
  *	@file contains the file structure to secure.
  *	Return 0 if the hook is successful and permission is granted.
+ * @file_restore_security:
+ *	Allocate and attach a security structure to the file->f_security field
+ *	during sys_restart().
+ *	@file contains the file structure to secure.
+ *	@stored contains the checkpointed context.
+ *	Return 0 if the hook is successful and permission is granted.
  * @file_free_security:
  *	Deallocate and free any security structures stored in file->f_security.
  *	@file contains the file structure being modified.
@@ -1494,6 +1500,7 @@ struct security_operations {
 
 	int (*file_permission) (struct file *file, int mask);
 	int (*file_alloc_security) (struct file *file);
+	int (*file_restore_security) (struct file *file, void *stored);
 	void (*file_free_security) (struct file *file);
 	int (*file_ioctl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
@@ -1761,6 +1768,7 @@ int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer
 void security_inode_getsecid(const struct inode *inode, u32 *secid);
 int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
+int security_file_restore(struct file *file, void *stored);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_file_mmap(struct file *file, unsigned long reqprot,
@@ -2250,6 +2258,11 @@ static inline int security_file_alloc(struct file *file)
 	return 0;
 }
 
+static inline int security_file_restore(struct file *file, void *stored)
+{
+	return 0;
+}
+
 static inline void security_file_free(struct file *file)
 { }
 
diff --git a/security/capability.c b/security/capability.c
index 4e586a7..f0a3c65 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -320,6 +320,11 @@ static int cap_file_alloc_security(struct file *file)
 	return 0;
 }
 
+static int cap_file_restore_security(struct file *file, void *stored)
+{
+	return 0;
+}
+
 static void cap_file_free_security(struct file *file)
 {
 }
@@ -976,6 +981,7 @@ void security_fixup_ops(struct security_operations *ops)
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
+	set_to_cap_if_null(ops, file_restore_security);
 	set_to_cap_if_null(ops, file_free_security);
 	set_to_cap_if_null(ops, file_ioctl);
 	set_to_cap_if_null(ops, file_mmap);
diff --git a/security/security.c b/security/security.c
index 26e7989..0006e9c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -628,6 +628,11 @@ int security_file_alloc(struct file *file)
 	return security_ops->file_alloc_security(file);
 }
 
+int security_file_restore(struct file *file, void *stored)
+{
+	return security_ops->file_restore_security(file, stored);
+}
+
 void security_file_free(struct file *file)
 {
 	security_ops->file_free_security(file);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ba24808..4b8c636 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2973,6 +2973,16 @@ static int selinux_file_alloc_security(struct file *file)
 	return file_alloc_security(file);
 }
 
+static int selinux_file_restore_security(struct file *file,
+				void *stored)
+{
+	/*
+	 * TODO - actually restore from 'stored' subject to authorization
+	 */
+	kfree(stored);
+	return file_alloc_security(file);
+}
+
 static void selinux_file_free_security(struct file *file)
 {
 	file_free_security(file);
@@ -5566,6 +5576,7 @@ static struct security_operations selinux_ops = {
 
 	.file_permission =		selinux_file_permission,
 	.file_alloc_security =		selinux_file_alloc_security,
+	.file_restore_security =	selinux_file_restore_security,
 	.file_free_security =		selinux_file_free_security,
 	.file_ioctl =			selinux_file_ioctl,
 	.file_mmap =			selinux_file_mmap,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index dfc0f7a..7bcdfde 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -831,6 +831,30 @@ static int smack_file_alloc_security(struct file *file)
 }
 
 /**
+ * smack_file_restore_security - assign a file security blob
+ * @file: the object
+ * @stored: the label stored in the checkpoint file
+ *
+ * Returns 0
+ */
+static int smack_file_restore_security(struct file *file, void *stored)
+{
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	file->f_security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		file->f_security = str;
+	}
+
+	return 0;
+}
+
+/**
  * smack_file_free_security - clear a file security blob
  * @file: the object
  *
@@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
 		msg->security = str;
 	}
 	return 0;
-	return 0;
 }
 
 /**
@@ -2996,6 +3019,7 @@ struct security_operations smack_ops = {
 
 	.file_permission = 		smack_file_permission,
 	.file_alloc_security = 		smack_file_alloc_security,
+	.file_restore_security =	smack_file_restore_security,
 	.file_free_security = 		smack_file_free_security,
 	.file_ioctl = 			smack_file_ioctl,
 	.file_lock = 			smack_file_lock,
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/10] cr: lsm: restore file->f_security
@ 2009-06-10  1:47     ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10  1:47 UTC (permalink / raw)
  To: Linux Containers
  Cc: Oren Laadan, David Howells, Alexey Dobriyan, Andrew Morgan, SELinux

file->f_cred is the cred of the task which opened it.  file->f_security
can be separately set by the LSM.  Checkpoint the file->f_security,
and at restart ask the LSM, using security_file_restore, based on the current
task's context and the checkpointed f_security, which f_security to apply (or
whether to refuse the restart altogether).

For Smack, accept the checkpointed label if the restarting task has
CAP_MAC_ADMIN.

For SELinux, I currently ignore the checkpointed label and call
file_alloc_security().  Do we want to have 'restore' permission for
class file?

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
 checkpoint/files.c             |   33 +++++++++++++++++++++++++++++++++
 include/linux/checkpoint_hdr.h |    1 +
 include/linux/security.h       |   13 +++++++++++++
 security/capability.c          |    6 ++++++
 security/security.c            |    5 +++++
 security/selinux/hooks.c       |   11 +++++++++++
 security/smack/smack_lsm.c     |   26 +++++++++++++++++++++++++-
 7 files changed, 94 insertions(+), 1 deletions(-)

diff --git a/checkpoint/files.c b/checkpoint/files.c
index 5be7d1b..36c7f35 100644
--- a/checkpoint/files.c
+++ b/checkpoint/files.c
@@ -19,6 +19,7 @@
 #include <linux/fsnotify.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/syscalls.h>
+#include <linux/security.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
@@ -155,6 +156,14 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 	if (h->f_credref < 0)
 		return h->f_credref;
 
+#ifdef CONFIG_SECURITY
+	if (file->f_security) {
+		h->f_secref = checkpoint_obj(ctx, file->f_security, CKPT_OBJ_SEC);
+		if (h->f_secref < 0)
+			return h->f_secref;
+	} else
+		h->f_secref = -1;
+#endif
 	/* FIX: need also file->f_owner, etc */
 
 	return 0;
@@ -437,6 +446,26 @@ static int attach_file(struct file *file)
 	return fd;
 }
 
+#ifdef CONFIG_SECURITY
+int restore_file_security(struct ckpt_ctx *ctx, struct file *file,
+			   struct ckpt_hdr_file *h)
+{
+	void *security = NULL;
+
+	if (h->f_secref != -1)
+		security = ckpt_obj_fetch(ctx, h->f_secref, CKPT_OBJ_SEC);
+	if (IS_ERR(security))
+		return PTR_ERR(security);
+	return security_file_restore(file, security);
+}
+#else
+static inline int restore_file_security(struct ckpt_ctx *ctx,
+		struct file *file, struct ckpt_hdr_file *h)
+{
+	return 0;
+}
+#endif
+
 #define CKPT_SETFL_MASK  \
 	(O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 
@@ -455,6 +484,10 @@ int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 	put_cred(file->f_cred);
 	file->f_cred = get_cred(cred);
 
+	ret = restore_file_security(ctx, file, h);
+	if (ret < 0)
+		goto out;
+
 	/* safe to set 1st arg (fd) to 0, as command is F_SETFL */
 	ret = vfs_fcntl(0, F_SETFL, h->f_flags & CKPT_SETFL_MASK, file);
 	if (ret < 0)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index a447b5a..1722826 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -320,6 +320,7 @@ struct ckpt_hdr_file {
 	__s32 f_credref;
 	__u64 f_pos;
 	__u64 f_version;
+	__s32 f_secref;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_file_generic {
diff --git a/include/linux/security.h b/include/linux/security.h
index 5625553..936a2a1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -554,6 +554,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	created.
  *	@file contains the file structure to secure.
  *	Return 0 if the hook is successful and permission is granted.
+ * @file_restore_security:
+ *	Allocate and attach a security structure to the file->f_security field
+ *	during sys_restart().
+ *	@file contains the file structure to secure.
+ *	@stored contains the checkpointed context.
+ *	Return 0 if the hook is successful and permission is granted.
  * @file_free_security:
  *	Deallocate and free any security structures stored in file->f_security.
  *	@file contains the file structure being modified.
@@ -1494,6 +1500,7 @@ struct security_operations {
 
 	int (*file_permission) (struct file *file, int mask);
 	int (*file_alloc_security) (struct file *file);
+	int (*file_restore_security) (struct file *file, void *stored);
 	void (*file_free_security) (struct file *file);
 	int (*file_ioctl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
@@ -1761,6 +1768,7 @@ int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer
 void security_inode_getsecid(const struct inode *inode, u32 *secid);
 int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
+int security_file_restore(struct file *file, void *stored);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_file_mmap(struct file *file, unsigned long reqprot,
@@ -2250,6 +2258,11 @@ static inline int security_file_alloc(struct file *file)
 	return 0;
 }
 
+static inline int security_file_restore(struct file *file, void *stored)
+{
+	return 0;
+}
+
 static inline void security_file_free(struct file *file)
 { }
 
diff --git a/security/capability.c b/security/capability.c
index 4e586a7..f0a3c65 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -320,6 +320,11 @@ static int cap_file_alloc_security(struct file *file)
 	return 0;
 }
 
+static int cap_file_restore_security(struct file *file, void *stored)
+{
+	return 0;
+}
+
 static void cap_file_free_security(struct file *file)
 {
 }
@@ -976,6 +981,7 @@ void security_fixup_ops(struct security_operations *ops)
 #endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
+	set_to_cap_if_null(ops, file_restore_security);
 	set_to_cap_if_null(ops, file_free_security);
 	set_to_cap_if_null(ops, file_ioctl);
 	set_to_cap_if_null(ops, file_mmap);
diff --git a/security/security.c b/security/security.c
index 26e7989..0006e9c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -628,6 +628,11 @@ int security_file_alloc(struct file *file)
 	return security_ops->file_alloc_security(file);
 }
 
+int security_file_restore(struct file *file, void *stored)
+{
+	return security_ops->file_restore_security(file, stored);
+}
+
 void security_file_free(struct file *file)
 {
 	security_ops->file_free_security(file);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ba24808..4b8c636 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2973,6 +2973,16 @@ static int selinux_file_alloc_security(struct file *file)
 	return file_alloc_security(file);
 }
 
+static int selinux_file_restore_security(struct file *file,
+				void *stored)
+{
+	/*
+	 * TODO - actually restore from 'stored' subject to authorization
+	 */
+	kfree(stored);
+	return file_alloc_security(file);
+}
+
 static void selinux_file_free_security(struct file *file)
 {
 	file_free_security(file);
@@ -5566,6 +5576,7 @@ static struct security_operations selinux_ops = {
 
 	.file_permission =		selinux_file_permission,
 	.file_alloc_security =		selinux_file_alloc_security,
+	.file_restore_security =	selinux_file_restore_security,
 	.file_free_security =		selinux_file_free_security,
 	.file_ioctl =			selinux_file_ioctl,
 	.file_mmap =			selinux_file_mmap,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index dfc0f7a..7bcdfde 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -831,6 +831,30 @@ static int smack_file_alloc_security(struct file *file)
 }
 
 /**
+ * smack_file_restore_security - assign a file security blob
+ * @file: the object
+ * @stored: the label stored in the checkpoint file
+ *
+ * Returns 0
+ */
+static int smack_file_restore_security(struct file *file, void *stored)
+{
+	char *str = smk_import(stored, 0);
+
+	if (str == NULL)
+		return -EINVAL;
+
+	file->f_security = current_security();
+	if (current_security() != str) {
+		if (!capable(CAP_MAC_ADMIN))
+			return -EPERM;
+		file->f_security = str;
+	}
+
+	return 0;
+}
+
+/**
  * smack_file_free_security - clear a file security blob
  * @file: the object
  *
@@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
 		msg->security = str;
 	}
 	return 0;
-	return 0;
 }
 
 /**
@@ -2996,6 +3019,7 @@ struct security_operations smack_ops = {
 
 	.file_permission = 		smack_file_permission,
 	.file_alloc_security = 		smack_file_alloc_security,
+	.file_restore_security =	smack_file_restore_security,
 	.file_free_security = 		smack_file_free_security,
 	.file_ioctl = 			smack_file_ioctl,
 	.file_lock = 			smack_file_lock,
-- 
1.6.1


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
  2009-06-10  1:46     ` Serge E. Hallyn
@ 2009-06-10  3:24         ` Casey Schaufler
  -1 siblings, 0 replies; 26+ messages in thread
From: Casey Schaufler @ 2009-06-10  3:24 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

Serge E. Hallyn wrote:
> Checkpoint and restore task and ipc struct ->security info.
> (files->f_security yet to be done).
>
> LSM contexts (a string representation of obj->security) are
> checkpointed as shared objects before any object referencing
> it.  The object's checkpoint header struct has a reference
> (h->sec_ref) to the shared object.  A NULL ->security is indicated
> by h->sec_ref = -1.
>
> At checkpoint time, for each obj->security to be checkpointed,
> the LSM will be asked (once) to convert it to a string, in memory
> which the checkpoint subsystem will kfree.  At restart time,
> the LSM will first return some meaningful token given the
> checkpointed string.  That token will be passed to per-object-type
> restore functions (task_restore_context(), shm_restore_security(),
> etc) where the LSM can determine based on the object type, the
> caller, and the token, whether to allow the object restore, and
> what value to actually assign to ->security.  In smack, the
> token is the actual imported label.  In SELinux, it is a temporary
> pointer to the sid which the checkpointed context referred to.
>
> In smack, the checkpointed labels are used for both tasks and
> ipc objects so long as the task calling sys_restart() has
> CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> from current_security(), -EPERM is returned.
>
> The basics of SELinux support are there (enough to demonstrate working
> c/r with SELinux enforcing), but there will need to be new object
> permissions for restore, so the precise nature of those needs to be
> discussed.  For instance, do we want to define process:restore
> and ipc_msg_msg:restore, in which case
>         allow root_t user_t:process restore
> would mean that root_t may restart a task and label it user_t?
>
> Since we are potentially skipping several allowed domain transitions
> (resulting in an illegal short-cut domain transition or type creation),
> I have a fear that the only sane way to proceed would be to have
> one all-powerful domain, checkpoint_restore_t, which can effectively
> transition to any domain it wants to by (ab)using the checkpoint
> image.
>
> Or, perhaps we can define intermediate domains...  So if we want
> user_t to be able to restart a server of type X_t, then we create
> a X_restore_t type, allow user_t to transition to it using a
> program which does sys_restart(), which in turn may transition to
> X_t?
>
> Obviously this needs discussion.
>
> Tomoyo has not been updated or tested.  Given its path-based
> domain name model, I'm not sure what the tomoyo maintainers
> would prefer - that the restart program be reflected in the
> domain name, or that the original domain name be restored.
>
> This is the first posting of this patch.  There are testcases
> in git://git.sr71.net/~hallyn/cr_tests.git , in particular
> under (the slightly mis-named) cr_tests/userns/ directory.
> All pass fine with all LSMS (except Tomoyo, not tested).
>
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  checkpoint/objhash.c           |   56 ++++++++++++
>  include/linux/checkpoint_hdr.h |   15 +++
>  include/linux/security.h       |  105 +++++++++++++++++++++
>  ipc/checkpoint.c               |   19 +++--
>  ipc/checkpoint_msg.c           |   30 ++++++-
>  ipc/checkpoint_sem.c           |   12 +++-
>  ipc/checkpoint_shm.c           |   12 +++-
>  ipc/util.h                     |    3 +-
>  kernel/cred.c                  |   29 ++++++-
>  security/capability.c          |   47 ++++++++++
>  security/security.c            |   39 ++++++++
>  security/selinux/hooks.c       |  196 ++++++++++++++++++++++++++++++++++++++++
>  security/smack/smack_lsm.c     |  135 +++++++++++++++++++++++++++
>  13 files changed, 686 insertions(+), 12 deletions(-)
>   

> ...

> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index 98b3195..dfc0f7a 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -1608,6 +1608,32 @@ static int smack_msg_msg_alloc_security(struct msg_msg *msg)
>  }
>  
>  /**
> + * smack_msg_msg_restore_security - Set the security blob for msg_msg
> + * @msg: the object
> + * @stored: the checkpointed label
> + *
> + * Returns 0
>   

Comment ought to reflect the actual behavior.

> + */
> +static int smack_msg_msg_restore_security(struct msg_msg *msg,
> +					void *stored)
> +{
> +	struct kern_ipc_perm *isp = &sma->sem_perm;
> +	char *str = smk_import(stored, 0);
>   

To be really nit-picky, I'd prefer a variable name that indicates
it's a Smack label rather than just any old character string. Perhaps

    char *smack = smk_import(stored, 0);

> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	msg->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		msg->security = str;
> +	}
> +	return 0;
> +	return 0;
>   

Returning once is sufficient.

> +}
> +
> +/**
>   * smack_msg_msg_free_security - Clear the security blob for msg_msg
>   * @msg: the object
>   *
> @@ -1644,6 +1670,30 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
>  }
>  
>  /**
> + * smack_shm_restore_security - retore the security blob for shm
> + * @shp: the object
> + * @stored: the checkpointed label
> + *
> + * Returns 0
>   

Previous comment comment applies here as well.

> + */
> +static int smack_shm_restore_security(struct shmid_kernel *shp, void *stored)
> +{
> +	struct kern_ipc_perm *isp = &shp->shm_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	isp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		isp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_shm_free_security - Clear the security blob for shm
>   * @shp: the object
>   *
> @@ -1753,6 +1803,31 @@ static int smack_sem_alloc_security(struct sem_array *sma)
>  }
>  
>  /**
> + * smack_sem_restore_security - Set the security blob for sem
> + * @sma: the object
> + * @stored: the label stored in checkpoint image
> + *
> + * Returns 0
>   

Or an error if the label is bad, as above.

> + */
> +static int smack_sem_restore_security(struct sem_array *sma,
> +				void *stored)
> +{
> +	struct kern_ipc_perm *isp = &sma->sem_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	isp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		isp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_sem_free_security - Clear the security blob for sem
>   * @sma: the object
>   *
> @@ -1857,6 +1932,31 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
>  }
>  
>  /**
> + * smack_msg_restore_security - Set the security blob for msg
> + * @msq: the object
> + * @stored: the stored label
> + *
> + * Returns 0
>   

And again ...

> + */
> +static int smack_msg_queue_restore_security(struct msg_queue *msq,
> +					void *stored)
> +{
> +	struct kern_ipc_perm *kisp = &msq->q_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	kisp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		kisp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_msg_free_security - Clear the security blob for msg
>   * @msq: the object
>   *
> @@ -2823,6 +2923,33 @@ static void smack_release_secctx(char *secdata, u32 seclen)
>  {
>  }
>  
> +/* checkpoint/restore hooks */
> +static char *smack_context_to_str(void *security)
> +{
> +	return kstrdup((char *)security, GFP_KERNEL);
> +}
> +
> +static void *smack_context_from_str(char *str)
> +{
> +	char *newsmack = smk_import(str, 0);
> +
> +	if (newsmack == NULL)
> +		return ERR_PTR(-EINVAL);
> +
> +	return newsmack;
> +}
> +
> +static int smack_task_restore_context(struct cred *cred, void *stored,
> +					void *f_security)
> +{
> +	if (cred->security != stored) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		cred->security = stored;
> +	}
> +	return 0;
> +}
> +
>  struct security_operations smack_ops = {
>  	.name =				"smack",
>  
> @@ -2902,9 +3029,11 @@ struct security_operations smack_ops = {
>  	.ipc_getsecid =			smack_ipc_getsecid,
>  
>  	.msg_msg_alloc_security = 	smack_msg_msg_alloc_security,
> +	.msg_msg_restore_security = 	smack_msg_msg_restore_security,
>  	.msg_msg_free_security = 	smack_msg_msg_free_security,
>  
>  	.msg_queue_alloc_security = 	smack_msg_queue_alloc_security,
> +	.msg_queue_restore_security = 	smack_msg_queue_restore_security,
>  	.msg_queue_free_security = 	smack_msg_queue_free_security,
>  	.msg_queue_associate = 		smack_msg_queue_associate,
>  	.msg_queue_msgctl = 		smack_msg_queue_msgctl,
> @@ -2912,12 +3041,14 @@ struct security_operations smack_ops = {
>  	.msg_queue_msgrcv = 		smack_msg_queue_msgrcv,
>  
>  	.shm_alloc_security = 		smack_shm_alloc_security,
> +	.shm_restore_security = 	smack_shm_restore_security,
>  	.shm_free_security = 		smack_shm_free_security,
>  	.shm_associate = 		smack_shm_associate,
>  	.shm_shmctl = 			smack_shm_shmctl,
>  	.shm_shmat = 			smack_shm_shmat,
>  
>  	.sem_alloc_security = 		smack_sem_alloc_security,
> +	.sem_restore_security = 	smack_sem_restore_security,
>  	.sem_free_security = 		smack_sem_free_security,
>  	.sem_associate = 		smack_sem_associate,
>  	.sem_semctl = 			smack_sem_semctl,
> @@ -2964,6 +3095,10 @@ struct security_operations smack_ops = {
>  	.secid_to_secctx = 		smack_secid_to_secctx,
>  	.secctx_to_secid = 		smack_secctx_to_secid,
>  	.release_secctx = 		smack_release_secctx,
> +
> +	.context_to_str =		smack_context_to_str,
> +	.context_from_str =		smack_context_from_str,
> +	.task_restore_context =		smack_task_restore_context,
>  };
>  
>  
>   

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
@ 2009-06-10  3:24         ` Casey Schaufler
  0 siblings, 0 replies; 26+ messages in thread
From: Casey Schaufler @ 2009-06-10  3:24 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

Serge E. Hallyn wrote:
> Checkpoint and restore task and ipc struct ->security info.
> (files->f_security yet to be done).
>
> LSM contexts (a string representation of obj->security) are
> checkpointed as shared objects before any object referencing
> it.  The object's checkpoint header struct has a reference
> (h->sec_ref) to the shared object.  A NULL ->security is indicated
> by h->sec_ref = -1.
>
> At checkpoint time, for each obj->security to be checkpointed,
> the LSM will be asked (once) to convert it to a string, in memory
> which the checkpoint subsystem will kfree.  At restart time,
> the LSM will first return some meaningful token given the
> checkpointed string.  That token will be passed to per-object-type
> restore functions (task_restore_context(), shm_restore_security(),
> etc) where the LSM can determine based on the object type, the
> caller, and the token, whether to allow the object restore, and
> what value to actually assign to ->security.  In smack, the
> token is the actual imported label.  In SELinux, it is a temporary
> pointer to the sid which the checkpointed context referred to.
>
> In smack, the checkpointed labels are used for both tasks and
> ipc objects so long as the task calling sys_restart() has
> CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> from current_security(), -EPERM is returned.
>
> The basics of SELinux support are there (enough to demonstrate working
> c/r with SELinux enforcing), but there will need to be new object
> permissions for restore, so the precise nature of those needs to be
> discussed.  For instance, do we want to define process:restore
> and ipc_msg_msg:restore, in which case
>         allow root_t user_t:process restore
> would mean that root_t may restart a task and label it user_t?
>
> Since we are potentially skipping several allowed domain transitions
> (resulting in an illegal short-cut domain transition or type creation),
> I have a fear that the only sane way to proceed would be to have
> one all-powerful domain, checkpoint_restore_t, which can effectively
> transition to any domain it wants to by (ab)using the checkpoint
> image.
>
> Or, perhaps we can define intermediate domains...  So if we want
> user_t to be able to restart a server of type X_t, then we create
> a X_restore_t type, allow user_t to transition to it using a
> program which does sys_restart(), which in turn may transition to
> X_t?
>
> Obviously this needs discussion.
>
> Tomoyo has not been updated or tested.  Given its path-based
> domain name model, I'm not sure what the tomoyo maintainers
> would prefer - that the restart program be reflected in the
> domain name, or that the original domain name be restored.
>
> This is the first posting of this patch.  There are testcases
> in git://git.sr71.net/~hallyn/cr_tests.git , in particular
> under (the slightly mis-named) cr_tests/userns/ directory.
> All pass fine with all LSMS (except Tomoyo, not tested).
>
> Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
>  checkpoint/objhash.c           |   56 ++++++++++++
>  include/linux/checkpoint_hdr.h |   15 +++
>  include/linux/security.h       |  105 +++++++++++++++++++++
>  ipc/checkpoint.c               |   19 +++--
>  ipc/checkpoint_msg.c           |   30 ++++++-
>  ipc/checkpoint_sem.c           |   12 +++-
>  ipc/checkpoint_shm.c           |   12 +++-
>  ipc/util.h                     |    3 +-
>  kernel/cred.c                  |   29 ++++++-
>  security/capability.c          |   47 ++++++++++
>  security/security.c            |   39 ++++++++
>  security/selinux/hooks.c       |  196 ++++++++++++++++++++++++++++++++++++++++
>  security/smack/smack_lsm.c     |  135 +++++++++++++++++++++++++++
>  13 files changed, 686 insertions(+), 12 deletions(-)
>   

> ...

> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index 98b3195..dfc0f7a 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -1608,6 +1608,32 @@ static int smack_msg_msg_alloc_security(struct msg_msg *msg)
>  }
>  
>  /**
> + * smack_msg_msg_restore_security - Set the security blob for msg_msg
> + * @msg: the object
> + * @stored: the checkpointed label
> + *
> + * Returns 0
>   

Comment ought to reflect the actual behavior.

> + */
> +static int smack_msg_msg_restore_security(struct msg_msg *msg,
> +					void *stored)
> +{
> +	struct kern_ipc_perm *isp = &sma->sem_perm;
> +	char *str = smk_import(stored, 0);
>   

To be really nit-picky, I'd prefer a variable name that indicates
it's a Smack label rather than just any old character string. Perhaps

    char *smack = smk_import(stored, 0);

> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	msg->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		msg->security = str;
> +	}
> +	return 0;
> +	return 0;
>   

Returning once is sufficient.

> +}
> +
> +/**
>   * smack_msg_msg_free_security - Clear the security blob for msg_msg
>   * @msg: the object
>   *
> @@ -1644,6 +1670,30 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
>  }
>  
>  /**
> + * smack_shm_restore_security - retore the security blob for shm
> + * @shp: the object
> + * @stored: the checkpointed label
> + *
> + * Returns 0
>   

Previous comment comment applies here as well.

> + */
> +static int smack_shm_restore_security(struct shmid_kernel *shp, void *stored)
> +{
> +	struct kern_ipc_perm *isp = &shp->shm_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	isp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		isp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_shm_free_security - Clear the security blob for shm
>   * @shp: the object
>   *
> @@ -1753,6 +1803,31 @@ static int smack_sem_alloc_security(struct sem_array *sma)
>  }
>  
>  /**
> + * smack_sem_restore_security - Set the security blob for sem
> + * @sma: the object
> + * @stored: the label stored in checkpoint image
> + *
> + * Returns 0
>   

Or an error if the label is bad, as above.

> + */
> +static int smack_sem_restore_security(struct sem_array *sma,
> +				void *stored)
> +{
> +	struct kern_ipc_perm *isp = &sma->sem_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	isp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		isp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_sem_free_security - Clear the security blob for sem
>   * @sma: the object
>   *
> @@ -1857,6 +1932,31 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
>  }
>  
>  /**
> + * smack_msg_restore_security - Set the security blob for msg
> + * @msq: the object
> + * @stored: the stored label
> + *
> + * Returns 0
>   

And again ...

> + */
> +static int smack_msg_queue_restore_security(struct msg_queue *msq,
> +					void *stored)
> +{
> +	struct kern_ipc_perm *kisp = &msq->q_perm;
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	kisp->security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		kisp->security = str;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * smack_msg_free_security - Clear the security blob for msg
>   * @msq: the object
>   *
> @@ -2823,6 +2923,33 @@ static void smack_release_secctx(char *secdata, u32 seclen)
>  {
>  }
>  
> +/* checkpoint/restore hooks */
> +static char *smack_context_to_str(void *security)
> +{
> +	return kstrdup((char *)security, GFP_KERNEL);
> +}
> +
> +static void *smack_context_from_str(char *str)
> +{
> +	char *newsmack = smk_import(str, 0);
> +
> +	if (newsmack == NULL)
> +		return ERR_PTR(-EINVAL);
> +
> +	return newsmack;
> +}
> +
> +static int smack_task_restore_context(struct cred *cred, void *stored,
> +					void *f_security)
> +{
> +	if (cred->security != stored) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		cred->security = stored;
> +	}
> +	return 0;
> +}
> +
>  struct security_operations smack_ops = {
>  	.name =				"smack",
>  
> @@ -2902,9 +3029,11 @@ struct security_operations smack_ops = {
>  	.ipc_getsecid =			smack_ipc_getsecid,
>  
>  	.msg_msg_alloc_security = 	smack_msg_msg_alloc_security,
> +	.msg_msg_restore_security = 	smack_msg_msg_restore_security,
>  	.msg_msg_free_security = 	smack_msg_msg_free_security,
>  
>  	.msg_queue_alloc_security = 	smack_msg_queue_alloc_security,
> +	.msg_queue_restore_security = 	smack_msg_queue_restore_security,
>  	.msg_queue_free_security = 	smack_msg_queue_free_security,
>  	.msg_queue_associate = 		smack_msg_queue_associate,
>  	.msg_queue_msgctl = 		smack_msg_queue_msgctl,
> @@ -2912,12 +3041,14 @@ struct security_operations smack_ops = {
>  	.msg_queue_msgrcv = 		smack_msg_queue_msgrcv,
>  
>  	.shm_alloc_security = 		smack_shm_alloc_security,
> +	.shm_restore_security = 	smack_shm_restore_security,
>  	.shm_free_security = 		smack_shm_free_security,
>  	.shm_associate = 		smack_shm_associate,
>  	.shm_shmctl = 			smack_shm_shmctl,
>  	.shm_shmat = 			smack_shm_shmat,
>  
>  	.sem_alloc_security = 		smack_sem_alloc_security,
> +	.sem_restore_security = 	smack_sem_restore_security,
>  	.sem_free_security = 		smack_sem_free_security,
>  	.sem_associate = 		smack_sem_associate,
>  	.sem_semctl = 			smack_sem_semctl,
> @@ -2964,6 +3095,10 @@ struct security_operations smack_ops = {
>  	.secid_to_secctx = 		smack_secid_to_secctx,
>  	.secctx_to_secid = 		smack_secctx_to_secid,
>  	.release_secctx = 		smack_release_secctx,
> +
> +	.context_to_str =		smack_context_to_str,
> +	.context_from_str =		smack_context_from_str,
> +	.task_restore_context =		smack_task_restore_context,
>  };
>  
>  
>   

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
  2009-06-10  1:47     ` Serge E. Hallyn
@ 2009-06-10  3:39         ` Casey Schaufler
  -1 siblings, 0 replies; 26+ messages in thread
From: Casey Schaufler @ 2009-06-10  3:39 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

Serge E. Hallyn wrote:
> file->f_cred is the cred of the task which opened it.  file->f_security
> can be separately set by the LSM.  Checkpoint the file->f_security,
> and at restart ask the LSM, using security_file_restore, based on the current
> task's context and the checkpointed f_security, which f_security to apply (or
> whether to refuse the restart altogether).
>
> For Smack, accept the checkpointed label if the restarting task has
> CAP_MAC_ADMIN.
>
> For SELinux, I currently ignore the checkpointed label and call
> file_alloc_security().  Do we want to have 'restore' permission for
> class file?
>
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  checkpoint/files.c             |   33 +++++++++++++++++++++++++++++++++
>  include/linux/checkpoint_hdr.h |    1 +
>  include/linux/security.h       |   13 +++++++++++++
>  security/capability.c          |    6 ++++++
>  security/security.c            |    5 +++++
>  security/selinux/hooks.c       |   11 +++++++++++
>  security/smack/smack_lsm.c     |   26 +++++++++++++++++++++++++-
>  7 files changed, 94 insertions(+), 1 deletions(-)
>
>   

> ...

> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index dfc0f7a..7bcdfde 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -831,6 +831,30 @@ static int smack_file_alloc_security(struct file *file)
>  }
>  
>  /**
> + * smack_file_restore_security - assign a file security blob
> + * @file: the object
> + * @stored: the label stored in the checkpoint file
> + *
> + * Returns 0
>   

Or -EINVAL if the label is not acceptable

> + */
> +static int smack_file_restore_security(struct file *file, void *stored)
> +{
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	file->f_security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		file->f_security = str;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
>   * smack_file_free_security - clear a file security blob
>   * @file: the object
>   *
> @@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
>  		msg->security = str;
>  	}
>  	return 0;
> -	return 0;
>   

Looks like you caught that. Thank you

>  }
>  
>  /**
> @@ -2996,6 +3019,7 @@ struct security_operations smack_ops = {
>  
>  	.file_permission = 		smack_file_permission,
>  	.file_alloc_security = 		smack_file_alloc_security,
> +	.file_restore_security =	smack_file_restore_security,
>  	.file_free_security = 		smack_file_free_security,
>  	.file_ioctl = 			smack_file_ioctl,
>  	.file_lock = 			smack_file_lock,
>   

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
@ 2009-06-10  3:39         ` Casey Schaufler
  0 siblings, 0 replies; 26+ messages in thread
From: Casey Schaufler @ 2009-06-10  3:39 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

Serge E. Hallyn wrote:
> file->f_cred is the cred of the task which opened it.  file->f_security
> can be separately set by the LSM.  Checkpoint the file->f_security,
> and at restart ask the LSM, using security_file_restore, based on the current
> task's context and the checkpointed f_security, which f_security to apply (or
> whether to refuse the restart altogether).
>
> For Smack, accept the checkpointed label if the restarting task has
> CAP_MAC_ADMIN.
>
> For SELinux, I currently ignore the checkpointed label and call
> file_alloc_security().  Do we want to have 'restore' permission for
> class file?
>
> Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
>  checkpoint/files.c             |   33 +++++++++++++++++++++++++++++++++
>  include/linux/checkpoint_hdr.h |    1 +
>  include/linux/security.h       |   13 +++++++++++++
>  security/capability.c          |    6 ++++++
>  security/security.c            |    5 +++++
>  security/selinux/hooks.c       |   11 +++++++++++
>  security/smack/smack_lsm.c     |   26 +++++++++++++++++++++++++-
>  7 files changed, 94 insertions(+), 1 deletions(-)
>
>   

> ...

> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index dfc0f7a..7bcdfde 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -831,6 +831,30 @@ static int smack_file_alloc_security(struct file *file)
>  }
>  
>  /**
> + * smack_file_restore_security - assign a file security blob
> + * @file: the object
> + * @stored: the label stored in the checkpoint file
> + *
> + * Returns 0
>   

Or -EINVAL if the label is not acceptable

> + */
> +static int smack_file_restore_security(struct file *file, void *stored)
> +{
> +	char *str = smk_import(stored, 0);
> +
> +	if (str == NULL)
> +		return -EINVAL;
> +
> +	file->f_security = current_security();
> +	if (current_security() != str) {
> +		if (!capable(CAP_MAC_ADMIN))
> +			return -EPERM;
> +		file->f_security = str;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
>   * smack_file_free_security - clear a file security blob
>   * @file: the object
>   *
> @@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
>  		msg->security = str;
>  	}
>  	return 0;
> -	return 0;
>   

Looks like you caught that. Thank you

>  }
>  
>  /**
> @@ -2996,6 +3019,7 @@ struct security_operations smack_ops = {
>  
>  	.file_permission = 		smack_file_permission,
>  	.file_alloc_security = 		smack_file_alloc_security,
> +	.file_restore_security =	smack_file_restore_security,
>  	.file_free_security = 		smack_file_free_security,
>  	.file_ioctl = 			smack_file_ioctl,
>  	.file_lock = 			smack_file_lock,
>   

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 04/10] cr: split core function out of some set*{u,g}id functions
       [not found]     ` <20090610014456.GC5658-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-06-10 12:20       ` James Morris
  2009-06-10 12:51         ` Serge E. Hallyn
  0 siblings, 1 reply; 26+ messages in thread
From: James Morris @ 2009-06-10 12:20 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Linux Containers, David Howells, Alexey Dobriyan, Andrew Morgan

On Tue, 9 Jun 2009, Serge E. Hallyn wrote:

> When restarting tasks, we want to be able to change xuid and
> xgid in a struct cred, and do so with security checks.  Break
> the core functionality of set{fs,res}{u,g}id into cred_setX
> which performs the access checks based on current_cred(),
> but performs the requested change on a passed-in cred.
> 

Please cc the lsm list when making changes to security.

-- 
James Morris
<jmorris-gx6/JNMH7DfYtjvyW6yDsg@public.gmane.org>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 04/10] cr: split core function out of some set*{u,g}id functions
  2009-06-10 12:20       ` James Morris
@ 2009-06-10 12:51         ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10 12:51 UTC (permalink / raw)
  To: James Morris
  Cc: Linux Containers, David Howells, Alexey Dobriyan, Andrew Morgan,
	linux-security-module

Quoting James Morris (jmorris@namei.org):
> On Tue, 9 Jun 2009, Serge E. Hallyn wrote:
> 
> > When restarting tasks, we want to be able to change xuid and
> > xgid in a struct cred, and do so with security checks.  Break
> > the core functionality of set{fs,res}{u,g}id into cred_setX
> > which performs the access checks based on current_cred(),
> > but performs the requested change on a passed-in cred.
> > 
> 
> Please cc the lsm list when making changes to security.

Argh, they were cc:d on my last version, and I was sure I'd
put them in the list of headers for this set.

FWIW, the thread can be seen here
https://lists.linux-foundation.org/pipermail/containers/2009-June/018509.html

thanks,
-serge

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
  2009-06-10  1:46     ` Serge E. Hallyn
@ 2009-06-10 13:54         ` Stephen Smalley
  -1 siblings, 0 replies; 26+ messages in thread
From: Stephen Smalley @ 2009-06-10 13:54 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

On Tue, 2009-06-09 at 20:46 -0500, Serge E. Hallyn wrote:
> Checkpoint and restore task and ipc struct ->security info.
> (files->f_security yet to be done).
> 
> LSM contexts (a string representation of obj->security) are
> checkpointed as shared objects before any object referencing
> it.  The object's checkpoint header struct has a reference
> (h->sec_ref) to the shared object.  A NULL ->security is indicated
> by h->sec_ref = -1.
> 
> At checkpoint time, for each obj->security to be checkpointed,
> the LSM will be asked (once) to convert it to a string, in memory
> which the checkpoint subsystem will kfree.  At restart time,
> the LSM will first return some meaningful token given the
> checkpointed string.  That token will be passed to per-object-type
> restore functions (task_restore_context(), shm_restore_security(),
> etc) where the LSM can determine based on the object type, the
> caller, and the token, whether to allow the object restore, and
> what value to actually assign to ->security.  In smack, the
> token is the actual imported label.  In SELinux, it is a temporary
> pointer to the sid which the checkpointed context referred to.

Possibly I misunderstand, but it appears that you have a single
security_context_to_str() hook that tries to take an arbitrary
->security pointer for any object type.  I don't believe that is safe,
as each object type may have its own security structure.

There are already LSM hooks to obtain secids for objects (task, ipc,
inode, sock), and to convert between secid and secctx strings for use by
the audit subsystem and networking subsystem.  Why can't you just use
those hooks for getting the secids and then converting them to secctx
strings later?

> In smack, the checkpointed labels are used for both tasks and
> ipc objects so long as the task calling sys_restart() has
> CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> from current_security(), -EPERM is returned.
> 
> The basics of SELinux support are there (enough to demonstrate working
> c/r with SELinux enforcing), but there will need to be new object
> permissions for restore, so the precise nature of those needs to be
> discussed.  For instance, do we want to define process:restore
> and ipc_msg_msg:restore, in which case
>         allow root_t user_t:process restore
> would mean that root_t may restart a task and label it user_t?

I think so, yes.

> Since we are potentially skipping several allowed domain transitions
> (resulting in an illegal short-cut domain transition or type creation),
> I have a fear that the only sane way to proceed would be to have
> one all-powerful domain, checkpoint_restore_t, which can effectively
> transition to any domain it wants to by (ab)using the checkpoint
> image.
> 
> Or, perhaps we can define intermediate domains...  So if we want
> user_t to be able to restart a server of type X_t, then we create
> a X_restore_t type, allow user_t to transition to it using a
> program which does sys_restart(), which in turn may transition to
> X_t?

Different domains will make sense for different use cases.  As long as
the mechanism doesn't prevent us from crafting more limited privilege
restore domains in policy, it shouldn't be a problem.

-- 
Stephen Smalley
National Security Agency

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
@ 2009-06-10 13:54         ` Stephen Smalley
  0 siblings, 0 replies; 26+ messages in thread
From: Stephen Smalley @ 2009-06-10 13:54 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

On Tue, 2009-06-09 at 20:46 -0500, Serge E. Hallyn wrote:
> Checkpoint and restore task and ipc struct ->security info.
> (files->f_security yet to be done).
> 
> LSM contexts (a string representation of obj->security) are
> checkpointed as shared objects before any object referencing
> it.  The object's checkpoint header struct has a reference
> (h->sec_ref) to the shared object.  A NULL ->security is indicated
> by h->sec_ref = -1.
> 
> At checkpoint time, for each obj->security to be checkpointed,
> the LSM will be asked (once) to convert it to a string, in memory
> which the checkpoint subsystem will kfree.  At restart time,
> the LSM will first return some meaningful token given the
> checkpointed string.  That token will be passed to per-object-type
> restore functions (task_restore_context(), shm_restore_security(),
> etc) where the LSM can determine based on the object type, the
> caller, and the token, whether to allow the object restore, and
> what value to actually assign to ->security.  In smack, the
> token is the actual imported label.  In SELinux, it is a temporary
> pointer to the sid which the checkpointed context referred to.

Possibly I misunderstand, but it appears that you have a single
security_context_to_str() hook that tries to take an arbitrary
->security pointer for any object type.  I don't believe that is safe,
as each object type may have its own security structure.

There are already LSM hooks to obtain secids for objects (task, ipc,
inode, sock), and to convert between secid and secctx strings for use by
the audit subsystem and networking subsystem.  Why can't you just use
those hooks for getting the secids and then converting them to secctx
strings later?

> In smack, the checkpointed labels are used for both tasks and
> ipc objects so long as the task calling sys_restart() has
> CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> from current_security(), -EPERM is returned.
> 
> The basics of SELinux support are there (enough to demonstrate working
> c/r with SELinux enforcing), but there will need to be new object
> permissions for restore, so the precise nature of those needs to be
> discussed.  For instance, do we want to define process:restore
> and ipc_msg_msg:restore, in which case
>         allow root_t user_t:process restore
> would mean that root_t may restart a task and label it user_t?

I think so, yes.

> Since we are potentially skipping several allowed domain transitions
> (resulting in an illegal short-cut domain transition or type creation),
> I have a fear that the only sane way to proceed would be to have
> one all-powerful domain, checkpoint_restore_t, which can effectively
> transition to any domain it wants to by (ab)using the checkpoint
> image.
> 
> Or, perhaps we can define intermediate domains...  So if we want
> user_t to be able to restart a server of type X_t, then we create
> a X_restore_t type, allow user_t to transition to it using a
> program which does sys_restart(), which in turn may transition to
> X_t?

Different domains will make sense for different use cases.  As long as
the mechanism doesn't prevent us from crafting more limited privilege
restore domains in policy, it shouldn't be a problem.

-- 
Stephen Smalley
National Security Agency


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
  2009-06-10  1:47     ` Serge E. Hallyn
@ 2009-06-10 13:54         ` Stephen Smalley
  -1 siblings, 0 replies; 26+ messages in thread
From: Stephen Smalley @ 2009-06-10 13:54 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

On Tue, 2009-06-09 at 20:47 -0500, Serge E. Hallyn wrote:
> file->f_cred is the cred of the task which opened it.  file->f_security
> can be separately set by the LSM.  Checkpoint the file->f_security,
> and at restart ask the LSM, using security_file_restore, based on the current
> task's context and the checkpointed f_security, which f_security to apply (or
> whether to refuse the restart altogether).
> 
> For Smack, accept the checkpointed label if the restarting task has
> CAP_MAC_ADMIN.
> 
> For SELinux, I currently ignore the checkpointed label and call
> file_alloc_security().  Do we want to have 'restore' permission for
> class file?

Yes - otherwise we'll end up with a restarted process that may not be
able to access its open files.

-- 
Stephen Smalley
National Security Agency

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
@ 2009-06-10 13:54         ` Stephen Smalley
  0 siblings, 0 replies; 26+ messages in thread
From: Stephen Smalley @ 2009-06-10 13:54 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

On Tue, 2009-06-09 at 20:47 -0500, Serge E. Hallyn wrote:
> file->f_cred is the cred of the task which opened it.  file->f_security
> can be separately set by the LSM.  Checkpoint the file->f_security,
> and at restart ask the LSM, using security_file_restore, based on the current
> task's context and the checkpointed f_security, which f_security to apply (or
> whether to refuse the restart altogether).
> 
> For Smack, accept the checkpointed label if the restarting task has
> CAP_MAC_ADMIN.
> 
> For SELinux, I currently ignore the checkpointed label and call
> file_alloc_security().  Do we want to have 'restore' permission for
> class file?

Yes - otherwise we'll end up with a restarted process that may not be
able to access its open files.

-- 
Stephen Smalley
National Security Agency


--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
  2009-06-10  3:39         ` Casey Schaufler
@ 2009-06-10 13:58             ` Serge E. Hallyn
  -1 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10 13:58 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

Quoting Casey Schaufler (casey-iSGtlc1asvQWG2LlvL+J4A@public.gmane.org):
> > @@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
> >  		msg->security = str;
> >  	}
> >  	return 0;
> > -	return 0;
> >   
> 
> Looks like you caught that. Thank you

Huh.  No, I didn't :)  I'm guessing that both the original problem
and this accidental fix came about through my inexperience with
resolving conflicts in git-rebase...

Thanks for taking a look, I will address all of your comments in
the next posting.

For the next version I may split up the smack, selinux, and
tomoyo hooks into separate patches from the core LSM patch
(and nix the rather silly split of the security_file_restore
hook) for easier review...

thanks,
-serge

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 10/10] cr: lsm: restore file->f_security
@ 2009-06-10 13:58             ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10 13:58 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

Quoting Casey Schaufler (casey@schaufler-ca.com):
> > @@ -1630,7 +1654,6 @@ static int smack_msg_msg_restore_security(struct msg_msg *msg,
> >  		msg->security = str;
> >  	}
> >  	return 0;
> > -	return 0;
> >   
> 
> Looks like you caught that. Thank you

Huh.  No, I didn't :)  I'm guessing that both the original problem
and this accidental fix came about through my inexperience with
resolving conflicts in git-rebase...

Thanks for taking a look, I will address all of your comments in
the next posting.

For the next version I may split up the smack, selinux, and
tomoyo hooks into separate patches from the core LSM patch
(and nix the rather silly split of the security_file_restore
hook) for easier review...

thanks,
-serge

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
  2009-06-10 13:54         ` Stephen Smalley
@ 2009-06-10 14:59             ` Serge E. Hallyn
  -1 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10 14:59 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: David Howells, SELinux, Linux Containers, Alexey Dobriyan, Andrew Morgan

Quoting Stephen Smalley (sds-+05T5uksL2qpZYMLLGbcSA@public.gmane.org):
> On Tue, 2009-06-09 at 20:46 -0500, Serge E. Hallyn wrote:
> > Checkpoint and restore task and ipc struct ->security info.
> > (files->f_security yet to be done).
> > 
> > LSM contexts (a string representation of obj->security) are
> > checkpointed as shared objects before any object referencing
> > it.  The object's checkpoint header struct has a reference
> > (h->sec_ref) to the shared object.  A NULL ->security is indicated
> > by h->sec_ref = -1.
> > 
> > At checkpoint time, for each obj->security to be checkpointed,
> > the LSM will be asked (once) to convert it to a string, in memory
> > which the checkpoint subsystem will kfree.  At restart time,
> > the LSM will first return some meaningful token given the
> > checkpointed string.  That token will be passed to per-object-type
> > restore functions (task_restore_context(), shm_restore_security(),
> > etc) where the LSM can determine based on the object type, the
> > caller, and the token, whether to allow the object restore, and
> > what value to actually assign to ->security.  In smack, the
> > token is the actual imported label.  In SELinux, it is a temporary
> > pointer to the sid which the checkpointed context referred to.

Thanks for taking a look.

> Possibly I misunderstand, but it appears that you have a single
> security_context_to_str() hook that tries to take an arbitrary

Yikes, you're right.  And I remember telling myself that wouldn't
work - then apparently I ran into the next problem and forgot.

> ->security pointer for any object type.  I don't believe that is safe,
> as each object type may have its own security structure.

Absolutely.  Which begs the question why it was working for me :)

> There are already LSM hooks to obtain secids for objects (task, ipc,
> inode, sock), and to convert between secid and secctx strings for use by
> the audit subsystem and networking subsystem.  Why can't you just use
> those hooks for getting the secids and then converting them to secctx
> strings later?

I had taken a look at the *_secid() hooks, and think I misunderstood
them.  Using those makes sense;  I'll make that change.

> > In smack, the checkpointed labels are used for both tasks and
> > ipc objects so long as the task calling sys_restart() has
> > CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> > from current_security(), -EPERM is returned.
> > 
> > The basics of SELinux support are there (enough to demonstrate working
> > c/r with SELinux enforcing), but there will need to be new object
> > permissions for restore, so the precise nature of those needs to be
> > discussed.  For instance, do we want to define process:restore
> > and ipc_msg_msg:restore, in which case
> >         allow root_t user_t:process restore
> > would mean that root_t may restart a task and label it user_t?
> 
> I think so, yes.

Ok, I'll define those in the next set.

thanks,
-serge

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/10] cr: restore LSM credentials
@ 2009-06-10 14:59             ` Serge E. Hallyn
  0 siblings, 0 replies; 26+ messages in thread
From: Serge E. Hallyn @ 2009-06-10 14:59 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Linux Containers, Oren Laadan, David Howells, Alexey Dobriyan,
	Andrew Morgan, SELinux

Quoting Stephen Smalley (sds@tycho.nsa.gov):
> On Tue, 2009-06-09 at 20:46 -0500, Serge E. Hallyn wrote:
> > Checkpoint and restore task and ipc struct ->security info.
> > (files->f_security yet to be done).
> > 
> > LSM contexts (a string representation of obj->security) are
> > checkpointed as shared objects before any object referencing
> > it.  The object's checkpoint header struct has a reference
> > (h->sec_ref) to the shared object.  A NULL ->security is indicated
> > by h->sec_ref = -1.
> > 
> > At checkpoint time, for each obj->security to be checkpointed,
> > the LSM will be asked (once) to convert it to a string, in memory
> > which the checkpoint subsystem will kfree.  At restart time,
> > the LSM will first return some meaningful token given the
> > checkpointed string.  That token will be passed to per-object-type
> > restore functions (task_restore_context(), shm_restore_security(),
> > etc) where the LSM can determine based on the object type, the
> > caller, and the token, whether to allow the object restore, and
> > what value to actually assign to ->security.  In smack, the
> > token is the actual imported label.  In SELinux, it is a temporary
> > pointer to the sid which the checkpointed context referred to.

Thanks for taking a look.

> Possibly I misunderstand, but it appears that you have a single
> security_context_to_str() hook that tries to take an arbitrary

Yikes, you're right.  And I remember telling myself that wouldn't
work - then apparently I ran into the next problem and forgot.

> ->security pointer for any object type.  I don't believe that is safe,
> as each object type may have its own security structure.

Absolutely.  Which begs the question why it was working for me :)

> There are already LSM hooks to obtain secids for objects (task, ipc,
> inode, sock), and to convert between secid and secctx strings for use by
> the audit subsystem and networking subsystem.  Why can't you just use
> those hooks for getting the secids and then converting them to secctx
> strings later?

I had taken a look at the *_secid() hooks, and think I misunderstood
them.  Using those makes sense;  I'll make that change.

> > In smack, the checkpointed labels are used for both tasks and
> > ipc objects so long as the task calling sys_restart() has
> > CAP_MAC_ADMIN.  Otherwise, if the checkpointed label is different
> > from current_security(), -EPERM is returned.
> > 
> > The basics of SELinux support are there (enough to demonstrate working
> > c/r with SELinux enforcing), but there will need to be new object
> > permissions for restore, so the precise nature of those needs to be
> > discussed.  For instance, do we want to define process:restore
> > and ipc_msg_msg:restore, in which case
> >         allow root_t user_t:process restore
> > would mean that root_t may restart a task and label it user_t?
> 
> I think so, yes.

Ok, I'll define those in the next set.

thanks,
-serge

--
This message was distributed to subscribers of the selinux mailing list.
If you no longer wish to subscribe, send mail to majordomo@tycho.nsa.gov with
the words "unsubscribe selinux" without quotes as the message.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2009-06-10 14:59 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-10  1:44 [PATCH 01/10] cred: #include init.h in cred.h Serge E. Hallyn
     [not found] ` <20090610014412.GA5628-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-10  1:44   ` [PATCH 02/10] groups: move code to kernel/groups.c Serge E. Hallyn
2009-06-10  1:44   ` [PATCH 03/10] cr: break out new_user_ns() Serge E. Hallyn
2009-06-10  1:44   ` [PATCH 04/10] cr: split core function out of some set*{u,g}id functions Serge E. Hallyn
     [not found]     ` <20090610014456.GC5658-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-10 12:20       ` James Morris
2009-06-10 12:51         ` Serge E. Hallyn
2009-06-10  1:45   ` [PATCH 05/10] cr: ipc: reset kern_ipc_perms Serge E. Hallyn
2009-06-10  1:45   ` [PATCH 06/10] cr: capabilities: define checkpoint and restore fns Serge E. Hallyn
2009-06-10  1:46   ` [PATCH 07/10] cr: checkpoint and restore task credentials Serge E. Hallyn
2009-06-10  1:46   ` [PATCH 08/10] cr: restore file->f_cred Serge E. Hallyn
2009-06-10  1:46   ` [PATCH 09/10] cr: restore LSM credentials Serge E. Hallyn
2009-06-10  1:46     ` Serge E. Hallyn
     [not found]     ` <20090610014637.GH5658-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-10  3:24       ` Casey Schaufler
2009-06-10  3:24         ` Casey Schaufler
2009-06-10 13:54       ` Stephen Smalley
2009-06-10 13:54         ` Stephen Smalley
     [not found]         ` <1244642042.20265.143.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-06-10 14:59           ` Serge E. Hallyn
2009-06-10 14:59             ` Serge E. Hallyn
2009-06-10  1:47   ` [PATCH 10/10] cr: lsm: restore file->f_security Serge E. Hallyn
2009-06-10  1:47     ` Serge E. Hallyn
     [not found]     ` <20090610014704.GI5658-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-10  3:39       ` Casey Schaufler
2009-06-10  3:39         ` Casey Schaufler
     [not found]         ` <4A2F2B08.40701-iSGtlc1asvQWG2LlvL+J4A@public.gmane.org>
2009-06-10 13:58           ` Serge E. Hallyn
2009-06-10 13:58             ` Serge E. Hallyn
2009-06-10 13:54       ` Stephen Smalley
2009-06-10 13:54         ` Stephen Smalley

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.