linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] capabilities: add a option PR_SET_CAPS for sys_prctl
@ 2023-10-18 12:20 Yunhui Cui
  2023-10-19 18:57 ` Paul Moore
  0 siblings, 1 reply; 2+ messages in thread
From: Yunhui Cui @ 2023-10-18 12:20 UTC (permalink / raw)
  To: serge, jmorris, peterz, cuiyunhui, chris.hyser,
	linux-security-module, linux-kernel

By infecting the container process, the already running container is
cloned, which means that each process of the container forks
independently. But the process in the container lacks some permissions
that cannot be completed.

For a container that is already running, we cannot modify the
configuration and restart it to complete the permission elevation.
Since capset() can only complete the setting of a subset of the
capabilities of the process, it cannot meet the requirements for
raising permissions. So an option is added to prctl() to complete it.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 include/linux/capability.h |  1 +
 include/uapi/linux/prctl.h |  1 +
 kernel/capability.c        | 82 +++++++++++++++++++++++++++-----------
 security/commoncap.c       |  7 ++++
 4 files changed, 67 insertions(+), 24 deletions(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index ecce0f43c73a..b656c40b281c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -211,6 +211,7 @@ extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
 extern bool ns_capable_setid(struct user_namespace *ns, int cap);
+extern int _capset(cap_user_header_t header, const cap_user_data_t data, bool prctl);
 #else
 static inline bool has_capability(struct task_struct *t, int cap)
 {
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 82cb4210ba50..9a8dae2be801 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -246,6 +246,7 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
 # define PR_SCHED_CORE_MAX		4
 
+#define PR_SET_CAPS			63
 /* Clone and personalize thread */
 #define PR_PERSONALIZED_CLONE		1000
 /* Isolation eventfd & epollfd during fork */
diff --git a/kernel/capability.c b/kernel/capability.c
index 1444f3954d75..968edd8b3564 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -201,25 +201,29 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
 	return ret;
 }
 
-/**
- * sys_capset - set capabilities for a process or (*) a group of processes
- * @header: pointer to struct that contains capability version and
- *	target pid data
- * @data: pointer to struct that contains the effective, permitted,
- *	and inheritable capabilities
- *
- * Set capabilities for the current process only.  The ability to any other
- * process(es) has been deprecated and removed.
- *
- * The restrictions on setting capabilities are specified as:
- *
- * I: any raised capabilities must be a subset of the old permitted
- * P: any raised capabilities must be a subset of the old permitted
- * E: must be set to a subset of new permitted
- *
- * Returns 0 on success and < 0 on error.
- */
-SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
+static int __capset(struct cred *new,
+	       const struct cred *old,
+	       const kernel_cap_t *effective,
+	       const kernel_cap_t *inheritable,
+	       const kernel_cap_t *permitted)
+{
+	new->cap_effective   = *effective;
+	new->cap_inheritable = *inheritable;
+	new->cap_permitted   = *permitted;
+
+	/*
+	 * Mask off ambient bits that are no longer both permitted and
+	 * inheritable.
+	 */
+	new->cap_ambient = cap_intersect(new->cap_ambient,
+					 cap_intersect(*permitted,
+						       *inheritable));
+	if (WARN_ON(!cap_ambient_invariant_ok(new)))
+		return -EINVAL;
+	return 0;
+}
+
+int _capset(cap_user_header_t header, const cap_user_data_t data, bool prctl)
 {
 	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy, copybytes;
@@ -266,11 +270,17 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 	if (!new)
 		return -ENOMEM;
 
-	ret = security_capset(new, current_cred(),
-			      &effective, &inheritable, &permitted);
-	if (ret < 0)
-		goto error;
-
+	if (!prctl) {
+		ret = security_capset(new, current_cred(),
+				&effective, &inheritable, &permitted);
+		if (ret < 0)
+			goto error;
+	} else {
+		ret = __capset(new, current_cred(),
+				 &effective, &inheritable, &permitted);
+		if (ret < 0)
+			goto error;
+	}
 	audit_log_capset(new, current_cred());
 
 	return commit_creds(new);
@@ -279,6 +289,30 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 	abort_creds(new);
 	return ret;
 }
+EXPORT_SYMBOL(_capset);
+
+/**
+ * sys_capset - set capabilities for a process or (*) a group of processes
+ * @header: pointer to struct that contains capability version and
+ *	target pid data
+ * @data: pointer to struct that contains the effective, permitted,
+ *	and inheritable capabilities
+ *
+ * Set capabilities for the current process only.  The ability to any other
+ * process(es) has been deprecated and removed.
+ *
+ * The restrictions on setting capabilities are specified as:
+ *
+ * I: any raised capabilities must be a subset of the old permitted
+ * P: any raised capabilities must be a subset of the old permitted
+ * E: must be set to a subset of new permitted
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
+{
+	return _capset(header, data, false);
+}
 
 /**
  * has_ns_capability - Does a task have a capability in a specific user ns
diff --git a/security/commoncap.c b/security/commoncap.c
index 482807dec118..dd7f058e7d03 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1266,6 +1266,13 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
 		return commit_creds(new);
 
+	case PR_SET_CAPS:
+		if (unlikely(!access_ok((void __user *)arg2, sizeof(cap_user_header_t))))
+			return -EFAULT;
+		if (unlikely(!access_ok((void __user *)arg3, sizeof(cap_user_data_t))))
+			return -EFAULT;
+		return _capset((cap_user_header_t)arg2, (cap_user_data_t)arg3, true);
+
 	case PR_CAP_AMBIENT:
 		if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
 			if (arg3 | arg4 | arg5)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] capabilities: add a option PR_SET_CAPS for sys_prctl
  2023-10-18 12:20 [PATCH] capabilities: add a option PR_SET_CAPS for sys_prctl Yunhui Cui
@ 2023-10-19 18:57 ` Paul Moore
  0 siblings, 0 replies; 2+ messages in thread
From: Paul Moore @ 2023-10-19 18:57 UTC (permalink / raw)
  To: Yunhui Cui
  Cc: serge, jmorris, peterz, chris.hyser, linux-security-module, linux-kernel

On Wed, Oct 18, 2023 at 8:20 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote:
>
> By infecting the container process, the already running container is
> cloned, which means that each process of the container forks
> independently. But the process in the container lacks some permissions
> that cannot be completed.
>
> For a container that is already running, we cannot modify the
> configuration and restart it to complete the permission elevation.
> Since capset() can only complete the setting of a subset of the
> capabilities of the process, it cannot meet the requirements for
> raising permissions. So an option is added to prctl() to complete it.

I'm having a difficult time understanding the description above, would
it be possible for you to explain the problem differently?

> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 82cb4210ba50..9a8dae2be801 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -246,6 +246,7 @@ struct prctl_mm_map {
>  # define PR_SCHED_CORE_SHARE_FROM      3 /* pull core_sched cookie to pid */
>  # define PR_SCHED_CORE_MAX             4
>
> +#define PR_SET_CAPS                    63
>  /* Clone and personalize thread */
>  #define PR_PERSONALIZED_CLONE          1000
>  /* Isolation eventfd & epollfd during fork */
> diff --git a/kernel/capability.c b/kernel/capability.c
> index 1444f3954d75..968edd8b3564 100644
> --- a/kernel/capability.c
> +++ b/kernel/capability.c
> @@ -266,11 +270,17 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
>         if (!new)
>                 return -ENOMEM;
>
> -       ret = security_capset(new, current_cred(),
> -                             &effective, &inheritable, &permitted);
> -       if (ret < 0)
> -               goto error;
> -
> +       if (!prctl) {
> +               ret = security_capset(new, current_cred(),
> +                               &effective, &inheritable, &permitted);
> +               if (ret < 0)
> +                       goto error;
> +       } else {
> +               ret = __capset(new, current_cred(),
> +                                &effective, &inheritable, &permitted);
> +               if (ret < 0)
> +                       goto error;
> +       }

It isn't clear to me why we would want to avoid the security_capset()
hook in the prctl case; the change to the task's capabilities is the
same, yes?  If the goal of prctl(PR_SET_CAPS, ...) is to bypass the
security_capset() controls, you're going to need to do a much better
job of explaining why this is necessary.

>         audit_log_capset(new, current_cred());
>
>         return commit_creds(new);

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-10-19 18:57 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-18 12:20 [PATCH] capabilities: add a option PR_SET_CAPS for sys_prctl Yunhui Cui
2023-10-19 18:57 ` Paul Moore

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).