linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Gladkov <legion@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Christian Brauner <brauner@kernel.org>,
	Iurii Zaikin <yzaikin@google.com>,
	Kees Cook <keescook@chromium.org>,
	Linux Containers <containers@lists.linux.dev>,
	linux-fsdevel@vger.kernel.org,
	Luis Chamberlain <mcgrof@kernel.org>,
	Vasily Averin <vvs@virtuozzo.com>
Subject: [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory
Date: Wed,  1 Jun 2022 15:20:30 +0200	[thread overview]
Message-ID: <857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org> (raw)
In-Reply-To: <cover.1654086665.git.legion@kernel.org>

Dynamic memory allocation is needed to modify .data and specify the per
namespace parameter. The new sysctl API is allowed to get rid of the
need for such modification.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
---
 include/linux/ipc_namespace.h |  18 ---
 ipc/ipc_sysctl.c              | 236 +++++++++++++++++-----------------
 ipc/namespace.c               |   4 -
 3 files changed, 121 insertions(+), 137 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..51c2c247c447 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -191,22 +191,4 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
 }
 
 #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
-
-#ifdef CONFIG_SYSVIPC_SYSCTL
-
-bool setup_ipc_sysctls(struct ipc_namespace *ns);
-void retire_ipc_sysctls(struct ipc_namespace *ns);
-
-#else /* CONFIG_SYSVIPC_SYSCTL */
-
-static inline void retire_ipc_sysctls(struct ipc_namespace *ns)
-{
-}
-
-static inline bool setup_ipc_sysctls(struct ipc_namespace *ns)
-{
-	return true;
-}
-
-#endif /* CONFIG_SYSVIPC_SYSCTL */
 #endif
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index ef313ecfb53a..833b670c38f3 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -68,26 +68,94 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
 	return ret;
 }
 
+static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table);
+
+static int ipc_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file)
+{
+	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+
+	// For now, we only allow changes in init_user_ns.
+	if (ns->user_ns != &init_user_ns)
+		return -EPERM;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	int index = (ctx->table - ipc_sysctls);
+
+	switch (index) {
+		case IPC_SYSCTL_SEM_NEXT_ID:
+		case IPC_SYSCTL_MSG_NEXT_ID:
+		case IPC_SYSCTL_SHM_NEXT_ID:
+			if (!checkpoint_restore_ns_capable(ns->user_ns))
+				return -EPERM;
+			break;
+	}
+#endif
+	ctx->ctl_data = ns;
+	return 0;
+}
+
+static ssize_t ipc_sys_read(struct ctl_context *ctx, struct file *file,
+		     char *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table table = *ctx->table;
+	table.data = data_from_ns(ctx, ctx->table);
+	return table.proc_handler(&table, 0, buffer, lenp, ppos);
+}
+
+static ssize_t ipc_sys_write(struct ctl_context *ctx, struct file *file,
+		      char *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table table = *ctx->table;
+	table.data = data_from_ns(ctx, ctx->table);
+	return table.proc_handler(&table, 1, buffer, lenp, ppos);
+}
+
+static struct ctl_fops ipc_sys_fops = {
+	.open	= ipc_sys_open,
+	.read	= ipc_sys_read,
+	.write	= ipc_sys_write,
+};
+
 int ipc_mni = IPCMNI;
 int ipc_mni_shift = IPCMNI_SHIFT;
 int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
 
+enum {
+	IPC_SYSCTL_SHMMAX,
+	IPC_SYSCTL_SHMALL,
+	IPC_SYSCTL_SHMMNI,
+	IPC_SYSCTL_SHM_RMID_FORCED,
+	IPC_SYSCTL_MSGMAX,
+	IPC_SYSCTL_MSGMNI,
+	IPC_SYSCTL_AUTO_MSGMNI,
+	IPC_SYSCTL_MSGMNB,
+	IPC_SYSCTL_SEM,
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	IPC_SYSCTL_SEM_NEXT_ID,
+	IPC_SYSCTL_MSG_NEXT_ID,
+	IPC_SYSCTL_SHM_NEXT_ID,
+#endif
+	IPC_SYSCTL_COUNTS
+};
+
 static struct ctl_table ipc_sysctls[] = {
-	{
+	[IPC_SYSCTL_SHMMAX] = {
 		.procname	= "shmmax",
 		.data		= &init_ipc_ns.shm_ctlmax,
 		.maxlen		= sizeof(init_ipc_ns.shm_ctlmax),
 		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
+		.proc_handler   = proc_doulongvec_minmax,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_SHMALL] = {
 		.procname	= "shmall",
 		.data		= &init_ipc_ns.shm_ctlall,
 		.maxlen		= sizeof(init_ipc_ns.shm_ctlall),
 		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
+		.proc_handler   = proc_doulongvec_minmax,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_SHMMNI] = {
 		.procname	= "shmmni",
 		.data		= &init_ipc_ns.shm_ctlmni,
 		.maxlen		= sizeof(init_ipc_ns.shm_ctlmni),
@@ -95,8 +163,9 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &ipc_mni,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_SHM_RMID_FORCED] = {
 		.procname	= "shm_rmid_forced",
 		.data		= &init_ipc_ns.shm_rmid_forced,
 		.maxlen		= sizeof(init_ipc_ns.shm_rmid_forced),
@@ -104,8 +173,9 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_ipc_dointvec_minmax_orphans,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_MSGMAX] = {
 		.procname	= "msgmax",
 		.data		= &init_ipc_ns.msg_ctlmax,
 		.maxlen		= sizeof(init_ipc_ns.msg_ctlmax),
@@ -113,8 +183,9 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_INT_MAX,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_MSGMNI] = {
 		.procname	= "msgmni",
 		.data		= &init_ipc_ns.msg_ctlmni,
 		.maxlen		= sizeof(init_ipc_ns.msg_ctlmni),
@@ -122,8 +193,9 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &ipc_mni,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_AUTO_MSGMNI] = {
 		.procname	= "auto_msgmni",
 		.data		= NULL,
 		.maxlen		= sizeof(int),
@@ -131,8 +203,9 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_ipc_auto_msgmni,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_MSGMNB] = {
 		.procname	=  "msgmnb",
 		.data		= &init_ipc_ns.msg_ctlmnb,
 		.maxlen		= sizeof(init_ipc_ns.msg_ctlmnb),
@@ -140,152 +213,85 @@ static struct ctl_table ipc_sysctls[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_INT_MAX,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_SEM] = {
 		.procname	= "sem",
 		.data		= &init_ipc_ns.sem_ctls,
 		.maxlen		= 4*sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_ipc_sem_dointvec,
+		.ctl_fops	= &ipc_sys_fops,
 	},
 #ifdef CONFIG_CHECKPOINT_RESTORE
-	{
+	[IPC_SYSCTL_SEM_NEXT_ID] = {
 		.procname	= "sem_next_id",
 		.data		= &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
 		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
-		.mode		= 0444,
+		.mode		= 0666,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_INT_MAX,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_MSG_NEXT_ID] = {
 		.procname	= "msg_next_id",
 		.data		= &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
 		.maxlen		= sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
-		.mode		= 0444,
+		.mode		= 0666,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_INT_MAX,
+		.ctl_fops	= &ipc_sys_fops,
 	},
-	{
+	[IPC_SYSCTL_SHM_NEXT_ID] = {
 		.procname	= "shm_next_id",
 		.data		= &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
 		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
-		.mode		= 0444,
+		.mode		= 0666,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_INT_MAX,
+		.ctl_fops	= &ipc_sys_fops,
 	},
 #endif
-	{}
+	[IPC_SYSCTL_COUNTS] = {}
 };
 
-static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
+static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table)
 {
-	return &current->nsproxy->ipc_ns->ipc_set;
-}
-
-static int set_is_seen(struct ctl_table_set *set)
-{
-	return &current->nsproxy->ipc_ns->ipc_set == set;
-}
-
-static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
-{
-	int mode = table->mode;
-
+	struct ipc_namespace *ns = ctx->ctl_data;
+
+	switch (ctx->table - ipc_sysctls) {
+		case IPC_SYSCTL_SHMMAX:			return &ns->shm_ctlmax;
+		case IPC_SYSCTL_SHMALL:			return &ns->shm_ctlall;
+		case IPC_SYSCTL_SHMMNI:			return &ns->shm_ctlmni;
+		case IPC_SYSCTL_SHM_RMID_FORCED:	return &ns->shm_rmid_forced;
+		case IPC_SYSCTL_MSGMAX:			return &ns->msg_ctlmax;
+		case IPC_SYSCTL_MSGMNI:			return &ns->msg_ctlmni;
+		case IPC_SYSCTL_MSGMNB:			return &ns->msg_ctlmnb;
+		case IPC_SYSCTL_SEM:			return &ns->sem_ctls;
 #ifdef CONFIG_CHECKPOINT_RESTORE
-	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
-
-	if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
-	     (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
-	     (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
-	    checkpoint_restore_ns_capable(ns->user_ns))
-		mode = 0666;
+		case IPC_SYSCTL_SEM_NEXT_ID:		return &ns->ids[IPC_SEM_IDS].next_id;
+		case IPC_SYSCTL_MSG_NEXT_ID:		return &ns->ids[IPC_MSG_IDS].next_id;
+		case IPC_SYSCTL_SHM_NEXT_ID:		return &ns->ids[IPC_SHM_IDS].next_id;
 #endif
-	return mode;
-}
-
-static struct ctl_table_root set_root = {
-	.lookup = set_lookup,
-	.permissions = ipc_permissions,
-};
-
-bool setup_ipc_sysctls(struct ipc_namespace *ns)
-{
-	struct ctl_table *tbl;
-
-	setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);
-
-	tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
-	if (tbl) {
-		int i;
-
-		for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
-			if (tbl[i].data == &init_ipc_ns.shm_ctlmax)
-				tbl[i].data = &ns->shm_ctlmax;
-
-			else if (tbl[i].data == &init_ipc_ns.shm_ctlall)
-				tbl[i].data = &ns->shm_ctlall;
-
-			else if (tbl[i].data == &init_ipc_ns.shm_ctlmni)
-				tbl[i].data = &ns->shm_ctlmni;
-
-			else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced)
-				tbl[i].data = &ns->shm_rmid_forced;
-
-			else if (tbl[i].data == &init_ipc_ns.msg_ctlmax)
-				tbl[i].data = &ns->msg_ctlmax;
-
-			else if (tbl[i].data == &init_ipc_ns.msg_ctlmni)
-				tbl[i].data = &ns->msg_ctlmni;
-
-			else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb)
-				tbl[i].data = &ns->msg_ctlmnb;
-
-			else if (tbl[i].data == &init_ipc_ns.sem_ctls)
-				tbl[i].data = &ns->sem_ctls;
-#ifdef CONFIG_CHECKPOINT_RESTORE
-			else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id)
-				tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
-
-			else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id)
-				tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
-
-			else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id)
-				tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
-#endif
-			else
-				tbl[i].data = NULL;
-		}
-
-		ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
-	}
-	if (!ns->ipc_sysctls) {
-		kfree(tbl);
-		retire_sysctl_set(&ns->ipc_set);
-		return false;
 	}
-
-	return true;
+	return NULL;
 }
 
-void retire_ipc_sysctls(struct ipc_namespace *ns)
-{
-	struct ctl_table *tbl;
-
-	tbl = ns->ipc_sysctls->ctl_table_arg;
-	unregister_sysctl_table(ns->ipc_sysctls);
-	retire_sysctl_set(&ns->ipc_set);
-	kfree(tbl);
-}
+static struct ctl_table ipc_root_table[] = {
+	{
+		.procname       = "kernel",
+		.mode           = 0555,
+		.child          = ipc_sysctls,
+	},
+	{}
+};
 
 static int __init ipc_sysctl_init(void)
 {
-	if (!setup_ipc_sysctls(&init_ipc_ns)) {
-		pr_warn("ipc sysctl registration failed\n");
-		return -ENOMEM;
-	}
+	register_sysctl_table(ipc_root_table);
 	return 0;
 }
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 754f3237194a..f760243ca685 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -63,9 +63,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_mq_sysctls(ns))
 		goto fail_put;
 
-	if (!setup_ipc_sysctls(ns))
-		goto fail_put;
-
 	sem_init_ns(ns);
 	msg_init_ns(ns);
 	shm_init_ns(ns);
@@ -133,7 +130,6 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	shm_exit_ns(ns);
 
 	retire_mq_sysctls(ns);
-	retire_ipc_sysctls(ns);
 
 	dec_ipc_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
-- 
2.33.3


  parent reply	other threads:[~2022-06-01 13:21 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-14 18:18 [PATCH v4 0/2] ipc: Store mq and ipc sysctls in the ipc namespace Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 1/2] ipc: Store mqueue " Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 2/2] ipc: Store ipc " Alexey Gladkov
2022-03-23 20:24 ` [GIT PULL] ipc: Bind to the ipc namespace at open time Eric W. Biederman
2022-03-24 18:12   ` Linus Torvalds
2022-03-24 21:48     ` Eric W. Biederman
2022-03-24 22:16       ` Linus Torvalds
2022-03-25 12:10     ` Alexey Gladkov
2022-04-22 12:53     ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-04-22 12:53       ` [PATCH v1 1/4] " Alexey Gladkov
2022-05-02 16:07         ` Eric W. Biederman
2022-04-22 12:53       ` [PATCH v1 2/4] ipc: Use proper " Alexey Gladkov
2022-05-02 16:09         ` Eric W. Biederman
2022-05-03 13:39           ` Alexey Gladkov
2022-05-03 13:39             ` [PATCH v2 1/4] ipc: Use the same namespace to modify and validate Alexey Gladkov
2022-05-03 13:39             ` [PATCH v2 2/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-05-03 13:39             ` [PATCH v2 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-05-03 13:39             ` [PATCH v2 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 12:53       ` [PATCH v1 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-04-22 12:53       ` [PATCH v1 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 20:44       ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Linus Torvalds
2022-05-04  3:42         ` Philip Rhoades
2022-06-01 13:20         ` [RFC PATCH 0/4] API extension for handling sysctl Alexey Gladkov
2022-06-01 13:20           ` [RFC PATCH 1/4] sysctl: " Alexey Gladkov
2022-06-01 19:19             ` Matthew Wilcox
2022-06-01 19:23               ` Linus Torvalds
2022-06-01 19:25                 ` Matthew Wilcox
2022-06-01 19:31                   ` Linus Torvalds
2022-06-01 19:32               ` Alexey Gladkov
2022-06-01 13:20           ` Alexey Gladkov [this message]
2022-06-01 16:45             ` [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory Linus Torvalds
2022-06-01 18:24               ` Alexey Gladkov
2022-06-01 18:34                 ` Linus Torvalds
2022-06-01 19:05                   ` Alexey Gladkov
2022-06-09 18:51                   ` Luis Chamberlain
2022-06-01 13:20           ` [RFC PATCH 3/4] sysctl: userns: " Alexey Gladkov
2022-06-01 13:20           ` [RFC PATCH 4/4] sysctl: mqueue: " Alexey Gladkov
2022-06-09 16:45           ` [RFC PATCH 0/4] API extension for handling sysctl Luis Chamberlain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org \
    --to=legion@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=brauner@kernel.org \
    --cc=containers@lists.linux.dev \
    --cc=ebiederm@xmission.com \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vvs@virtuozzo.com \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).