From: Alexey Gladkov <legion@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>,
"Eric W . Biederman" <ebiederm@xmission.com>,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Christian Brauner <brauner@kernel.org>,
Iurii Zaikin <yzaikin@google.com>,
Kees Cook <keescook@chromium.org>,
Linux Containers <containers@lists.linux.dev>,
linux-fsdevel@vger.kernel.org,
Luis Chamberlain <mcgrof@kernel.org>,
Vasily Averin <vvs@virtuozzo.com>
Subject: [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory
Date: Wed, 1 Jun 2022 15:20:30 +0200 [thread overview]
Message-ID: <857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org> (raw)
In-Reply-To: <cover.1654086665.git.legion@kernel.org>
Dynamic memory allocation is needed to modify .data and specify the per
namespace parameter. The new sysctl API is allowed to get rid of the
need for such modification.
Signed-off-by: Alexey Gladkov <legion@kernel.org>
---
include/linux/ipc_namespace.h | 18 ---
ipc/ipc_sysctl.c | 236 +++++++++++++++++-----------------
ipc/namespace.c | 4 -
3 files changed, 121 insertions(+), 137 deletions(-)
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..51c2c247c447 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -191,22 +191,4 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
}
#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
-
-#ifdef CONFIG_SYSVIPC_SYSCTL
-
-bool setup_ipc_sysctls(struct ipc_namespace *ns);
-void retire_ipc_sysctls(struct ipc_namespace *ns);
-
-#else /* CONFIG_SYSVIPC_SYSCTL */
-
-static inline void retire_ipc_sysctls(struct ipc_namespace *ns)
-{
-}
-
-static inline bool setup_ipc_sysctls(struct ipc_namespace *ns)
-{
- return true;
-}
-
-#endif /* CONFIG_SYSVIPC_SYSCTL */
#endif
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index ef313ecfb53a..833b670c38f3 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -68,26 +68,94 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
return ret;
}
+static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table);
+
+static int ipc_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file)
+{
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+
+ // For now, we only allow changes in init_user_ns.
+ if (ns->user_ns != &init_user_ns)
+ return -EPERM;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ int index = (ctx->table - ipc_sysctls);
+
+ switch (index) {
+ case IPC_SYSCTL_SEM_NEXT_ID:
+ case IPC_SYSCTL_MSG_NEXT_ID:
+ case IPC_SYSCTL_SHM_NEXT_ID:
+ if (!checkpoint_restore_ns_capable(ns->user_ns))
+ return -EPERM;
+ break;
+ }
+#endif
+ ctx->ctl_data = ns;
+ return 0;
+}
+
+static ssize_t ipc_sys_read(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table table = *ctx->table;
+ table.data = data_from_ns(ctx, ctx->table);
+ return table.proc_handler(&table, 0, buffer, lenp, ppos);
+}
+
+static ssize_t ipc_sys_write(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table table = *ctx->table;
+ table.data = data_from_ns(ctx, ctx->table);
+ return table.proc_handler(&table, 1, buffer, lenp, ppos);
+}
+
+static struct ctl_fops ipc_sys_fops = {
+ .open = ipc_sys_open,
+ .read = ipc_sys_read,
+ .write = ipc_sys_write,
+};
+
int ipc_mni = IPCMNI;
int ipc_mni_shift = IPCMNI_SHIFT;
int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
+enum {
+ IPC_SYSCTL_SHMMAX,
+ IPC_SYSCTL_SHMALL,
+ IPC_SYSCTL_SHMMNI,
+ IPC_SYSCTL_SHM_RMID_FORCED,
+ IPC_SYSCTL_MSGMAX,
+ IPC_SYSCTL_MSGMNI,
+ IPC_SYSCTL_AUTO_MSGMNI,
+ IPC_SYSCTL_MSGMNB,
+ IPC_SYSCTL_SEM,
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ IPC_SYSCTL_SEM_NEXT_ID,
+ IPC_SYSCTL_MSG_NEXT_ID,
+ IPC_SYSCTL_SHM_NEXT_ID,
+#endif
+ IPC_SYSCTL_COUNTS
+};
+
static struct ctl_table ipc_sysctls[] = {
- {
+ [IPC_SYSCTL_SHMMAX] = {
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof(init_ipc_ns.shm_ctlmax),
.mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_SHMALL] = {
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof(init_ipc_ns.shm_ctlall),
.mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_SHMMNI] = {
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof(init_ipc_ns.shm_ctlmni),
@@ -95,8 +163,9 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_SHM_RMID_FORCED] = {
.procname = "shm_rmid_forced",
.data = &init_ipc_ns.shm_rmid_forced,
.maxlen = sizeof(init_ipc_ns.shm_rmid_forced),
@@ -104,8 +173,9 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_ipc_dointvec_minmax_orphans,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_MSGMAX] = {
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof(init_ipc_ns.msg_ctlmax),
@@ -113,8 +183,9 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_MSGMNI] = {
.procname = "msgmni",
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof(init_ipc_ns.msg_ctlmni),
@@ -122,8 +193,9 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_AUTO_MSGMNI] = {
.procname = "auto_msgmni",
.data = NULL,
.maxlen = sizeof(int),
@@ -131,8 +203,9 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_ipc_auto_msgmni,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_MSGMNB] = {
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof(init_ipc_ns.msg_ctlmnb),
@@ -140,152 +213,85 @@ static struct ctl_table ipc_sysctls[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_SEM] = {
.procname = "sem",
.data = &init_ipc_ns.sem_ctls,
.maxlen = 4*sizeof(int),
.mode = 0644,
.proc_handler = proc_ipc_sem_dointvec,
+ .ctl_fops = &ipc_sys_fops,
},
#ifdef CONFIG_CHECKPOINT_RESTORE
- {
+ [IPC_SYSCTL_SEM_NEXT_ID] = {
.procname = "sem_next_id",
.data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
- .mode = 0444,
+ .mode = 0666,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_MSG_NEXT_ID] = {
.procname = "msg_next_id",
.data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
- .mode = 0444,
+ .mode = 0666,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
+ .ctl_fops = &ipc_sys_fops,
},
- {
+ [IPC_SYSCTL_SHM_NEXT_ID] = {
.procname = "shm_next_id",
.data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
- .mode = 0444,
+ .mode = 0666,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
+ .ctl_fops = &ipc_sys_fops,
},
#endif
- {}
+ [IPC_SYSCTL_COUNTS] = {}
};
-static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
+static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table *table)
{
- return ¤t->nsproxy->ipc_ns->ipc_set;
-}
-
-static int set_is_seen(struct ctl_table_set *set)
-{
- return ¤t->nsproxy->ipc_ns->ipc_set == set;
-}
-
-static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
-{
- int mode = table->mode;
-
+ struct ipc_namespace *ns = ctx->ctl_data;
+
+ switch (ctx->table - ipc_sysctls) {
+ case IPC_SYSCTL_SHMMAX: return &ns->shm_ctlmax;
+ case IPC_SYSCTL_SHMALL: return &ns->shm_ctlall;
+ case IPC_SYSCTL_SHMMNI: return &ns->shm_ctlmni;
+ case IPC_SYSCTL_SHM_RMID_FORCED: return &ns->shm_rmid_forced;
+ case IPC_SYSCTL_MSGMAX: return &ns->msg_ctlmax;
+ case IPC_SYSCTL_MSGMNI: return &ns->msg_ctlmni;
+ case IPC_SYSCTL_MSGMNB: return &ns->msg_ctlmnb;
+ case IPC_SYSCTL_SEM: return &ns->sem_ctls;
#ifdef CONFIG_CHECKPOINT_RESTORE
- struct ipc_namespace *ns = current->nsproxy->ipc_ns;
-
- if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
- (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
- (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
- checkpoint_restore_ns_capable(ns->user_ns))
- mode = 0666;
+ case IPC_SYSCTL_SEM_NEXT_ID: return &ns->ids[IPC_SEM_IDS].next_id;
+ case IPC_SYSCTL_MSG_NEXT_ID: return &ns->ids[IPC_MSG_IDS].next_id;
+ case IPC_SYSCTL_SHM_NEXT_ID: return &ns->ids[IPC_SHM_IDS].next_id;
#endif
- return mode;
-}
-
-static struct ctl_table_root set_root = {
- .lookup = set_lookup,
- .permissions = ipc_permissions,
-};
-
-bool setup_ipc_sysctls(struct ipc_namespace *ns)
-{
- struct ctl_table *tbl;
-
- setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);
-
- tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
- if (tbl) {
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
- if (tbl[i].data == &init_ipc_ns.shm_ctlmax)
- tbl[i].data = &ns->shm_ctlmax;
-
- else if (tbl[i].data == &init_ipc_ns.shm_ctlall)
- tbl[i].data = &ns->shm_ctlall;
-
- else if (tbl[i].data == &init_ipc_ns.shm_ctlmni)
- tbl[i].data = &ns->shm_ctlmni;
-
- else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced)
- tbl[i].data = &ns->shm_rmid_forced;
-
- else if (tbl[i].data == &init_ipc_ns.msg_ctlmax)
- tbl[i].data = &ns->msg_ctlmax;
-
- else if (tbl[i].data == &init_ipc_ns.msg_ctlmni)
- tbl[i].data = &ns->msg_ctlmni;
-
- else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb)
- tbl[i].data = &ns->msg_ctlmnb;
-
- else if (tbl[i].data == &init_ipc_ns.sem_ctls)
- tbl[i].data = &ns->sem_ctls;
-#ifdef CONFIG_CHECKPOINT_RESTORE
- else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id)
- tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
-
- else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id)
- tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
-
- else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id)
- tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
-#endif
- else
- tbl[i].data = NULL;
- }
-
- ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
- }
- if (!ns->ipc_sysctls) {
- kfree(tbl);
- retire_sysctl_set(&ns->ipc_set);
- return false;
}
-
- return true;
+ return NULL;
}
-void retire_ipc_sysctls(struct ipc_namespace *ns)
-{
- struct ctl_table *tbl;
-
- tbl = ns->ipc_sysctls->ctl_table_arg;
- unregister_sysctl_table(ns->ipc_sysctls);
- retire_sysctl_set(&ns->ipc_set);
- kfree(tbl);
-}
+static struct ctl_table ipc_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = ipc_sysctls,
+ },
+ {}
+};
static int __init ipc_sysctl_init(void)
{
- if (!setup_ipc_sysctls(&init_ipc_ns)) {
- pr_warn("ipc sysctl registration failed\n");
- return -ENOMEM;
- }
+ register_sysctl_table(ipc_root_table);
return 0;
}
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 754f3237194a..f760243ca685 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -63,9 +63,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (!setup_mq_sysctls(ns))
goto fail_put;
- if (!setup_ipc_sysctls(ns))
- goto fail_put;
-
sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);
@@ -133,7 +130,6 @@ static void free_ipc_ns(struct ipc_namespace *ns)
shm_exit_ns(ns);
retire_mq_sysctls(ns);
- retire_ipc_sysctls(ns);
dec_ipc_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
--
2.33.3
next prev parent reply other threads:[~2022-06-01 13:21 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-14 18:18 [PATCH v4 0/2] ipc: Store mq and ipc sysctls in the ipc namespace Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 1/2] ipc: Store mqueue " Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 2/2] ipc: Store ipc " Alexey Gladkov
2022-03-23 20:24 ` [GIT PULL] ipc: Bind to the ipc namespace at open time Eric W. Biederman
2022-03-24 18:12 ` Linus Torvalds
2022-03-24 21:48 ` Eric W. Biederman
2022-03-24 22:16 ` Linus Torvalds
2022-03-25 12:10 ` Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 1/4] " Alexey Gladkov
2022-05-02 16:07 ` Eric W. Biederman
2022-04-22 12:53 ` [PATCH v1 2/4] ipc: Use proper " Alexey Gladkov
2022-05-02 16:09 ` Eric W. Biederman
2022-05-03 13:39 ` Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 1/4] ipc: Use the same namespace to modify and validate Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 2/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 20:44 ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Linus Torvalds
2022-05-04 3:42 ` Philip Rhoades
2022-06-01 13:20 ` [RFC PATCH 0/4] API extension for handling sysctl Alexey Gladkov
2022-06-01 13:20 ` [RFC PATCH 1/4] sysctl: " Alexey Gladkov
2022-06-01 19:19 ` Matthew Wilcox
2022-06-01 19:23 ` Linus Torvalds
2022-06-01 19:25 ` Matthew Wilcox
2022-06-01 19:31 ` Linus Torvalds
2022-06-01 19:32 ` Alexey Gladkov
2022-06-01 13:20 ` Alexey Gladkov [this message]
2022-06-01 16:45 ` [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory Linus Torvalds
2022-06-01 18:24 ` Alexey Gladkov
2022-06-01 18:34 ` Linus Torvalds
2022-06-01 19:05 ` Alexey Gladkov
2022-06-09 18:51 ` Luis Chamberlain
2022-06-01 13:20 ` [RFC PATCH 3/4] sysctl: userns: " Alexey Gladkov
2022-06-01 13:20 ` [RFC PATCH 4/4] sysctl: mqueue: " Alexey Gladkov
2022-06-09 16:45 ` [RFC PATCH 0/4] API extension for handling sysctl Luis Chamberlain
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org \
--to=legion@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=containers@lists.linux.dev \
--cc=ebiederm@xmission.com \
--cc=keescook@chromium.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mcgrof@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=vvs@virtuozzo.com \
--cc=yzaikin@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).