From: Alexey Gladkov <legion@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>,
"Eric W . Biederman" <ebiederm@xmission.com>,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Christian Brauner <brauner@kernel.org>,
Iurii Zaikin <yzaikin@google.com>,
Kees Cook <keescook@chromium.org>,
Linux Containers <containers@lists.linux.dev>,
linux-fsdevel@vger.kernel.org,
Luis Chamberlain <mcgrof@kernel.org>,
Vasily Averin <vvs@virtuozzo.com>
Subject: [RFC PATCH 3/4] sysctl: userns: Do not use dynamic memory
Date: Wed, 1 Jun 2022 15:20:31 +0200 [thread overview]
Message-ID: <81190e5e4879d53be2e1416bcad0b663421339d6.1654086665.git.legion@kernel.org> (raw)
In-Reply-To: <cover.1654086665.git.legion@kernel.org>
Dynamic memory allocation is needed to modify .data and specify the
per namespace parameter. The new sysctl API is allowed to get rid of
the need for such modification.
Signed-off-by: Alexey Gladkov <legion@kernel.org>
---
include/linux/user_namespace.h | 6 --
kernel/ucount.c | 116 +++++++++++++--------------------
kernel/user_namespace.c | 10 +--
3 files changed, 46 insertions(+), 86 deletions(-)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c4..7b134516e5cb 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -95,10 +95,6 @@ struct user_namespace {
struct key *persistent_keyring_register;
#endif
struct work_struct work;
-#ifdef CONFIG_SYSCTL
- struct ctl_table_set set;
- struct ctl_table_header *sysctls;
-#endif
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
@@ -116,8 +112,6 @@ struct ucounts {
extern struct user_namespace init_user_ns;
extern struct ucounts init_ucounts;
-bool setup_userns_sysctls(struct user_namespace *ns);
-void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
diff --git a/kernel/ucount.c b/kernel/ucount.c
index ee8e57fd6f90..4a5072671847 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -7,6 +7,7 @@
#include <linux/hash.h>
#include <linux/kmemleak.h>
#include <linux/user_namespace.h>
+#include <linux/fs.h>
struct ucounts init_ucounts = {
.ns = &init_user_ns,
@@ -26,38 +27,20 @@ static DEFINE_SPINLOCK(ucounts_lock);
#ifdef CONFIG_SYSCTL
-static struct ctl_table_set *
-set_lookup(struct ctl_table_root *root)
-{
- return ¤t_user_ns()->set;
-}
-
-static int set_is_seen(struct ctl_table_set *set)
-{
- return ¤t_user_ns()->set == set;
-}
-
-static int set_permissions(struct ctl_table_header *head,
- struct ctl_table *table)
-{
- struct user_namespace *user_ns =
- container_of(head->set, struct user_namespace, set);
- int mode;
-
- /* Allow users with CAP_SYS_RESOURCE unrestrained access */
- if (ns_capable(user_ns, CAP_SYS_RESOURCE))
- mode = (table->mode & S_IRWXU) >> 6;
- else
- /* Allow all others at most read-only access */
- mode = table->mode & S_IROTH;
- return (mode << 6) | (mode << 3) | mode;
-}
-
-static struct ctl_table_root set_root = {
- .lookup = set_lookup,
- .permissions = set_permissions,
+static int user_sys_open(struct ctl_context *ctx, struct inode *inode,
+ struct file *file);
+static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos);
+static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos);
+
+static struct ctl_fops user_sys_fops = {
+ .open = user_sys_open,
+ .read = user_sys_read,
+ .write = user_sys_write,
};
+static long ue_dummy = 0;
static long ue_zero = 0;
static long ue_int_max = INT_MAX;
@@ -66,9 +49,11 @@ static long ue_int_max = INT_MAX;
.procname = name, \
.maxlen = sizeof(long), \
.mode = 0644, \
+ .data = &ue_dummy, \
.proc_handler = proc_doulongvec_minmax, \
.extra1 = &ue_zero, \
.extra2 = &ue_int_max, \
+ .ctl_fops = &user_sys_fops, \
}
static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_user_namespaces"),
@@ -89,44 +74,43 @@ static struct ctl_table user_table[] = {
#endif
{ }
};
-#endif /* CONFIG_SYSCTL */
-bool setup_userns_sysctls(struct user_namespace *ns)
+static int user_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file)
{
-#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
-
- BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
- setup_sysctl_set(&ns->set, &set_root, set_is_seen);
- tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
- if (tbl) {
- int i;
- for (i = 0; i < UCOUNT_COUNTS; i++) {
- tbl[i].data = &ns->ucount_max[i];
- }
- ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
- }
- if (!ns->sysctls) {
- kfree(tbl);
- retire_sysctl_set(&ns->set);
- return false;
- }
-#endif
- return true;
+ /* Allow users with CAP_SYS_RESOURCE unrestrained access */
+ if ((file->f_mode & FMODE_WRITE) &&
+ !ns_capable(file->f_cred->user_ns, CAP_SYS_RESOURCE))
+ return -EPERM;
+ return 0;
}
-void retire_userns_sysctls(struct user_namespace *ns)
+static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
{
-#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
+ struct ctl_table table = *ctx->table;
+ table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table];
+ return table.proc_handler(&table, 0, buffer, lenp, ppos);
+}
- tbl = ns->sysctls->ctl_table_arg;
- unregister_sysctl_table(ns->sysctls);
- retire_sysctl_set(&ns->set);
- kfree(tbl);
-#endif
+static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file,
+ char *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table table = *ctx->table;
+ table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table];
+ return table.proc_handler(&table, 1, buffer, lenp, ppos);
}
+static struct ctl_table user_root_table[] = {
+ {
+ .procname = "user",
+ .mode = 0555,
+ .child = user_table,
+ },
+ {}
+};
+
+#endif /* CONFIG_SYSCTL */
+
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
{
struct ucounts *ucounts;
@@ -357,17 +341,7 @@ bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigne
static __init int user_namespace_sysctl_init(void)
{
#ifdef CONFIG_SYSCTL
- static struct ctl_table_header *user_header;
- static struct ctl_table empty[1];
- /*
- * It is necessary to register the user directory in the
- * default set so that registrations in the child sets work
- * properly.
- */
- user_header = register_sysctl("user", empty);
- kmemleak_ignore(user_header);
- BUG_ON(!user_header);
- BUG_ON(!setup_userns_sysctls(&init_user_ns));
+ register_sysctl_table(user_root_table);
#endif
hlist_add_ucounts(&init_ucounts);
inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 981bb2d10d83..c0e707bc9a31 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -149,17 +149,10 @@ int create_user_ns(struct cred *new)
INIT_LIST_HEAD(&ns->keyring_name_list);
init_rwsem(&ns->keyring_sem);
#endif
- ret = -ENOMEM;
- if (!setup_userns_sysctls(ns))
- goto fail_keyring;
set_cred_user_ns(new, ns);
return 0;
-fail_keyring:
-#ifdef CONFIG_PERSISTENT_KEYRINGS
- key_put(ns->persistent_keyring_register);
-#endif
- ns_free_inum(&ns->ns);
+
fail_free:
kmem_cache_free(user_ns_cachep, ns);
fail_dec:
@@ -208,7 +201,6 @@ static void free_user_ns(struct work_struct *work)
kfree(ns->projid_map.forward);
kfree(ns->projid_map.reverse);
}
- retire_userns_sysctls(ns);
key_free_user_ns(ns);
ns_free_inum(&ns->ns);
kmem_cache_free(user_ns_cachep, ns);
--
2.33.3
next prev parent reply other threads:[~2022-06-01 13:21 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-14 18:18 [PATCH v4 0/2] ipc: Store mq and ipc sysctls in the ipc namespace Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 1/2] ipc: Store mqueue " Alexey Gladkov
2022-02-14 18:18 ` [PATCH v4 2/2] ipc: Store ipc " Alexey Gladkov
2022-03-23 20:24 ` [GIT PULL] ipc: Bind to the ipc namespace at open time Eric W. Biederman
2022-03-24 18:12 ` Linus Torvalds
2022-03-24 21:48 ` Eric W. Biederman
2022-03-24 22:16 ` Linus Torvalds
2022-03-25 12:10 ` Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 1/4] " Alexey Gladkov
2022-05-02 16:07 ` Eric W. Biederman
2022-04-22 12:53 ` [PATCH v1 2/4] ipc: Use proper " Alexey Gladkov
2022-05-02 16:09 ` Eric W. Biederman
2022-05-03 13:39 ` Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 1/4] ipc: Use the same namespace to modify and validate Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 2/4] ipc: Remove extra1 field abuse to pass ipc namespace Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-05-03 13:39 ` [PATCH v2 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 3/4] ipc: Check permissions for checkpoint_restart sysctls at open time Alexey Gladkov
2022-04-22 12:53 ` [PATCH v1 4/4] ipc: Remove extra braces Alexey Gladkov
2022-04-22 20:44 ` [PATCH v1 0/4] ipc: Remove extra1 field abuse to pass ipc namespace Linus Torvalds
2022-05-04 3:42 ` Philip Rhoades
2022-06-01 13:20 ` [RFC PATCH 0/4] API extension for handling sysctl Alexey Gladkov
2022-06-01 13:20 ` [RFC PATCH 1/4] sysctl: " Alexey Gladkov
2022-06-01 19:19 ` Matthew Wilcox
2022-06-01 19:23 ` Linus Torvalds
2022-06-01 19:25 ` Matthew Wilcox
2022-06-01 19:31 ` Linus Torvalds
2022-06-01 19:32 ` Alexey Gladkov
2022-06-01 13:20 ` [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory Alexey Gladkov
2022-06-01 16:45 ` Linus Torvalds
2022-06-01 18:24 ` Alexey Gladkov
2022-06-01 18:34 ` Linus Torvalds
2022-06-01 19:05 ` Alexey Gladkov
2022-06-09 18:51 ` Luis Chamberlain
2022-06-01 13:20 ` Alexey Gladkov [this message]
2022-06-01 13:20 ` [RFC PATCH 4/4] sysctl: mqueue: " Alexey Gladkov
2022-06-09 16:45 ` [RFC PATCH 0/4] API extension for handling sysctl Luis Chamberlain
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=81190e5e4879d53be2e1416bcad0b663421339d6.1654086665.git.legion@kernel.org \
--to=legion@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=containers@lists.linux.dev \
--cc=ebiederm@xmission.com \
--cc=keescook@chromium.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mcgrof@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=vvs@virtuozzo.com \
--cc=yzaikin@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).