linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hao Luo <haoluo@google.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>,
	Shakeel Butt <shakeelb@google.com>,
	Joe Burton <jevburton.kernel@gmail.com>,
	Tejun Heo <tj@kernel.org>,
	joshdon@google.com, sdf@google.com, bpf@vger.kernel.org,
	linux-kernel@vger.kernel.org, Hao Luo <haoluo@google.com>
Subject: [PATCH bpf-next v1 1/9] bpf: Add mkdir, rmdir, unlink syscalls for prog_bpf_syscall
Date: Fri, 25 Feb 2022 15:43:31 -0800	[thread overview]
Message-ID: <20220225234339.2386398-2-haoluo@google.com> (raw)
In-Reply-To: <20220225234339.2386398-1-haoluo@google.com>

This patch allows bpf_syscall prog to perform some basic filesystem
operations: create, remove directories and unlink files. Three bpf
helpers are added for this purpose. When combined with the following
patches that allow pinning and getting bpf objects from bpf prog,
this feature can be used to create directory hierarchy in bpffs that
help manage bpf objects purely using bpf progs.

The added helpers subject to the same permission checks as their syscall
version. For example, one can not write to a read-only file system;
The identity of the current process is checked to see whether it has
sufficient permission to perform the operations.

Only directories and files in bpffs can be created or removed by these
helpers. But it won't be too hard to allow these helpers to operate
on files in other filesystems, if we want.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 include/linux/bpf.h            |   1 +
 include/uapi/linux/bpf.h       |  26 +++++
 kernel/bpf/inode.c             |   9 +-
 kernel/bpf/syscall.c           | 177 +++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  26 +++++
 5 files changed, 236 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f19abc59b6cd..fce5e26179f5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1584,6 +1584,7 @@ int bpf_link_new_fd(struct bpf_link *link);
 struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 
+bool bpf_path_is_bpf_dir(const struct path *path);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index afe3d0d7f5f2..a5dbc794403d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5086,6 +5086,29 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure. On error
  *		*dst* buffer is zeroed out.
+ *
+ * long bpf_mkdir(const char *pathname, int pathname_sz, u32 mode)
+ *	Description
+ *		Attempts to create a directory name *pathname*. The argument
+ *		*pathname_sz* specifies the length of the string *pathname*.
+ *		The argument *mode* specifies the mode for the new directory. It
+ *		is modified by the process's umask. It has the same semantic as
+ *		the syscall mkdir(2).
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_rmdir(const char *pathname, int pathname_sz)
+ *	Description
+ *		Deletes a directory, which must be empty.
+ *	Return
+ *		0 on sucess, or a negative error in case of failure.
+ *
+ * long bpf_unlink(const char *pathname, int pathname_sz)
+ *	Description
+ *		Deletes a name and possibly the file it refers to. It has the
+ *		same semantic as the syscall unlink(2).
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5280,6 +5303,9 @@ union bpf_attr {
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
+	FN(mkdir),			\
+	FN(rmdir),			\
+	FN(unlink),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4f841e16779e..3aca00e9e950 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -414,6 +414,11 @@ static const struct inode_operations bpf_dir_iops = {
 	.unlink		= simple_unlink,
 };
 
+bool bpf_path_is_bpf_dir(const struct path *path)
+{
+	return d_inode(path->dentry)->i_op == &bpf_dir_iops;
+}
+
 /* pin iterator link into bpffs */
 static int bpf_iter_link_pin_kernel(struct dentry *parent,
 				    const char *name, struct bpf_link *link)
@@ -439,7 +444,6 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
 			  enum bpf_type type)
 {
 	struct dentry *dentry;
-	struct inode *dir;
 	struct path path;
 	umode_t mode;
 	int ret;
@@ -454,8 +458,7 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
 	if (ret)
 		goto out;
 
-	dir = d_inode(path.dentry);
-	if (dir->i_op != &bpf_dir_iops) {
+	if (!bpf_path_is_bpf_dir(&path)) {
 		ret = -EPERM;
 		goto out;
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index db402ebc5570..07683b791733 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -12,6 +12,7 @@
 #include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
+#include <linux/namei.h>
 #include <linux/anon_inodes.h>
 #include <linux/fdtable.h>
 #include <linux/file.h>
@@ -4867,6 +4868,176 @@ const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
 	.arg4_type	= ARG_PTR_TO_LONG,
 };
 
+BPF_CALL_3(bpf_mkdir, const char *, pathname, int, pathname_sz, u32, raw_mode)
+{
+	struct user_namespace *mnt_userns;
+	struct dentry *dentry;
+	struct path path;
+	umode_t mode;
+	int err;
+
+	if (pathname_sz <= 1 || pathname[pathname_sz - 1])
+		return -EINVAL;
+
+	dentry = kern_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	if (!bpf_path_is_bpf_dir(&path)) {
+		err = -EPERM;
+		goto err_exit;
+	}
+
+	mode = raw_mode;
+	if (!IS_POSIXACL(path.dentry->d_inode))
+		mode &= ~current_umask();
+	err = security_path_mkdir(&path, dentry, mode);
+	if (err)
+		goto err_exit;
+
+	mnt_userns = mnt_user_ns(path.mnt);
+	err = vfs_mkdir(mnt_userns, d_inode(path.dentry), dentry, mode);
+
+err_exit:
+	done_path_create(&path, dentry);
+	return err;
+}
+
+const struct bpf_func_proto bpf_mkdir_proto = {
+	.func		= bpf_mkdir,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_rmdir, const char *, pathname, int, pathname_sz)
+{
+	struct user_namespace *mnt_userns;
+	struct path parent;
+	struct dentry *dentry;
+	int err;
+
+	if (pathname_sz <= 1 || pathname[pathname_sz - 1])
+		return -EINVAL;
+
+	err = kern_path(pathname, 0, &parent);
+	if (err)
+		return err;
+
+	if (!bpf_path_is_bpf_dir(&parent)) {
+		err = -EPERM;
+		goto exit1;
+	}
+
+	err = mnt_want_write(parent.mnt);
+	if (err)
+		goto exit1;
+
+	dentry = kern_path_locked(pathname, &parent);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto exit2;
+	}
+
+	if (d_really_is_negative(dentry)) {
+		err = -ENOENT;
+		goto exit3;
+	}
+
+	err = security_path_rmdir(&parent, dentry);
+	if (err)
+		goto exit3;
+
+	mnt_userns = mnt_user_ns(parent.mnt);
+	err = vfs_rmdir(mnt_userns, d_inode(parent.dentry), dentry);
+exit3:
+	dput(dentry);
+	inode_unlock(d_inode(parent.dentry));
+exit2:
+	mnt_drop_write(parent.mnt);
+exit1:
+	path_put(&parent);
+	return err;
+}
+
+const struct bpf_func_proto bpf_rmdir_proto = {
+	.func		= bpf_rmdir,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+BPF_CALL_2(bpf_unlink, const char *, pathname, int, pathname_sz)
+{
+	struct user_namespace *mnt_userns;
+	struct path parent;
+	struct dentry *dentry;
+	struct inode *inode = NULL;
+	int err;
+
+	if (pathname_sz <= 1 || pathname[pathname_sz - 1])
+		return -EINVAL;
+
+	err = kern_path(pathname, 0, &parent);
+	if (err)
+		return err;
+
+	err = mnt_want_write(parent.mnt);
+	if (err)
+		goto exit1;
+
+	dentry = kern_path_locked(pathname, &parent);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto exit2;
+	}
+
+	if (!bpf_path_is_bpf_dir(&parent)) {
+		err = -EPERM;
+		goto exit3;
+	}
+
+	if (d_is_negative(dentry)) {
+		err = -ENOENT;
+		goto exit3;
+	}
+
+	if (d_is_dir(dentry)) {
+		err = -EISDIR;
+		goto exit3;
+	}
+
+	inode = dentry->d_inode;
+	ihold(inode);
+	err = security_path_unlink(&parent, dentry);
+	if (err)
+		goto exit3;
+
+	mnt_userns = mnt_user_ns(parent.mnt);
+	err = vfs_unlink(mnt_userns, d_inode(parent.dentry), dentry, NULL);
+exit3:
+	dput(dentry);
+	inode_unlock(d_inode(parent.dentry));
+	if (inode)
+		iput(inode);
+exit2:
+	mnt_drop_write(parent.mnt);
+exit1:
+	path_put(&parent);
+	return err;
+}
+
+const struct bpf_func_proto bpf_unlink_proto = {
+	.func		= bpf_unlink,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
 static const struct bpf_func_proto *
 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4879,6 +5050,12 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sys_close_proto;
 	case BPF_FUNC_kallsyms_lookup_name:
 		return &bpf_kallsyms_lookup_name_proto;
+	case BPF_FUNC_mkdir:
+		return &bpf_mkdir_proto;
+	case BPF_FUNC_rmdir:
+		return &bpf_rmdir_proto;
+	case BPF_FUNC_unlink:
+		return &bpf_unlink_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index afe3d0d7f5f2..a5dbc794403d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5086,6 +5086,29 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure. On error
  *		*dst* buffer is zeroed out.
+ *
+ * long bpf_mkdir(const char *pathname, int pathname_sz, u32 mode)
+ *	Description
+ *		Attempts to create a directory name *pathname*. The argument
+ *		*pathname_sz* specifies the length of the string *pathname*.
+ *		The argument *mode* specifies the mode for the new directory. It
+ *		is modified by the process's umask. It has the same semantic as
+ *		the syscall mkdir(2).
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_rmdir(const char *pathname, int pathname_sz)
+ *	Description
+ *		Deletes a directory, which must be empty.
+ *	Return
+ *		0 on sucess, or a negative error in case of failure.
+ *
+ * long bpf_unlink(const char *pathname, int pathname_sz)
+ *	Description
+ *		Deletes a name and possibly the file it refers to. It has the
+ *		same semantic as the syscall unlink(2).
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5280,6 +5303,9 @@ union bpf_attr {
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
+	FN(mkdir),			\
+	FN(rmdir),			\
+	FN(unlink),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.35.1.574.g5d30c73bfb-goog


  reply	other threads:[~2022-02-25 23:43 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-25 23:43 [PATCH bpf-next v1 0/9] Extend cgroup interface with bpf Hao Luo
2022-02-25 23:43 ` Hao Luo [this message]
2022-02-27  5:18   ` [PATCH bpf-next v1 1/9] bpf: Add mkdir, rmdir, unlink syscalls for prog_bpf_syscall Kumar Kartikeya Dwivedi
2022-02-28 22:10     ` Hao Luo
2022-03-02 19:34       ` Alexei Starovoitov
2022-03-03 18:50         ` Hao Luo
2022-03-04 18:37           ` Hao Luo
2022-03-05 23:47             ` Alexei Starovoitov
2022-03-08 21:08               ` Hao Luo
2022-03-02 20:55   ` Yonghong Song
2022-03-03 18:56     ` Hao Luo
2022-03-03 19:13       ` Yonghong Song
2022-03-03 19:15         ` Hao Luo
2022-03-12  3:46   ` Al Viro
2022-03-14 17:07     ` Hao Luo
2022-03-14 23:10       ` Al Viro
2022-03-15 17:27         ` Hao Luo
2022-03-15 18:59           ` Alexei Starovoitov
2022-03-15 19:03             ` Alexei Starovoitov
2022-03-15 19:00           ` Al Viro
2022-03-15 19:47             ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 2/9] bpf: Add BPF_OBJ_PIN and BPF_OBJ_GET in the bpf_sys_bpf helper Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 3/9] selftests/bpf: tests mkdir, rmdir, unlink and pin in syscall Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 4/9] bpf: Introduce sleepable tracepoints Hao Luo
2022-03-02 19:41   ` Alexei Starovoitov
2022-03-03 19:37     ` Hao Luo
2022-03-03 19:59       ` Alexei Starovoitov
2022-03-02 21:23   ` Yonghong Song
2022-03-02 21:30     ` Alexei Starovoitov
2022-03-03  1:08       ` Yonghong Song
2022-03-03  2:29         ` Alexei Starovoitov
2022-03-03 19:43           ` Hao Luo
2022-03-03 20:02             ` Alexei Starovoitov
2022-03-03 20:04               ` Alexei Starovoitov
2022-03-03 22:06                 ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 5/9] cgroup: Sleepable cgroup tracepoints Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 6/9] libbpf: Add sleepable tp_btf Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 7/9] bpf: Lift permission check in __sys_bpf when called from kernel Hao Luo
2022-03-02 20:01   ` Alexei Starovoitov
2022-03-03 19:14     ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 8/9] bpf: Introduce cgroup iter Hao Luo
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:53   ` kernel test robot
2022-03-02 21:59   ` Yonghong Song
2022-03-03 20:02     ` Hao Luo
2022-03-02 22:45   ` Kumar Kartikeya Dwivedi
2022-03-03  2:03     ` Yonghong Song
2022-03-03  3:03       ` Kumar Kartikeya Dwivedi
2022-03-03  4:00         ` Alexei Starovoitov
2022-03-03  7:33         ` Yonghong Song
2022-03-03  8:13           ` Kumar Kartikeya Dwivedi
2022-03-03 21:52           ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 9/9] selftests/bpf: Tests using sleepable tracepoints to monitor cgroup events Hao Luo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220225234339.2386398-2-haoluo@google.com \
    --to=haoluo@google.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jevburton.kernel@gmail.com \
    --cc=joshdon@google.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=tj@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).