All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc.
@ 2018-03-06  8:29 Misono, Tomohiro
  2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-06  8:29 UTC (permalink / raw)
  To: linux-btrfs

This adds two new unprivileged ioctls:

1st patch: version of tree_search ioctl which only searches/returns subvolume related item.
2nd patch: user version of ino_lookup ioctl which also performs permission check.

They will be used to implement user version of "subvolume list/show" etc in user tools.
See each commit log for more detals.

The RFC implementation of btrfs-progs can be found in the ML titled as follows: 
  [RFC PATCH 0/7] btrfs-progs: Allow normal user to call "subvolume list/show"

Tomohiro Misono (2):
  btrfs: Add unprivileged subvolume search ioctl
  btrfs: Add unprivileged version of ino_lookup ioctl

 fs/btrfs/ioctl.c           | 472 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |  18 ++
 2 files changed, 490 insertions(+)

-- 
2.14.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-06  8:29 [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc Misono, Tomohiro
@ 2018-03-06  8:30 ` Misono, Tomohiro
  2018-03-06 20:29   ` Goffredo Baroncelli
  2018-03-08  8:29   ` Nikolay Borisov
  2018-03-06  8:31 ` [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl Misono, Tomohiro
  2018-03-06 16:51 ` [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc David Sterba
  2 siblings, 2 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-06  8:30 UTC (permalink / raw)
  To: linux-btrfs

Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
from root tree. The arguments of this ioctl are the same as treesearch
ioctl and can be used like treesearch ioctl.

Since treesearch ioctl requires root privilege, this ioctl is needed
to allow normal users to call "btrfs subvolume list/show" etc.

Note that the subvolume name in ROOT_BACKREF/ROOT_REF will not be copied to
prevent potential name leak. The name can be obtained by calling
user version's ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER).

Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
---
 fs/btrfs/ioctl.c           | 254 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |   2 +
 2 files changed, 256 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 111ee282b777..1dba309dce31 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1921,6 +1921,28 @@ static noinline int key_in_sk(struct btrfs_key *key,
 	return 1;
 }
 
+/*
+ * check if key is in sk and subvolume related range
+ */
+static noinline int key_in_sk_and_subvol(struct btrfs_key *key,
+			      struct btrfs_ioctl_search_key *sk)
+{
+	int ret;
+
+	ret = key_in_sk(key, sk);
+	if (!ret)
+		return 0;
+
+	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
+		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
+	    key->type >= BTRFS_ROOT_ITEM_KEY &&
+	    key->type <= BTRFS_ROOT_BACKREF_KEY)
+		return 1;
+
+	return 0;
+}
+
 static noinline int copy_to_sk(struct btrfs_path *path,
 			       struct btrfs_key *key,
 			       struct btrfs_ioctl_search_key *sk,
@@ -2045,6 +2067,142 @@ static noinline int copy_to_sk(struct btrfs_path *path,
 	return ret;
 }
 
+/*
+ * Basically the same as copy_to_sk() but restricts the copied item
+ * within subvolume related one using key_in_sk_and_subvol().
+ * Also, the name of subvolume will be omitted from
+ * ROOT_BACKREF/ROOT_REF item.
+ */
+static noinline int copy_subvol_item(struct btrfs_path *path,
+			       struct btrfs_key *key,
+			       struct btrfs_ioctl_search_key *sk,
+			       size_t *buf_size,
+			       char __user *ubuf,
+			       unsigned long *sk_offset,
+			       int *num_found)
+{
+	u64 found_transid;
+	struct extent_buffer *leaf;
+	struct btrfs_ioctl_search_header sh;
+	struct btrfs_key test;
+	unsigned long item_off;
+	unsigned long item_len;
+	int nritems;
+	int i;
+	int slot;
+	int ret = 0;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	nritems = btrfs_header_nritems(leaf);
+
+	if (btrfs_header_generation(leaf) > sk->max_transid) {
+		i = nritems;
+		goto advance_key;
+	}
+	found_transid = btrfs_header_generation(leaf);
+
+	for (i = slot; i < nritems; i++) {
+		item_off = btrfs_item_ptr_offset(leaf, i);
+		item_len = btrfs_item_size_nr(leaf, i);
+
+		btrfs_item_key_to_cpu(leaf, key, i);
+		if (!key_in_sk_and_subvol(key, sk))
+			continue;
+
+		/* will not copy the name of subvolume */
+		if (key->type == BTRFS_ROOT_BACKREF_KEY ||
+		    key->type == BTRFS_ROOT_REF_KEY)
+			item_len = sizeof(struct btrfs_root_ref);
+
+		if (sizeof(sh) + item_len > *buf_size) {
+			if (*num_found) {
+				ret = 1;
+				goto out;
+			}
+
+			/*
+			 * return one empty item back for v1, which does not
+			 * handle -EOVERFLOW
+			 */
+
+			*buf_size = sizeof(sh) + item_len;
+			item_len = 0;
+			ret = -EOVERFLOW;
+		}
+
+		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
+			ret = 1;
+			goto out;
+		}
+
+		sh.objectid = key->objectid;
+		sh.offset = key->offset;
+		sh.type = key->type;
+		sh.len = item_len;
+		sh.transid = found_transid;
+
+		/* copy search result header */
+		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		*sk_offset += sizeof(sh);
+
+		if (item_len) {
+			char __user *up = ubuf + *sk_offset;
+
+			/* copy the item */
+			if (read_extent_buffer_to_user(leaf, up,
+						       item_off, item_len)) {
+				ret = -EFAULT;
+				goto out;
+			}
+
+			*sk_offset += item_len;
+		}
+		(*num_found)++;
+
+		if (ret) /* -EOVERFLOW from above */
+			goto out;
+
+		if (*num_found >= sk->nr_items) {
+			ret = 1;
+			goto out;
+		}
+	}
+advance_key:
+	ret = 0;
+	test.objectid = sk->max_objectid;
+	test.type = sk->max_type;
+	test.offset = sk->max_offset;
+	if (btrfs_comp_cpu_keys(key, &test) >= 0)
+		ret = 1;
+	else if (key->offset < (u64)-1)
+		key->offset++;
+	else if (key->type < (u8)-1) {
+		key->offset = 0;
+		key->type++;
+	} else if (key->objectid < (u64)-1) {
+		key->offset = 0;
+		key->type = 0;
+		key->objectid++;
+	} else
+		ret = 1;
+out:
+	/*
+	 *  0: all items from this leaf copied, continue with next
+	 *  1: * more items can be copied, but unused buffer is too small
+	 *     * all items were found
+	 *     Either way, it will stops the loop which iterates to the next
+	 *     leaf
+	 *  -EOVERFLOW: item was to large for buffer
+	 *  -EFAULT: could not copy extent buffer back to userspace
+	 */
+	return ret;
+}
+
 static noinline int search_ioctl(struct inode *inode,
 				 struct btrfs_ioctl_search_key *sk,
 				 size_t *buf_size,
@@ -2309,6 +2467,100 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 	return ret;
 }
 
+static noinline int search_subvol(struct inode *inode,
+				 struct btrfs_ioctl_search_key *sk,
+				 size_t *buf_size,
+				 char __user *ubuf)
+{
+	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	unsigned long sk_offset = 0;
+	int num_found = 0;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/* search ROOT TREEE */
+	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_read_fs_root_no_name(info, &key);
+	if (IS_ERR(root)) {
+		btrfs_free_path(path);
+		return -ENOENT;
+	}
+
+	key.objectid = sk->min_objectid;
+	key.type = sk->min_type;
+	key.offset = sk->min_offset;
+
+	while (1) {
+		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
+		if (ret != 0) {
+			if (ret > 0)
+				ret = 0;
+			goto err;
+		}
+
+		ret = copy_subvol_item(path, &key, sk, buf_size, ubuf,
+				 &sk_offset, &num_found);
+		btrfs_release_path(path);
+		if (ret)
+			break;
+	}
+
+	if (ret > 0)
+		ret = 0;
+err:
+	sk->nr_items = num_found;
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * Unprivileged ioctl for getting subvolume related item.
+ * The arguments are the same as btrfs_ioctl_tree_search()
+ * and can be used like tree search ioctl.
+ *
+ * Only returns ROOT_ITEM/ROOT_BACKREF/ROOT_REF key of subvolumes
+ * from root tree. Also, the name of subvolume will be omitted from
+ * ROOT_BACKREF/ROOT_REF item to prevent name leak.
+ */
+static noinline int btrfs_ioctl_get_subvol_info(struct file *file,
+					   void __user *argp)
+{
+	struct btrfs_ioctl_search_args __user *uargs;
+	struct btrfs_ioctl_search_key sk;
+	struct inode *inode;
+	size_t buf_size;
+	int ret;
+
+	uargs = (struct btrfs_ioctl_search_args __user *)argp;
+
+	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
+		return -EFAULT;
+
+	buf_size = sizeof(uargs->buf);
+
+	inode = file_inode(file);
+	ret = search_subvol(inode, &sk, &buf_size, uargs->buf);
+
+	/*
+	 * In the original implementation an overflow is handled by returning a
+	 * search header with a len of zero, so reset ret.
+	 */
+	if (ret == -EOVERFLOW)
+		ret = 0;
+
+	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
+		ret = -EFAULT;
+	return ret;
+}
+
 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 					     void __user *arg)
 {
@@ -5663,6 +5915,8 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_get_features(file, argp);
 	case BTRFS_IOC_SET_FEATURES:
 		return btrfs_ioctl_set_features(file, argp);
+	case BTRFS_IOC_GET_SUBVOL_INFO:
+		return btrfs_ioctl_get_subvol_info(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index c8d99b9ca550..1e9361cf66d5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -843,5 +843,7 @@ enum btrfs_err_code {
 				   struct btrfs_ioctl_vol_args_v2)
 #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
 					struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
+					struct btrfs_ioctl_search_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl
  2018-03-06  8:29 [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc Misono, Tomohiro
  2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
@ 2018-03-06  8:31 ` Misono, Tomohiro
  2018-03-07  0:42   ` Misono, Tomohiro
  2018-03-08  9:47   ` Nikolay Borisov
  2018-03-06 16:51 ` [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc David Sterba
  2 siblings, 2 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-06  8:31 UTC (permalink / raw)
  To: linux-btrfs

Add unprivileged version of ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER)
to allow normal users to call "btrfs subvololume list/show" etc. in
combination with BTRFS_IOC_GET_SUBVOL_INFO.

This can be used like BTRFS_IOC_INO_LOOKUP but the argument is
different because it also returns the name of bottom subvolume,
which is ommited by BTRFS_IOC_GET_SUBVOL_INFO ioctl.
Also, this must be called with fd which exists under the tree of
args->treeid to prevent for user to search any tree.

The main differences from original ino_lookup ioctl are:
  1. Read permission will be checked using inode_permission()
     during path construction. -EACCES will be returned in case
     of failure.
  2. Path construction will be stopped at the inode number which
     corresponds to the fd with which this ioctl is called. If
     constructed path does not exist under fd's inode, -EACCES
     will be returned.
  3. The name of bottom subvolume is also searched and filled.

Note that the maximum length of path is shorter 272 bytes than
ino_lookup ioctl because of space of subvolume's id and name.

Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
---
 fs/btrfs/ioctl.c           | 218 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |  16 ++++
 2 files changed, 234 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1dba309dce31..ac23da98b7e7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2425,6 +2425,179 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
 	return ret;
 }
 
+static noinline int btrfs_search_path_in_tree_user(struct btrfs_fs_info *info,
+				struct super_block *sb,
+				struct btrfs_key upper_limit,
+				struct btrfs_ioctl_ino_lookup_user_args *args)
+{
+	struct btrfs_root *root;
+	struct btrfs_key key, key2;
+	char *ptr;
+	int ret = -1;
+	int slot;
+	int len;
+	int total_len = 0;
+	u64 dirid = args->dirid;
+	struct btrfs_inode_ref *iref;
+	struct btrfs_root_ref rref;
+
+	unsigned long item_off;
+	unsigned long item_len;
+
+	struct extent_buffer *l;
+	struct btrfs_path *path = NULL;
+
+	struct inode *inode;
+	int *new = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	if (dirid == upper_limit.objectid)
+		/*
+		 * If the bottom subvolume exists directly under upper limit,
+		 * there is no need to construct the path and just get the
+		 * subvolume's name
+		 */
+		goto get_subvol_name;
+	if (dirid == BTRFS_FIRST_FREE_OBJECTID)
+		/* The subvolume does not exist under upper_limit */
+		goto access_err;
+
+	ptr = &args->path[BTRFS_INO_LOOKUP_PATH_MAX - 1];
+
+	key.objectid = args->treeid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_read_fs_root_no_name(info, &key);
+	if (IS_ERR(root)) {
+		btrfs_err(info, "could not find root %llu", args->treeid);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	key.objectid = dirid;
+	key.type = BTRFS_INODE_REF_KEY;
+	key.offset = (u64)-1;
+
+	while (1) {
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		if (ret < 0)
+			goto out;
+		else if (ret > 0) {
+			ret = btrfs_previous_item(root, path, dirid,
+						  BTRFS_INODE_REF_KEY);
+			if (ret < 0)
+				goto out;
+			else if (ret > 0) {
+				ret = -ENOENT;
+				goto out;
+			}
+		}
+
+		l = path->nodes[0];
+		slot = path->slots[0];
+		btrfs_item_key_to_cpu(l, &key, slot);
+
+		iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
+		len = btrfs_inode_ref_name_len(l, iref);
+		ptr -= len + 1;
+		total_len += len + 1;
+		if (ptr < args->path) {
+			ret = -ENAMETOOLONG;
+			goto out;
+		}
+
+		*(ptr + len) = '/';
+		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
+
+		/* Check the read permission */
+		ret = btrfs_previous_item(root, path, dirid,
+					  BTRFS_INODE_ITEM_KEY);
+		if (ret < 0) {
+			goto out;
+		} else if (ret > 0) {
+			ret = -ENOENT;
+			goto out;
+		}
+		l = path->nodes[0];
+		slot = path->slots[0];
+
+		btrfs_item_key_to_cpu(l, &key2, slot);
+		inode = btrfs_iget(sb, &key2, root, new);
+		ret = inode_permission(inode, MAY_READ);
+		iput(inode);
+		if (ret)
+			goto access_err;
+
+		if (key.offset == BTRFS_FIRST_FREE_OBJECTID ||
+		    key.offset == upper_limit.objectid)
+			break;
+
+		btrfs_release_path(path);
+		key.objectid = key.offset;
+		key.offset = (u64)-1;
+		dirid = key.objectid;
+	}
+	 /* Check if the searched path exists under the upper_limit */
+	if (key.offset == BTRFS_FIRST_FREE_OBJECTID &&
+	    upper_limit.objectid != BTRFS_FIRST_FREE_OBJECTID)
+		goto access_err;
+
+	memmove(args->path, ptr, total_len);
+	args->path[total_len] = '\0';
+	btrfs_release_path(path);
+	ret = 0;
+
+get_subvol_name:
+	/* Get subolume's name from ROOT_REF */
+	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_read_fs_root_no_name(info, &key);
+	if (IS_ERR(root)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	key.objectid = args->treeid;
+	key.type = BTRFS_ROOT_REF_KEY;
+	key.offset = args->subid;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret != 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+	l = path->nodes[0];
+	slot = path->slots[0];
+	btrfs_item_key_to_cpu(l, &key, slot);
+
+	item_off = btrfs_item_ptr_offset(l, slot);
+	item_len = btrfs_item_size_nr(l, slot);
+	/* Check if dirid in ROOT_REF corresponds to passed arg */
+	read_extent_buffer(l, &rref, item_off, sizeof(struct btrfs_root_ref));
+	if (rref.dirid != args->dirid) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Copy subvolume's name */
+	item_off += sizeof(struct btrfs_root_ref);
+	item_len -= sizeof(struct btrfs_root_ref);
+	read_extent_buffer(l, args->name, item_off, item_len);
+	args->name[item_len] = '\0';
+
+out:
+	btrfs_free_path(path);
+	return ret;
+
+access_err:
+	btrfs_free_path(path);
+	ret = -EACCES;
+	return ret;
+}
+
 static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 					   void __user *argp)
 {
@@ -2467,6 +2640,49 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 	return ret;
 }
 
+/*
+ * User version of ino_lookup ioctl (unprivileged)
+ *
+ * The main differences from original ino_lookup ioctl are:
+ *   1. Read permission will be checked using inode_permission()
+ *      during path construction. -EACCES will be returned in case
+ *      of failure.
+ *   2. Path construction will be stopped at the inode number which
+ *      corresponds to the fd with which this ioctl is called. If
+ *      constructed path does not exist under fd's inode, -EACCES
+ *      will be returned.
+ *   3. The name of bottom subvolume is also searched and filled.
+ */
+static noinline int btrfs_ioctl_ino_lookup_user(struct file *file,
+					   void __user *argp)
+{
+	struct btrfs_ioctl_ino_lookup_user_args *args;
+	struct inode *inode;
+	int ret = 0;
+
+	args = memdup_user(argp, sizeof(*args));
+	if (IS_ERR(args))
+		return PTR_ERR(args);
+
+	inode = file_inode(file);
+	/*
+	 * args->treeid must be matched with this inode's objectid
+	 * to previent for user to search any fs/file tree.
+	 */
+	if (args->treeid != BTRFS_I(inode)->root->root_key.objectid)
+		return -EINVAL;
+
+	ret = btrfs_search_path_in_tree_user(BTRFS_I(inode)->root->fs_info,
+					inode->i_sb, BTRFS_I(inode)->location,
+					args);
+
+	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+		ret = -EFAULT;
+
+	kfree(args);
+	return ret;
+}
+
 static noinline int search_subvol(struct inode *inode,
 				 struct btrfs_ioctl_search_key *sk,
 				 size_t *buf_size,
@@ -5917,6 +6133,8 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_set_features(file, argp);
 	case BTRFS_IOC_GET_SUBVOL_INFO:
 		return btrfs_ioctl_get_subvol_info(file, argp);
+	case BTRFS_IOC_INO_LOOKUP_USER:
+		return btrfs_ioctl_ino_lookup_user(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 1e9361cf66d5..22326d631417 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -422,6 +422,20 @@ struct btrfs_ioctl_ino_lookup_args {
 	char name[BTRFS_INO_LOOKUP_PATH_MAX];
 };
 
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4072-BTRFS_VOL_NAME_MAX)
+struct btrfs_ioctl_ino_lookup_user_args {
+	/* in */
+	__u64 treeid;
+	/* in, 'dirid' is the same as objectid in btrfs_ioctl_ino_lookup_args */
+	__u64 dirid;
+	/* in, subvolume id which exists under the directory of 'dirid' */
+	__u64 subid;
+	/* out, name of the subvolume of 'subid' */
+	char name[BTRFS_VOL_NAME_MAX];
+	/* out, 'path' is the same as 'name' in btrfs_ioctl_ino_lookup_args */
+	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+
 /* Search criteria for the btrfs SEARCH ioctl family. */
 struct btrfs_ioctl_search_key {
 	/*
@@ -845,5 +859,7 @@ enum btrfs_err_code {
 					struct btrfs_ioctl_logical_ino_args)
 #define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
 					struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+					struct btrfs_ioctl_ino_lookup_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc.
  2018-03-06  8:29 [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc Misono, Tomohiro
  2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
  2018-03-06  8:31 ` [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl Misono, Tomohiro
@ 2018-03-06 16:51 ` David Sterba
  2 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2018-03-06 16:51 UTC (permalink / raw)
  To: Misono, Tomohiro; +Cc: linux-btrfs

On Tue, Mar 06, 2018 at 05:29:34PM +0900, Misono, Tomohiro wrote:
> This adds two new unprivileged ioctls:
> 
> 1st patch: version of tree_search ioctl which only searches/returns subvolume related item.
> 2nd patch: user version of ino_lookup ioctl which also performs permission check.
> 
> They will be used to implement user version of "subvolume list/show" etc in user tools.

The unprivileged separate listing ioctls are highly requested so I'm
looking forward to the feedback round.

The usecase coverage should be same what the current 'btrfs subvol list'
does, except the complex filtering.

The merging target for that shall be 4.18 which should give us enough
time to discuss and review the usecase or the ioctl capabilities and
data structure formats.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
@ 2018-03-06 20:29   ` Goffredo Baroncelli
  2018-03-07  0:40     ` Misono, Tomohiro
  2018-03-08  8:29   ` Nikolay Borisov
  1 sibling, 1 reply; 14+ messages in thread
From: Goffredo Baroncelli @ 2018-03-06 20:29 UTC (permalink / raw)
  To: Misono, Tomohiro, linux-btrfs

On 03/06/2018 09:30 AM, Misono, Tomohiro wrote:
> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
> from root tree. The arguments of this ioctl are the same as treesearch
> ioctl and can be used like treesearch ioctl.

Is it a pro ? The treesearch ioctl is tightly coupled to the btrfs internal structure, this means that if we would change the btrfs internal structure, we have to update all the clients of this api. For the treesearch it is an acceptable compromise between flexibility and speed of developing. But for a more specialized API, I think that it would make sense to provide a less coupled api to the internal structure.

Below some comments


> 
> Since treesearch ioctl requires root privilege, this ioctl is needed
> to allow normal users to call "btrfs subvolume list/show" etc.
> 
> Note that the subvolume name in ROOT_BACKREF/ROOT_REF will not be copied to
> prevent potential name leak. The name can be obtained by calling
> user version's ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER).
> 
> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
> ---
>  fs/btrfs/ioctl.c           | 254 +++++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/btrfs.h |   2 +
>  2 files changed, 256 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 111ee282b777..1dba309dce31 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1921,6 +1921,28 @@ static noinline int key_in_sk(struct btrfs_key *key,
>  	return 1;
>  }
>  
> +/*
> + * check if key is in sk and subvolume related range
> + */
> +static noinline int key_in_sk_and_subvol(struct btrfs_key *key,
> +			      struct btrfs_ioctl_search_key *sk)
> +{
> +	int ret;
> +
> +	ret = key_in_sk(key, sk);
> +	if (!ret)
> +		return 0;
> +
> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)

Why returning all the range [BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]. It would be sufficient to return only

  +	    (key->type == BTRFS_ROOT_ITEM_KEY ||
  +	     key->type == BTRFS_ROOT_BACKREF_KEY))


> +		return 1;
> +
> +	return 0;
> +}
> +
>  static noinline int copy_to_sk(struct btrfs_path *path,
>  			       struct btrfs_key *key,
>  			       struct btrfs_ioctl_search_key *sk,
> @@ -2045,6 +2067,142 @@ static noinline int copy_to_sk(struct btrfs_path *path,
>  	return ret;
>  }
>  
> +/*
> + * Basically the same as copy_to_sk() but restricts the copied item
> + * within subvolume related one using key_in_sk_and_subvol().
> + * Also, the name of subvolume will be omitted from
> + * ROOT_BACKREF/ROOT_REF item.
> + */
> +static noinline int copy_subvol_item(struct btrfs_path *path,
> +			       struct btrfs_key *key,
> +			       struct btrfs_ioctl_search_key *sk,
> +			       size_t *buf_size,
> +			       char __user *ubuf,
> +			       unsigned long *sk_offset,
> +			       int *num_found)
> +{
> +	u64 found_transid;
> +	struct extent_buffer *leaf;
> +	struct btrfs_ioctl_search_header sh;
> +	struct btrfs_key test;
> +	unsigned long item_off;
> +	unsigned long item_len;
> +	int nritems;
> +	int i;
> +	int slot;
> +	int ret = 0;
> +
> +	leaf = path->nodes[0];
> +	slot = path->slots[0];
> +	nritems = btrfs_header_nritems(leaf);
> +
> +	if (btrfs_header_generation(leaf) > sk->max_transid) {
> +		i = nritems;
> +		goto advance_key;
> +	}
> +	found_transid = btrfs_header_generation(leaf);
> +
> +	for (i = slot; i < nritems; i++) {
> +		item_off = btrfs_item_ptr_offset(leaf, i);
> +		item_len = btrfs_item_size_nr(leaf, i);
> +
> +		btrfs_item_key_to_cpu(leaf, key, i);
> +		if (!key_in_sk_and_subvol(key, sk))
> +			continue;
> +
> +		/* will not copy the name of subvolume */
> +		if (key->type == BTRFS_ROOT_BACKREF_KEY ||
> +		    key->type == BTRFS_ROOT_REF_KEY)
> +			item_len = sizeof(struct btrfs_root_ref);
> +
> +		if (sizeof(sh) + item_len > *buf_size) {
> +			if (*num_found) {
> +				ret = 1;
> +				goto out;
> +			}
> +
> +			/*
> +			 * return one empty item back for v1, which does not
> +			 * handle -EOVERFLOW
> +			 */
It is still applicable ?
> +
> +			*buf_size = sizeof(sh) + item_len;
> +			item_len = 0;
> +			ret = -EOVERFLOW;> +		}
> +
> +		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
> +			ret = 1;
> +			goto out;
> +		}
> +
> +		sh.objectid = key->objectid;
> +		sh.offset = key->offset;
> +		sh.type = key->type;
> +		sh.len = item_len;
> +		sh.transid = found_transid;
> +
> +		/* copy search result header */
> +		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
> +			ret = -EFAULT;
> +			goto out;
> +		}
> +
> +		*sk_offset += sizeof(sh);
> +
> +		if (item_len) {
> +			char __user *up = ubuf + *sk_offset;
> +
> +			/* copy the item */
> +			if (read_extent_buffer_to_user(leaf, up,
> +						       item_off, item_len)) {
> +				ret = -EFAULT;
> +				goto out;
> +			}
> +
> +			*sk_offset += item_len;
> +		}
> +		(*num_found)++;
> +
> +		if (ret) /* -EOVERFLOW from above */
> +			goto out;
> +
> +		if (*num_found >= sk->nr_items) {
> +			ret = 1;
> +			goto out;
> +		}
> +	}
> +advance_key:
> +	ret = 0;
> +	test.objectid = sk->max_objectid;
> +	test.type = sk->max_type;
> +	test.offset = sk->max_offset;
> +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
> +		ret = 1;
> +	else if (key->offset < (u64)-1)
> +		key->offset++;
> +	else if (key->type < (u8)-1) {
> +		key->offset = 0;
> +		key->type++;
> +	} else if (key->objectid < (u64)-1) {
> +		key->offset = 0;
> +		key->type = 0;
> +		key->objectid++;

It would be doable to check that type is in the range 
	[BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]

I.e. something like


 +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
 +		ret = 1;
 +	else if (key->offset < (u64)-1)
 +		key->offset++;
 +	else if (key->type < BTRFS_ROOT_BACKREF_KEY) {
 +		key->offset = 0;
 +		key->type++;
 +	} else if (key->objectid < (u64)-1) {
 +		key->offset = 0;
 +		key->type = BTRFS_ROOT_ITEM_KEY;
 +		key->objectid++;



> +	} else
> +		ret = 1;
> +out:
> +	/*
> +	 *  0: all items from this leaf copied, continue with next
> +	 *  1: * more items can be copied, but unused buffer is too small
> +	 *     * all items were found
> +	 *     Either way, it will stops the loop which iterates to the next
> +	 *     leaf
> +	 *  -EOVERFLOW: item was to large for buffer
> +	 *  -EFAULT: could not copy extent buffer back to userspace
> +	 */
> +	return ret;
> +}
> +
>  static noinline int search_ioctl(struct inode *inode,
>  				 struct btrfs_ioctl_search_key *sk,
>  				 size_t *buf_size,
> @@ -2309,6 +2467,100 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  	return ret;
>  }
>  
> +static noinline int search_subvol(struct inode *inode,
> +				 struct btrfs_ioctl_search_key *sk,
> +				 size_t *buf_size,
> +				 char __user *ubuf)
> +{
> +	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
> +	struct btrfs_root *root;
> +	struct btrfs_key key;
> +	struct btrfs_path *path;
> +	unsigned long sk_offset = 0;
> +	int num_found = 0;
> +	int ret;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	/* search ROOT TREEE */
> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		btrfs_free_path(path);
> +		return -ENOENT;
> +	}
> +
> +	key.objectid = sk->min_objectid;
> +	key.type = sk->min_type;
> +	key.offset = sk->min_offset;
> +
> +	while (1) {
> +		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> +		if (ret != 0) {
> +			if (ret > 0)
> +				ret = 0;
> +			goto err;
> +		}
> +
> +		ret = copy_subvol_item(path, &key, sk, buf_size, ubuf,
> +				 &sk_offset, &num_found);
> +		btrfs_release_path(path);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (ret > 0)
> +		ret = 0;
> +err:
> +	sk->nr_items = num_found;
> +	btrfs_free_path(path);
> +	return ret;
> +}
> +
> +/*
> + * Unprivileged ioctl for getting subvolume related item.
> + * The arguments are the same as btrfs_ioctl_tree_search()
> + * and can be used like tree search ioctl.
> + *
> + * Only returns ROOT_ITEM/ROOT_BACKREF/ROOT_REF key of subvolumes
> + * from root tree. Also, the name of subvolume will be omitted from
> + * ROOT_BACKREF/ROOT_REF item to prevent name leak.
> + */
> +static noinline int btrfs_ioctl_get_subvol_info(struct file *file,
> +					   void __user *argp)
> +{
> +	struct btrfs_ioctl_search_args __user *uargs;
> +	struct btrfs_ioctl_search_key sk;
> +	struct inode *inode;
> +	size_t buf_size;
> +	int ret;
> +
> +	uargs = (struct btrfs_ioctl_search_args __user *)argp;
> +
> +	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
> +		return -EFAULT;
> +
> +	buf_size = sizeof(uargs->buf);
> +
> +	inode = file_inode(file);
> +	ret = search_subvol(inode, &sk, &buf_size, uargs->buf);
> +
> +	/*
> +	 * In the original implementation an overflow is handled by returning a
> +	 * search header with a len of zero, so reset ret.
> +	 */
> +	if (ret == -EOVERFLOW)
> +		ret = 0;
> +
> +	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
> +		ret = -EFAULT;
> +	return ret;
> +}
> +
>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  					     void __user *arg)
>  {
> @@ -5663,6 +5915,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_get_features(file, argp);
>  	case BTRFS_IOC_SET_FEATURES:
>  		return btrfs_ioctl_set_features(file, argp);
> +	case BTRFS_IOC_GET_SUBVOL_INFO:
> +		return btrfs_ioctl_get_subvol_info(file, argp);
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index c8d99b9ca550..1e9361cf66d5 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -843,5 +843,7 @@ enum btrfs_err_code {
>  				   struct btrfs_ioctl_vol_args_v2)
>  #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
>  					struct btrfs_ioctl_logical_ino_args)
> +#define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
> +					struct btrfs_ioctl_search_args)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
> 


-- 
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-06 20:29   ` Goffredo Baroncelli
@ 2018-03-07  0:40     ` Misono, Tomohiro
  2018-03-07 16:36       ` David Sterba
  2018-03-07 19:00       ` Goffredo Baroncelli
  0 siblings, 2 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-07  0:40 UTC (permalink / raw)
  To: kreijack, linux-btrfs

On 2018/03/07 5:29, Goffredo Baroncelli wrote:
> On 03/06/2018 09:30 AM, Misono, Tomohiro wrote:
>> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
>> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
>> from root tree. The arguments of this ioctl are the same as treesearch
>> ioctl and can be used like treesearch ioctl.
> 
> Is it a pro ? The treesearch ioctl is tightly coupled to the btrfs internal structure, this means that if we would change the btrfs internal structure, we have to update all the clients of this api. For the treesearch it is an acceptable compromise between flexibility and speed of developing. But for a more specialized API, I think that it would make sense to provide a less coupled api to the internal structure.

Thanks for the comments.

The reason I choose the same api is just to minimize the code change in btrfs-progs.
For tree search part, it works just switching the ioctl number from BTRFS_IOC_TREE_SEARCH
to BTRFS_IOC_GET_SUBVOL_INFO in list_subvol_search()[1].

[1] https://marc.info/?l=linux-btrfs&m=152032537911218&w=2

> 
> Below some comments
> 
> 
>>
>> Since treesearch ioctl requires root privilege, this ioctl is needed
>> to allow normal users to call "btrfs subvolume list/show" etc.
>>
>> Note that the subvolume name in ROOT_BACKREF/ROOT_REF will not be copied to
>> prevent potential name leak. The name can be obtained by calling
>> user version's ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER).
>>
>> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
>> ---
>>  fs/btrfs/ioctl.c           | 254 +++++++++++++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/btrfs.h |   2 +
>>  2 files changed, 256 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 111ee282b777..1dba309dce31 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -1921,6 +1921,28 @@ static noinline int key_in_sk(struct btrfs_key *key,
>>  	return 1;
>>  }
>>  
>> +/*
>> + * check if key is in sk and subvolume related range
>> + */
>> +static noinline int key_in_sk_and_subvol(struct btrfs_key *key,
>> +			      struct btrfs_ioctl_search_key *sk)
>> +{
>> +	int ret;
>> +
>> +	ret = key_in_sk(key, sk);
>> +	if (!ret)
>> +		return 0;
>> +
>> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
>> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
>> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
>> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
>> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)
> 
> Why returning all the range [BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]. It would be sufficient to return only
> 
>   +	    (key->type == BTRFS_ROOT_ITEM_KEY ||
>   +	     key->type == BTRFS_ROOT_BACKREF_KEY))

Sorry, it is a mistake, I mean "key->type <= BTRFS_ROOTREF_KEY".
Although btrfs-progs only uses BTRFS_ROOT_BACKREF_KEY, I notice libbtrfs 
uses BTRFS_ROOT_REF_KEY instead. So, I think it is better to return both
ROOT_BACKREF_KEY and ROOT_REF_KEY. I will fix this in v2.

> 
> 
>> +		return 1;
>> +
>> +	return 0;
>> +}
>> +
>>  static noinline int copy_to_sk(struct btrfs_path *path,
>>  			       struct btrfs_key *key,
>>  			       struct btrfs_ioctl_search_key *sk,
>> @@ -2045,6 +2067,142 @@ static noinline int copy_to_sk(struct btrfs_path *path,
>>  	return ret;
>>  }
>>  
>> +/*
>> + * Basically the same as copy_to_sk() but restricts the copied item
>> + * within subvolume related one using key_in_sk_and_subvol().
>> + * Also, the name of subvolume will be omitted from
>> + * ROOT_BACKREF/ROOT_REF item.
>> + */
>> +static noinline int copy_subvol_item(struct btrfs_path *path,
>> +			       struct btrfs_key *key,
>> +			       struct btrfs_ioctl_search_key *sk,
>> +			       size_t *buf_size,
>> +			       char __user *ubuf,
>> +			       unsigned long *sk_offset,
>> +			       int *num_found)
>> +{
>> +	u64 found_transid;
>> +	struct extent_buffer *leaf;
>> +	struct btrfs_ioctl_search_header sh;
>> +	struct btrfs_key test;
>> +	unsigned long item_off;
>> +	unsigned long item_len;
>> +	int nritems;
>> +	int i;
>> +	int slot;
>> +	int ret = 0;
>> +
>> +	leaf = path->nodes[0];
>> +	slot = path->slots[0];
>> +	nritems = btrfs_header_nritems(leaf);
>> +
>> +	if (btrfs_header_generation(leaf) > sk->max_transid) {
>> +		i = nritems;
>> +		goto advance_key;
>> +	}
>> +	found_transid = btrfs_header_generation(leaf);
>> +
>> +	for (i = slot; i < nritems; i++) {
>> +		item_off = btrfs_item_ptr_offset(leaf, i);
>> +		item_len = btrfs_item_size_nr(leaf, i);
>> +
>> +		btrfs_item_key_to_cpu(leaf, key, i);
>> +		if (!key_in_sk_and_subvol(key, sk))
>> +			continue;
>> +
>> +		/* will not copy the name of subvolume */
>> +		if (key->type == BTRFS_ROOT_BACKREF_KEY ||
>> +		    key->type == BTRFS_ROOT_REF_KEY)
>> +			item_len = sizeof(struct btrfs_root_ref);
>> +
>> +		if (sizeof(sh) + item_len > *buf_size) {
>> +			if (*num_found) {
>> +				ret = 1;
>> +				goto out;
>> +			}
>> +
>> +			/*
>> +			 * return one empty item back for v1, which does not
>> +			 * handle -EOVERFLOW
>> +			 */
> It is still applicable ?
In order to just replace TREE_SEARCH (v1) in user space, I think it still needs
the same EOVERFLOW handling like TREE_SEARCH does (in copy_to_sk() and btrfs_ioctl_tree_search()).

>> +
>> +			*buf_size = sizeof(sh) + item_len;
>> +			item_len = 0;
>> +			ret = -EOVERFLOW;> +		}
>> +
>> +		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
>> +			ret = 1;
>> +			goto out;
>> +		}
>> +
>> +		sh.objectid = key->objectid;
>> +		sh.offset = key->offset;
>> +		sh.type = key->type;
>> +		sh.len = item_len;
>> +		sh.transid = found_transid;
>> +
>> +		/* copy search result header */
>> +		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
>> +			ret = -EFAULT;
>> +			goto out;
>> +		}
>> +
>> +		*sk_offset += sizeof(sh);
>> +
>> +		if (item_len) {
>> +			char __user *up = ubuf + *sk_offset;
>> +
>> +			/* copy the item */
>> +			if (read_extent_buffer_to_user(leaf, up,
>> +						       item_off, item_len)) {
>> +				ret = -EFAULT;
>> +				goto out;
>> +			}
>> +
>> +			*sk_offset += item_len;
>> +		}
>> +		(*num_found)++;
>> +
>> +		if (ret) /* -EOVERFLOW from above */
>> +			goto out;
>> +
>> +		if (*num_found >= sk->nr_items) {
>> +			ret = 1;
>> +			goto out;
>> +		}
>> +	}
>> +advance_key:
>> +	ret = 0;
>> +	test.objectid = sk->max_objectid;
>> +	test.type = sk->max_type;
>> +	test.offset = sk->max_offset;
>> +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
>> +		ret = 1;
>> +	else if (key->offset < (u64)-1)
>> +		key->offset++;
>> +	else if (key->type < (u8)-1) {
>> +		key->offset = 0;
>> +		key->type++;
>> +	} else if (key->objectid < (u64)-1) {
>> +		key->offset = 0;
>> +		key->type = 0;
>> +		key->objectid++;
> 
> It would be doable to check that type is in the range 
> 	[BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]
> 
> I.e. something like
> 
> 
>  +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
>  +		ret = 1;
>  +	else if (key->offset < (u64)-1)
>  +		key->offset++;
>  +	else if (key->type < BTRFS_ROOT_BACKREF_KEY) {
>  +		key->offset = 0;
>  +		key->type++;
>  +	} else if (key->objectid < (u64)-1) {
>  +		key->offset = 0;
>  +		key->type = BTRFS_ROOT_ITEM_KEY;
>  +		key->objectid++;

Thanks, I will fix this in v2.

> 
>> +	} else
>> +		ret = 1;
>> +out:
>> +	/*
>> +	 *  0: all items from this leaf copied, continue with next
>> +	 *  1: * more items can be copied, but unused buffer is too small
>> +	 *     * all items were found
>> +	 *     Either way, it will stops the loop which iterates to the next
>> +	 *     leaf
>> +	 *  -EOVERFLOW: item was to large for buffer
>> +	 *  -EFAULT: could not copy extent buffer back to userspace
>> +	 */
>> +	return ret;
>> +}
>> +
>>  static noinline int search_ioctl(struct inode *inode,
>>  				 struct btrfs_ioctl_search_key *sk,
>>  				 size_t *buf_size,
>> @@ -2309,6 +2467,100 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>>  	return ret;
>>  }
>>  
>> +static noinline int search_subvol(struct inode *inode,
>> +				 struct btrfs_ioctl_search_key *sk,
>> +				 size_t *buf_size,
>> +				 char __user *ubuf)
>> +{
>> +	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
>> +	struct btrfs_root *root;
>> +	struct btrfs_key key;
>> +	struct btrfs_path *path;
>> +	unsigned long sk_offset = 0;
>> +	int num_found = 0;
>> +	int ret;
>> +
>> +	path = btrfs_alloc_path();
>> +	if (!path)
>> +		return -ENOMEM;
>> +
>> +	/* search ROOT TREEE */
>> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
>> +	key.type = BTRFS_ROOT_ITEM_KEY;
>> +	key.offset = (u64)-1;
>> +	root = btrfs_read_fs_root_no_name(info, &key);
>> +	if (IS_ERR(root)) {
>> +		btrfs_free_path(path);
>> +		return -ENOENT;
>> +	}
>> +
>> +	key.objectid = sk->min_objectid;
>> +	key.type = sk->min_type;
>> +	key.offset = sk->min_offset;
>> +
>> +	while (1) {
>> +		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
>> +		if (ret != 0) {
>> +			if (ret > 0)
>> +				ret = 0;
>> +			goto err;
>> +		}
>> +
>> +		ret = copy_subvol_item(path, &key, sk, buf_size, ubuf,
>> +				 &sk_offset, &num_found);
>> +		btrfs_release_path(path);
>> +		if (ret)
>> +			break;
>> +	}
>> +
>> +	if (ret > 0)
>> +		ret = 0;
>> +err:
>> +	sk->nr_items = num_found;
>> +	btrfs_free_path(path);
>> +	return ret;
>> +}
>> +
>> +/*
>> + * Unprivileged ioctl for getting subvolume related item.
>> + * The arguments are the same as btrfs_ioctl_tree_search()
>> + * and can be used like tree search ioctl.
>> + *
>> + * Only returns ROOT_ITEM/ROOT_BACKREF/ROOT_REF key of subvolumes
>> + * from root tree. Also, the name of subvolume will be omitted from
>> + * ROOT_BACKREF/ROOT_REF item to prevent name leak.
>> + */
>> +static noinline int btrfs_ioctl_get_subvol_info(struct file *file,
>> +					   void __user *argp)
>> +{
>> +	struct btrfs_ioctl_search_args __user *uargs;
>> +	struct btrfs_ioctl_search_key sk;
>> +	struct inode *inode;
>> +	size_t buf_size;
>> +	int ret;
>> +
>> +	uargs = (struct btrfs_ioctl_search_args __user *)argp;
>> +
>> +	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
>> +		return -EFAULT;
>> +
>> +	buf_size = sizeof(uargs->buf);
>> +
>> +	inode = file_inode(file);
>> +	ret = search_subvol(inode, &sk, &buf_size, uargs->buf);
>> +
>> +	/*
>> +	 * In the original implementation an overflow is handled by returning a
>> +	 * search header with a len of zero, so reset ret.
>> +	 */
>> +	if (ret == -EOVERFLOW)
>> +		ret = 0;
>> +
>> +	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
>> +		ret = -EFAULT;
>> +	return ret;
>> +}
>> +
>>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>>  					     void __user *arg)
>>  {
>> @@ -5663,6 +5915,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>>  		return btrfs_ioctl_get_features(file, argp);
>>  	case BTRFS_IOC_SET_FEATURES:
>>  		return btrfs_ioctl_set_features(file, argp);
>> +	case BTRFS_IOC_GET_SUBVOL_INFO:
>> +		return btrfs_ioctl_get_subvol_info(file, argp);
>>  	}
>>  
>>  	return -ENOTTY;
>> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
>> index c8d99b9ca550..1e9361cf66d5 100644
>> --- a/include/uapi/linux/btrfs.h
>> +++ b/include/uapi/linux/btrfs.h
>> @@ -843,5 +843,7 @@ enum btrfs_err_code {
>>  				   struct btrfs_ioctl_vol_args_v2)
>>  #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
>>  					struct btrfs_ioctl_logical_ino_args)
>> +#define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
>> +					struct btrfs_ioctl_search_args)
>>  
>>  #endif /* _UAPI_LINUX_BTRFS_H */
>>
> 
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl
  2018-03-06  8:31 ` [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl Misono, Tomohiro
@ 2018-03-07  0:42   ` Misono, Tomohiro
  2018-03-08  9:47   ` Nikolay Borisov
  1 sibling, 0 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-07  0:42 UTC (permalink / raw)
  To: linux-btrfs

On 2018/03/06 17:31, Misono, Tomohiro wrote:
> Add unprivileged version of ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER)
> to allow normal users to call "btrfs subvololume list/show" etc. in
> combination with BTRFS_IOC_GET_SUBVOL_INFO.
> 
> This can be used like BTRFS_IOC_INO_LOOKUP but the argument is
> different because it also returns the name of bottom subvolume,
> which is ommited by BTRFS_IOC_GET_SUBVOL_INFO ioctl.
> Also, this must be called with fd which exists under the tree of
> args->treeid to prevent for user to search any tree.
> 
> The main differences from original ino_lookup ioctl are:
>   1. Read permission will be checked using inode_permission()
>      during path construction. -EACCES will be returned in case
>      of failure.
>   2. Path construction will be stopped at the inode number which
>      corresponds to the fd with which this ioctl is called. If
>      constructed path does not exist under fd's inode, -EACCES
>      will be returned.
>   3. The name of bottom subvolume is also searched and filled.
> 
> Note that the maximum length of path is shorter 272 bytes than
> ino_lookup ioctl because of space of subvolume's id and name.
> 
> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
> ---
>  fs/btrfs/ioctl.c           | 218 +++++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/btrfs.h |  16 ++++
>  2 files changed, 234 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1dba309dce31..ac23da98b7e7 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2425,6 +2425,179 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
>  	return ret;
>  }
>  
> +static noinline int btrfs_search_path_in_tree_user(struct btrfs_fs_info *info,
> +				struct super_block *sb,
> +				struct btrfs_key upper_limit,
> +				struct btrfs_ioctl_ino_lookup_user_args *args)
> +{
> +	struct btrfs_root *root;
> +	struct btrfs_key key, key2;
> +	char *ptr;
> +	int ret = -1;
> +	int slot;
> +	int len;
> +	int total_len = 0;
> +	u64 dirid = args->dirid;
> +	struct btrfs_inode_ref *iref;
> +	struct btrfs_root_ref rref;
> +
> +	unsigned long item_off;
> +	unsigned long item_len;
> +
> +	struct extent_buffer *l;
> +	struct btrfs_path *path = NULL;
> +
> +	struct inode *inode;
> +	int *new = 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	if (dirid == upper_limit.objectid)
> +		/*
> +		 * If the bottom subvolume exists directly under upper limit,
> +		 * there is no need to construct the path and just get the
> +		 * subvolume's name
> +		 */
> +		goto get_subvol_name;
> +	if (dirid == BTRFS_FIRST_FREE_OBJECTID)
> +		/* The subvolume does not exist under upper_limit */
> +		goto access_err;
> +
> +	ptr = &args->path[BTRFS_INO_LOOKUP_PATH_MAX - 1];
> +
> +	key.objectid = args->treeid;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		btrfs_err(info, "could not find root %llu", args->treeid);
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	key.objectid = dirid;
> +	key.type = BTRFS_INODE_REF_KEY;
> +	key.offset = (u64)-1;
> +
> +	while (1) {
> +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +		if (ret < 0)
> +			goto out;
> +		else if (ret > 0) {
> +			ret = btrfs_previous_item(root, path, dirid,
> +						  BTRFS_INODE_REF_KEY);
> +			if (ret < 0)
> +				goto out;
> +			else if (ret > 0) {
> +				ret = -ENOENT;
> +				goto out;
> +			}
> +		}
> +
> +		l = path->nodes[0];
> +		slot = path->slots[0];
> +		btrfs_item_key_to_cpu(l, &key, slot);
> +
> +		iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
> +		len = btrfs_inode_ref_name_len(l, iref);
> +		ptr -= len + 1;
> +		total_len += len + 1;
> +		if (ptr < args->path) {
> +			ret = -ENAMETOOLONG;
> +			goto out;
> +		}
> +
> +		*(ptr + len) = '/';
> +		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
> +
> +		/* Check the read permission */
> +		ret = btrfs_previous_item(root, path, dirid,
> +					  BTRFS_INODE_ITEM_KEY);
> +		if (ret < 0) {
> +			goto out;
> +		} else if (ret > 0) {
> +			ret = -ENOENT;
> +			goto out;
> +		}
> +		l = path->nodes[0];
> +		slot = path->slots[0];
> +
> +		btrfs_item_key_to_cpu(l, &key2, slot);
> +		inode = btrfs_iget(sb, &key2, root, new);
> +		ret = inode_permission(inode, MAY_READ);
> +		iput(inode);
> +		if (ret)
> +			goto access_err;
> +
> +		if (key.offset == BTRFS_FIRST_FREE_OBJECTID ||
> +		    key.offset == upper_limit.objectid)
> +			break;
> +
> +		btrfs_release_path(path);
> +		key.objectid = key.offset;
> +		key.offset = (u64)-1;
> +		dirid = key.objectid;
> +	}
> +	 /* Check if the searched path exists under the upper_limit */
> +	if (key.offset == BTRFS_FIRST_FREE_OBJECTID &&
> +	    upper_limit.objectid != BTRFS_FIRST_FREE_OBJECTID)
> +		goto access_err;
> +
> +	memmove(args->path, ptr, total_len);
> +	args->path[total_len] = '\0';
> +	btrfs_release_path(path);
> +	ret = 0;
> +
> +get_subvol_name:
> +	/* Get subolume's name from ROOT_REF */
> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	key.objectid = args->treeid;
> +	key.type = BTRFS_ROOT_REF_KEY;
> +	key.offset = args->subid;
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret != 0) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +	l = path->nodes[0];
> +	slot = path->slots[0];
> +	btrfs_item_key_to_cpu(l, &key, slot);
> +
> +	item_off = btrfs_item_ptr_offset(l, slot);
> +	item_len = btrfs_item_size_nr(l, slot);
> +	/* Check if dirid in ROOT_REF corresponds to passed arg */
> +	read_extent_buffer(l, &rref, item_off, sizeof(struct btrfs_root_ref));
> +	if (rref.dirid != args->dirid) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	/* Copy subvolume's name */
> +	item_off += sizeof(struct btrfs_root_ref);
> +	item_len -= sizeof(struct btrfs_root_ref);
> +	read_extent_buffer(l, args->name, item_off, item_len);
> +	args->name[item_len] = '\0';
> +
> +out:
> +	btrfs_free_path(path);
> +	return ret;
> +
> +access_err:
> +	btrfs_free_path(path);
> +	ret = -EACCES;
> +	return ret;
> +}
> +
>  static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  					   void __user *argp)
>  {
> @@ -2467,6 +2640,49 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  	return ret;
>  }
>  
> +/*
> + * User version of ino_lookup ioctl (unprivileged)
> + *
> + * The main differences from original ino_lookup ioctl are:
> + *   1. Read permission will be checked using inode_permission()
> + *      during path construction. -EACCES will be returned in case
> + *      of failure.
> + *   2. Path construction will be stopped at the inode number which
> + *      corresponds to the fd with which this ioctl is called. If
> + *      constructed path does not exist under fd's inode, -EACCES
> + *      will be returned.
> + *   3. The name of bottom subvolume is also searched and filled.
> + */
> +static noinline int btrfs_ioctl_ino_lookup_user(struct file *file,
> +					   void __user *argp)
> +{
> +	struct btrfs_ioctl_ino_lookup_user_args *args;
> +	struct inode *inode;
> +	int ret = 0;
> +
> +	args = memdup_user(argp, sizeof(*args));
> +	if (IS_ERR(args))
> +		return PTR_ERR(args);
> +
> +	inode = file_inode(file);
> +	/*
> +	 * args->treeid must be matched with this inode's objectid
> +	 * to previent for user to search any fs/file tree.
> +	 */
> +	if (args->treeid != BTRFS_I(inode)->root->root_key.objectid)
> +		return -EINVAL;
> +
> +	ret = btrfs_search_path_in_tree_user(BTRFS_I(inode)->root->fs_info,
> +					inode->i_sb, BTRFS_I(inode)->location,
> +					args);
> +
> +	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
> +		ret = -EFAULT;
> +
> +	kfree(args);
> +	return ret;
> +}
> +
>  static noinline int search_subvol(struct inode *inode,
>  				 struct btrfs_ioctl_search_key *sk,
>  				 size_t *buf_size,
> @@ -5917,6 +6133,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_set_features(file, argp);
>  	case BTRFS_IOC_GET_SUBVOL_INFO:
>  		return btrfs_ioctl_get_subvol_info(file, argp);
> +	case BTRFS_IOC_INO_LOOKUP_USER:
> +		return btrfs_ioctl_ino_lookup_user(file, argp);
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 1e9361cf66d5..22326d631417 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -422,6 +422,20 @@ struct btrfs_ioctl_ino_lookup_args {
>  	char name[BTRFS_INO_LOOKUP_PATH_MAX];
>  };
>  
> +#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4072-BTRFS_VOL_NAME_MAX)
> +struct btrfs_ioctl_ino_lookup_user_args {
> +	/* in */
> +	__u64 treeid;
> +	/* in, 'dirid' is the same as objectid in btrfs_ioctl_ino_lookup_args */
> +	__u64 dirid;
> +	/* in, subvolume id which exists under the directory of 'dirid' */
> +	__u64 subid;
> +	/* out, name of the subvolume of 'subid' */
> +	char name[BTRFS_VOL_NAME_MAX];
> +	/* out, 'path' is the same as 'name' in btrfs_ioctl_ino_lookup_args */
> +	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
> +};
> +
>  /* Search criteria for the btrfs SEARCH ioctl family. */
>  struct btrfs_ioctl_search_key {
>  	/*
> @@ -845,5 +859,7 @@ enum btrfs_err_code {
>  					struct btrfs_ioctl_logical_ino_args)
>  #define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
>  					struct btrfs_ioctl_search_args)
> +#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 61, \
> +					struct btrfs_ioctl_ino_lookup_args)

The argument obviously should be "btrfs_ioctl_ino_lookup_user_args".
I will fix this in v2.

>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-07  0:40     ` Misono, Tomohiro
@ 2018-03-07 16:36       ` David Sterba
  2018-03-07 19:00       ` Goffredo Baroncelli
  1 sibling, 0 replies; 14+ messages in thread
From: David Sterba @ 2018-03-07 16:36 UTC (permalink / raw)
  To: Misono, Tomohiro; +Cc: kreijack, linux-btrfs

On Wed, Mar 07, 2018 at 09:40:18AM +0900, Misono, Tomohiro wrote:
> On 2018/03/07 5:29, Goffredo Baroncelli wrote:
> > On 03/06/2018 09:30 AM, Misono, Tomohiro wrote:
> >> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
> >> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
> >> from root tree. The arguments of this ioctl are the same as treesearch
> >> ioctl and can be used like treesearch ioctl.
> > 
> > Is it a pro ? The treesearch ioctl is tightly coupled to the btrfs internal structure, this means that if we would change the btrfs internal structure, we have to update all the clients of this api. For the treesearch it is an acceptable compromise between flexibility and speed of developing. But for a more specialized API, I think that it would make sense to provide a less coupled api to the internal structure.
> 
> Thanks for the comments.
> 
> The reason I choose the same api is just to minimize the code change in btrfs-progs.

That's not IMO a good reason. We can cahnge the code in btrfs-progs and
that's not going to be the only user of the ioctl so the interfact (ie.
the structures) should be adapted for the needs of the ioctl.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-07  0:40     ` Misono, Tomohiro
  2018-03-07 16:36       ` David Sterba
@ 2018-03-07 19:00       ` Goffredo Baroncelli
  2018-03-09  1:10         ` Misono, Tomohiro
  1 sibling, 1 reply; 14+ messages in thread
From: Goffredo Baroncelli @ 2018-03-07 19:00 UTC (permalink / raw)
  To: Misono, Tomohiro, linux-btrfs

On 03/07/2018 01:40 AM, Misono, Tomohiro wrote:
> On 2018/03/07 5:29, Goffredo Baroncelli wrote:
>> On 03/06/2018 09:30 AM, Misono, Tomohiro wrote:
>>> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
>>> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
>>> from root tree. The arguments of this ioctl are the same as treesearch
>>> ioctl and can be used like treesearch ioctl.
>>
>> Is it a pro ? The treesearch ioctl is tightly coupled to the btrfs internal structure, this means that if we would change the btrfs internal structure, we have to update all the clients of this api. For the treesearch it is an acceptable compromise between flexibility and speed of developing. But for a more specialized API, I think that it would make sense to provide a less coupled api to the internal structure.
> 
> Thanks for the comments.
> 
> The reason I choose the same api is just to minimize the code change in btrfs-progs.
> For tree search part, it works just switching the ioctl number from BTRFS_IOC_TREE_SEARCH
> to BTRFS_IOC_GET_SUBVOL_INFO in list_subvol_search()[1].
> 
> [1] https://marc.info/?l=linux-btrfs&m=152032537911218&w=2

I suggest to avoid this approach. An API is forever; we already have a "root-only" one which is quite unfriendly and error prone; I think that we should put all the energies to make a better one. 

I think that the major weaknesses of this api are:
- it exports the the data in "le" format  (see struct btrfs_root_item as example); 
- it requires the user to increase the key for the next ioctl call. This could be doable in the kernel space before returning
- this ioctl exports both the ROOT_BACKREF and ROOT_ITEM info. Why not make two separate (simplers) ioctl(s) ?

>>
>> Below some comments
[...]

>>> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
>>> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
>>> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
>>> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
>>> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)
>>
>> Why returning all the range [BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]. It would be sufficient to return only
>>
>>   +	    (key->type == BTRFS_ROOT_ITEM_KEY ||
>>   +	     key->type == BTRFS_ROOT_BACKREF_KEY))
> 
> Sorry, it is a mistake, I mean "key->type <= BTRFS_ROOTREF_KEY".
> Although btrfs-progs only uses BTRFS_ROOT_BACKREF_KEY, I notice libbtrfs 
> uses BTRFS_ROOT_REF_KEY instead. So, I think it is better to return both
> ROOT_BACKREF_KEY and ROOT_REF_KEY. I will fix this in v2.
> 

I was referring to the '>=' and '<=' instead of '=='. If another type is added in the middle, it would be returned. I find it a bit error prone.

BR
G.Baroncelli
-- 
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
  2018-03-06 20:29   ` Goffredo Baroncelli
@ 2018-03-08  8:29   ` Nikolay Borisov
  2018-03-09  1:16     ` Misono, Tomohiro
  1 sibling, 1 reply; 14+ messages in thread
From: Nikolay Borisov @ 2018-03-08  8:29 UTC (permalink / raw)
  To: Misono, Tomohiro, linux-btrfs



On  6.03.2018 10:30, Misono, Tomohiro wrote:
> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
> from root tree. The arguments of this ioctl are the same as treesearch
> ioctl and can be used like treesearch ioctl.
> 
> Since treesearch ioctl requires root privilege, this ioctl is needed
> to allow normal users to call "btrfs subvolume list/show" etc.
> 
> Note that the subvolume name in ROOT_BACKREF/ROOT_REF will not be copied to
> prevent potential name leak. The name can be obtained by calling
> user version's ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER).
> 
> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
> ---
>  fs/btrfs/ioctl.c           | 254 +++++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/btrfs.h |   2 +
>  2 files changed, 256 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 111ee282b777..1dba309dce31 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1921,6 +1921,28 @@ static noinline int key_in_sk(struct btrfs_key *key,
>  	return 1;
>  }
>  
> +/*
> + * check if key is in sk and subvolume related range
> + */
> +static noinline int key_in_sk_and_subvol(struct btrfs_key *key,
> +			      struct btrfs_ioctl_search_key *sk)
> +{
> +	int ret;
> +
> +	ret = key_in_sk(key, sk);
> +	if (!ret)
> +		return 0;
> +
> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&

Why do you need the FIRST_FREE/LAST_FREE object id checks? The range
described by those are ordinary files. Since you are only interested in
subvolume data then you should omit those, no?

> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)
I think this check should really be:

if (key->objectid == BTRFS_FS_TREE_OBJECTID || key->objectid ==
BTRFS_ROOT_ITEM_KEY || key->type == BTRFS_ROOT_BACKREF_KEY

> +		return 1;
> +
> +	return 0;
> +}
> +
>  static noinline int copy_to_sk(struct btrfs_path *path,
>  			       struct btrfs_key *key,
>  			       struct btrfs_ioctl_search_key *sk,
> @@ -2045,6 +2067,142 @@ static noinline int copy_to_sk(struct btrfs_path *path,
>  	return ret;
>  }
>  
> +/*
> + * Basically the same as copy_to_sk() but restricts the copied item
> + * within subvolume related one using key_in_sk_and_subvol().
> + * Also, the name of subvolume will be omitted from
> + * ROOT_BACKREF/ROOT_REF item.
> + */
> +static noinline int copy_subvol_item(struct btrfs_path *path,
> +			       struct btrfs_key *key,
> +			       struct btrfs_ioctl_search_key *sk,
> +			       size_t *buf_size,
> +			       char __user *ubuf,
> +			       unsigned long *sk_offset,
> +			       int *num_found)
> +{
> +	u64 found_transid;
> +	struct extent_buffer *leaf;
> +	struct btrfs_ioctl_search_header sh;
> +	struct btrfs_key test;
> +	unsigned long item_off;
> +	unsigned long item_len;
> +	int nritems;
> +	int i;
> +	int slot;
> +	int ret = 0;
> +
> +	leaf = path->nodes[0];
> +	slot = path->slots[0];
> +	nritems = btrfs_header_nritems(leaf);
> +
> +	if (btrfs_header_generation(leaf) > sk->max_transid) {
> +		i = nritems;
> +		goto advance_key;

I don't see why you need a goto label at all. Just put the necessary
code inside the if and return directly.

> +	}
> +	found_transid = btrfs_header_generation(leaf);
> +
> +	for (i = slot; i < nritems; i++) {
> +		item_off = btrfs_item_ptr_offset(leaf, i);
> +		item_len = btrfs_item_size_nr(leaf, i);
> +
> +		btrfs_item_key_to_cpu(leaf, key, i);
> +		if (!key_in_sk_and_subvol(key, sk))
> +			continue;
> +
> +		/* will not copy the name of subvolume */
> +		if (key->type == BTRFS_ROOT_BACKREF_KEY ||
> +		    key->type == BTRFS_ROOT_REF_KEY)
> +			item_len = sizeof(struct btrfs_root_ref);
> +
> +		if (sizeof(sh) + item_len > *buf_size) {
> +			if (*num_found) {
> +				ret = 1;
> +				goto out;
 This can be a simple return 1;


> +			}
> +
> +			/*
> +			 * return one empty item back for v1, which does not
> +			 * handle -EOVERFLOW
> +			 */
> +
> +			*buf_size = sizeof(sh) + item_len;
> +			item_len = 0;
> +			ret = -EOVERFLOW;
> +		}
> +
> +		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
> +			ret = 1;
> +			goto out;
ditto
> +		}
> +
> +		sh.objectid = key->objectid;
> +		sh.offset = key->offset;
> +		sh.type = key->type;
> +		sh.len = item_len;
> +		sh.transid = found_transid;
> +
> +		/* copy search result header */
> +		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
> +			ret = -EFAULT;
> +			goto out;
ditto
> +		}
> +
> +		*sk_offset += sizeof(sh);
> +
> +		if (item_len) {
> +			char __user *up = ubuf + *sk_offset;
> +
> +			/* copy the item */
> +			if (read_extent_buffer_to_user(leaf, up,
> +						       item_off, item_len)) {
> +				ret = -EFAULT;
> +				goto out;
ditto
> +			}
> +
> +			*sk_offset += item_len;
> +		}
> +		(*num_found)++;
> +
> +		if (ret) /* -EOVERFLOW from above */
> +			goto out;
return ret
> +
> +		if (*num_found >= sk->nr_items) {
> +			ret = 1;
> +			goto out;
return 1
> +		}
> +	}
> +advance_key:
> +	ret = 0;
> +	test.objectid = sk->max_objectid;
> +	test.type = sk->max_type;
> +	test.offset = sk->max_offset;
> +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
> +		ret = 1;
> +	else if (key->offset < (u64)-1)
> +		key->offset++;
> +	else if (key->type < (u8)-1) {
> +		key->offset = 0;
> +		key->type++;
> +	} else if (key->objectid < (u64)-1) {
> +		key->offset = 0;
> +		key->type = 0;
> +		key->objectid++;
> +	} else
> +		ret = 1;
> +out:

Given that you are not doing any unwinding of locks or some such I don't
see any reason for the label at all.

> +	/*
> +	 *  0: all items from this leaf copied, continue with next
> +	 *  1: * more items can be copied, but unused buffer is too small
> +	 *     * all items were found
> +	 *     Either way, it will stops the loop which iterates to the next
> +	 *     leaf
> +	 *  -EOVERFLOW: item was to large for buffer
> +	 *  -EFAULT: could not copy extent buffer back to userspace
> +	 */
Put the description of the return value above the function definition
> +	return ret;
> +}
> +
>  static noinline int search_ioctl(struct inode *inode,
>  				 struct btrfs_ioctl_search_key *sk,
>  				 size_t *buf_size,
> @@ -2309,6 +2467,100 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  	return ret;
>  }
>  
> +static noinline int search_subvol(struct inode *inode,
> +				 struct btrfs_ioctl_search_key *sk,
> +				 size_t *buf_size,
> +				 char __user *ubuf)
> +{
> +	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
> +	struct btrfs_root *root;
> +	struct btrfs_key key;
> +	struct btrfs_path *path;
> +	unsigned long sk_offset = 0;
> +	int num_found = 0;
> +	int ret;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	/* search ROOT TREEE */
> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		btrfs_free_path(path);
> +		return -ENOENT;
> +	}

So why do you have to do this, when you can just do :
struct btrfs_root *root = fs_info->tree_root;

Looking at the wider codebase, the root tree is referenced directly
almost all the time. btrfs_read_fs_root_no_name is generally used when
you have the objectid of a particular subvol root item and you want to
get that one. But this is not the case here.

> +
> +	key.objectid = sk->min_objectid;
> +	key.type = sk->min_type;
> +	key.offset = sk->min_offset;
> +
> +	while (1) {
> +		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> +		if (ret != 0) {
> +			if (ret > 0)
> +				ret = 0;
> +			goto err;
> +		}
> +
> +		ret = copy_subvol_item(path, &key, sk, buf_size, ubuf,
> +				 &sk_offset, &num_found);
> +		btrfs_release_path(path);
> +		if (ret)
> +			break;
> +	}
> +
> +	if (ret > 0)
> +		ret = 0;
> +err:
> +	sk->nr_items = num_found;
> +	btrfs_free_path(path);
> +	return ret;
> +}
> +
> +/*
> + * Unprivileged ioctl for getting subvolume related item.
> + * The arguments are the same as btrfs_ioctl_tree_search()
> + * and can be used like tree search ioctl.
> + *
> + * Only returns ROOT_ITEM/ROOT_BACKREF/ROOT_REF key of subvolumes
> + * from root tree. Also, the name of subvolume will be omitted from
> + * ROOT_BACKREF/ROOT_REF item to prevent name leak.
> + */
> +static noinline int btrfs_ioctl_get_subvol_info(struct file *file,
> +					   void __user *argp)
> +{
> +	struct btrfs_ioctl_search_args __user *uargs;
> +	struct btrfs_ioctl_search_key sk;
> +	struct inode *inode;
> +	size_t buf_size;
> +	int ret;
> +
> +	uargs = (struct btrfs_ioctl_search_args __user *)argp;
> +
> +	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
> +		return -EFAULT;
> +
> +	buf_size = sizeof(uargs->buf);
> +
> +	inode = file_inode(file);
> +	ret = search_subvol(inode, &sk, &buf_size, uargs->buf);
> +
> +	/*
> +	 * In the original implementation an overflow is handled by returning a
> +	 * search header with a len of zero, so reset ret.
> +	 */
> +	if (ret == -EOVERFLOW)
> +		ret = 0;
> +
> +	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
> +		ret = -EFAULT;
> +	return ret;
> +}
> +
>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  					     void __user *arg)
>  {
> @@ -5663,6 +5915,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_get_features(file, argp);
>  	case BTRFS_IOC_SET_FEATURES:
>  		return btrfs_ioctl_set_features(file, argp);
> +	case BTRFS_IOC_GET_SUBVOL_INFO:
> +		return btrfs_ioctl_get_subvol_info(file, argp);
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index c8d99b9ca550..1e9361cf66d5 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -843,5 +843,7 @@ enum btrfs_err_code {
>  				   struct btrfs_ioctl_vol_args_v2)
>  #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
>  					struct btrfs_ioctl_logical_ino_args)
> +#define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
> +					struct btrfs_ioctl_search_args)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl
  2018-03-06  8:31 ` [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl Misono, Tomohiro
  2018-03-07  0:42   ` Misono, Tomohiro
@ 2018-03-08  9:47   ` Nikolay Borisov
  2018-03-09  1:29     ` Misono, Tomohiro
  1 sibling, 1 reply; 14+ messages in thread
From: Nikolay Borisov @ 2018-03-08  9:47 UTC (permalink / raw)
  To: Misono, Tomohiro, linux-btrfs



On  6.03.2018 10:31, Misono, Tomohiro wrote:
> Add unprivileged version of ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER)
> to allow normal users to call "btrfs subvololume list/show" etc. in
> combination with BTRFS_IOC_GET_SUBVOL_INFO.
> 
> This can be used like BTRFS_IOC_INO_LOOKUP but the argument is
> different because it also returns the name of bottom subvolume,
> which is ommited by BTRFS_IOC_GET_SUBVOL_INFO ioctl.
> Also, this must be called with fd which exists under the tree of
> args->treeid to prevent for user to search any tree.
> 
> The main differences from original ino_lookup ioctl are:
>   1. Read permission will be checked using inode_permission()
>      during path construction. -EACCES will be returned in case
>      of failure.
>   2. Path construction will be stopped at the inode number which
>      corresponds to the fd with which this ioctl is called. If
>      constructed path does not exist under fd's inode, -EACCES
>      will be returned.
>   3. The name of bottom subvolume is also searched and filled.
> 
> Note that the maximum length of path is shorter 272 bytes than

Did you mean 255? Since BTRFS_VOL_NAME_MAX is defined as 255?

> ino_lookup ioctl because of space of subvolume's id and name.
> 
> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
> ---
>  fs/btrfs/ioctl.c           | 218 +++++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/btrfs.h |  16 ++++
>  2 files changed, 234 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1dba309dce31..ac23da98b7e7 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2425,6 +2425,179 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
>  	return ret;
>  }
>  
> +static noinline int btrfs_search_path_in_tree_user(struct btrfs_fs_info *info,
> +				struct super_block *sb,
> +				struct btrfs_key upper_limit,
> +				struct btrfs_ioctl_ino_lookup_user_args *args)
> +{
> +	struct btrfs_root *root;
> +	struct btrfs_key key, key2;
> +	char *ptr;
> +	int ret = -1;
> +	int slot;
> +	int len;
> +	int total_len = 0;
> +	u64 dirid = args->dirid;
> +	struct btrfs_inode_ref *iref;
> +	struct btrfs_root_ref rref;
> +
> +	unsigned long item_off;
> +	unsigned long item_len;
> +
> +	struct extent_buffer *l;
> +	struct btrfs_path *path = NULL;
> +
> +	struct inode *inode;
> +	int *new = 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	if (dirid == upper_limit.objectid)
> +		/*
> +		 * If the bottom subvolume exists directly under upper limit,
> +		 * there is no need to construct the path and just get the
> +		 * subvolume's name
> +		 */
> +		goto get_subvol_name;

Eliminate the label and factor out the code in a new function or just
put in the body of the if.

> +	if (dirid == BTRFS_FIRST_FREE_OBJECTID)
> +		/* The subvolume does not exist under upper_limit */
> +		goto access_err;

just set ret to -EACCESS and goto out. And eliminate access_err label
altogether. Furthermore, shouldn't this check really ether:

a) Be the first one in this function so that we return ASAP (i.e. even
before calling btrfs_alloc_path)

or

b) Put in the ioctl function itself. Currently for the
btrfs_ioctl_ino_lookup case it's duplicated in both that function and
btrfs_search_path_in_tree

> +
> +	ptr = &args->path[BTRFS_INO_LOOKUP_PATH_MAX - 1];
> +
> +	key.objectid = args->treeid;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		btrfs_err(info, "could not find root %llu", args->treeid);
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	key.objectid = dirid;
> +	key.type = BTRFS_INODE_REF_KEY;
> +	key.offset = (u64)-1;
> +
> +	while (1) {
> +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +		if (ret < 0)
> +			goto out;
> +		else if (ret > 0) {
> +			ret = btrfs_previous_item(root, path, dirid,
> +						  BTRFS_INODE_REF_KEY);
> +			if (ret < 0)
> +				goto out;
> +			else if (ret > 0) {
> +				ret = -ENOENT;
> +				goto out;
> +			}
> +		}
> +
> +		l = path->nodes[0];
> +		slot = path->slots[0];
> +		btrfs_item_key_to_cpu(l, &key, slot);
> +
> +		iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
> +		len = btrfs_inode_ref_name_len(l, iref);
> +		ptr -= len + 1;
> +		total_len += len + 1;
> +		if (ptr < args->path) {
> +			ret = -ENAMETOOLONG;
> +			goto out;
> +		}
> +
> +		*(ptr + len) = '/';
> +		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
> +
> +		/* Check the read permission */
> +		ret = btrfs_previous_item(root, path, dirid,
> +					  BTRFS_INODE_ITEM_KEY);
> +		if (ret < 0) {
> +			goto out;
> +		} else if (ret > 0) {
> +			ret = -ENOENT;
> +			goto out;
> +		}
> +		l = path->nodes[0];
> +		slot = path->slots[0];
> +
> +		btrfs_item_key_to_cpu(l, &key2, slot);
> +		inode = btrfs_iget(sb, &key2, root, new);
> +		ret = inode_permission(inode, MAY_READ);
> +		iput(inode);
> +		if (ret)
> +			goto access_err;
> +
> +		if (key.offset == BTRFS_FIRST_FREE_OBJECTID ||
> +		    key.offset == upper_limit.objectid)
> +			break;
> +
> +		btrfs_release_path(path);
> +		key.objectid = key.offset;
> +		key.offset = (u64)-1;
> +		dirid = key.objectid;
> +	}
> +	 /* Check if the searched path exists under the upper_limit */
> +	if (key.offset == BTRFS_FIRST_FREE_OBJECTID &&
> +	    upper_limit.objectid != BTRFS_FIRST_FREE_OBJECTID)
> +		goto access_err;
> +
> +	memmove(args->path, ptr, total_len);
> +	args->path[total_len] = '\0';
> +	btrfs_release_path(path);
> +	ret = 0;
> +
> +get_subvol_name:
> +	/* Get subolume's name from ROOT_REF */
> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_read_fs_root_no_name(info, &key);
> +	if (IS_ERR(root)) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	key.objectid = args->treeid;
> +	key.type = BTRFS_ROOT_REF_KEY;
> +	key.offset = args->subid;
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret != 0) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +	l = path->nodes[0];
> +	slot = path->slots[0];
> +	btrfs_item_key_to_cpu(l, &key, slot);
> +
> +	item_off = btrfs_item_ptr_offset(l, slot);
> +	item_len = btrfs_item_size_nr(l, slot);
> +	/* Check if dirid in ROOT_REF corresponds to passed arg */
> +	read_extent_buffer(l, &rref, item_off, sizeof(struct btrfs_root_ref));
> +	if (rref.dirid != args->dirid) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	/* Copy subvolume's name */
> +	item_off += sizeof(struct btrfs_root_ref);
> +	item_len -= sizeof(struct btrfs_root_ref);
> +	read_extent_buffer(l, args->name, item_off, item_len);
> +	args->name[item_len] = '\0';
> +
> +out:
> +	btrfs_free_path(path);
> +	return ret;
> +
> +access_err:
> +	btrfs_free_path(path);
> +	ret = -EACCES;
> +	return ret;
> +}
> +
>  static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  					   void __user *argp)
>  {
> @@ -2467,6 +2640,49 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>  	return ret;
>  }
>  
> +/*
> + * User version of ino_lookup ioctl (unprivileged)
> + *
> + * The main differences from original ino_lookup ioctl are:
> + *   1. Read permission will be checked using inode_permission()
> + *      during path construction. -EACCES will be returned in case
> + *      of failure.
> + *   2. Path construction will be stopped at the inode number which
> + *      corresponds to the fd with which this ioctl is called. If
> + *      constructed path does not exist under fd's inode, -EACCES
> + *      will be returned.
> + *   3. The name of bottom subvolume is also searched and filled.
> + */
> +static noinline int btrfs_ioctl_ino_lookup_user(struct file *file,
> +					   void __user *argp)
> +{
> +	struct btrfs_ioctl_ino_lookup_user_args *args;
> +	struct inode *inode;
> +	int ret = 0;
> +
> +	args = memdup_user(argp, sizeof(*args));
> +	if (IS_ERR(args))
> +		return PTR_ERR(args);
> +
> +	inode = file_inode(file);
> +	/*
> +	 * args->treeid must be matched with this inode's objectid
> +	 * to previent for user to search any fs/file tree.
> +	 */
> +	if (args->treeid != BTRFS_I(inode)->root->root_key.objectid)
> +		return -EINVAL;

Do we want something like :
if (args->treeid == 0)
                args->treeid = BTRFS_I(inode)->root->root_key.objectid;

or does the ioctl impose a hard requirement for treeid to be set?

> +
> +	ret = btrfs_search_path_in_tree_user(BTRFS_I(inode)->root->fs_info,
> +					inode->i_sb, BTRFS_I(inode)->location,
> +					args);

Just pass in the the inode and reference all the parameters inside the
function. btrfs_search_path_in_tree_user really requires only 2 args:
inode and 'args'

> +
> +	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
> +		ret = -EFAULT;
> +
> +	kfree(args);
> +	return ret;
> +}
> +
>  static noinline int search_subvol(struct inode *inode,
>  				 struct btrfs_ioctl_search_key *sk,
>  				 size_t *buf_size,
> @@ -5917,6 +6133,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_set_features(file, argp);
>  	case BTRFS_IOC_GET_SUBVOL_INFO:
>  		return btrfs_ioctl_get_subvol_info(file, argp);
> +	case BTRFS_IOC_INO_LOOKUP_USER:
> +		return btrfs_ioctl_ino_lookup_user(file, argp);
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 1e9361cf66d5..22326d631417 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -422,6 +422,20 @@ struct btrfs_ioctl_ino_lookup_args {
>  	char name[BTRFS_INO_LOOKUP_PATH_MAX];
>  };
>  
> +#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4072-BTRFS_VOL_NAME_MAX)
Where does 4072 come from? Is it related to
#define BTRFS_INO_LOOKUP_PATH_MAX 4080

in any way ?
> +struct btrfs_ioctl_ino_lookup_user_args {
> +	/* in */
> +	__u64 treeid;
> +	/* in, 'dirid' is the same as objectid in btrfs_ioctl_ino_lookup_args */

Given there is no description of the objectid parameter of
ino_lookup_args. Your reference to it doesn't really make its purpose
any more clear.

> +	__u64 dirid;
> +	/* in, subvolume id which exists under the directory of 'dirid' */
> +	__u64 subid;
> +	/* out, name of the subvolume of 'subid' */
> +	char name[BTRFS_VOL_NAME_MAX];
> +	/* out, 'path' is the same as 'name' in btrfs_ioctl_ino_lookup_args */
> +	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
> +};
> +
>  /* Search criteria for the btrfs SEARCH ioctl family. */
>  struct btrfs_ioctl_search_key {
>  	/*
> @@ -845,5 +859,7 @@ enum btrfs_err_code {
>  					struct btrfs_ioctl_logical_ino_args)
>  #define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
>  					struct btrfs_ioctl_search_args)
> +#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 61, \
> +					struct btrfs_ioctl_ino_lookup_args)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-07 19:00       ` Goffredo Baroncelli
@ 2018-03-09  1:10         ` Misono, Tomohiro
  0 siblings, 0 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-09  1:10 UTC (permalink / raw)
  To: kreijack, linux-btrfs



On 2018/03/08 4:00, Goffredo Baroncelli wrote:
> On 03/07/2018 01:40 AM, Misono, Tomohiro wrote:
>> On 2018/03/07 5:29, Goffredo Baroncelli wrote:
>>> On 03/06/2018 09:30 AM, Misono, Tomohiro wrote:
>>>> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
>>>> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
>>>> from root tree. The arguments of this ioctl are the same as treesearch
>>>> ioctl and can be used like treesearch ioctl.
>>>
>>> Is it a pro ? The treesearch ioctl is tightly coupled to the btrfs internal structure, this means that if we would change the btrfs internal structure, we have to update all the clients of this api. For the treesearch it is an acceptable compromise between flexibility and speed of developing. But for a more specialized API, I think that it would make sense to provide a less coupled api to the internal structure.
>>
>> Thanks for the comments.
>>
>> The reason I choose the same api is just to minimize the code change in btrfs-progs.
>> For tree search part, it works just switching the ioctl number from BTRFS_IOC_TREE_SEARCH
>> to BTRFS_IOC_GET_SUBVOL_INFO in list_subvol_search()[1].
>>
>> [1] https://marc.info/?l=linux-btrfs&m=152032537911218&w=2
> 
> I suggest to avoid this approach. An API is forever; we already have a "root-only" one which is quite unfriendly and error prone; I think that we should put all the energies to make a better one. 
> 
> I think that the major weaknesses of this api are:
> - it exports the the data in "le" format  (see struct btrfs_root_item as example); 
> - it requires the user to increase the key for the next ioctl call. This could be doable in the kernel space before returning
> - this ioctl exports both the ROOT_BACKREF and ROOT_ITEM info. Why not make two separate (simplers) ioctl(s) ?

I agree with you and will split this ioctl into two pats (one is for getting ROOT_ITEM and the other for ROOT_REF/BACKREF)
and make them to have user friendly apis.

> 
>>>
>>> Below some comments
> [...]
> 
>>>> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
>>>> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
>>>> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
>>>> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
>>>> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)
>>>
>>> Why returning all the range [BTRFS_ROOT_ITEM_KEY...BTRFS_ROOT_BACKREF_KEY]. It would be sufficient to return only
>>>
>>>   +	    (key->type == BTRFS_ROOT_ITEM_KEY ||
>>>   +	     key->type == BTRFS_ROOT_BACKREF_KEY))
>>
>> Sorry, it is a mistake, I mean "key->type <= BTRFS_ROOTREF_KEY".
>> Although btrfs-progs only uses BTRFS_ROOT_BACKREF_KEY, I notice libbtrfs 
>> uses BTRFS_ROOT_REF_KEY instead. So, I think it is better to return both
>> ROOT_BACKREF_KEY and ROOT_REF_KEY. I will fix this in v2.
>>
> 
> I was referring to the '>=' and '<=' instead of '=='. If another type is added in the middle, it would be returned. I find it a bit error prone.

ok, I will change this. thanks.
Tomohiro Misono

> 
> BR
> G.Baroncelli
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl
  2018-03-08  8:29   ` Nikolay Borisov
@ 2018-03-09  1:16     ` Misono, Tomohiro
  0 siblings, 0 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-09  1:16 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs


On 2018/03/08 17:29, Nikolay Borisov wrote:
> 
> 
> On  6.03.2018 10:30, Misono, Tomohiro wrote:
>> Add new unprivileged ioctl (BTRFS_IOC_GET_SUBVOL_INFO) which searches
>> and returns only subvolume related item (ROOT_ITEM/ROOT_BACKREF/ROOT_REF)
>> from root tree. The arguments of this ioctl are the same as treesearch
>> ioctl and can be used like treesearch ioctl.
>>
>> Since treesearch ioctl requires root privilege, this ioctl is needed
>> to allow normal users to call "btrfs subvolume list/show" etc.
>>
>> Note that the subvolume name in ROOT_BACKREF/ROOT_REF will not be copied to
>> prevent potential name leak. The name can be obtained by calling
>> user version's ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER).
>>
>> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
>> ---
>>  fs/btrfs/ioctl.c           | 254 +++++++++++++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/btrfs.h |   2 +
>>  2 files changed, 256 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 111ee282b777..1dba309dce31 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -1921,6 +1921,28 @@ static noinline int key_in_sk(struct btrfs_key *key,
>>  	return 1;
>>  }
>>  
>> +/*
>> + * check if key is in sk and subvolume related range
>> + */
>> +static noinline int key_in_sk_and_subvol(struct btrfs_key *key,
>> +			      struct btrfs_ioctl_search_key *sk)
>> +{
>> +	int ret;
>> +
>> +	ret = key_in_sk(key, sk);
>> +	if (!ret)
>> +		return 0;
>> +
>> +	if ((key->objectid == BTRFS_FS_TREE_OBJECTID ||
>> +		(key->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
>> +		 key->objectid <= BTRFS_LAST_FREE_OBJECTID)) &&
> 
> Why do you need the FIRST_FREE/LAST_FREE object id checks? The range
> described by those are ordinary files. Since you are only interested in
> subvolume data then you should omit those, no?

There are ROOT_ITEM for other trees (EXTETN_TREE, DEV_TREE etc.) and they should be skipped.

> 
>> +	    key->type >= BTRFS_ROOT_ITEM_KEY &&
>> +	    key->type <= BTRFS_ROOT_BACKREF_KEY)
> I think this check should really be:
> 
> if (key->objectid == BTRFS_FS_TREE_OBJECTID || key->objectid ==
> BTRFS_ROOT_ITEM_KEY || key->type == BTRFS_ROOT_BACKREF_KEY
>>> +		return 1;
>> +
>> +	return 0;
>> +}
>> +
>>  static noinline int copy_to_sk(struct btrfs_path *path,
>>  			       struct btrfs_key *key,
>>  			       struct btrfs_ioctl_search_key *sk,
>> @@ -2045,6 +2067,142 @@ static noinline int copy_to_sk(struct btrfs_path *path,
>>  	return ret;
>>  }
>>  
>> +/*
>> + * Basically the same as copy_to_sk() but restricts the copied item
>> + * within subvolume related one using key_in_sk_and_subvol().
>> + * Also, the name of subvolume will be omitted from
>> + * ROOT_BACKREF/ROOT_REF item.
>> + */
>> +static noinline int copy_subvol_item(struct btrfs_path *path,
>> +			       struct btrfs_key *key,
>> +			       struct btrfs_ioctl_search_key *sk,
>> +			       size_t *buf_size,
>> +			       char __user *ubuf,
>> +			       unsigned long *sk_offset,
>> +			       int *num_found)
>> +{
>> +	u64 found_transid;
>> +	struct extent_buffer *leaf;
>> +	struct btrfs_ioctl_search_header sh;
>> +	struct btrfs_key test;
>> +	unsigned long item_off;
>> +	unsigned long item_len;
>> +	int nritems;
>> +	int i;
>> +	int slot;
>> +	int ret = 0;
>> +
>> +	leaf = path->nodes[0];
>> +	slot = path->slots[0];
>> +	nritems = btrfs_header_nritems(leaf);
>> +
>> +	if (btrfs_header_generation(leaf) > sk->max_transid) {
>> +		i = nritems;
>> +		goto advance_key;
> 
> I don't see why you need a goto label at all. Just put the necessary
> code inside the if and return directly.

Most of this code is just copied from copy_to_sk().
I will rewrite code in more appropriate way in future version.

> 
>> +	}
>> +	found_transid = btrfs_header_generation(leaf);
>> +
>> +	for (i = slot; i < nritems; i++) {
>> +		item_off = btrfs_item_ptr_offset(leaf, i);
>> +		item_len = btrfs_item_size_nr(leaf, i);
>> +
>> +		btrfs_item_key_to_cpu(leaf, key, i);
>> +		if (!key_in_sk_and_subvol(key, sk))
>> +			continue;
>> +
>> +		/* will not copy the name of subvolume */
>> +		if (key->type == BTRFS_ROOT_BACKREF_KEY ||
>> +		    key->type == BTRFS_ROOT_REF_KEY)
>> +			item_len = sizeof(struct btrfs_root_ref);
>> +
>> +		if (sizeof(sh) + item_len > *buf_size) {
>> +			if (*num_found) {
>> +				ret = 1;
>> +				goto out;
>  This can be a simple return 1;
> 
> 
>> +			}
>> +
>> +			/*
>> +			 * return one empty item back for v1, which does not
>> +			 * handle -EOVERFLOW
>> +			 */
>> +
>> +			*buf_size = sizeof(sh) + item_len;
>> +			item_len = 0;
>> +			ret = -EOVERFLOW;
>> +		}
>> +
>> +		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
>> +			ret = 1;
>> +			goto out;
> ditto
>> +		}
>> +
>> +		sh.objectid = key->objectid;
>> +		sh.offset = key->offset;
>> +		sh.type = key->type;
>> +		sh.len = item_len;
>> +		sh.transid = found_transid;
>> +
>> +		/* copy search result header */
>> +		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
>> +			ret = -EFAULT;
>> +			goto out;
> ditto
>> +		}
>> +
>> +		*sk_offset += sizeof(sh);
>> +
>> +		if (item_len) {
>> +			char __user *up = ubuf + *sk_offset;
>> +
>> +			/* copy the item */
>> +			if (read_extent_buffer_to_user(leaf, up,
>> +						       item_off, item_len)) {
>> +				ret = -EFAULT;
>> +				goto out;
> ditto
>> +			}
>> +
>> +			*sk_offset += item_len;
>> +		}
>> +		(*num_found)++;
>> +
>> +		if (ret) /* -EOVERFLOW from above */
>> +			goto out;
> return ret
>> +
>> +		if (*num_found >= sk->nr_items) {
>> +			ret = 1;
>> +			goto out;
> return 1
>> +		}
>> +	}
>> +advance_key:
>> +	ret = 0;
>> +	test.objectid = sk->max_objectid;
>> +	test.type = sk->max_type;
>> +	test.offset = sk->max_offset;
>> +	if (btrfs_comp_cpu_keys(key, &test) >= 0)
>> +		ret = 1;
>> +	else if (key->offset < (u64)-1)
>> +		key->offset++;
>> +	else if (key->type < (u8)-1) {
>> +		key->offset = 0;
>> +		key->type++;
>> +	} else if (key->objectid < (u64)-1) {
>> +		key->offset = 0;
>> +		key->type = 0;
>> +		key->objectid++;
>> +	} else
>> +		ret = 1;
>> +out:
> 
> Given that you are not doing any unwinding of locks or some such I don't
> see any reason for the label at all.
> 
>> +	/*
>> +	 *  0: all items from this leaf copied, continue with next
>> +	 *  1: * more items can be copied, but unused buffer is too small
>> +	 *     * all items were found
>> +	 *     Either way, it will stops the loop which iterates to the next
>> +	 *     leaf
>> +	 *  -EOVERFLOW: item was to large for buffer
>> +	 *  -EFAULT: could not copy extent buffer back to userspace
>> +	 */
> Put the description of the return value above the function definition
>> +	return ret;
>> +}
>> +
>>  static noinline int search_ioctl(struct inode *inode,
>>  				 struct btrfs_ioctl_search_key *sk,
>>  				 size_t *buf_size,
>> @@ -2309,6 +2467,100 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>>  	return ret;
>>  }
>>  
>> +static noinline int search_subvol(struct inode *inode,
>> +				 struct btrfs_ioctl_search_key *sk,
>> +				 size_t *buf_size,
>> +				 char __user *ubuf)
>> +{
>> +	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
>> +	struct btrfs_root *root;
>> +	struct btrfs_key key;
>> +	struct btrfs_path *path;
>> +	unsigned long sk_offset = 0;
>> +	int num_found = 0;
>> +	int ret;
>> +
>> +	path = btrfs_alloc_path();
>> +	if (!path)
>> +		return -ENOMEM;
>> +
>> +	/* search ROOT TREEE */
>> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
>> +	key.type = BTRFS_ROOT_ITEM_KEY;
>> +	key.offset = (u64)-1;
>> +	root = btrfs_read_fs_root_no_name(info, &key);
>> +	if (IS_ERR(root)) {
>> +		btrfs_free_path(path);
>> +		return -ENOENT;
>> +	}
> 
> So why do you have to do this, when you can just do :
> struct btrfs_root *root = fs_info->tree_root;
> 
> Looking at the wider codebase, the root tree is referenced directly
> almost all the time. btrfs_read_fs_root_no_name is generally used when
> you have the objectid of a particular subvol root item and you want to
> get that one. But this is not the case here.
Ok, thanks.
Tomohiro Misono

> 
>> +
>> +	key.objectid = sk->min_objectid;
>> +	key.type = sk->min_type;
>> +	key.offset = sk->min_offset;
>> +
>> +	while (1) {
>> +		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
>> +		if (ret != 0) {
>> +			if (ret > 0)
>> +				ret = 0;
>> +			goto err;
>> +		}
>> +
>> +		ret = copy_subvol_item(path, &key, sk, buf_size, ubuf,
>> +				 &sk_offset, &num_found);
>> +		btrfs_release_path(path);
>> +		if (ret)
>> +			break;
>> +	}
>> +
>> +	if (ret > 0)
>> +		ret = 0;
>> +err:
>> +	sk->nr_items = num_found;
>> +	btrfs_free_path(path);
>> +	return ret;
>> +}
>> +
>> +/*
>> + * Unprivileged ioctl for getting subvolume related item.
>> + * The arguments are the same as btrfs_ioctl_tree_search()
>> + * and can be used like tree search ioctl.
>> + *
>> + * Only returns ROOT_ITEM/ROOT_BACKREF/ROOT_REF key of subvolumes
>> + * from root tree. Also, the name of subvolume will be omitted from
>> + * ROOT_BACKREF/ROOT_REF item to prevent name leak.
>> + */
>> +static noinline int btrfs_ioctl_get_subvol_info(struct file *file,
>> +					   void __user *argp)
>> +{
>> +	struct btrfs_ioctl_search_args __user *uargs;
>> +	struct btrfs_ioctl_search_key sk;
>> +	struct inode *inode;
>> +	size_t buf_size;
>> +	int ret;
>> +
>> +	uargs = (struct btrfs_ioctl_search_args __user *)argp;
>> +
>> +	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
>> +		return -EFAULT;
>> +
>> +	buf_size = sizeof(uargs->buf);
>> +
>> +	inode = file_inode(file);
>> +	ret = search_subvol(inode, &sk, &buf_size, uargs->buf);
>> +
>> +	/*
>> +	 * In the original implementation an overflow is handled by returning a
>> +	 * search header with a len of zero, so reset ret.
>> +	 */
>> +	if (ret == -EOVERFLOW)
>> +		ret = 0;
>> +
>> +	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
>> +		ret = -EFAULT;
>> +	return ret;
>> +}
>> +
>>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>>  					     void __user *arg)
>>  {
>> @@ -5663,6 +5915,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>>  		return btrfs_ioctl_get_features(file, argp);
>>  	case BTRFS_IOC_SET_FEATURES:
>>  		return btrfs_ioctl_set_features(file, argp);
>> +	case BTRFS_IOC_GET_SUBVOL_INFO:
>> +		return btrfs_ioctl_get_subvol_info(file, argp);
>>  	}
>>  
>>  	return -ENOTTY;
>> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
>> index c8d99b9ca550..1e9361cf66d5 100644
>> --- a/include/uapi/linux/btrfs.h
>> +++ b/include/uapi/linux/btrfs.h
>> @@ -843,5 +843,7 @@ enum btrfs_err_code {
>>  				   struct btrfs_ioctl_vol_args_v2)
>>  #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
>>  					struct btrfs_ioctl_logical_ino_args)
>> +#define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
>> +					struct btrfs_ioctl_search_args)
>>  
>>  #endif /* _UAPI_LINUX_BTRFS_H */
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl
  2018-03-08  9:47   ` Nikolay Borisov
@ 2018-03-09  1:29     ` Misono, Tomohiro
  0 siblings, 0 replies; 14+ messages in thread
From: Misono, Tomohiro @ 2018-03-09  1:29 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs

On 2018/03/08 18:47, Nikolay Borisov wrote:
> 
> 
> On  6.03.2018 10:31, Misono, Tomohiro wrote:
>> Add unprivileged version of ino_lookup ioctl (BTRFS_IOC_INO_LOOKUP_USER)
>> to allow normal users to call "btrfs subvololume list/show" etc. in
>> combination with BTRFS_IOC_GET_SUBVOL_INFO.
>>
>> This can be used like BTRFS_IOC_INO_LOOKUP but the argument is
>> different because it also returns the name of bottom subvolume,
>> which is ommited by BTRFS_IOC_GET_SUBVOL_INFO ioctl.
>> Also, this must be called with fd which exists under the tree of
>> args->treeid to prevent for user to search any tree.
>>
>> The main differences from original ino_lookup ioctl are:
>>   1. Read permission will be checked using inode_permission()
>>      during path construction. -EACCES will be returned in case
>>      of failure.
>>   2. Path construction will be stopped at the inode number which
>>      corresponds to the fd with which this ioctl is called. If
>>      constructed path does not exist under fd's inode, -EACCES
>>      will be returned.
>>   3. The name of bottom subvolume is also searched and filled.
>>
>> Note that the maximum length of path is shorter 272 bytes than
> 
> Did you mean 255? Since BTRFS_VOL_NAME_MAX is defined as 255?

My mistake, actually 255 + 1 (for null) + 8 (for subvolume id) = 264 bytes in this version.

> 
>> ino_lookup ioctl because of space of subvolume's id and name.
>>
>> Signed-off-by: Tomohiro Misono <misono.tomohiro@jp.fujitsu.com>
>> ---
>>  fs/btrfs/ioctl.c           | 218 +++++++++++++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/btrfs.h |  16 ++++
>>  2 files changed, 234 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 1dba309dce31..ac23da98b7e7 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -2425,6 +2425,179 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
>>  	return ret;
>>  }
>>  
>> +static noinline int btrfs_search_path_in_tree_user(struct btrfs_fs_info *info,
>> +				struct super_block *sb,
>> +				struct btrfs_key upper_limit,
>> +				struct btrfs_ioctl_ino_lookup_user_args *args)
>> +{
>> +	struct btrfs_root *root;
>> +	struct btrfs_key key, key2;
>> +	char *ptr;
>> +	int ret = -1;
>> +	int slot;
>> +	int len;
>> +	int total_len = 0;
>> +	u64 dirid = args->dirid;
>> +	struct btrfs_inode_ref *iref;
>> +	struct btrfs_root_ref rref;
>> +
>> +	unsigned long item_off;
>> +	unsigned long item_len;
>> +
>> +	struct extent_buffer *l;
>> +	struct btrfs_path *path = NULL;
>> +
>> +	struct inode *inode;
>> +	int *new = 0;
>> +
>> +	path = btrfs_alloc_path();
>> +	if (!path)
>> +		return -ENOMEM;
>> +
>> +	if (dirid == upper_limit.objectid)
>> +		/*
>> +		 * If the bottom subvolume exists directly under upper limit,
>> +		 * there is no need to construct the path and just get the
>> +		 * subvolume's name
>> +		 */
>> +		goto get_subvol_name;
> 
> Eliminate the label and factor out the code in a new function or just
> put in the body of the if.

ok.

> 
>> +	if (dirid == BTRFS_FIRST_FREE_OBJECTID)
>> +		/* The subvolume does not exist under upper_limit */
>> +		goto access_err;
> 
> just set ret to -EACCESS and goto out. And eliminate access_err label
> altogether. Furthermore, shouldn't this check really ether:
> 
> a) Be the first one in this function so that we return ASAP (i.e. even
> before calling btrfs_alloc_path)
> 
> or
> 
> b) Put in the ioctl function itself. Currently for the

I'd like to adopt b) approach.

> btrfs_ioctl_ino_lookup case it's duplicated in both that function and
> btrfs_search_path_in_tree
> >> +
>> +	ptr = &args->path[BTRFS_INO_LOOKUP_PATH_MAX - 1];
>> +
>> +	key.objectid = args->treeid;
>> +	key.type = BTRFS_ROOT_ITEM_KEY;
>> +	key.offset = (u64)-1;
>> +	root = btrfs_read_fs_root_no_name(info, &key);
>> +	if (IS_ERR(root)) {
>> +		btrfs_err(info, "could not find root %llu", args->treeid);
>> +		ret = -ENOENT;
>> +		goto out;
>> +	}
>> +
>> +	key.objectid = dirid;
>> +	key.type = BTRFS_INODE_REF_KEY;
>> +	key.offset = (u64)-1;
>> +
>> +	while (1) {
>> +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>> +		if (ret < 0)
>> +			goto out;
>> +		else if (ret > 0) {
>> +			ret = btrfs_previous_item(root, path, dirid,
>> +						  BTRFS_INODE_REF_KEY);
>> +			if (ret < 0)
>> +				goto out;
>> +			else if (ret > 0) {
>> +				ret = -ENOENT;
>> +				goto out;
>> +			}
>> +		}
>> +
>> +		l = path->nodes[0];
>> +		slot = path->slots[0];
>> +		btrfs_item_key_to_cpu(l, &key, slot);
>> +
>> +		iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
>> +		len = btrfs_inode_ref_name_len(l, iref);
>> +		ptr -= len + 1;
>> +		total_len += len + 1;
>> +		if (ptr < args->path) {
>> +			ret = -ENAMETOOLONG;
>> +			goto out;
>> +		}
>> +
>> +		*(ptr + len) = '/';
>> +		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
>> +
>> +		/* Check the read permission */
>> +		ret = btrfs_previous_item(root, path, dirid,
>> +					  BTRFS_INODE_ITEM_KEY);
>> +		if (ret < 0) {
>> +			goto out;
>> +		} else if (ret > 0) {
>> +			ret = -ENOENT;
>> +			goto out;
>> +		}
>> +		l = path->nodes[0];
>> +		slot = path->slots[0];
>> +
>> +		btrfs_item_key_to_cpu(l, &key2, slot);
>> +		inode = btrfs_iget(sb, &key2, root, new);
>> +		ret = inode_permission(inode, MAY_READ);
>> +		iput(inode);
>> +		if (ret)
>> +			goto access_err;
>> +
>> +		if (key.offset == BTRFS_FIRST_FREE_OBJECTID ||
>> +		    key.offset == upper_limit.objectid)
>> +			break;
>> +
>> +		btrfs_release_path(path);
>> +		key.objectid = key.offset;
>> +		key.offset = (u64)-1;
>> +		dirid = key.objectid;
>> +	}
>> +	 /* Check if the searched path exists under the upper_limit */
>> +	if (key.offset == BTRFS_FIRST_FREE_OBJECTID &&
>> +	    upper_limit.objectid != BTRFS_FIRST_FREE_OBJECTID)
>> +		goto access_err;
>> +
>> +	memmove(args->path, ptr, total_len);
>> +	args->path[total_len] = '\0';
>> +	btrfs_release_path(path);
>> +	ret = 0;
>> +
>> +get_subvol_name:
>> +	/* Get subolume's name from ROOT_REF */
>> +	key.objectid = BTRFS_ROOT_TREE_OBJECTID;
>> +	key.type = BTRFS_ROOT_ITEM_KEY;
>> +	key.offset = (u64)-1;
>> +	root = btrfs_read_fs_root_no_name(info, &key);
>> +	if (IS_ERR(root)) {
>> +		ret = -ENOENT;
>> +		goto out;
>> +	}
>> +
>> +	key.objectid = args->treeid;
>> +	key.type = BTRFS_ROOT_REF_KEY;
>> +	key.offset = args->subid;
>> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>> +	if (ret != 0) {
>> +		ret = -ENOENT;
>> +		goto out;
>> +	}
>> +	l = path->nodes[0];
>> +	slot = path->slots[0];
>> +	btrfs_item_key_to_cpu(l, &key, slot);
>> +
>> +	item_off = btrfs_item_ptr_offset(l, slot);
>> +	item_len = btrfs_item_size_nr(l, slot);
>> +	/* Check if dirid in ROOT_REF corresponds to passed arg */
>> +	read_extent_buffer(l, &rref, item_off, sizeof(struct btrfs_root_ref));
>> +	if (rref.dirid != args->dirid) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	/* Copy subvolume's name */
>> +	item_off += sizeof(struct btrfs_root_ref);
>> +	item_len -= sizeof(struct btrfs_root_ref);
>> +	read_extent_buffer(l, args->name, item_off, item_len);
>> +	args->name[item_len] = '\0';
>> +
>> +out:
>> +	btrfs_free_path(path);
>> +	return ret;
>> +
>> +access_err:
>> +	btrfs_free_path(path);
>> +	ret = -EACCES;
>> +	return ret;
>> +}
>> +
>>  static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>>  					   void __user *argp)
>>  {
>> @@ -2467,6 +2640,49 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
>>  	return ret;
>>  }
>>  
>> +/*
>> + * User version of ino_lookup ioctl (unprivileged)
>> + *
>> + * The main differences from original ino_lookup ioctl are:
>> + *   1. Read permission will be checked using inode_permission()
>> + *      during path construction. -EACCES will be returned in case
>> + *      of failure.
>> + *   2. Path construction will be stopped at the inode number which
>> + *      corresponds to the fd with which this ioctl is called. If
>> + *      constructed path does not exist under fd's inode, -EACCES
>> + *      will be returned.
>> + *   3. The name of bottom subvolume is also searched and filled.
>> + */
>> +static noinline int btrfs_ioctl_ino_lookup_user(struct file *file,
>> +					   void __user *argp)
>> +{
>> +	struct btrfs_ioctl_ino_lookup_user_args *args;
>> +	struct inode *inode;
>> +	int ret = 0;
>> +
>> +	args = memdup_user(argp, sizeof(*args));
>> +	if (IS_ERR(args))
>> +		return PTR_ERR(args);
>> +
>> +	inode = file_inode(file);
>> +	/*
>> +	 * args->treeid must be matched with this inode's objectid
>> +	 * to previent for user to search any fs/file tree.
>> +	 */
>> +	if (args->treeid != BTRFS_I(inode)->root->root_key.objectid)
>> +		return -EINVAL;
> 
> Do we want something like :
> if (args->treeid == 0)
>                 args->treeid = BTRFS_I(inode)->root->root_key.objectid;
> 
> or does the ioctl impose a hard requirement for treeid to be set?

My first plan is yes, but this seems unreasonable.
so I will remove the variable 'treeid' from struct ino_lookup_args_user.

>> +
>> +	ret = btrfs_search_path_in_tree_user(BTRFS_I(inode)->root->fs_info,
>> +					inode->i_sb, BTRFS_I(inode)->location,
>> +					args);
> 
> Just pass in the the inode and reference all the parameters inside the
> function. btrfs_search_path_in_tree_user really requires only 2 args:
> inode and 'args'
> 

ok.

>> +
>> +	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
>> +		ret = -EFAULT;
>> +
>> +	kfree(args);
>> +	return ret;
>> +}
>> +
>>  static noinline int search_subvol(struct inode *inode,
>>  				 struct btrfs_ioctl_search_key *sk,
>>  				 size_t *buf_size,
>> @@ -5917,6 +6133,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>>  		return btrfs_ioctl_set_features(file, argp);
>>  	case BTRFS_IOC_GET_SUBVOL_INFO:
>>  		return btrfs_ioctl_get_subvol_info(file, argp);
>> +	case BTRFS_IOC_INO_LOOKUP_USER:
>> +		return btrfs_ioctl_ino_lookup_user(file, argp);
>>  	}
>>  
>>  	return -ENOTTY;
>> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
>> index 1e9361cf66d5..22326d631417 100644
>> --- a/include/uapi/linux/btrfs.h
>> +++ b/include/uapi/linux/btrfs.h
>> @@ -422,6 +422,20 @@ struct btrfs_ioctl_ino_lookup_args {
>>  	char name[BTRFS_INO_LOOKUP_PATH_MAX];
>>  };
>>  
>> +#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4072-BTRFS_VOL_NAME_MAX)
> Where does 4072 come from? Is it related to
> #define BTRFS_INO_LOOKUP_PATH_MAX 4080
> 
> in any way ?
>> +struct btrfs_ioctl_ino_lookup_user_args {
>> +	/* in */
>> +	__u64 treeid;
>> +	/* in, 'dirid' is the same as objectid in btrfs_ioctl_ino_lookup_args */
> 
> Given there is no description of the objectid parameter of
> ino_lookup_args. Your reference to it doesn't really make its purpose
> any more clear.

The variable names in struct ino_lookup_args are rather confusing and I will add a comment too.

Thanks,
Tomohiro Misono

> 
>> +	__u64 dirid;
>> +	/* in, subvolume id which exists under the directory of 'dirid' */
>> +	__u64 subid;
>> +	/* out, name of the subvolume of 'subid' */
>> +	char name[BTRFS_VOL_NAME_MAX];
>> +	/* out, 'path' is the same as 'name' in btrfs_ioctl_ino_lookup_args */
>> +	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
>> +};
>> +
>>  /* Search criteria for the btrfs SEARCH ioctl family. */
>>  struct btrfs_ioctl_search_key {
>>  	/*
>> @@ -845,5 +859,7 @@ enum btrfs_err_code {
>>  					struct btrfs_ioctl_logical_ino_args)
>>  #define BTRFS_IOC_GET_SUBVOL_INFO _IOWR(BTRFS_IOCTL_MAGIC, 60, \
>>  					struct btrfs_ioctl_search_args)
>> +#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 61, \
>> +					struct btrfs_ioctl_ino_lookup_args)
>>  
>>  #endif /* _UAPI_LINUX_BTRFS_H */
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-03-09  1:29 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-06  8:29 [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc Misono, Tomohiro
2018-03-06  8:30 ` [PATCH 1/2] btrfs: Add unprivileged subvolume search ioctl Misono, Tomohiro
2018-03-06 20:29   ` Goffredo Baroncelli
2018-03-07  0:40     ` Misono, Tomohiro
2018-03-07 16:36       ` David Sterba
2018-03-07 19:00       ` Goffredo Baroncelli
2018-03-09  1:10         ` Misono, Tomohiro
2018-03-08  8:29   ` Nikolay Borisov
2018-03-09  1:16     ` Misono, Tomohiro
2018-03-06  8:31 ` [PATCH 2/2] btrfs: Add unprivileged version of ino_lookup ioctl Misono, Tomohiro
2018-03-07  0:42   ` Misono, Tomohiro
2018-03-08  9:47   ` Nikolay Borisov
2018-03-09  1:29     ` Misono, Tomohiro
2018-03-06 16:51 ` [PATCH 0/2] btrfs: Add two new unprivileged ioctls to allow normal users to call "sub list/show" etc David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.