* [PATCH 1/2][v3] btrfs: fix readdir deadlock with pagefault
@ 2017-08-01 19:50 josef
2017-08-01 19:51 ` [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index josef
0 siblings, 1 reply; 4+ messages in thread
From: josef @ 2017-08-01 19:50 UTC (permalink / raw)
To: kernel-team, linux-btrfs; +Cc: Josef Bacik
From: Josef Bacik <jbacik@fb.com>
Readdir does dir_emit while under the btree lock. dir_emit can trigger
the page fault which means we can deadlock. Fix this by allocating a
buffer on opening a directory and copying the readdir into this buffer
and doing dir_emit from outside of the tree lock.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
v2->v3: actually set the filp->private_data properly for ioctl trans.
fs/btrfs/ctree.h | 5 +++
fs/btrfs/file.c | 9 ++++-
fs/btrfs/inode.c | 107 +++++++++++++++++++++++++++++++++++++++++--------------
fs/btrfs/ioctl.c | 22 ++++++++----
4 files changed, 109 insertions(+), 34 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5ee9f10..33e942b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1264,6 +1264,11 @@ struct btrfs_root {
atomic64_t qgroup_meta_rsv;
};
+struct btrfs_file_private {
+ struct btrfs_trans_handle *trans;
+ void *filldir_buf;
+};
+
static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
{
return btrfs_sb(inode->i_sb)->sectorsize;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0f102a1..1897c3b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1973,8 +1973,15 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
int btrfs_release_file(struct inode *inode, struct file *filp)
{
- if (filp->private_data)
+ struct btrfs_file_private *private = filp->private_data;
+
+ if (private && private->trans)
btrfs_ioctl_trans_end(filp);
+ if (private && private->filldir_buf)
+ kfree(private->filldir_buf);
+ kfree(private);
+ filp->private_data = NULL;
+
/*
* ordered_data_close is set by settattr when we are about to truncate
* a file from a non-zero size to a zero size. This tries to
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9a4413a..bbdbeea 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5877,25 +5877,73 @@ unsigned char btrfs_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
+/*
+ * All this infrastructure exists because dir_emit can fault, and we are holding
+ * the tree lock when doing readdir. For now just allocate a buffer and copy
+ * our information into that, and then dir_emit from the buffer. This is
+ * similar to what NFS does, only we don't keep the buffer around in pagecache
+ * because I'm afraid I'll fuck that up. Long term we need to make filldir do
+ * copy_to_user_inatomic so we don't have to worry about page faulting under the
+ * tree lock.
+ */
+static int btrfs_opendir(struct inode *inode, struct file *file)
+{
+ struct btrfs_file_private *private;
+
+ private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
+ if (!private)
+ return -ENOMEM;
+ private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!private->filldir_buf) {
+ kfree(private);
+ return -ENOMEM;
+ }
+ file->private_data = private;
+ return 0;
+}
+
+struct dir_entry {
+ u64 ino;
+ u64 offset;
+ unsigned type;
+ int name_len;
+};
+
+static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
+{
+ while (entries--) {
+ struct dir_entry *entry = addr;
+ char *name = (char *)(entry + 1);
+ ctx->pos = entry->offset;
+ if (!dir_emit(ctx, name, entry->name_len, entry->ino,
+ entry->type))
+ return 1;
+ addr += sizeof(struct dir_entry) + entry->name_len;
+ ctx->pos++;
+ }
+ return 0;
+}
+
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_file_private *private = file->private_data;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
+ void *addr;
struct list_head ins_list;
struct list_head del_list;
int ret;
struct extent_buffer *leaf;
int slot;
- unsigned char d_type;
- int over = 0;
- char tmp_name[32];
char *name_ptr;
int name_len;
+ int entries = 0;
+ int total_len = 0;
bool put = false;
struct btrfs_key location;
@@ -5906,12 +5954,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (!path)
return -ENOMEM;
+ addr = private->filldir_buf;
path->reada = READA_FORWARD;
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+again:
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
key.objectid = btrfs_ino(BTRFS_I(inode));
@@ -5921,6 +5971,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
goto err;
while (1) {
+ struct dir_entry *entry;
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
@@ -5942,41 +5993,44 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
goto next;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
-
- ctx->pos = found_key.offset;
-
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(fs_info, leaf, slot, di))
goto next;
name_len = btrfs_dir_name_len(leaf, di);
- if (name_len <= sizeof(tmp_name)) {
- name_ptr = tmp_name;
- } else {
- name_ptr = kmalloc(name_len, GFP_KERNEL);
- if (!name_ptr) {
- ret = -ENOMEM;
- goto err;
- }
+ if ((total_len + sizeof(struct dir_entry) + name_len) >=
+ PAGE_SIZE) {
+ btrfs_release_path(path);
+ ret = btrfs_filldir(private->filldir_buf, entries,
+ ctx);
+ if (ret)
+ goto nopos;
+ addr = private->filldir_buf;
+ entries = 0;
+ total_len = 0;
+ goto again;
}
+
+ entry = addr;
+ entry->name_len = name_len;
+ name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
-
- d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
+ entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
-
- over = !dir_emit(ctx, name_ptr, name_len, location.objectid,
- d_type);
-
- if (name_ptr != tmp_name)
- kfree(name_ptr);
-
- if (over)
- goto nopos;
- ctx->pos++;
+ entry->ino = location.objectid;
+ entry->offset = found_key.offset;
+ entries++;
+ addr += sizeof(struct dir_entry) + name_len;
+ total_len += sizeof(struct dir_entry) + name_len;
next:
path->slots[0]++;
}
+ btrfs_release_path(path);
+
+ ret = btrfs_filldir(private->filldir_buf, entries, ctx);
+ if (ret)
+ goto nopos;
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
if (ret)
@@ -10777,6 +10831,7 @@ static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = btrfs_real_readdir,
+ .open = btrfs_opendir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bedeec6..2c79ba0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3966,6 +3966,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
+ struct btrfs_file_private *private;
int ret;
ret = -EPERM;
@@ -3973,8 +3974,16 @@ static long btrfs_ioctl_trans_start(struct file *file)
goto out;
ret = -EINPROGRESS;
- if (file->private_data)
+ private = file->private_data;
+ if (private && private->trans)
goto out;
+ if (!private) {
+ private = kzalloc(sizeof(struct btrfs_file_private),
+ GFP_KERNEL);
+ if (!private)
+ return -ENOMEM;
+ file->private_data = private;
+ }
ret = -EROFS;
if (btrfs_root_readonly(root))
@@ -3991,7 +4000,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
if (IS_ERR(trans))
goto out_drop;
- file->private_data = trans;
+ private->trans = trans;
return 0;
out_drop:
@@ -4246,14 +4255,13 @@ long btrfs_ioctl_trans_end(struct file *file)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
+ struct btrfs_file_private *private = file->private_data;
- trans = file->private_data;
- if (!trans)
+ if (!private || !private->trans)
return -EINVAL;
- file->private_data = NULL;
- btrfs_end_transaction(trans);
+ btrfs_end_transaction(private->trans);
+ private->trans = NULL;
atomic_dec(&root->fs_info->open_ioctl_trans);
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index
2017-08-01 19:50 [PATCH 1/2][v3] btrfs: fix readdir deadlock with pagefault josef
@ 2017-08-01 19:51 ` josef
0 siblings, 0 replies; 4+ messages in thread
From: josef @ 2017-08-01 19:51 UTC (permalink / raw)
To: kernel-team, linux-btrfs; +Cc: Josef Bacik
From: Josef Bacik <jbacik@fb.com>
Our dir_context->pos is supposed to hold the next position we're
supposed to look. If we successfully insert a delayed dir index we
could end up with a duplicate entry because we don't increase ctx->pos
after doing the dir_emit.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
fs/btrfs/delayed-inode.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8ae409b..19e4ad2 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1727,6 +1727,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
if (over)
return 1;
+ ctx->pos++;
}
return 0;
}
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index
2017-07-24 19:14 ` [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index josef
@ 2017-07-28 17:33 ` Liu Bo
0 siblings, 0 replies; 4+ messages in thread
From: Liu Bo @ 2017-07-28 17:33 UTC (permalink / raw)
To: josef; +Cc: linux-btrfs, kernel-team, Josef Bacik
On Mon, Jul 24, 2017 at 03:14:26PM -0400, josef@toxicpanda.com wrote:
> From: Josef Bacik <jbacik@fb.com>
>
> Our dir_context->pos is supposed to hold the next position we're
> supposed to look. If we successfully insert a delayed dir index we
> could end up with a duplicate entry because we don't increase ctx->pos
> after doing the dir_emit.
>
Looks good.
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Thanks,
-liubo
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
> fs/btrfs/delayed-inode.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> index 8ae409b..19e4ad2 100644
> --- a/fs/btrfs/delayed-inode.c
> +++ b/fs/btrfs/delayed-inode.c
> @@ -1727,6 +1727,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
>
> if (over)
> return 1;
> + ctx->pos++;
> }
> return 0;
> }
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index
2017-07-24 19:14 [PATCH 1/2][v2] btrfs: fix readdir deadlock with pagefault josef
@ 2017-07-24 19:14 ` josef
2017-07-28 17:33 ` Liu Bo
0 siblings, 1 reply; 4+ messages in thread
From: josef @ 2017-07-24 19:14 UTC (permalink / raw)
To: linux-btrfs, kernel-team; +Cc: Josef Bacik
From: Josef Bacik <jbacik@fb.com>
Our dir_context->pos is supposed to hold the next position we're
supposed to look. If we successfully insert a delayed dir index we
could end up with a duplicate entry because we don't increase ctx->pos
after doing the dir_emit.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
fs/btrfs/delayed-inode.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8ae409b..19e4ad2 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1727,6 +1727,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
if (over)
return 1;
+ ctx->pos++;
}
return 0;
}
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-08-01 19:51 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-01 19:50 [PATCH 1/2][v3] btrfs: fix readdir deadlock with pagefault josef
2017-08-01 19:51 ` [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index josef
-- strict thread matches above, loose matches on Subject: below --
2017-07-24 19:14 [PATCH 1/2][v2] btrfs: fix readdir deadlock with pagefault josef
2017-07-24 19:14 ` [PATCH 2/2] btrfs: increase ctx->pos for delayed dir index josef
2017-07-28 17:33 ` Liu Bo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.