* [PATCH 1/7] btrfs: defrag: pass file_ra_state instead of file for btrfs_defrag_file()
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 9:46 ` Johannes Thumshirn
2021-05-28 2:28 ` [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper Qu Wenruo
` (5 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
Currently btrfs_defrag_file() accepts both "struct inode" and "struct
file" as parameter, which can't be more confusing.
As we can easily grab "struct inode" from "struct file" using
file_inode() helper.
The reason why we need "struct file" is just to re-use its f_ra.
This patch will change this by passing "struct file_ra_state" parameter,
so that it's more clear what we really want.
Since we're here, also add some comment on the function
btrfs_defrag_file() to make later reader less miserable.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ctree.h | 4 ++--
fs/btrfs/ioctl.c | 27 ++++++++++++++++++---------
2 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 45798e68331a..918df8877b45 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3212,9 +3212,9 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(u8 *uuid);
-int btrfs_defrag_file(struct inode *inode, struct file *file,
+int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
- u64 newer_than, unsigned long max_pages);
+ u64 newer_than, unsigned long max_to_defrag);
void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 66f259895914..67ef9c535eb5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1354,13 +1354,22 @@ static int cluster_pages_for_defrag(struct inode *inode,
}
-int btrfs_defrag_file(struct inode *inode, struct file *file,
+/*
+ * Btrfs entrace for defrag.
+ *
+ * @inode: Inode to be defraged
+ * @ra: Readahead state. If NULL, one will be allocated at runtime.
+ * @range: Defrag options including range and flags.
+ * @newer_than: Minimal transid to defrag
+ * @max_to_defrag: Max number of sectors to be defraged, if 0, the whole inode
+ * will be defraged.
+ */
+int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct file_ra_state *ra = NULL;
unsigned long last_index;
u64 isize = i_size_read(inode);
u64 last_len = 0;
@@ -1378,6 +1387,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
u64 new_align = ~((u64)SZ_128K - 1);
struct page **pages = NULL;
bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
+ bool ra_allocated = false;
if (isize == 0)
return 0;
@@ -1396,16 +1406,15 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
extent_thresh = SZ_256K;
/*
- * If we were not given a file, allocate a readahead context. As
+ * If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so
* we don't error out.
*/
- if (!file) {
+ if (!ra) {
+ ra_allocated = true;
ra = kzalloc(sizeof(*ra), GFP_KERNEL);
if (ra)
file_ra_state_init(ra, inode->i_mapping);
- } else {
- ra = &file->f_ra;
}
pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL);
@@ -1487,7 +1496,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra_index = max(i, ra_index);
if (ra)
page_cache_sync_readahead(inode->i_mapping, ra,
- file, ra_index, cluster);
+ NULL, ra_index, cluster);
ra_index += cluster;
}
@@ -1558,7 +1567,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
btrfs_inode_unlock(inode, 0);
}
- if (!file)
+ if (ra_allocated)
kfree(ra);
kfree(pages);
return ret;
@@ -3074,7 +3083,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
/* the rest are all set to zero by kzalloc */
range->len = (u64)-1;
}
- ret = btrfs_defrag_file(file_inode(file), file,
+ ret = btrfs_defrag_file(file_inode(file), &file->f_ra,
range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
2021-05-28 2:28 ` [PATCH 1/7] btrfs: defrag: pass file_ra_state instead of file for btrfs_defrag_file() Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 10:23 ` Johannes Thumshirn
2021-05-28 2:28 ` [PATCH 3/7] btrfs: defrag: introduce a new helper to collect target file extents Qu Wenruo
` (4 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
In cluster_pages_for_defrag(), we have complex code block inside one
for() loop.
The code block is to prepare one page for defrag, this will ensure:
- The page is locked and set up properly
- No ordered extent exists in the page range
- The page is uptodate
- The writeback has finished
This behavior is pretty common and will be reused by later defrag
rework.
So extract the code into its own helper, defrag_prepare_one_page(), for
later usage, and cleanup the code by a little.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 151 +++++++++++++++++++++++++++--------------------
1 file changed, 86 insertions(+), 65 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 67ef9c535eb5..ba69991bca10 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1144,6 +1144,89 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
return ret;
}
+/*
+ * Prepare one page to be defraged.
+ *
+ * This will ensure:
+ * - Returned page is locked and has been set up properly
+ * - No ordered extent exists in the page
+ * - The page is uptodate
+ * - The writeback has finished
+ */
+static struct page *defrag_prepare_one_page(struct btrfs_inode *inode,
+ unsigned long index)
+{
+ gfp_t mask = btrfs_alloc_write_mask(inode->vfs_inode.i_mapping);
+ u64 page_start = index << PAGE_SHIFT;
+ u64 page_end = page_start + PAGE_SIZE - 1;
+ struct extent_state *cached_state = NULL;
+ struct page *page;
+ int ret;
+
+again:
+ page = find_or_create_page(inode->vfs_inode.i_mapping, index, mask);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ ret = set_page_extent_mapped(page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(ret);
+ }
+
+ /* Wait for any exists ordered extent in the range */
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+ lock_extent_bits(&inode->io_tree, page_start, page_end,
+ &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, page_start,
+ PAGE_SIZE);
+ unlock_extent_cached(&inode->io_tree, page_start, page_end,
+ &cached_state);
+ if (!ordered)
+ break;
+
+ unlock_page(page);
+ btrfs_start_ordered_extent(ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ lock_page(page);
+ /*
+ * we unlocked the page above, so we need check if
+ * it was released or not.
+ */
+ if (page->mapping != inode->vfs_inode.i_mapping ||
+ !PagePrivate(page)) {
+ unlock_page(page);
+ put_page(page);
+ goto again;
+ }
+ }
+
+ /*
+ * Now the page range has no ordered extent any more.
+ * Read the page to make it uptodate.
+ */
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (page->mapping != inode->vfs_inode.i_mapping ||
+ !PagePrivate(page)) {
+ unlock_page(page);
+ put_page(page);
+ goto again;
+ }
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ }
+ wait_on_page_writeback(page);
+ return page;
+}
+
/*
* it doesn't do much good to defrag one or two pages
* at a time. This pulls in a nice chunk of pages
@@ -1172,11 +1255,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
int ret;
int i;
int i_done;
- struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
- struct extent_io_tree *tree;
struct extent_changeset *data_reserved = NULL;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
file_end = (isize - 1) >> PAGE_SHIFT;
if (!isize || start_index > file_end)
@@ -1189,68 +1269,16 @@ static int cluster_pages_for_defrag(struct inode *inode,
if (ret)
return ret;
i_done = 0;
- tree = &BTRFS_I(inode)->io_tree;
/* step one, lock all the pages */
for (i = 0; i < page_cnt; i++) {
struct page *page;
-again:
- page = find_or_create_page(inode->i_mapping,
- start_index + i, mask);
- if (!page)
- break;
- ret = set_page_extent_mapped(page);
- if (ret < 0) {
- unlock_page(page);
- put_page(page);
+ page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
break;
}
-
- page_start = page_offset(page);
- page_end = page_start + PAGE_SIZE - 1;
- while (1) {
- lock_extent_bits(tree, page_start, page_end,
- &cached_state);
- ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
- page_start);
- unlock_extent_cached(tree, page_start, page_end,
- &cached_state);
- if (!ordered)
- break;
-
- unlock_page(page);
- btrfs_start_ordered_extent(ordered, 1);
- btrfs_put_ordered_extent(ordered);
- lock_page(page);
- /*
- * we unlocked the page above, so we need check if
- * it was released or not.
- */
- if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
- goto again;
- }
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
- ret = -EIO;
- break;
- }
- }
-
- if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
- goto again;
- }
-
pages[i] = page;
i_done++;
}
@@ -1260,13 +1288,6 @@ static int cluster_pages_for_defrag(struct inode *inode,
if (!(inode->i_sb->s_flags & SB_ACTIVE))
goto out;
- /*
- * so now we have a nice long stream of locked
- * and up to date pages, lets wait on them
- */
- for (i = 0; i < i_done; i++)
- wait_on_page_writeback(pages[i]);
-
page_start = page_offset(pages[0]);
page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper
2021-05-28 2:28 ` [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper Qu Wenruo
@ 2021-05-28 10:23 ` Johannes Thumshirn
2021-05-28 10:36 ` Qu Wenruo
0 siblings, 1 reply; 14+ messages in thread
From: Johannes Thumshirn @ 2021-05-28 10:23 UTC (permalink / raw)
To: Qu Wenruo, linux-btrfs
On 28/05/2021 04:28, Qu Wenruo wrote:
> In cluster_pages_for_defrag(), we have complex code block inside one
> for() loop.
>
> The code block is to prepare one page for defrag, this will ensure:
> - The page is locked and set up properly
> - No ordered extent exists in the page range
> - The page is uptodate
> - The writeback has finished
>
> This behavior is pretty common and will be reused by later defrag
> rework.
>
> So extract the code into its own helper, defrag_prepare_one_page(), for
> later usage, and cleanup the code by a little.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/ioctl.c | 151 +++++++++++++++++++++++++++--------------------
> 1 file changed, 86 insertions(+), 65 deletions(-)
>
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 67ef9c535eb5..ba69991bca10 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1144,6 +1144,89 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
> return ret;
> }
>
> +/*
> + * Prepare one page to be defraged.
> + *
> + * This will ensure:
> + * - Returned page is locked and has been set up properly
> + * - No ordered extent exists in the page
> + * - The page is uptodate
> + * - The writeback has finished
> + */
> +static struct page *defrag_prepare_one_page(struct btrfs_inode *inode,
> + unsigned long index)
> +{
struct address_space *mapping = inode->vfs_inode.i_mapping;
gfp_t mask = btrfs_alloc_write_mask(mapping);
> + gfp_t mask = btrfs_alloc_write_mask(inode->vfs_inode.i_mapping);
> + u64 page_start = index << PAGE_SHIFT;
> + u64 page_end = page_start + PAGE_SIZE - 1;
> + struct extent_state *cached_state = NULL;
> + struct page *page;
> + int ret;
> +
> +again:
> + page = find_or_create_page(inode->vfs_inode.i_mapping, index, mask);
page = find_or_create_page(mapping, index, mask);
While you're at it?
> + if (!page)
> + return ERR_PTR(-ENOMEM);
> +
> + ret = set_page_extent_mapped(page);
> + if (ret < 0) {
> + unlock_page(page);
> + put_page(page);
> + return ERR_PTR(ret);
> + }
> +
> + /* Wait for any exists ordered extent in the range */
> + while (1) {
> + struct btrfs_ordered_extent *ordered;
> +
> + lock_extent_bits(&inode->io_tree, page_start, page_end,
> + &cached_state);
> + ordered = btrfs_lookup_ordered_range(inode, page_start,
> + PAGE_SIZE);
> + unlock_extent_cached(&inode->io_tree, page_start, page_end,
> + &cached_state);
> + if (!ordered)
> + break;
> +
> + unlock_page(page);
> + btrfs_start_ordered_extent(ordered, 1);
> + btrfs_put_ordered_extent(ordered);
> + lock_page(page);
> + /*
> + * we unlocked the page above, so we need check if
> + * it was released or not.
> + */
> + if (page->mapping != inode->vfs_inode.i_mapping ||
if (page->mapping != mapping ||
> + !PagePrivate(page)) {
> + unlock_page(page);
> + put_page(page);
> + goto again;
> + }
> + }
> +
> + /*
> + * Now the page range has no ordered extent any more.
> + * Read the page to make it uptodate.
> + */
> + if (!PageUptodate(page)) {
> + btrfs_readpage(NULL, page);
> + lock_page(page);
> + if (page->mapping != inode->vfs_inode.i_mapping ||
if (page->mapping != mapping ||
> + !PagePrivate(page)) {
> + unlock_page(page);
> + put_page(page);
> + goto again;
> + }
> + if (!PageUptodate(page)) {
> + unlock_page(page);
> + put_page(page);
> + return ERR_PTR(-EIO);
> + }
> + }
> + wait_on_page_writeback(page);
> + return page;
> +}
> +
> /*
> * it doesn't do much good to defrag one or two pages
> * at a time. This pulls in a nice chunk of pages
> @@ -1172,11 +1255,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
> int ret;
> int i;
> int i_done;
> - struct btrfs_ordered_extent *ordered;
> struct extent_state *cached_state = NULL;
> - struct extent_io_tree *tree;
> struct extent_changeset *data_reserved = NULL;
> - gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
>
> file_end = (isize - 1) >> PAGE_SHIFT;
> if (!isize || start_index > file_end)
> @@ -1189,68 +1269,16 @@ static int cluster_pages_for_defrag(struct inode *inode,
> if (ret)
> return ret;
> i_done = 0;
> - tree = &BTRFS_I(inode)->io_tree;
>
> /* step one, lock all the pages */
> for (i = 0; i < page_cnt; i++) {
> struct page *page;
> -again:
> - page = find_or_create_page(inode->i_mapping,
> - start_index + i, mask);
> - if (!page)
> - break;
>
> - ret = set_page_extent_mapped(page);
> - if (ret < 0) {
> - unlock_page(page);
> - put_page(page);
> + page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i);
> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> break;
> }
> -
> - page_start = page_offset(page);
> - page_end = page_start + PAGE_SIZE - 1;
> - while (1) {
> - lock_extent_bits(tree, page_start, page_end,
> - &cached_state);
> - ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
> - page_start);
> - unlock_extent_cached(tree, page_start, page_end,
> - &cached_state);
> - if (!ordered)
> - break;
> -
> - unlock_page(page);
> - btrfs_start_ordered_extent(ordered, 1);
> - btrfs_put_ordered_extent(ordered);
> - lock_page(page);
> - /*
> - * we unlocked the page above, so we need check if
> - * it was released or not.
> - */
> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
> - unlock_page(page);
> - put_page(page);
> - goto again;
> - }
> - }
> -
> - if (!PageUptodate(page)) {
> - btrfs_readpage(NULL, page);
> - lock_page(page);
> - if (!PageUptodate(page)) {
> - unlock_page(page);
> - put_page(page);
> - ret = -EIO;
> - break;
> - }
> - }
> -
> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
> - unlock_page(page);
> - put_page(page);
> - goto again;
> - }
> -
> pages[i] = page;
> i_done++;
> }
> @@ -1260,13 +1288,6 @@ static int cluster_pages_for_defrag(struct inode *inode,
> if (!(inode->i_sb->s_flags & SB_ACTIVE))
> goto out;
>
> - /*
> - * so now we have a nice long stream of locked
> - * and up to date pages, lets wait on them
> - */
> - for (i = 0; i < i_done; i++)
> - wait_on_page_writeback(pages[i]);
> -
Doesn't this introduce a behavioral change? previously we didn't
wait for page writeback in case of a parallel unmount, now we do.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper
2021-05-28 10:23 ` Johannes Thumshirn
@ 2021-05-28 10:36 ` Qu Wenruo
2021-05-28 10:38 ` Johannes Thumshirn
0 siblings, 1 reply; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 10:36 UTC (permalink / raw)
To: Johannes Thumshirn, linux-btrfs
On 2021/5/28 下午6:23, Johannes Thumshirn wrote:
> On 28/05/2021 04:28, Qu Wenruo wrote:
>> In cluster_pages_for_defrag(), we have complex code block inside one
>> for() loop.
>>
>> The code block is to prepare one page for defrag, this will ensure:
>> - The page is locked and set up properly
>> - No ordered extent exists in the page range
>> - The page is uptodate
>> - The writeback has finished
>>
>> This behavior is pretty common and will be reused by later defrag
>> rework.
>>
>> So extract the code into its own helper, defrag_prepare_one_page(), for
>> later usage, and cleanup the code by a little.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> fs/btrfs/ioctl.c | 151 +++++++++++++++++++++++++++--------------------
>> 1 file changed, 86 insertions(+), 65 deletions(-)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 67ef9c535eb5..ba69991bca10 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -1144,6 +1144,89 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
>> return ret;
>> }
>>
>> +/*
>> + * Prepare one page to be defraged.
>> + *
>> + * This will ensure:
>> + * - Returned page is locked and has been set up properly
>> + * - No ordered extent exists in the page
>> + * - The page is uptodate
>> + * - The writeback has finished
>> + */
>> +static struct page *defrag_prepare_one_page(struct btrfs_inode *inode,
>> + unsigned long index)
>> +{
>
> struct address_space *mapping = inode->vfs_inode.i_mapping;
> gfp_t mask = btrfs_alloc_write_mask(mapping);
>
>> + gfp_t mask = btrfs_alloc_write_mask(inode->vfs_inode.i_mapping);
>> + u64 page_start = index << PAGE_SHIFT;
>> + u64 page_end = page_start + PAGE_SIZE - 1;
>> + struct extent_state *cached_state = NULL;
>> + struct page *page;
>> + int ret;
>> +
>> +again:
>> + page = find_or_create_page(inode->vfs_inode.i_mapping, index, mask);
>
> page = find_or_create_page(mapping, index, mask);
>
> While you're at it?
>
>> + if (!page)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + ret = set_page_extent_mapped(page);
>> + if (ret < 0) {
>> + unlock_page(page);
>> + put_page(page);
>> + return ERR_PTR(ret);
>> + }
>> +
>> + /* Wait for any exists ordered extent in the range */
>> + while (1) {
>> + struct btrfs_ordered_extent *ordered;
>> +
>> + lock_extent_bits(&inode->io_tree, page_start, page_end,
>> + &cached_state);
>> + ordered = btrfs_lookup_ordered_range(inode, page_start,
>> + PAGE_SIZE);
>> + unlock_extent_cached(&inode->io_tree, page_start, page_end,
>> + &cached_state);
>> + if (!ordered)
>> + break;
>> +
>> + unlock_page(page);
>> + btrfs_start_ordered_extent(ordered, 1);
>> + btrfs_put_ordered_extent(ordered);
>> + lock_page(page);
>> + /*
>> + * we unlocked the page above, so we need check if
>> + * it was released or not.
>> + */
>> + if (page->mapping != inode->vfs_inode.i_mapping ||
>
> if (page->mapping != mapping ||
>
>> + !PagePrivate(page)) {
>> + unlock_page(page);
>> + put_page(page);
>> + goto again;
>> + }
>> + }
>> +
>> + /*
>> + * Now the page range has no ordered extent any more.
>> + * Read the page to make it uptodate.
>> + */
>> + if (!PageUptodate(page)) {
>> + btrfs_readpage(NULL, page);
>> + lock_page(page);
>> + if (page->mapping != inode->vfs_inode.i_mapping ||
>
> if (page->mapping != mapping ||
>
>
>> + !PagePrivate(page)) {
>> + unlock_page(page);
>> + put_page(page);
>> + goto again;
>> + }
>> + if (!PageUptodate(page)) {
>> + unlock_page(page);
>> + put_page(page);
>> + return ERR_PTR(-EIO);
>> + }
>> + }
>> + wait_on_page_writeback(page);
>> + return page;
>> +}
>> +
>> /*
>> * it doesn't do much good to defrag one or two pages
>> * at a time. This pulls in a nice chunk of pages
>> @@ -1172,11 +1255,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
>> int ret;
>> int i;
>> int i_done;
>> - struct btrfs_ordered_extent *ordered;
>> struct extent_state *cached_state = NULL;
>> - struct extent_io_tree *tree;
>> struct extent_changeset *data_reserved = NULL;
>> - gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
>>
>> file_end = (isize - 1) >> PAGE_SHIFT;
>> if (!isize || start_index > file_end)
>> @@ -1189,68 +1269,16 @@ static int cluster_pages_for_defrag(struct inode *inode,
>> if (ret)
>> return ret;
>> i_done = 0;
>> - tree = &BTRFS_I(inode)->io_tree;
>>
>> /* step one, lock all the pages */
>> for (i = 0; i < page_cnt; i++) {
>> struct page *page;
>> -again:
>> - page = find_or_create_page(inode->i_mapping,
>> - start_index + i, mask);
>> - if (!page)
>> - break;
>>
>> - ret = set_page_extent_mapped(page);
>> - if (ret < 0) {
>> - unlock_page(page);
>> - put_page(page);
>> + page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i);
>> + if (IS_ERR(page)) {
>> + ret = PTR_ERR(page);
>> break;
>> }
>> -
>> - page_start = page_offset(page);
>> - page_end = page_start + PAGE_SIZE - 1;
>> - while (1) {
>> - lock_extent_bits(tree, page_start, page_end,
>> - &cached_state);
>> - ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
>> - page_start);
>> - unlock_extent_cached(tree, page_start, page_end,
>> - &cached_state);
>> - if (!ordered)
>> - break;
>> -
>> - unlock_page(page);
>> - btrfs_start_ordered_extent(ordered, 1);
>> - btrfs_put_ordered_extent(ordered);
>> - lock_page(page);
>> - /*
>> - * we unlocked the page above, so we need check if
>> - * it was released or not.
>> - */
>> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
>> - unlock_page(page);
>> - put_page(page);
>> - goto again;
>> - }
>> - }
>> -
>> - if (!PageUptodate(page)) {
>> - btrfs_readpage(NULL, page);
>> - lock_page(page);
>> - if (!PageUptodate(page)) {
>> - unlock_page(page);
>> - put_page(page);
>> - ret = -EIO;
>> - break;
>> - }
>> - }
>> -
>> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
>> - unlock_page(page);
>> - put_page(page);
>> - goto again;
>> - }
>> -
>> pages[i] = page;
>> i_done++;
>> }
>> @@ -1260,13 +1288,6 @@ static int cluster_pages_for_defrag(struct inode *inode,
>> if (!(inode->i_sb->s_flags & SB_ACTIVE))
>> goto out;
>>
>> - /*
>> - * so now we have a nice long stream of locked
>> - * and up to date pages, lets wait on them
>> - */
>> - for (i = 0; i < i_done; i++)
>> - wait_on_page_writeback(pages[i]);
>> -
>
> Doesn't this introduce a behavioral change? previously we didn't
> wait for page writeback in case of a parallel unmount, now we do.
The behavior is only changing when we wait, we still check the SB_ACTIVE.
IMHO the extra wait shouldn't cause much difference for unmount.
Thanks,
Qu
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper
2021-05-28 10:36 ` Qu Wenruo
@ 2021-05-28 10:38 ` Johannes Thumshirn
0 siblings, 0 replies; 14+ messages in thread
From: Johannes Thumshirn @ 2021-05-28 10:38 UTC (permalink / raw)
To: Qu Wenruo, linux-btrfs
On 28/05/2021 12:36, Qu Wenruo wrote:
>
>
> On 2021/5/28 下午6:23, Johannes Thumshirn wrote:
>> On 28/05/2021 04:28, Qu Wenruo wrote:
>>> In cluster_pages_for_defrag(), we have complex code block inside one
>>> for() loop.
>>>
>>> The code block is to prepare one page for defrag, this will ensure:
>>> - The page is locked and set up properly
>>> - No ordered extent exists in the page range
>>> - The page is uptodate
>>> - The writeback has finished
>>>
>>> This behavior is pretty common and will be reused by later defrag
>>> rework.
>>>
>>> So extract the code into its own helper, defrag_prepare_one_page(), for
>>> later usage, and cleanup the code by a little.
>>>
>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>> ---
>>> fs/btrfs/ioctl.c | 151 +++++++++++++++++++++++++++--------------------
>>> 1 file changed, 86 insertions(+), 65 deletions(-)
>>>
>>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>>> index 67ef9c535eb5..ba69991bca10 100644
>>> --- a/fs/btrfs/ioctl.c
>>> +++ b/fs/btrfs/ioctl.c
>>> @@ -1144,6 +1144,89 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
>>> return ret;
>>> }
>>>
>>> +/*
>>> + * Prepare one page to be defraged.
>>> + *
>>> + * This will ensure:
>>> + * - Returned page is locked and has been set up properly
>>> + * - No ordered extent exists in the page
>>> + * - The page is uptodate
>>> + * - The writeback has finished
>>> + */
>>> +static struct page *defrag_prepare_one_page(struct btrfs_inode *inode,
>>> + unsigned long index)
>>> +{
>>
>> struct address_space *mapping = inode->vfs_inode.i_mapping;
>> gfp_t mask = btrfs_alloc_write_mask(mapping);
>>
>>> + gfp_t mask = btrfs_alloc_write_mask(inode->vfs_inode.i_mapping);
>>> + u64 page_start = index << PAGE_SHIFT;
>>> + u64 page_end = page_start + PAGE_SIZE - 1;
>>> + struct extent_state *cached_state = NULL;
>>> + struct page *page;
>>> + int ret;
>>> +
>>> +again:
>>> + page = find_or_create_page(inode->vfs_inode.i_mapping, index, mask);
>>
>> page = find_or_create_page(mapping, index, mask);
>>
>> While you're at it?
>>
>>> + if (!page)
>>> + return ERR_PTR(-ENOMEM);
>>> +
>>> + ret = set_page_extent_mapped(page);
>>> + if (ret < 0) {
>>> + unlock_page(page);
>>> + put_page(page);
>>> + return ERR_PTR(ret);
>>> + }
>>> +
>>> + /* Wait for any exists ordered extent in the range */
>>> + while (1) {
>>> + struct btrfs_ordered_extent *ordered;
>>> +
>>> + lock_extent_bits(&inode->io_tree, page_start, page_end,
>>> + &cached_state);
>>> + ordered = btrfs_lookup_ordered_range(inode, page_start,
>>> + PAGE_SIZE);
>>> + unlock_extent_cached(&inode->io_tree, page_start, page_end,
>>> + &cached_state);
>>> + if (!ordered)
>>> + break;
>>> +
>>> + unlock_page(page);
>>> + btrfs_start_ordered_extent(ordered, 1);
>>> + btrfs_put_ordered_extent(ordered);
>>> + lock_page(page);
>>> + /*
>>> + * we unlocked the page above, so we need check if
>>> + * it was released or not.
>>> + */
>>> + if (page->mapping != inode->vfs_inode.i_mapping ||
>>
>> if (page->mapping != mapping ||
>>
>>> + !PagePrivate(page)) {
>>> + unlock_page(page);
>>> + put_page(page);
>>> + goto again;
>>> + }
>>> + }
>>> +
>>> + /*
>>> + * Now the page range has no ordered extent any more.
>>> + * Read the page to make it uptodate.
>>> + */
>>> + if (!PageUptodate(page)) {
>>> + btrfs_readpage(NULL, page);
>>> + lock_page(page);
>>> + if (page->mapping != inode->vfs_inode.i_mapping ||
>>
>> if (page->mapping != mapping ||
>>
>>
>>> + !PagePrivate(page)) {
>>> + unlock_page(page);
>>> + put_page(page);
>>> + goto again;
>>> + }
>>> + if (!PageUptodate(page)) {
>>> + unlock_page(page);
>>> + put_page(page);
>>> + return ERR_PTR(-EIO);
>>> + }
>>> + }
>>> + wait_on_page_writeback(page);
>>> + return page;
>>> +}
>>> +
>>> /*
>>> * it doesn't do much good to defrag one or two pages
>>> * at a time. This pulls in a nice chunk of pages
>>> @@ -1172,11 +1255,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
>>> int ret;
>>> int i;
>>> int i_done;
>>> - struct btrfs_ordered_extent *ordered;
>>> struct extent_state *cached_state = NULL;
>>> - struct extent_io_tree *tree;
>>> struct extent_changeset *data_reserved = NULL;
>>> - gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
>>>
>>> file_end = (isize - 1) >> PAGE_SHIFT;
>>> if (!isize || start_index > file_end)
>>> @@ -1189,68 +1269,16 @@ static int cluster_pages_for_defrag(struct inode *inode,
>>> if (ret)
>>> return ret;
>>> i_done = 0;
>>> - tree = &BTRFS_I(inode)->io_tree;
>>>
>>> /* step one, lock all the pages */
>>> for (i = 0; i < page_cnt; i++) {
>>> struct page *page;
>>> -again:
>>> - page = find_or_create_page(inode->i_mapping,
>>> - start_index + i, mask);
>>> - if (!page)
>>> - break;
>>>
>>> - ret = set_page_extent_mapped(page);
>>> - if (ret < 0) {
>>> - unlock_page(page);
>>> - put_page(page);
>>> + page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i);
>>> + if (IS_ERR(page)) {
>>> + ret = PTR_ERR(page);
>>> break;
>>> }
>>> -
>>> - page_start = page_offset(page);
>>> - page_end = page_start + PAGE_SIZE - 1;
>>> - while (1) {
>>> - lock_extent_bits(tree, page_start, page_end,
>>> - &cached_state);
>>> - ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
>>> - page_start);
>>> - unlock_extent_cached(tree, page_start, page_end,
>>> - &cached_state);
>>> - if (!ordered)
>>> - break;
>>> -
>>> - unlock_page(page);
>>> - btrfs_start_ordered_extent(ordered, 1);
>>> - btrfs_put_ordered_extent(ordered);
>>> - lock_page(page);
>>> - /*
>>> - * we unlocked the page above, so we need check if
>>> - * it was released or not.
>>> - */
>>> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
>>> - unlock_page(page);
>>> - put_page(page);
>>> - goto again;
>>> - }
>>> - }
>>> -
>>> - if (!PageUptodate(page)) {
>>> - btrfs_readpage(NULL, page);
>>> - lock_page(page);
>>> - if (!PageUptodate(page)) {
>>> - unlock_page(page);
>>> - put_page(page);
>>> - ret = -EIO;
>>> - break;
>>> - }
>>> - }
>>> -
>>> - if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
>>> - unlock_page(page);
>>> - put_page(page);
>>> - goto again;
>>> - }
>>> -
>>> pages[i] = page;
>>> i_done++;
>>> }
>>> @@ -1260,13 +1288,6 @@ static int cluster_pages_for_defrag(struct inode *inode,
>>> if (!(inode->i_sb->s_flags & SB_ACTIVE))
>>> goto out;
>>>
>>> - /*
>>> - * so now we have a nice long stream of locked
>>> - * and up to date pages, lets wait on them
>>> - */
>>> - for (i = 0; i < i_done; i++)
>>> - wait_on_page_writeback(pages[i]);
>>> -
>>
>> Doesn't this introduce a behavioral change? previously we didn't
>> wait for page writeback in case of a parallel unmount, now we do.
>
> The behavior is only changing when we wait, we still check the SB_ACTIVE.
>
> IMHO the extra wait shouldn't cause much difference for unmount.
Yes I wasn't sure if I this does have any impact or not (don't think so), but as I've
spotted the 'inode->vfs_inode.i_mapping' pointer chasings I thought I'd leave the
comment here
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/7] btrfs: defrag: introduce a new helper to collect target file extents
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
2021-05-28 2:28 ` [PATCH 1/7] btrfs: defrag: pass file_ra_state instead of file for btrfs_defrag_file() Qu Wenruo
2021-05-28 2:28 ` [PATCH 2/7] btrfs: defrag: extract the page preparation code into one helper Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 2:28 ` [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range Qu Wenruo
` (3 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
Introduce a new helper, defrag_collect_targets(), to collect all
possible targets to be defraged.
This function will not consider things like max_sectors_to_defrag, thus
caller should be responsible to ensure we don't exceed the limit.
This function will be the first stage of later defrag rework.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 111 insertions(+)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ba69991bca10..1e57293a05f2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1375,6 +1375,117 @@ static int cluster_pages_for_defrag(struct inode *inode,
}
+struct defrag_target_range {
+ struct list_head list;
+ u64 start;
+ u64 len;
+};
+
+/*
+ * Helper to collect all valid target extents.
+ *
+ * @start: The file offset to lookup
+ * @len: The length to lookup
+ * @extent_thresh: File extent size threshold, any extent size >= this value
+ * will be ignored
+ * @newer_than: Only defrag extents newer than this value
+ * @do_compress: Whether the defrag is doing compression
+ * If true, @extent_thresh will be ignored and all regular
+ * file extents meeting @newer_than will be targets.
+ * @target_list: The list of targets file extents
+ */
+static int defrag_collect_targets(struct btrfs_inode *inode,
+ u64 start, u64 len, u32 extent_thresh,
+ u64 newer_than, bool do_compress,
+ struct list_head *target_list)
+{
+ u64 cur = start;
+ int ret = 0;
+
+ while (cur < start + len) {
+ struct extent_map *em;
+ struct defrag_target_range *new;
+ bool next_mergeable = true;
+ u64 range_len;
+
+ em = defrag_lookup_extent(&inode->vfs_inode, cur);
+ if (!em)
+ break;
+
+ /* Skip hole/inline/preallocated extents */
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ goto next;
+
+ /* Skip older extent */
+ if (em->generation < newer_than)
+ goto next;
+
+ /*
+ * For do_compress case, we want to compress all valid file
+ * extents, thus no @extent_thresh or mergable check.
+ */
+ if (do_compress)
+ goto add;
+
+ /* Skip too large extent */
+ if (em->len >= extent_thresh)
+ goto next;
+
+ next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em);
+ if (!next_mergeable) {
+ struct defrag_target_range *last;
+
+ /* Empty target list, no way to merge with last entry */
+ if (list_empty(target_list))
+ goto next;
+ last = list_entry(target_list->prev,
+ struct defrag_target_range, list);
+ /* Not mergable with last entry */
+ if (last->start + last->len != cur)
+ goto next;
+
+ /* Mergable, fall throught to add it to @target_list. */
+ }
+
+add:
+ range_len = min(extent_map_end(em), start + len) - cur;
+ /*
+ * This one is a good target, check if it can be merged into
+ * last range of the target list
+ */
+ if (!list_empty(target_list)) {
+ struct defrag_target_range *last;
+
+ last = list_entry(target_list->prev,
+ struct defrag_target_range, list);
+ ASSERT(last->start + last->len <= cur);
+ if (last->start + last->len == cur) {
+ /* Mergable, enlarge the last entry */
+ last->len += range_len;
+ goto next;
+ }
+ /* Fall through to allocate a new entry */
+ }
+
+ /* Allocate new defrag_target_range */
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (!new) {
+ ret = -ENOMEM;
+ free_extent_map(em);
+ break;
+ }
+ new->start = cur;
+ new->len = range_len;
+ list_add_tail(&new->list, target_list);
+
+next:
+ cur = extent_map_end(em);
+ free_extent_map(em);
+ }
+ return ret;
+}
+
/*
* Btrfs entrace for defrag.
*
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
` (2 preceding siblings ...)
2021-05-28 2:28 ` [PATCH 3/7] btrfs: defrag: introduce a new helper to collect target file extents Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 9:07 ` Filipe Manana
2021-05-28 2:28 ` [PATCH 5/7] btrfs: defrag: introduce a new helper to defrag one cluster Qu Wenruo
` (2 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
Intrudouce a helper, defrag_one_target(), to defrag one continuous range
by:
- Lock and read the page
- Set the extent range defrag
- Set the involved page range dirty
There is a special note here, since the target range may be a hole now,
we use btrfs_set_extent_delalloc() with EXTENT_DEFRAG as @extra_bits,
other than set_extent_defrag().
This would properly add EXTENT_DELALLOC_NEW bit to make inode nbytes
updated properly, and still handle regular extents without any problem.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1e57293a05f2..cd7650bcc70c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1486,6 +1486,82 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
return ret;
}
+#define CLUSTER_SIZE (SZ_256K)
+static int defrag_one_target(struct btrfs_inode *inode,
+ struct file_ra_state *ra, u64 start, u32 len)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_changeset *data_reserved = NULL;
+ struct extent_state *cached_state = NULL;
+ struct page **pages;
+ const u32 sectorsize = inode->root->fs_info->sectorsize;
+ unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
+ unsigned long start_index = start >> PAGE_SHIFT;
+ unsigned int nr_pages = last_index - start_index + 1;
+ int ret = 0;
+ int i;
+
+ ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
+ ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
+
+ pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+
+ /* Kick in readahead */
+ if (ra)
+ page_cache_sync_readahead(inode->vfs_inode.i_mapping, ra, NULL,
+ start_index, nr_pages);
+
+ /* Prepare all pages */
+ for (i = 0; i < nr_pages; i++) {
+ pages[i] = defrag_prepare_one_page(inode, start_index + i);
+ if (IS_ERR(pages[i])) {
+ ret = PTR_ERR(pages[i]);
+ pages[i] = NULL;
+ goto free_pages;
+ }
+ }
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, len);
+ if (ret < 0)
+ goto free_pages;
+
+ /* Lock the extent bits and update the extent bits*/
+ lock_extent_bits(&inode->io_tree, start, start + len - 1,
+ &cached_state);
+ clear_extent_bit(&inode->io_tree, start, start + len - 1,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 0, 0, &cached_state);
+
+ /*
+ * Since the target list is gathered without inode nor extent lock, we
+ * may get a range which is now a hole.
+ * In that case, we have to set it with DELALLOC_NEW as if we're
+ * writing a new data, or inode nbytes will mismatch.
+ */
+ ret = btrfs_set_extent_delalloc(inode, start, start + len - 1,
+ EXTENT_DEFRAG, &cached_state);
+ /* Update the page status */
+ for (i = 0; i < nr_pages; i++) {
+ ClearPageChecked(pages[i]);
+ btrfs_page_clamp_set_dirty(fs_info, pages[i], start, len);
+ }
+ unlock_extent_cached(&inode->io_tree, start, start + len - 1,
+ &cached_state);
+ btrfs_delalloc_release_extents(inode, len);
+ extent_changeset_free(data_reserved);
+
+free_pages:
+ for (i = 0; i < nr_pages; i++) {
+ if (pages[i]) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+ }
+ kfree(pages);
+ return ret;
+}
+
/*
* Btrfs entrace for defrag.
*
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range
2021-05-28 2:28 ` [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range Qu Wenruo
@ 2021-05-28 9:07 ` Filipe Manana
2021-05-28 10:27 ` Qu Wenruo
0 siblings, 1 reply; 14+ messages in thread
From: Filipe Manana @ 2021-05-28 9:07 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs
On Fri, May 28, 2021 at 7:00 AM Qu Wenruo <wqu@suse.com> wrote:
>
> Intrudouce a helper, defrag_one_target(), to defrag one continuous range
> by:
>
> - Lock and read the page
> - Set the extent range defrag
> - Set the involved page range dirty
>
> There is a special note here, since the target range may be a hole now,
> we use btrfs_set_extent_delalloc() with EXTENT_DEFRAG as @extra_bits,
> other than set_extent_defrag().
>
> This would properly add EXTENT_DELALLOC_NEW bit to make inode nbytes
> updated properly, and still handle regular extents without any problem.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 76 insertions(+)
>
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1e57293a05f2..cd7650bcc70c 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1486,6 +1486,82 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
> return ret;
> }
>
> +#define CLUSTER_SIZE (SZ_256K)
> +static int defrag_one_target(struct btrfs_inode *inode,
> + struct file_ra_state *ra, u64 start, u32 len)
> +{
> + struct btrfs_fs_info *fs_info = inode->root->fs_info;
> + struct extent_changeset *data_reserved = NULL;
> + struct extent_state *cached_state = NULL;
> + struct page **pages;
> + const u32 sectorsize = inode->root->fs_info->sectorsize;
> + unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
> + unsigned long start_index = start >> PAGE_SHIFT;
> + unsigned int nr_pages = last_index - start_index + 1;
> + int ret = 0;
> + int i;
> +
> + ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
> + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
> +
> + pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
> + if (!pages)
> + return -ENOMEM;
> +
> + /* Kick in readahead */
> + if (ra)
> + page_cache_sync_readahead(inode->vfs_inode.i_mapping, ra, NULL,
> + start_index, nr_pages);
> +
> + /* Prepare all pages */
> + for (i = 0; i < nr_pages; i++) {
> + pages[i] = defrag_prepare_one_page(inode, start_index + i);
> + if (IS_ERR(pages[i])) {
> + ret = PTR_ERR(pages[i]);
> + pages[i] = NULL;
> + goto free_pages;
> + }
> + }
> + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, len);
> + if (ret < 0)
> + goto free_pages;
> +
> + /* Lock the extent bits and update the extent bits*/
> + lock_extent_bits(&inode->io_tree, start, start + len - 1,
> + &cached_state);
> + clear_extent_bit(&inode->io_tree, start, start + len - 1,
> + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
> + 0, 0, &cached_state);
> +
> + /*
> + * Since the target list is gathered without inode nor extent lock, we
> + * may get a range which is now a hole.
You are undoing what was done by commit
7f458a3873ae94efe1f37c8b96c97e7298769e98.
In case there's a hole this results in allocating extents and filling
them with zeroes, doing unnecessary IO and using disk space.
Please add back the logic to skip defrag if it's now a hole.
Thanks.
> + * In that case, we have to set it with DELALLOC_NEW as if we're
> + * writing a new data, or inode nbytes will mismatch.
> + */
> + ret = btrfs_set_extent_delalloc(inode, start, start + len - 1,
> + EXTENT_DEFRAG, &cached_state);
> + /* Update the page status */
> + for (i = 0; i < nr_pages; i++) {
> + ClearPageChecked(pages[i]);
> + btrfs_page_clamp_set_dirty(fs_info, pages[i], start, len);
> + }
> + unlock_extent_cached(&inode->io_tree, start, start + len - 1,
> + &cached_state);
> + btrfs_delalloc_release_extents(inode, len);
> + extent_changeset_free(data_reserved);
> +
> +free_pages:
> + for (i = 0; i < nr_pages; i++) {
> + if (pages[i]) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + }
> + kfree(pages);
> + return ret;
> +}
> +
> /*
> * Btrfs entrace for defrag.
> *
> --
> 2.31.1
>
--
Filipe David Manana,
“Whether you think you can, or you think you can't — you're right.”
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range
2021-05-28 9:07 ` Filipe Manana
@ 2021-05-28 10:27 ` Qu Wenruo
0 siblings, 0 replies; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 10:27 UTC (permalink / raw)
To: fdmanana, Qu Wenruo; +Cc: linux-btrfs
On 2021/5/28 下午5:07, Filipe Manana wrote:
> On Fri, May 28, 2021 at 7:00 AM Qu Wenruo <wqu@suse.com> wrote:
>>
>> Intrudouce a helper, defrag_one_target(), to defrag one continuous range
>> by:
>>
>> - Lock and read the page
>> - Set the extent range defrag
>> - Set the involved page range dirty
>>
>> There is a special note here, since the target range may be a hole now,
>> we use btrfs_set_extent_delalloc() with EXTENT_DEFRAG as @extra_bits,
>> other than set_extent_defrag().
>>
>> This would properly add EXTENT_DELALLOC_NEW bit to make inode nbytes
>> updated properly, and still handle regular extents without any problem.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 76 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 1e57293a05f2..cd7650bcc70c 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -1486,6 +1486,82 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
>> return ret;
>> }
>>
>> +#define CLUSTER_SIZE (SZ_256K)
>> +static int defrag_one_target(struct btrfs_inode *inode,
>> + struct file_ra_state *ra, u64 start, u32 len)
>> +{
>> + struct btrfs_fs_info *fs_info = inode->root->fs_info;
>> + struct extent_changeset *data_reserved = NULL;
>> + struct extent_state *cached_state = NULL;
>> + struct page **pages;
>> + const u32 sectorsize = inode->root->fs_info->sectorsize;
>> + unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
>> + unsigned long start_index = start >> PAGE_SHIFT;
>> + unsigned int nr_pages = last_index - start_index + 1;
>> + int ret = 0;
>> + int i;
>> +
>> + ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
>> + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
>> +
>> + pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
>> + if (!pages)
>> + return -ENOMEM;
>> +
>> + /* Kick in readahead */
>> + if (ra)
>> + page_cache_sync_readahead(inode->vfs_inode.i_mapping, ra, NULL,
>> + start_index, nr_pages);
>> +
>> + /* Prepare all pages */
>> + for (i = 0; i < nr_pages; i++) {
>> + pages[i] = defrag_prepare_one_page(inode, start_index + i);
>> + if (IS_ERR(pages[i])) {
>> + ret = PTR_ERR(pages[i]);
>> + pages[i] = NULL;
>> + goto free_pages;
>> + }
>> + }
>> + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, len);
>> + if (ret < 0)
>> + goto free_pages;
>> +
>> + /* Lock the extent bits and update the extent bits*/
>> + lock_extent_bits(&inode->io_tree, start, start + len - 1,
>> + &cached_state);
>> + clear_extent_bit(&inode->io_tree, start, start + len - 1,
>> + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
>> + 0, 0, &cached_state);
>> +
>> + /*
>> + * Since the target list is gathered without inode nor extent lock, we
>> + * may get a range which is now a hole.
>
> You are undoing what was done by commit
> 7f458a3873ae94efe1f37c8b96c97e7298769e98.
> In case there's a hole this results in allocating extents and filling
> them with zeroes, doing unnecessary IO and using disk space.
> Please add back the logic to skip defrag if it's now a hole.
OK, let me try to reuse the defrag_collect_targets() inside the lock to
exclude the holes.
Thanks,
Qu
>
> Thanks.
>
>> + * In that case, we have to set it with DELALLOC_NEW as if we're
>> + * writing a new data, or inode nbytes will mismatch.
>> + */
>> + ret = btrfs_set_extent_delalloc(inode, start, start + len - 1,
>> + EXTENT_DEFRAG, &cached_state);
>> + /* Update the page status */
>> + for (i = 0; i < nr_pages; i++) {
>> + ClearPageChecked(pages[i]);
>> + btrfs_page_clamp_set_dirty(fs_info, pages[i], start, len);
>> + }
>> + unlock_extent_cached(&inode->io_tree, start, start + len - 1,
>> + &cached_state);
>> + btrfs_delalloc_release_extents(inode, len);
>> + extent_changeset_free(data_reserved);
>> +
>> +free_pages:
>> + for (i = 0; i < nr_pages; i++) {
>> + if (pages[i]) {
>> + unlock_page(pages[i]);
>> + put_page(pages[i]);
>> + }
>> + }
>> + kfree(pages);
>> + return ret;
>> +}
>> +
>> /*
>> * Btrfs entrace for defrag.
>> *
>> --
>> 2.31.1
>>
>
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 5/7] btrfs: defrag: introduce a new helper to defrag one cluster
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
` (3 preceding siblings ...)
2021-05-28 2:28 ` [PATCH 4/7] btrfs: defrag: introduce a helper to defrag a continuous range Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 2:28 ` [PATCH 6/7] btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file() Qu Wenruo
2021-05-28 2:28 ` [PATCH 7/7] btrfs: defrag: remove the old infrastructure Qu Wenruo
6 siblings, 0 replies; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
This new helper, defrag_one_cluster(), will defrag one cluster (at most
256K) by:
- Collect all targets
- Call defrag_one_target() on each target
With some extra range clamping.
- Update @sectors_defraged parameter
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cd7650bcc70c..911db470aad6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1562,6 +1562,48 @@ static int defrag_one_target(struct btrfs_inode *inode,
return ret;
}
+static int defrag_one_cluster(struct btrfs_inode *inode,
+ struct file_ra_state *ra,
+ u64 start, u32 len, u32 extent_thresh,
+ u64 newer_than, bool do_compress,
+ unsigned long *sectors_defraged,
+ unsigned long max_sectors)
+{
+ const u32 sectorsize = inode->root->fs_info->sectorsize;
+ struct defrag_target_range *entry;
+ struct defrag_target_range *tmp;
+ LIST_HEAD(target_list);
+ int ret;
+
+ BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
+ ret = defrag_collect_targets(inode, start, len, extent_thresh,
+ newer_than, do_compress, &target_list);
+ if (ret < 0)
+ goto out;
+
+ list_for_each_entry(entry, &target_list, list) {
+ u32 range_len = entry->len;
+
+ /* Reached the limit */
+ if (max_sectors && max_sectors == *sectors_defraged)
+ break;
+
+ if (max_sectors)
+ range_len = min_t(u32, range_len,
+ (max_sectors - *sectors_defraged) * sectorsize);
+ ret = defrag_one_target(inode, ra, entry->start, range_len);
+ if (ret < 0)
+ break;
+ *sectors_defraged += range_len;
+ }
+out:
+ list_for_each_entry_safe(entry, tmp, &target_list, list) {
+ list_del_init(&entry->list);
+ kfree(entry);
+ }
+ return ret;
+}
+
/*
* Btrfs entrace for defrag.
*
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/7] btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
` (4 preceding siblings ...)
2021-05-28 2:28 ` [PATCH 5/7] btrfs: defrag: introduce a new helper to defrag one cluster Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
2021-05-28 2:28 ` [PATCH 7/7] btrfs: defrag: remove the old infrastructure Qu Wenruo
6 siblings, 0 replies; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
The function defrag_one_cluster() is able to defrag one range well
enough, we only need to do prepration for it, including:
- Clamp and align the defrag range
- Exclude invalid cases
- Proper inode locking
The old infrastructures will not be removed in this patch, as it would
be too noisy to review.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 219 ++++++++++++++---------------------------------
1 file changed, 63 insertions(+), 156 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 911db470aad6..1a5a461d94a5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1619,25 +1619,15 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 newer_than, unsigned long max_to_defrag)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- unsigned long last_index;
+ unsigned long sectors_defraged = 0;
u64 isize = i_size_read(inode);
- u64 last_len = 0;
- u64 skip = 0;
- u64 defrag_end = 0;
- u64 newer_off = range->start;
- unsigned long i;
- unsigned long ra_index = 0;
- int ret;
- int defrag_count = 0;
- int compress_type = BTRFS_COMPRESS_ZLIB;
- u32 extent_thresh = range->extent_thresh;
- unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
- unsigned long cluster = max_cluster;
- u64 new_align = ~((u64)SZ_128K - 1);
- struct page **pages = NULL;
+ u64 cur;
+ u64 last_byte;
bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
bool ra_allocated = false;
+ int compress_type = BTRFS_COMPRESS_ZLIB;
+ int ret;
+ u32 extent_thresh = range->extent_thresh;
if (isize == 0)
return 0;
@@ -1655,6 +1645,14 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
if (extent_thresh == 0)
extent_thresh = SZ_256K;
+ if (range->start + range->len > range->start) {
+ /* Got a specific range */
+ last_byte = min(isize, range->start + range->len) - 1;
+ } else {
+ /* Defrag until file end */
+ last_byte = isize - 1;
+ }
+
/*
* If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so
@@ -1667,159 +1665,68 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
file_ra_state_init(ra, inode->i_mapping);
}
- pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- goto out_ra;
- }
+ /* Align the range */
+ cur = round_down(range->start, fs_info->sectorsize);
+ last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
- /* find the last page to defrag */
- if (range->start + range->len > range->start) {
- last_index = min_t(u64, isize - 1,
- range->start + range->len - 1) >> PAGE_SHIFT;
- } else {
- last_index = (isize - 1) >> PAGE_SHIFT;
- }
+ while (cur < last_byte) {
+ u64 cluster_end;
- if (newer_than) {
- ret = find_new_extents(root, inode, newer_than,
- &newer_off, SZ_64K);
- if (!ret) {
- range->start = newer_off;
- /*
- * we always align our defrag to help keep
- * the extents in the file evenly spaced
- */
- i = (newer_off & new_align) >> PAGE_SHIFT;
- } else
- goto out_ra;
- } else {
- i = range->start >> PAGE_SHIFT;
- }
- if (!max_to_defrag)
- max_to_defrag = last_index - i + 1;
-
- /*
- * make writeback starts from i, so the defrag range can be
- * written sequentially.
- */
- if (i < inode->i_mapping->writeback_index)
- inode->i_mapping->writeback_index = i;
-
- while (i <= last_index && defrag_count < max_to_defrag &&
- (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
- /*
- * make sure we stop running if someone unmounts
- * the FS
- */
- if (!(inode->i_sb->s_flags & SB_ACTIVE))
- break;
+ /* The cluster size 256K should always be page aligned */
+ BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
- if (btrfs_defrag_cancelled(fs_info)) {
- btrfs_debug(fs_info, "defrag_file cancelled");
- ret = -EAGAIN;
- goto error;
- }
-
- if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
- extent_thresh, &last_len, &skip,
- &defrag_end, do_compress)){
- unsigned long next;
- /*
- * the should_defrag function tells us how much to skip
- * bump our counter by the suggested amount
- */
- next = DIV_ROUND_UP(skip, PAGE_SIZE);
- i = max(i + 1, next);
- continue;
- }
-
- if (!newer_than) {
- cluster = (PAGE_ALIGN(defrag_end) >>
- PAGE_SHIFT) - i;
- cluster = min(cluster, max_cluster);
- } else {
- cluster = max_cluster;
- }
+ /* We want the cluster ends at page boundary when possible */
+ cluster_end = (((cur >> PAGE_SHIFT) +
+ (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
+ cluster_end = min(cluster_end, last_byte);
- if (i + cluster > ra_index) {
- ra_index = max(i, ra_index);
- if (ra)
- page_cache_sync_readahead(inode->i_mapping, ra,
- NULL, ra_index, cluster);
- ra_index += cluster;
- }
-
- btrfs_inode_lock(inode, 0);
- if (IS_SWAPFILE(inode)) {
+ btrfs_inode_lock(inode, 0);
+ if (IS_SWAPFILE(inode)) {
ret = -ETXTBSY;
- } else {
- if (do_compress)
- BTRFS_I(inode)->defrag_compress = compress_type;
- ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+ btrfs_inode_unlock(inode, 0);
+ break;
}
- if (ret < 0) {
+ if (!(inode->i_sb->s_flags & SB_ACTIVE)) {
btrfs_inode_unlock(inode, 0);
- goto out_ra;
+ break;
}
-
- defrag_count += ret;
- balance_dirty_pages_ratelimited(inode->i_mapping);
+ if (do_compress)
+ BTRFS_I(inode)->defrag_compress = compress_type;
+ ret = defrag_one_cluster(BTRFS_I(inode), ra, cur,
+ cluster_end + 1 - cur, extent_thresh,
+ newer_than, do_compress,
+ §ors_defraged, max_to_defrag);
btrfs_inode_unlock(inode, 0);
-
- if (newer_than) {
- if (newer_off == (u64)-1)
- break;
-
- if (ret > 0)
- i += ret;
-
- newer_off = max(newer_off + 1,
- (u64)i << PAGE_SHIFT);
-
- ret = find_new_extents(root, inode, newer_than,
- &newer_off, SZ_64K);
- if (!ret) {
- range->start = newer_off;
- i = (newer_off & new_align) >> PAGE_SHIFT;
- } else {
- break;
- }
- } else {
- if (ret > 0) {
- i += ret;
- last_len += ret << PAGE_SHIFT;
- } else {
- i++;
- last_len = 0;
- }
- }
- }
-
- ret = defrag_count;
-error:
- if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
- filemap_flush(inode->i_mapping);
- if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_flush(inode->i_mapping);
- }
-
- if (range->compress_type == BTRFS_COMPRESS_LZO) {
- btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
- } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
- btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ if (ret < 0)
+ break;
+ cur = cluster_end + 1;
}
-out_ra:
- if (do_compress) {
- btrfs_inode_lock(inode, 0);
- BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
- btrfs_inode_unlock(inode, 0);
- }
if (ra_allocated)
kfree(ra);
- kfree(pages);
+ if (sectors_defraged) {
+ /*
+ * We have defraged some sectors, for compression case
+ * they need to be written back immediately.
+ */
+ if (range->flags & BTRFS_DEFRAG_RANGE_START_IO) {
+ filemap_flush(inode->i_mapping);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_flush(inode->i_mapping);
+ }
+ if (range->compress_type == BTRFS_COMPRESS_LZO) {
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+ } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
+ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ }
+ ret = sectors_defraged;
+ }
+ if (do_compress) {
+ btrfs_inode_lock(inode, 0);
+ BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
+ btrfs_inode_unlock(inode, 0);
+ }
return ret;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 7/7] btrfs: defrag: remove the old infrastructure
2021-05-28 2:28 [PATCH 0/7] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
` (5 preceding siblings ...)
2021-05-28 2:28 ` [PATCH 6/7] btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file() Qu Wenruo
@ 2021-05-28 2:28 ` Qu Wenruo
6 siblings, 0 replies; 14+ messages in thread
From: Qu Wenruo @ 2021-05-28 2:28 UTC (permalink / raw)
To: linux-btrfs
Now the old infrastructure can all be removed.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/ioctl.c | 301 -----------------------------------------------
1 file changed, 301 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1a5a461d94a5..669f533060fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -940,99 +940,6 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
* file you want to defrag, we return 0 to let you know to skip this
* part of the file
*/
-static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh)
-{
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_map *em = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- u64 end;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
- read_unlock(&em_tree->lock);
-
- if (em) {
- end = extent_map_end(em);
- free_extent_map(em);
- if (end - offset > thresh)
- return 0;
- }
- /* if we already have a nice delalloc here, just stop */
- thresh /= 2;
- end = count_range_bits(io_tree, &offset, offset + thresh,
- thresh, EXTENT_DELALLOC, 1);
- if (end >= thresh)
- return 0;
- return 1;
-}
-
-/*
- * helper function to walk through a file and find extents
- * newer than a specific transid, and smaller than thresh.
- *
- * This is used by the defragging code to find new and small
- * extents
- */
-static int find_new_extents(struct btrfs_root *root,
- struct inode *inode, u64 newer_than,
- u64 *off, u32 thresh)
-{
- struct btrfs_path *path;
- struct btrfs_key min_key;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *extent;
- int type;
- int ret;
- u64 ino = btrfs_ino(BTRFS_I(inode));
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- min_key.objectid = ino;
- min_key.type = BTRFS_EXTENT_DATA_KEY;
- min_key.offset = *off;
-
- while (1) {
- ret = btrfs_search_forward(root, &min_key, path, newer_than);
- if (ret != 0)
- goto none;
-process_slot:
- if (min_key.objectid != ino)
- goto none;
- if (min_key.type != BTRFS_EXTENT_DATA_KEY)
- goto none;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- type = btrfs_file_extent_type(leaf, extent);
- if (type == BTRFS_FILE_EXTENT_REG &&
- btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
- check_defrag_in_cache(inode, min_key.offset, thresh)) {
- *off = min_key.offset;
- btrfs_free_path(path);
- return 0;
- }
-
- path->slots[0]++;
- if (path->slots[0] < btrfs_header_nritems(leaf)) {
- btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]);
- goto process_slot;
- }
-
- if (min_key.offset == (u64)-1)
- goto none;
-
- min_key.offset++;
- btrfs_release_path(path);
- }
-none:
- btrfs_free_path(path);
- return -ENOENT;
-}
-
static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -1084,66 +991,6 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
return ret;
}
-static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
- u64 *last_len, u64 *skip, u64 *defrag_end,
- int compress)
-{
- struct extent_map *em;
- int ret = 1;
- bool next_mergeable = true;
- bool prev_mergeable = true;
-
- /*
- * make sure that once we start defragging an extent, we keep on
- * defragging it
- */
- if (start < *defrag_end)
- return 1;
-
- *skip = 0;
-
- em = defrag_lookup_extent(inode, start);
- if (!em)
- return 0;
-
- /* this will cover holes, and inline extents */
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- ret = 0;
- goto out;
- }
-
- if (!*defrag_end)
- prev_mergeable = false;
-
- next_mergeable = defrag_check_next_extent(inode, em);
- /*
- * we hit a real extent, if it is big or the next extent is not a
- * real extent, don't bother defragging it
- */
- if (!compress && (*last_len == 0 || *last_len >= thresh) &&
- (em->len >= thresh || (!next_mergeable && !prev_mergeable)))
- ret = 0;
-out:
- /*
- * last_len ends up being a counter of how many bytes we've defragged.
- * every time we choose not to defrag an extent, we reset *last_len
- * so that the next tiny extent will force a defrag.
- *
- * The end result of this is that tiny extents before a single big
- * extent will force at least part of that big extent to be defragged.
- */
- if (ret) {
- *defrag_end = extent_map_end(em);
- } else {
- *last_len = 0;
- *skip = extent_map_end(em);
- *defrag_end = 0;
- }
-
- free_extent_map(em);
- return ret;
-}
-
/*
* Prepare one page to be defraged.
*
@@ -1227,154 +1074,6 @@ static struct page *defrag_prepare_one_page(struct btrfs_inode *inode,
return page;
}
-/*
- * it doesn't do much good to defrag one or two pages
- * at a time. This pulls in a nice chunk of pages
- * to COW and defrag.
- *
- * It also makes sure the delalloc code has enough
- * dirty data to avoid making new small extents as part
- * of the defrag
- *
- * It's a good idea to start RA on this range
- * before calling this.
- */
-static int cluster_pages_for_defrag(struct inode *inode,
- struct page **pages,
- unsigned long start_index,
- unsigned long num_pages)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- unsigned long file_end;
- u64 isize = i_size_read(inode);
- u64 page_start;
- u64 page_end;
- u64 page_cnt;
- u64 start = (u64)start_index << PAGE_SHIFT;
- u64 search_start;
- int ret;
- int i;
- int i_done;
- struct extent_state *cached_state = NULL;
- struct extent_changeset *data_reserved = NULL;
-
- file_end = (isize - 1) >> PAGE_SHIFT;
- if (!isize || start_index > file_end)
- return 0;
-
- page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
-
- ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
- start, page_cnt << PAGE_SHIFT);
- if (ret)
- return ret;
- i_done = 0;
-
- /* step one, lock all the pages */
- for (i = 0; i < page_cnt; i++) {
- struct page *page;
-
- page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- break;
- }
- pages[i] = page;
- i_done++;
- }
- if (!i_done || ret)
- goto out;
-
- if (!(inode->i_sb->s_flags & SB_ACTIVE))
- goto out;
-
- page_start = page_offset(pages[0]);
- page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, &cached_state);
-
- /*
- * When defragmenting we skip ranges that have holes or inline extents,
- * (check should_defrag_range()), to avoid unnecessary IO and wasting
- * space. At btrfs_defrag_file(), we check if a range should be defragged
- * before locking the inode and then, if it should, we trigger a sync
- * page cache readahead - we lock the inode only after that to avoid
- * blocking for too long other tasks that possibly want to operate on
- * other file ranges. But before we were able to get the inode lock,
- * some other task may have punched a hole in the range, or we may have
- * now an inline extent, in which case we should not defrag. So check
- * for that here, where we have the inode and the range locked, and bail
- * out if that happened.
- */
- search_start = page_start;
- while (search_start < page_end) {
- struct extent_map *em;
-
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
- page_end - search_start);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out_unlock_range;
- }
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- free_extent_map(em);
- /* Ok, 0 means we did not defrag anything */
- ret = 0;
- goto out_unlock_range;
- }
- search_start = extent_map_end(em);
- free_extent_map(em);
- }
-
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
- page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 0, 0, &cached_state);
-
- if (i_done != page_cnt) {
- spin_lock(&BTRFS_I(inode)->lock);
- btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
- spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
- start, (page_cnt - i_done) << PAGE_SHIFT, true);
- }
-
-
- set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
- &cached_state);
-
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, &cached_state);
-
- for (i = 0; i < i_done; i++) {
- btrfs_page_clear_dirty(fs_info, pages[i], page_offset(pages[i]),
- PAGE_SIZE);
- ClearPageChecked(pages[i]);
- btrfs_page_set_dirty(fs_info, pages[i], page_offset(pages[i]),
- PAGE_SIZE);
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
- extent_changeset_free(data_reserved);
- return i_done;
-
-out_unlock_range:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, &cached_state);
-out:
- for (i = 0; i < i_done; i++) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
- btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
- start, page_cnt << PAGE_SHIFT, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
- extent_changeset_free(data_reserved);
- return ret;
-
-}
-
struct defrag_target_range {
struct list_head list;
u64 start;
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread