From: David Sterba <dsterba@suse.cz>
To: Qu Wenruo <wqu@suse.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v5 07/11] btrfs: defrag: introduce a helper to defrag a range
Date: Mon, 23 Aug 2021 21:21:39 +0200 [thread overview]
Message-ID: <20210823192138.GQ5047@twin.jikos.cz> (raw)
In-Reply-To: <20210806081242.257996-8-wqu@suse.com>
On Fri, Aug 06, 2021 at 04:12:38PM +0800, Qu Wenruo wrote:
> A new helper, defrag_one_range(), is introduced to defrag one range.
>
> This function will mostly prepare the needed pages and extent status for
> defrag_one_locked_target().
>
> As we can only have a consistent view of extent map with page and
> extent bits locked, we need to re-check the range passed in to get a
> real target list for defrag_one_locked_target().
>
> Since defrag_collect_targets() will call defrag_lookup_extent() and lock
> extent range, we also need to teach those two functions to skip extent
> lock.
> Thus new parameter, @locked, is introruced to skip extent lock if the
> caller has already locked the range.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/ioctl.c | 105 ++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 95 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index a21c4c09269a..2f7196f9bd65 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1081,7 +1081,8 @@ static int find_new_extents(struct btrfs_root *root,
> return -ENOENT;
> }
>
> -static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
> +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
> + bool locked)
> {
> struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
> struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> @@ -1101,10 +1102,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
> u64 end = start + sectorsize - 1;
>
> /* get the big lock and read metadata off disk */
> - lock_extent_bits(io_tree, start, end, &cached);
> + if (!locked)
> + lock_extent_bits(io_tree, start, end, &cached);
> em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start,
> sectorsize);
> - unlock_extent_cached(io_tree, start, end, &cached);
> + if (!locked)
> + unlock_extent_cached(io_tree, start, end, &cached);
>
> if (IS_ERR(em))
> return NULL;
> @@ -1113,7 +1116,8 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
> return em;
> }
>
> -static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
> +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
> + bool locked)
> {
> struct extent_map *next;
> bool ret = true;
> @@ -1122,7 +1126,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
> if (em->start + em->len >= i_size_read(inode))
> return false;
>
> - next = defrag_lookup_extent(inode, em->start + em->len);
> + next = defrag_lookup_extent(inode, em->start + em->len, locked);
> if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
> ret = false;
> else if ((em->block_start + em->block_len == next->block_start) &&
> @@ -1151,7 +1155,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
>
> *skip = 0;
>
> - em = defrag_lookup_extent(inode, start);
> + em = defrag_lookup_extent(inode, start, false);
> if (!em)
> return 0;
>
> @@ -1164,7 +1168,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
> if (!*defrag_end)
> prev_mergeable = false;
>
> - next_mergeable = defrag_check_next_extent(inode, em);
> + next_mergeable = defrag_check_next_extent(inode, em, false);
> /*
> * we hit a real extent, if it is big or the next extent is not a
> * real extent, don't bother defragging it
> @@ -1445,12 +1449,13 @@ struct defrag_target_range {
> * @do_compress: Whether the defrag is doing compression
> * If true, @extent_thresh will be ignored and all regular
> * file extents meeting @newer_than will be targets.
> + * @locked: If the range has already hold extent lock
> * @target_list: The list of targets file extents
> */
> static int defrag_collect_targets(struct btrfs_inode *inode,
> u64 start, u64 len, u32 extent_thresh,
> u64 newer_than, bool do_compress,
> - struct list_head *target_list)
> + bool locked, struct list_head *target_list)
> {
> u64 cur = start;
> int ret = 0;
> @@ -1461,7 +1466,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
> bool next_mergeable = true;
> u64 range_len;
>
> - em = defrag_lookup_extent(&inode->vfs_inode, cur);
> + em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
> if (!em)
> break;
>
> @@ -1485,7 +1490,8 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
> if (em->len >= extent_thresh)
> goto next;
>
> - next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em);
> + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
> + locked);
> if (!next_mergeable) {
> struct defrag_target_range *last;
>
> @@ -1603,6 +1609,85 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
> return ret;
> }
>
> +static int defrag_one_range(struct btrfs_inode *inode,
> + u64 start, u32 len,
> + u32 extent_thresh, u64 newer_than,
> + bool do_compress)
> +{
> + struct extent_state *cached_state = NULL;
> + struct defrag_target_range *entry;
> + struct defrag_target_range *tmp;
> + LIST_HEAD(target_list);
> + struct page **pages;
> + const u32 sectorsize = inode->root->fs_info->sectorsize;
> + unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
> + unsigned long start_index = start >> PAGE_SHIFT;
This is another instance of the unsafe page index type variable, here
it's fine because start is u64, but below
> + unsigned int nr_pages = last_index - start_index + 1;
> + int ret = 0;
> + int i;
> +
> + ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
> + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
> +
> + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
> + if (!pages)
> + return -ENOMEM;
> +
> + /* Prepare all pages */
> + for (i = 0; i < nr_pages; i++) {
> + pages[i] = defrag_prepare_one_page(inode, start_index + i);
> + if (IS_ERR(pages[i])) {
> + ret = PTR_ERR(pages[i]);
> + pages[i] = NULL;
> + goto free_pages;
> + }
> + }
> + for (i = 0; i < nr_pages; i++)
> + wait_on_page_writeback(pages[i]);
> +
> + /* Also lock the pages range */
> + lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT,
The shifts are on unsigned long which can break, so it's better to
declare them u64 right away.
> + (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
> + &cached_state);
> + /*
> + * Now we have a consistent view about the extent map, re-check
> + * which range really needs to be defragged.
> + *
> + * And this time we have extent locked already, pass @locked = true
> + * so that we won't re-lock the extent range and cause deadlock.
> + */
> + ret = defrag_collect_targets(inode, start, len, extent_thresh,
> + newer_than, do_compress, true,
> + &target_list);
> + if (ret < 0)
> + goto unlock_extent;
> +
> + list_for_each_entry(entry, &target_list, list) {
> + ret = defrag_one_locked_target(inode, entry, pages, nr_pages,
> + &cached_state);
> + if (ret < 0)
> + break;
> + }
> +
> + list_for_each_entry_safe(entry, tmp, &target_list, list) {
> + list_del_init(&entry->list);
> + kfree(entry);
> + }
> +unlock_extent:
> + unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT,
> + (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
And same here.
> + &cached_state);
> +free_pages:
> + for (i = 0; i < nr_pages; i++) {
> + if (pages[i]) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + }
> + kfree(pages);
> + return ret;
> +}
> +
> /*
> * Btrfs entrace for defrag.
> *
> --
> 2.32.0
next prev parent reply other threads:[~2021-08-23 19:24 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-06 8:12 [PATCH v5 00/11] btrfs: defrag: rework to support sector perfect defrag Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 01/11] btrfs: defrag: pass file_ra_state instead of file for btrfs_defrag_file() Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 02/11] btrfs: defrag: also check PagePrivate for subpage cases in cluster_pages_for_defrag() Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 03/11] btrfs: defrag: replace hard coded PAGE_SIZE to sectorsize Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 04/11] btrfs: defrag: extract the page preparation code into one helper Qu Wenruo
2021-08-23 19:04 ` David Sterba
2021-08-06 8:12 ` [PATCH v5 05/11] btrfs: defrag: introduce a new helper to collect target file extents Qu Wenruo
2021-08-23 19:08 ` David Sterba
2021-08-06 8:12 ` [PATCH v5 06/11] btrfs: defrag: introduce a helper to defrag a continuous prepared range Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 07/11] btrfs: defrag: introduce a helper to defrag a range Qu Wenruo
2021-08-23 19:21 ` David Sterba [this message]
2021-08-06 8:12 ` [PATCH v5 08/11] btrfs: defrag: introduce a new helper to defrag one cluster Qu Wenruo
2021-08-23 19:27 ` David Sterba
2021-08-06 8:12 ` [PATCH v5 09/11] btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file() Qu Wenruo
2021-08-09 11:32 ` Dan Carpenter
2021-08-09 12:13 ` Qu Wenruo
2021-08-10 6:19 ` Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 10/11] btrfs: defrag: remove the old infrastructure Qu Wenruo
2021-08-06 8:12 ` [PATCH v5 11/11] btrfs: defrag: enable defrag for subpage case Qu Wenruo
2021-08-23 19:43 ` [PATCH v5 00/11] btrfs: defrag: rework to support sector perfect defrag David Sterba
2021-08-27 9:18 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210823192138.GQ5047@twin.jikos.cz \
--to=dsterba@suse.cz \
--cc=linux-btrfs@vger.kernel.org \
--cc=wqu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).