linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: Dennis Zhou <dennis@kernel.org>
Cc: David Sterba <dsterba@suse.com>, Chris Mason <clm@fb.com>,
	Josef Bacik <josef@toxicpanda.com>,
	Omar Sandoval <osandov@osandov.com>,
	kernel-team@fb.com, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 13/19] btrfs: have multiple discard lists
Date: Thu, 10 Oct 2019 12:51:01 -0400	[thread overview]
Message-ID: <20191010165100.nz7tfn7kpclp6dkl@macbook-pro-91.dhcp.thefacebook.com> (raw)
In-Reply-To: <87b5ef751e8febd481065e475bd53b276e670ff6.1570479299.git.dennis@kernel.org>

On Mon, Oct 07, 2019 at 04:17:44PM -0400, Dennis Zhou wrote:
> Non-block group destruction discarding currently only had a single list
> with no minimum discard length. This can lead to caravaning more
> meaningful discards behind a heavily fragmented block group.
> 
> This adds support for multiple lists with minimum discard lengths to
> prevent the caravan effect. We promote block groups back up when we
> exceed the BTRFS_DISCARD_MAX_FILTER size, currently we support only 2
> lists with filters of 1MB and 32KB respectively.
> 
> Signed-off-by: Dennis Zhou <dennis@kernel.org>
> ---
>  fs/btrfs/ctree.h            |  2 +-
>  fs/btrfs/discard.c          | 60 +++++++++++++++++++++++++++++++++----
>  fs/btrfs/discard.h          |  4 +++
>  fs/btrfs/free-space-cache.c | 37 +++++++++++++++--------
>  fs/btrfs/free-space-cache.h |  2 +-
>  5 files changed, 85 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index e81f699347e0..b5608f8dc41a 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -439,7 +439,7 @@ struct btrfs_full_stripe_locks_tree {
>  };
>  
>  /* discard control */
> -#define BTRFS_NR_DISCARD_LISTS		2
> +#define BTRFS_NR_DISCARD_LISTS		3
>  
>  struct btrfs_discard_ctl {
>  	struct workqueue_struct *discard_workers;
> diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
> index 072c73f48297..296cbffc5957 100644
> --- a/fs/btrfs/discard.c
> +++ b/fs/btrfs/discard.c
> @@ -20,6 +20,10 @@
>  #define BTRFS_DISCARD_MAX_DELAY		(10000UL)
>  #define BTRFS_DISCARD_MAX_IOPS		(10UL)
>  
> +/* montonically decreasing filters after 0 */
> +static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {0,
> +	BTRFS_DISCARD_MAX_FILTER, BTRFS_DISCARD_MIN_FILTER};
> +
>  static struct list_head *
>  btrfs_get_discard_list(struct btrfs_discard_ctl *discard_ctl,
>  		       struct btrfs_block_group_cache *cache)
> @@ -120,7 +124,7 @@ find_next_cache(struct btrfs_discard_ctl *discard_ctl, u64 now)
>  }
>  
>  static struct btrfs_block_group_cache *
> -peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
> +peek_discard_list(struct btrfs_discard_ctl *discard_ctl, int *discard_index)
>  {
>  	struct btrfs_block_group_cache *cache;
>  	u64 now = ktime_get_ns();
> @@ -132,6 +136,7 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
>  
>  	if (cache && now > cache->discard_delay) {
>  		discard_ctl->cache = cache;
> +		*discard_index = cache->discard_index;
>  		if (cache->discard_index == 0 &&
>  		    cache->free_space_ctl->free_space != cache->key.offset) {
>  			__btrfs_add_to_discard_list(discard_ctl, cache);
> @@ -150,6 +155,36 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
>  	return cache;
>  }
>  
> +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache,
> +				u64 bytes)
> +{
> +	struct btrfs_discard_ctl *discard_ctl;
> +
> +	if (!cache || !btrfs_test_opt(cache->fs_info, DISCARD_ASYNC))
> +		return;
> +
> +	discard_ctl = &cache->fs_info->discard_ctl;
> +
> +	if (cache && cache->discard_index > 1 &&
> +	    bytes >= BTRFS_DISCARD_MAX_FILTER) {
> +		remove_from_discard_list(discard_ctl, cache);
> +		cache->discard_index = 1;

Really need names here, I have no idea what 1 is.

> +		btrfs_add_to_discard_list(discard_ctl, cache);
> +	}
> +}
> +
> +static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
> +				       struct btrfs_block_group_cache *cache)
> +{
> +	cache->discard_index++;
> +	if (cache->discard_index == BTRFS_NR_DISCARD_LISTS) {
> +		cache->discard_index = 1;
> +		return;
> +	}
> +
> +	btrfs_add_to_discard_list(discard_ctl, cache);
> +}
> +
>  void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
>  			       struct btrfs_block_group_cache *cache)
>  {
> @@ -202,23 +237,34 @@ static void btrfs_discard_workfn(struct work_struct *work)
>  {
>  	struct btrfs_discard_ctl *discard_ctl;
>  	struct btrfs_block_group_cache *cache;
> +	int discard_index = 0;
>  	u64 trimmed = 0;
> +	u64 minlen = 0;
>  
>  	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
>  
>  again:
> -	cache = peek_discard_list(discard_ctl);
> +	cache = peek_discard_list(discard_ctl, &discard_index);
>  	if (!cache || !btrfs_run_discard_work(discard_ctl))
>  		return;
>  
> -	if (btrfs_discard_bitmaps(cache))
> +	minlen = discard_minlen[discard_index];
> +
> +	if (btrfs_discard_bitmaps(cache)) {
> +		u64 maxlen = 0;
> +
> +		if (discard_index)
> +			maxlen = discard_minlen[discard_index - 1];
> +
>  		btrfs_trim_block_group_bitmaps(cache, &trimmed,
>  					       cache->discard_cursor,
>  					       btrfs_block_group_end(cache),
> -					       0, true);
> -	else
> +					       minlen, maxlen, true);
> +	} else {
>  		btrfs_trim_block_group(cache, &trimmed, cache->discard_cursor,
> -				       btrfs_block_group_end(cache), 0, true);
> +				       btrfs_block_group_end(cache),
> +				       minlen, true);
> +	}
>  
>  	discard_ctl->prev_discard = trimmed;
>  
> @@ -231,6 +277,8 @@ static void btrfs_discard_workfn(struct work_struct *work)
>  				 cache->key.offset)
>  				btrfs_add_to_discard_free_list(discard_ctl,
>  							       cache);
> +			else
> +				btrfs_update_discard_index(discard_ctl, cache);
>  		} else {
>  			cache->discard_cursor = cache->key.objectid;
>  			cache->discard_flags |= BTRFS_DISCARD_BITMAPS;
> diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h
> index 898dd92dbf8f..1daa8da4a1b5 100644
> --- a/fs/btrfs/discard.h
> +++ b/fs/btrfs/discard.h
> @@ -18,6 +18,8 @@
>  
>  /* discard size limits */
>  #define BTRFS_DISCARD_MAX_SIZE		(SZ_64M)
> +#define BTRFS_DISCARD_MAX_FILTER	(SZ_1M)
> +#define BTRFS_DISCARD_MIN_FILTER	(SZ_32K)
>  
>  /* discard flags */
>  #define BTRFS_DISCARD_RESET_CURSOR	(1UL << 0)
> @@ -39,6 +41,8 @@ void btrfs_add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
>  			       struct btrfs_block_group_cache *cache);
>  void btrfs_add_to_discard_free_list(struct btrfs_discard_ctl *discard_ctl,
>  				    struct btrfs_block_group_cache *cache);
> +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache,
> +				u64 bytes);
>  void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
>  
>  void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index ce33803a45b2..ed35dc090df6 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -2471,6 +2471,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
>  	if (ret)
>  		kmem_cache_free(btrfs_free_space_cachep, info);
>  out:
> +	btrfs_discard_check_filter(cache, bytes);

So we're only accounting the new space?  What if we merge with a larger area
here?  We should probably make our decision based on the actual trimable area.

>  	btrfs_discard_update_discardable(cache, ctl);
>  	spin_unlock(&ctl->tree_lock);
>  
> @@ -3409,7 +3410,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
>  				goto next;
>  			}
>  			unlink_free_space(ctl, entry);
> -			if (bytes > BTRFS_DISCARD_MAX_SIZE) {
> +			/*
> +			 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
> +			 * If X < BTRFS_DISCARD_MIN_FILTER, we won't trim X when
> +			 * we come back around.  So trim it now.
> +			 */
> +			if (bytes > (BTRFS_DISCARD_MAX_SIZE +
> +				     BTRFS_DISCARD_MIN_FILTER)) {
>  				bytes = extent_bytes = BTRFS_DISCARD_MAX_SIZE;
>  				entry->offset += BTRFS_DISCARD_MAX_SIZE;
>  				entry->bytes -= BTRFS_DISCARD_MAX_SIZE;
> @@ -3510,7 +3517,7 @@ static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
>  
>  static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
>  			u64 *total_trimmed, u64 start, u64 end, u64 minlen,
> -			bool async)
> +			u64 maxlen, bool async)
>  {
>  	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
>  	struct btrfs_free_space *entry;
> @@ -3535,7 +3542,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
>  		}
>  
>  		entry = tree_search_offset(ctl, offset, 1, 0);
> -		if (!entry || (async && start == offset &&
> +		if (!entry || (async && minlen && start == offset &&
>  			       btrfs_free_space_trimmed(entry))) {

Huh?  Why do we care if minlen is set if our entry is already trimmed?  If we're
already trimmed we should just skip it even with minlen set, right?  Thanks,

Josef

  reply	other threads:[~2019-10-10 16:51 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07 20:17 [RFC PATCH 00/19] btrfs: async discard support Dennis Zhou
2019-10-07 20:17 ` [PATCH 01/19] bitmap: genericize percpu bitmap region iterators Dennis Zhou
2019-10-07 20:26   ` Josef Bacik
2019-10-07 22:24     ` Dennis Zhou
2019-10-15 12:11       ` David Sterba
2019-10-15 18:35         ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 02/19] btrfs: rename DISCARD opt to DISCARD_SYNC Dennis Zhou
2019-10-07 20:27   ` Josef Bacik
2019-10-08 11:12   ` Johannes Thumshirn
2019-10-11  9:19   ` Nikolay Borisov
2019-10-07 20:17 ` [PATCH 03/19] btrfs: keep track of which extents have been discarded Dennis Zhou
2019-10-07 20:37   ` Josef Bacik
2019-10-07 22:38     ` Dennis Zhou
2019-10-10 13:40       ` Josef Bacik
2019-10-11 16:15         ` Dennis Zhou
2019-10-08 12:46   ` Nikolay Borisov
2019-10-11 16:08     ` Dennis Zhou
2019-10-15 12:17   ` David Sterba
2019-10-15 19:58     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 04/19] btrfs: keep track of cleanliness of the bitmap Dennis Zhou
2019-10-10 14:16   ` Josef Bacik
2019-10-11 16:17     ` Dennis Zhou
2019-10-15 12:23   ` David Sterba
2019-10-07 20:17 ` [PATCH 05/19] btrfs: add the beginning of async discard, discard workqueue Dennis Zhou
2019-10-10 14:38   ` Josef Bacik
2019-10-15 12:49   ` David Sterba
2019-10-15 19:57     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 06/19] btrfs: handle empty block_group removal Dennis Zhou
2019-10-10 15:00   ` Josef Bacik
2019-10-11 16:52     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 07/19] btrfs: discard one region at a time in async discard Dennis Zhou
2019-10-10 15:22   ` Josef Bacik
2019-10-14 19:42     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 08/19] btrfs: track discardable extents for asnyc discard Dennis Zhou
2019-10-10 15:36   ` Josef Bacik
2019-10-14 19:50     ` Dennis Zhou
2019-10-15 13:12   ` David Sterba
2019-10-15 18:41     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 09/19] btrfs: keep track of discardable_bytes Dennis Zhou
2019-10-10 15:38   ` Josef Bacik
2019-10-07 20:17 ` [PATCH 10/19] btrfs: calculate discard delay based on number of extents Dennis Zhou
2019-10-10 15:41   ` Josef Bacik
2019-10-11 18:07     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 11/19] btrfs: add bps discard rate limit Dennis Zhou
2019-10-10 15:47   ` Josef Bacik
2019-10-14 19:56     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 12/19] btrfs: limit max discard size for async discard Dennis Zhou
2019-10-10 16:16   ` Josef Bacik
2019-10-14 19:57     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 13/19] btrfs: have multiple discard lists Dennis Zhou
2019-10-10 16:51   ` Josef Bacik [this message]
2019-10-14 20:04     ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 14/19] btrfs: only keep track of data extents for async discard Dennis Zhou
2019-10-10 16:53   ` Josef Bacik
2019-10-07 20:17 ` [PATCH 15/19] btrfs: load block_groups into discard_list on mount Dennis Zhou
2019-10-10 17:11   ` Josef Bacik
2019-10-14 20:17     ` Dennis Zhou
2019-10-14 23:38       ` David Sterba
2019-10-15 15:42         ` Dennis Zhou
2019-10-07 20:17 ` [PATCH 16/19] btrfs: keep track of discard reuse stats Dennis Zhou
2019-10-10 17:13   ` Josef Bacik
2019-10-07 20:17 ` [PATCH 17/19] btrfs: add async discard header Dennis Zhou
2019-10-10 17:13   ` Josef Bacik
2019-10-07 20:17 ` [PATCH 18/19] btrfs: increase the metadata allowance for the free_space_cache Dennis Zhou
2019-10-10 17:16   ` Josef Bacik
2019-10-07 20:17 ` [PATCH 19/19] btrfs: make smaller extents more likely to go into bitmaps Dennis Zhou
2019-10-10 17:17   ` Josef Bacik
2019-10-11  7:49 ` [RFC PATCH 00/19] btrfs: async discard support Nikolay Borisov
2019-10-14 21:05   ` Dennis Zhou
2019-10-15 12:08 ` David Sterba
2019-10-15 15:41   ` Dennis Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191010165100.nz7tfn7kpclp6dkl@macbook-pro-91.dhcp.thefacebook.com \
    --to=josef@toxicpanda.com \
    --cc=clm@fb.com \
    --cc=dennis@kernel.org \
    --cc=dsterba@suse.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=osandov@osandov.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).