Linux-ext4 Archive on lore.kernel.org
 help / color / Atom feed
From: Andreas Dilger <adilger@dilger.ca>
To: Theodore Ts'o <tytso@mit.edu>
Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>
Subject: Re: [PATCH 4/4] ext4: add prefetch_block_bitmaps mount options
Date: Sat, 1 Aug 2020 02:57:57 -0600
Message-ID: <F6E21B09-FA23-407A-9FAB-25570E1D89CE@dilger.ca> (raw)
In-Reply-To: <20200731190805.181253-5-tytso@mit.edu>


[-- Attachment #1: Type: text/plain, Size: 10664 bytes --]

On Jul 31, 2020, at 1:08 PM, Theodore Ts'o <tytso@mit.edu> wrote:
> 
> For file systems where we can afford to keep the buddy bitmaps cached,
> we can speed up initial writes to large file systems by starting to
> load the block allocation bitmaps as soon as the file system is
> mounted.  This won't work well for _super_ large file systems, or
> memory constrained systems, so we only enable this when it is
> requested via a mount option.
> 
> Addresses-Google-Bug: 159488342
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Andreas Dilger <adilger@dilger.ca>

> ---
> fs/ext4/ext4.h              | 15 +++++++++-
> fs/ext4/mballoc.c           | 10 +++----
> fs/ext4/super.c             | 59 +++++++++++++++++++++++++++----------
> include/trace/events/ext4.h | 44 +++++++++++++++++++++++++++
> 4 files changed, 105 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7451662e092a..4df6f429de1a 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1172,6 +1172,7 @@ struct ext4_inode_info {
> #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
> #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
> #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
> +#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
> #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
> #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
> #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
> @@ -2315,9 +2316,15 @@ struct ext4_lazy_init {
> 	struct mutex		li_list_mtx;
> };
> 
> +enum ext4_li_mode {
> +	EXT4_LI_MODE_PREFETCH_BBITMAP,
> +	EXT4_LI_MODE_ITABLE,
> +};
> +
> struct ext4_li_request {
> 	struct super_block	*lr_super;
> -	struct ext4_sb_info	*lr_sbi;
> +	enum ext4_li_mode	lr_mode;
> +	ext4_group_t		lr_first_not_zeroed;
> 	ext4_group_t		lr_next_group;
> 	struct list_head	lr_request;
> 	unsigned long		lr_next_sched;
> @@ -2657,6 +2664,12 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
> extern void ext4_discard_preallocations(struct inode *);
> extern int __init ext4_init_mballoc(void);
> extern void ext4_exit_mballoc(void);
> +extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
> +				     ext4_group_t group,
> +				     unsigned int nr, int *cnt);
> +extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +				  unsigned int nr);
> +
> extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
> 			     struct buffer_head *bh, ext4_fsblk_t block,
> 			     unsigned long count, int flags);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index b1ef35a9e9f1..47de61e44db2 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2233,9 +2233,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
>  * Start prefetching @nr block bitmaps starting at @group.
>  * Return the next group which needs to be prefetched.
>  */
> -static ext4_group_t
> -ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> -		 unsigned int nr, int *cnt)
> +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> +			      unsigned int nr, int *cnt)
> {
> 	ext4_group_t ngroups = ext4_get_groups_count(sb);
> 	struct buffer_head *bh;
> @@ -2285,9 +2284,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
>  * waiting for the block allocation bitmap read to finish when
>  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
>  */
> -static void
> -ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> -		      unsigned int nr)
> +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +			   unsigned int nr)
> {
> 	while (nr-- > 0) {
> 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..51e91a220ea9 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1521,6 +1521,7 @@ enum {
> 	Opt_dioread_nolock, Opt_dioread_lock,
> 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
> 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
> +	Opt_prefetch_block_bitmaps,
> };
> 
> static const match_table_t tokens = {
> @@ -1612,6 +1613,7 @@ static const match_table_t tokens = {
> 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> 	{Opt_nombcache, "nombcache"},
> 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
> +	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
> 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
> 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
> 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
> @@ -1829,6 +1831,8 @@ static const struct mount_opts {
> 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
> 	{Opt_test_dummy_encryption, 0, MOPT_STRING},
> 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> +	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
> +	 MOPT_SET},
> 	{Opt_err, 0, 0}
> };
> 
> @@ -3201,15 +3205,34 @@ static void print_daily_error_info(struct timer_list *t)
> static int ext4_run_li_request(struct ext4_li_request *elr)
> {
> 	struct ext4_group_desc *gdp = NULL;
> -	ext4_group_t group, ngroups;
> -	struct super_block *sb;
> +	struct super_block *sb = elr->lr_super;
> +	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
> +	ext4_group_t group = elr->lr_next_group;
> 	unsigned long timeout = 0;
> +	unsigned int prefetch_ios = 0;
> 	int ret = 0;
> 
> -	sb = elr->lr_super;
> -	ngroups = EXT4_SB(sb)->s_groups_count;
> +	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
> +		elr->lr_next_group = ext4_mb_prefetch(sb, group,
> +				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
> +		if (prefetch_ios)
> +			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
> +					      prefetch_ios);
> +		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
> +					    prefetch_ios);
> +		if (group >= elr->lr_next_group) {
> +			ret = 1;
> +			if (elr->lr_first_not_zeroed != ngroups &&
> +			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
> +				elr->lr_next_group = elr->lr_first_not_zeroed;
> +				elr->lr_mode = EXT4_LI_MODE_ITABLE;
> +				ret = 0;
> +			}
> +		}
> +		return ret;
> +	}
> 
> -	for (group = elr->lr_next_group; group < ngroups; group++) {
> +	for (; group < ngroups; group++) {
> 		gdp = ext4_get_group_desc(sb, group, NULL);
> 		if (!gdp) {
> 			ret = 1;
> @@ -3227,9 +3250,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		timeout = jiffies;
> 		ret = ext4_init_inode_table(sb, group,
> 					    elr->lr_timeout ? 0 : 1);
> +		trace_ext4_lazy_itable_init(sb, group);
> 		if (elr->lr_timeout == 0) {
> 			timeout = (jiffies - timeout) *
> -				  elr->lr_sbi->s_li_wait_mult;
> +				EXT4_SB(elr->lr_super)->s_li_wait_mult;
> 			elr->lr_timeout = timeout;
> 		}
> 		elr->lr_next_sched = jiffies + elr->lr_timeout;
> @@ -3244,15 +3268,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
>  */
> static void ext4_remove_li_request(struct ext4_li_request *elr)
> {
> -	struct ext4_sb_info *sbi;
> -
> 	if (!elr)
> 		return;
> 
> -	sbi = elr->lr_sbi;
> -
> 	list_del(&elr->lr_request);
> -	sbi->s_li_request = NULL;
> +	EXT4_SB(elr->lr_super)->s_li_request = NULL;
> 	kfree(elr);
> }
> 
> @@ -3461,7 +3481,6 @@ static int ext4_li_info_new(void)
> static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 					    ext4_group_t start)
> {
> -	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr;
> 
> 	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
> @@ -3469,8 +3488,13 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 		return NULL;
> 
> 	elr->lr_super = sb;
> -	elr->lr_sbi = sbi;
> -	elr->lr_next_group = start;
> +	elr->lr_first_not_zeroed = start;
> +	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
> +		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +	else {
> +		elr->lr_mode = EXT4_LI_MODE_ITABLE;
> +		elr->lr_next_group = start;
> +	}
> 
> 	/*
> 	 * Randomize first schedule time of the request to
> @@ -3488,6 +3512,7 @@ int ext4_register_li_request(struct super_block *sb,
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr = NULL;
> 	ext4_group_t ngroups = sbi->s_groups_count;
> +	enum ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
> 	int ret = 0;
> 
> 	mutex_lock(&ext4_li_mtx);
> @@ -3500,8 +3525,10 @@ int ext4_register_li_request(struct super_block *sb,
> 		goto out;
> 	}
> 
> -	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> -	    !test_opt(sb, INIT_INODE_TABLE))
> +	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +		lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +	} else if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> +		   !test_opt(sb, INIT_INODE_TABLE))
> 		goto out;
> 
> 	elr = ext4_li_request_new(sb, first_not_zeroed);
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index cbcd2e1a608d..8008d2e116b9 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2742,6 +2742,50 @@ TRACE_EVENT(ext4_error,
> 		  __entry->function, __entry->line)
> );
> 
> +TRACE_EVENT(ext4_prefetch_bitmaps,
> +	    TP_PROTO(struct super_block *sb, ext4_group_t group,
> +		     ext4_group_t next, unsigned int prefetch_ios),
> +
> +	TP_ARGS(sb, group, next, prefetch_ios),
> +
> +	TP_STRUCT__entry(
> +		__field(	dev_t,	dev			)
> +		__field(	__u32,	group			)
> +		__field(	__u32,	next			)
> +		__field(	__u32,	ios			)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev	= sb->s_dev;
> +		__entry->group	= group;
> +		__entry->next	= next;
> +		__entry->ios	= prefetch_ios;
> +	),
> +
> +	TP_printk("dev %d,%d group %u next %u ios %u",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  __entry->group, __entry->next, __entry->ios)
> +);
> +
> +TRACE_EVENT(ext4_lazy_itable_init,
> +	    TP_PROTO(struct super_block *sb, ext4_group_t group),
> +
> +	TP_ARGS(sb, group),
> +
> +	TP_STRUCT__entry(
> +		__field(	dev_t,	dev			)
> +		__field(	__u32,	group			)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev	= sb->s_dev;
> +		__entry->group	= group;
> +	),
> +
> +	TP_printk("dev %d,%d group %u",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
> +);
> +
> #endif /* _TRACE_EXT4_H */
> 
> /* This part must be outside protection */
> --
> 2.24.1
> 


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

  reply index

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-31 19:08 [PATCH 0/4] V2- ext4 block bitmap prefetch patches Theodore Ts'o
2020-07-31 19:08 ` [PATCH 1/4] ext4: add prefetching for block allocation bitmaps Theodore Ts'o
2020-07-31 19:08 ` [PATCH 2/4] ext4: skip non-loaded groups at cr=0/1 when scanning for good groups Theodore Ts'o
2020-08-01  8:49   ` Andreas Dilger
2020-07-31 19:08 ` [PATCH 3/4] ext4: indicate via a block bitmap read is prefetched via a tracepoint Theodore Ts'o
2020-07-31 19:08 ` [PATCH 4/4] ext4: add prefetch_block_bitmaps mount options Theodore Ts'o
2020-08-01  8:57   ` Andreas Dilger [this message]
  -- strict thread matches above, loose matches on Subject: below --
2020-07-17 15:53 [PATCH 0/4] ex4 block bitmap prefetching Theodore Ts'o
2020-07-17 15:53 ` [PATCH 4/4] ext4: add prefetch_block_bitmaps mount options Theodore Ts'o
2020-07-21  8:20   ` Andreas Dilger
2020-07-24 13:58   ` Благодаренко Артём

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=F6E21B09-FA23-407A-9FAB-25570E1D89CE@dilger.ca \
    --to=adilger@dilger.ca \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-ext4 Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-ext4/0 linux-ext4/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-ext4 linux-ext4/ https://lore.kernel.org/linux-ext4 \
		linux-ext4@vger.kernel.org
	public-inbox-index linux-ext4

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-ext4


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git