[2/2] f2fs: tune discard speed with storage usage rate
diff mbox series

Message ID 20180810100806.9298-2-yuchao0@huawei.com
State New
Headers show
Series
  • [1/2] f2fs: set 4KB discard granularity by default
Related show

Commit Message

Chao Yu Aug. 10, 2018, 10:08 a.m. UTC
Previously, discard speed was fixed mostly, and in high usage rate
device, we will speed up issuing discard, but it doesn't make sense
that in a non-full filesystem, we still issue discard with slow speed.

Anyway, it comes out undiscarded block makes FTL GC be lower efficient
and causing high lifetime overhead.

Let's tune discard speed as below:

a. adjust default issue interval:
		original	after
min_interval:	50ms		100ms
mid_interval:	500ms		1000ms
max_interval:	60000ms		10000ms

b. if last time we stop issuing discard due to IO interruption of user,
let's reset all {min,mid,max}_interval to default one.

c. tune {min,mid,max}_interval with below calculation method:

base_interval = default_interval / 10;
total_interval = default_interval - base_interval;
interval = base_interval + total_interval * (100 - dev_util) / 100;

For example:
min_interval (:100ms)
dev_util (%)	interval (ms)
0		100
10		91
20		82
30		73
...
80		28
90		19
100		10

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/f2fs.h    | 11 ++++----
 fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
 fs/f2fs/segment.h |  9 +++++++
 fs/f2fs/super.c   |  2 +-
 4 files changed, 67 insertions(+), 19 deletions(-)

Comments

Jaegeuk Kim Aug. 14, 2018, 4:19 a.m. UTC | #1
On 08/10, Chao Yu wrote:
> Previously, discard speed was fixed mostly, and in high usage rate
> device, we will speed up issuing discard, but it doesn't make sense
> that in a non-full filesystem, we still issue discard with slow speed.

Could you please elaborate the problem in more detail? The speed depends
on how many candidates?

Thanks,

> 
> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> and causing high lifetime overhead.
> 
> Let's tune discard speed as below:
> 
> a. adjust default issue interval:
> 		original	after
> min_interval:	50ms		100ms
> mid_interval:	500ms		1000ms
> max_interval:	60000ms		10000ms
> 
> b. if last time we stop issuing discard due to IO interruption of user,
> let's reset all {min,mid,max}_interval to default one.
> 
> c. tune {min,mid,max}_interval with below calculation method:
> 
> base_interval = default_interval / 10;
> total_interval = default_interval - base_interval;
> interval = base_interval + total_interval * (100 - dev_util) / 100;
> 
> For example:
> min_interval (:100ms)
> dev_util (%)	interval (ms)
> 0		100
> 10		91
> 20		82
> 30		73
> ...
> 80		28
> 90		19
> 100		10
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/f2fs/f2fs.h    | 11 ++++----
>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>  fs/f2fs/segment.h |  9 +++++++
>  fs/f2fs/super.c   |  2 +-
>  4 files changed, 67 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 273ffdaf4891..a1dd2e1c3cb9 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -185,10 +185,9 @@ enum {
>  
>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>  
> @@ -248,7 +247,8 @@ struct discard_entry {
>  };
>  
>  /* default discard granularity of inner discard thread, unit: block count */
> -#define DEFAULT_DISCARD_GRANULARITY		1
> +#define MID_DISCARD_GRANULARITY			16
> +#define MIN_DISCARD_GRANULARITY			1
>  
>  /* max discard pend list number */
>  #define MAX_PLIST_NUM		512
> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>  	struct rb_root root;			/* root of discard rb-tree */
>  	bool rbtree_check;			/* config for consistence check */
> +	bool io_interrupted;			/* last state of io interrupted */
>  };
>  
>  /* for the list of fsync inodes, used only during recovery */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8b52e8dfb12f..9564aaf1f27b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>  #endif
>  }
>  
> +static void __adjust_discard_speed(unsigned int *interval,
> +				unsigned int def_interval, int dev_util)
> +{
> +	unsigned int base_interval, total_interval;
> +
> +	base_interval = def_interval / 10;
> +	total_interval = def_interval - base_interval;
> +
> +	/*
> +	 * if def_interval = 100, adjusted interval should be in range of
> +	 * [10, 100].
> +	 */
> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
> +}
> +
> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> +					struct discard_policy *dpolicy)
> +{
> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> +	int dev_util;
> +
> +	if (dcc->io_interrupted) {
> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> +		return;
> +	}
> +
> +	dev_util = dev_utilization(sbi);
> +
> +	__adjust_discard_speed(&dpolicy->min_interval,
> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> +	__adjust_discard_speed(&dpolicy->mid_interval,
> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> +	__adjust_discard_speed(&dpolicy->max_interval,
> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
> +}
> +
>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>  				struct discard_policy *dpolicy,
>  				int discard_type, unsigned int granularity)
> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>  
>  	if (discard_type == DPOLICY_BG) {
> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>  		dpolicy->io_aware = true;
>  		dpolicy->sync = false;
>  		dpolicy->ordered = true;
> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> -			dpolicy->granularity = 1;
> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> -		}
> +		__tune_discard_policy(sbi, dpolicy);
>  	} else if (discard_type == DPOLICY_FORCE) {
> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>  		dpolicy->io_aware = false;
>  	} else if (discard_type == DPOLICY_FSTRIM) {
>  		dpolicy->io_aware = false;
> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>  	if (!issued && io_interrupted)
>  		issued = -1;
>  
> +	dcc->io_interrupted = io_interrupted;
> +
>  	return issued;
>  }
>  
> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>  		if (i + 1 < dpolicy->granularity)
>  			break;
>  
> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>  
>  		pend_list = &dcc->pend_list[i];
> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>  	if (!issued && io_interrupted)
>  		issued = -1;
>  
> +	dcc->io_interrupted = io_interrupted;
> +
>  	return issued;
>  }
>  
> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>  	struct f2fs_sb_info *sbi = data;
>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
> -	struct discard_policy dpolicy;
> +	struct discard_policy dpolicy = {
> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> +	};
>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>  	int issued;
>  
> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>  	if (!dcc)
>  		return -ENOMEM;
>  
> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>  	INIT_LIST_HEAD(&dcc->entry_list);
>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>  	dcc->next_pos = 0;
>  	dcc->root = RB_ROOT;
>  	dcc->rbtree_check = false;
> +	dcc->io_interrupted = false;
>  
>  	init_waitqueue_head(&dcc->discard_wait_queue);
>  	SM_I(sbi)->dcc_info = dcc;
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 422b0ceb1eaa..63b4da72cd34 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>  					sbi->user_block_count);
>  }
>  
> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> +{
> +	unsigned int dev_blks;
> +
> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
> +	return div_u64((u64)dev_blks * 100,
> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> +}
> +
>  /*
>   * Sometimes f2fs may be better to drop out-of-place update policy.
>   * And, users can control the policy through sysfs entries.
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index b055f2ea77c5..55ed76daad23 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>  	/* adjust parameters according to the volume size */
>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> -		sm_i->dcc_info->discard_granularity = 1;
> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>  	}
>  
> -- 
> 2.18.0.rc1
Chao Yu Aug. 14, 2018, 7:41 a.m. UTC | #2
On 2018/8/14 12:19, Jaegeuk Kim wrote:
> On 08/10, Chao Yu wrote:
>> Previously, discard speed was fixed mostly, and in high usage rate
>> device, we will speed up issuing discard, but it doesn't make sense
>> that in a non-full filesystem, we still issue discard with slow speed.
> 
> Could you please elaborate the problem in more detail? The speed depends
> on how many candidates?

undiscard blocks are all 4k granularity.
a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%


1. for case c), we need to speed up issuing discard based on utilization of
"filesystem + undiscard" instead of just utilization of filesystem.

-		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
-			dpolicy->granularity = 1;
-			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		}

2. If free space in storage touches therein threshold, performance will be very
sensitive. In low-end storage, with high usage in space, even free space is
reduced by 1%, performance will decrease a lot.

IMO, in above cases, we'd better to issue discard with high speed for c), middle
speed for b), and low speed for a).

How do you think?

Thanks,

> 
> Thanks,
> 
>>
>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
>> and causing high lifetime overhead.
>>
>> Let's tune discard speed as below:
>>
>> a. adjust default issue interval:
>> 		original	after
>> min_interval:	50ms		100ms
>> mid_interval:	500ms		1000ms
>> max_interval:	60000ms		10000ms
>>
>> b. if last time we stop issuing discard due to IO interruption of user,
>> let's reset all {min,mid,max}_interval to default one.
>>
>> c. tune {min,mid,max}_interval with below calculation method:
>>
>> base_interval = default_interval / 10;
>> total_interval = default_interval - base_interval;
>> interval = base_interval + total_interval * (100 - dev_util) / 100;
>>
>> For example:
>> min_interval (:100ms)
>> dev_util (%)	interval (ms)
>> 0		100
>> 10		91
>> 20		82
>> 30		73
>> ...
>> 80		28
>> 90		19
>> 100		10
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/f2fs/f2fs.h    | 11 ++++----
>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>>  fs/f2fs/segment.h |  9 +++++++
>>  fs/f2fs/super.c   |  2 +-
>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -185,10 +185,9 @@ enum {
>>  
>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>  
>> @@ -248,7 +247,8 @@ struct discard_entry {
>>  };
>>  
>>  /* default discard granularity of inner discard thread, unit: block count */
>> -#define DEFAULT_DISCARD_GRANULARITY		1
>> +#define MID_DISCARD_GRANULARITY			16
>> +#define MIN_DISCARD_GRANULARITY			1
>>  
>>  /* max discard pend list number */
>>  #define MAX_PLIST_NUM		512
>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>>  	struct rb_root root;			/* root of discard rb-tree */
>>  	bool rbtree_check;			/* config for consistence check */
>> +	bool io_interrupted;			/* last state of io interrupted */
>>  };
>>  
>>  /* for the list of fsync inodes, used only during recovery */
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 8b52e8dfb12f..9564aaf1f27b 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>>  #endif
>>  }
>>  
>> +static void __adjust_discard_speed(unsigned int *interval,
>> +				unsigned int def_interval, int dev_util)
>> +{
>> +	unsigned int base_interval, total_interval;
>> +
>> +	base_interval = def_interval / 10;
>> +	total_interval = def_interval - base_interval;
>> +
>> +	/*
>> +	 * if def_interval = 100, adjusted interval should be in range of
>> +	 * [10, 100].
>> +	 */
>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
>> +}
>> +
>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>> +					struct discard_policy *dpolicy)
>> +{
>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>> +	int dev_util;
>> +
>> +	if (dcc->io_interrupted) {
>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>> +		return;
>> +	}
>> +
>> +	dev_util = dev_utilization(sbi);
>> +
>> +	__adjust_discard_speed(&dpolicy->min_interval,
>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
>> +	__adjust_discard_speed(&dpolicy->mid_interval,
>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
>> +	__adjust_discard_speed(&dpolicy->max_interval,
>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
>> +}
>> +
>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>  				struct discard_policy *dpolicy,
>>  				int discard_type, unsigned int granularity)
>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>  
>>  	if (discard_type == DPOLICY_BG) {
>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>  		dpolicy->io_aware = true;
>>  		dpolicy->sync = false;
>>  		dpolicy->ordered = true;
>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>> -			dpolicy->granularity = 1;
>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>> -		}
>> +		__tune_discard_policy(sbi, dpolicy);
>>  	} else if (discard_type == DPOLICY_FORCE) {
>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>  		dpolicy->io_aware = false;
>>  	} else if (discard_type == DPOLICY_FSTRIM) {
>>  		dpolicy->io_aware = false;
>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>  	if (!issued && io_interrupted)
>>  		issued = -1;
>>  
>> +	dcc->io_interrupted = io_interrupted;
>> +
>>  	return issued;
>>  }
>>  
>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>  		if (i + 1 < dpolicy->granularity)
>>  			break;
>>  
>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>>  
>>  		pend_list = &dcc->pend_list[i];
>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>  	if (!issued && io_interrupted)
>>  		issued = -1;
>>  
>> +	dcc->io_interrupted = io_interrupted;
>> +
>>  	return issued;
>>  }
>>  
>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>>  	struct f2fs_sb_info *sbi = data;
>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
>> -	struct discard_policy dpolicy;
>> +	struct discard_policy dpolicy = {
>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>> +	};
>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>  	int issued;
>>  
>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>  	if (!dcc)
>>  		return -ENOMEM;
>>  
>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>  	INIT_LIST_HEAD(&dcc->entry_list);
>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>  	dcc->next_pos = 0;
>>  	dcc->root = RB_ROOT;
>>  	dcc->rbtree_check = false;
>> +	dcc->io_interrupted = false;
>>  
>>  	init_waitqueue_head(&dcc->discard_wait_queue);
>>  	SM_I(sbi)->dcc_info = dcc;
>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>> index 422b0ceb1eaa..63b4da72cd34 100644
>> --- a/fs/f2fs/segment.h
>> +++ b/fs/f2fs/segment.h
>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>>  					sbi->user_block_count);
>>  }
>>  
>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>> +{
>> +	unsigned int dev_blks;
>> +
>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
>> +	return div_u64((u64)dev_blks * 100,
>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>> +}
>> +
>>  /*
>>   * Sometimes f2fs may be better to drop out-of-place update policy.
>>   * And, users can control the policy through sysfs entries.
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index b055f2ea77c5..55ed76daad23 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>>  	/* adjust parameters according to the volume size */
>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>> -		sm_i->dcc_info->discard_granularity = 1;
>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>  	}
>>  
>> -- 
>> 2.18.0.rc1
> 
> .
>
Jaegeuk Kim Aug. 14, 2018, 5:23 p.m. UTC | #3
On 08/14, Chao Yu wrote:
> On 2018/8/14 12:19, Jaegeuk Kim wrote:
> > On 08/10, Chao Yu wrote:
> >> Previously, discard speed was fixed mostly, and in high usage rate
> >> device, we will speed up issuing discard, but it doesn't make sense
> >> that in a non-full filesystem, we still issue discard with slow speed.
> > 
> > Could you please elaborate the problem in more detail? The speed depends
> > on how many candidates?
> 
> undiscard blocks are all 4k granularity.
> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
> 
> 
> 1. for case c), we need to speed up issuing discard based on utilization of
> "filesystem + undiscard" instead of just utilization of filesystem.
> 
> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> -			dpolicy->granularity = 1;
> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> -		}
> 
> 2. If free space in storage touches therein threshold, performance will be very
> sensitive. In low-end storage, with high usage in space, even free space is
> reduced by 1%, performance will decrease a lot.

So, we may need to distinguish low-end vs. high-end storage. In high-end case,
it'd be better to avoid IO contention, while low-end device wants to get more
discard commands as much as possible. So, how about adding an option for this
as a tunable point?

> 
> IMO, in above cases, we'd better to issue discard with high speed for c), middle
> speed for b), and low speed for a).
> 
> How do you think?
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> >> and causing high lifetime overhead.
> >>
> >> Let's tune discard speed as below:
> >>
> >> a. adjust default issue interval:
> >> 		original	after
> >> min_interval:	50ms		100ms
> >> mid_interval:	500ms		1000ms
> >> max_interval:	60000ms		10000ms
> >>
> >> b. if last time we stop issuing discard due to IO interruption of user,
> >> let's reset all {min,mid,max}_interval to default one.
> >>
> >> c. tune {min,mid,max}_interval with below calculation method:
> >>
> >> base_interval = default_interval / 10;
> >> total_interval = default_interval - base_interval;
> >> interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>
> >> For example:
> >> min_interval (:100ms)
> >> dev_util (%)	interval (ms)
> >> 0		100
> >> 10		91
> >> 20		82
> >> 30		73
> >> ...
> >> 80		28
> >> 90		19
> >> 100		10
> >>
> >> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >> ---
> >>  fs/f2fs/f2fs.h    | 11 ++++----
> >>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
> >>  fs/f2fs/segment.h |  9 +++++++
> >>  fs/f2fs/super.c   |  2 +-
> >>  4 files changed, 67 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 273ffdaf4891..a1dd2e1c3cb9 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -185,10 +185,9 @@ enum {
> >>  
> >>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
> >>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> >> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
> >> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
> >> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
> >> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
> >> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
> >> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
> >> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
> >>  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >>  
> >> @@ -248,7 +247,8 @@ struct discard_entry {
> >>  };
> >>  
> >>  /* default discard granularity of inner discard thread, unit: block count */
> >> -#define DEFAULT_DISCARD_GRANULARITY		1
> >> +#define MID_DISCARD_GRANULARITY			16
> >> +#define MIN_DISCARD_GRANULARITY			1
> >>  
> >>  /* max discard pend list number */
> >>  #define MAX_PLIST_NUM		512
> >> @@ -330,6 +330,7 @@ struct discard_cmd_control {
> >>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
> >>  	struct rb_root root;			/* root of discard rb-tree */
> >>  	bool rbtree_check;			/* config for consistence check */
> >> +	bool io_interrupted;			/* last state of io interrupted */
> >>  };
> >>  
> >>  /* for the list of fsync inodes, used only during recovery */
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >> index 8b52e8dfb12f..9564aaf1f27b 100644
> >> --- a/fs/f2fs/segment.c
> >> +++ b/fs/f2fs/segment.c
> >> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
> >>  #endif
> >>  }
> >>  
> >> +static void __adjust_discard_speed(unsigned int *interval,
> >> +				unsigned int def_interval, int dev_util)
> >> +{
> >> +	unsigned int base_interval, total_interval;
> >> +
> >> +	base_interval = def_interval / 10;
> >> +	total_interval = def_interval - base_interval;
> >> +
> >> +	/*
> >> +	 * if def_interval = 100, adjusted interval should be in range of
> >> +	 * [10, 100].
> >> +	 */
> >> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
> >> +}
> >> +
> >> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> >> +					struct discard_policy *dpolicy)
> >> +{
> >> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >> +	int dev_util;
> >> +
> >> +	if (dcc->io_interrupted) {
> >> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >> +		return;
> >> +	}
> >> +
> >> +	dev_util = dev_utilization(sbi);
> >> +
> >> +	__adjust_discard_speed(&dpolicy->min_interval,
> >> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> >> +	__adjust_discard_speed(&dpolicy->mid_interval,
> >> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> >> +	__adjust_discard_speed(&dpolicy->max_interval,
> >> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
> >> +}
> >> +
> >>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>  				struct discard_policy *dpolicy,
> >>  				int discard_type, unsigned int granularity)
> >> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
> >>  
> >>  	if (discard_type == DPOLICY_BG) {
> >> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>  		dpolicy->io_aware = true;
> >>  		dpolicy->sync = false;
> >>  		dpolicy->ordered = true;
> >> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >> -			dpolicy->granularity = 1;
> >> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >> -		}
> >> +		__tune_discard_policy(sbi, dpolicy);
> >>  	} else if (discard_type == DPOLICY_FORCE) {
> >> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>  		dpolicy->io_aware = false;
> >>  	} else if (discard_type == DPOLICY_FSTRIM) {
> >>  		dpolicy->io_aware = false;
> >> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> >>  	if (!issued && io_interrupted)
> >>  		issued = -1;
> >>  
> >> +	dcc->io_interrupted = io_interrupted;
> >> +
> >>  	return issued;
> >>  }
> >>  
> >> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>  		if (i + 1 < dpolicy->granularity)
> >>  			break;
> >>  
> >> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
> >> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
> >>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
> >>  
> >>  		pend_list = &dcc->pend_list[i];
> >> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>  	if (!issued && io_interrupted)
> >>  		issued = -1;
> >>  
> >> +	dcc->io_interrupted = io_interrupted;
> >> +
> >>  	return issued;
> >>  }
> >>  
> >> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
> >>  	struct f2fs_sb_info *sbi = data;
> >>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
> >> -	struct discard_policy dpolicy;
> >> +	struct discard_policy dpolicy = {
> >> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> >> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> >> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> >> +	};
> >>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> >>  	int issued;
> >>  
> >> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>  	if (!dcc)
> >>  		return -ENOMEM;
> >>  
> >> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> >> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>  	INIT_LIST_HEAD(&dcc->entry_list);
> >>  	for (i = 0; i < MAX_PLIST_NUM; i++)
> >>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
> >> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>  	dcc->next_pos = 0;
> >>  	dcc->root = RB_ROOT;
> >>  	dcc->rbtree_check = false;
> >> +	dcc->io_interrupted = false;
> >>  
> >>  	init_waitqueue_head(&dcc->discard_wait_queue);
> >>  	SM_I(sbi)->dcc_info = dcc;
> >> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >> index 422b0ceb1eaa..63b4da72cd34 100644
> >> --- a/fs/f2fs/segment.h
> >> +++ b/fs/f2fs/segment.h
> >> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> >>  					sbi->user_block_count);
> >>  }
> >>  
> >> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> >> +{
> >> +	unsigned int dev_blks;
> >> +
> >> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
> >> +	return div_u64((u64)dev_blks * 100,
> >> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> >> +}
> >> +
> >>  /*
> >>   * Sometimes f2fs may be better to drop out-of-place update policy.
> >>   * And, users can control the policy through sysfs entries.
> >> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >> index b055f2ea77c5..55ed76daad23 100644
> >> --- a/fs/f2fs/super.c
> >> +++ b/fs/f2fs/super.c
> >> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
> >>  	/* adjust parameters according to the volume size */
> >>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
> >>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> >> -		sm_i->dcc_info->discard_granularity = 1;
> >> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
> >>  	}
> >>  
> >> -- 
> >> 2.18.0.rc1
> > 
> > .
> >
Chao Yu Aug. 15, 2018, 1:46 a.m. UTC | #4
On 2018/8/15 1:23, Jaegeuk Kim wrote:
> On 08/14, Chao Yu wrote:
>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
>>> On 08/10, Chao Yu wrote:
>>>> Previously, discard speed was fixed mostly, and in high usage rate
>>>> device, we will speed up issuing discard, but it doesn't make sense
>>>> that in a non-full filesystem, we still issue discard with slow speed.
>>>
>>> Could you please elaborate the problem in more detail? The speed depends
>>> on how many candidates?
>>
>> undiscard blocks are all 4k granularity.
>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
>>
>>
>> 1. for case c), we need to speed up issuing discard based on utilization of
>> "filesystem + undiscard" instead of just utilization of filesystem.
>>
>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>> -			dpolicy->granularity = 1;
>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>> -		}
>>
>> 2. If free space in storage touches therein threshold, performance will be very
>> sensitive. In low-end storage, with high usage in space, even free space is
>> reduced by 1%, performance will decrease a lot.
> 
> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
> it'd be better to avoid IO contention, while low-end device wants to get more
> discard commands as much as possible. So, how about adding an option for this
> as a tunable point?

Agreed, how about adding a sysfs entry discard_tunning:
1: enabled, use 4k granularity, self-adapted speed based on real device free space.
0: disabled, use dcc->discard_granularity, fixed speed.

By default: enabled

How do you think?

Thanks,

> 
>>
>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
>> speed for b), and low speed for a).
>>
>> How do you think?
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
>>>> and causing high lifetime overhead.
>>>>
>>>> Let's tune discard speed as below:
>>>>
>>>> a. adjust default issue interval:
>>>> 		original	after
>>>> min_interval:	50ms		100ms
>>>> mid_interval:	500ms		1000ms
>>>> max_interval:	60000ms		10000ms
>>>>
>>>> b. if last time we stop issuing discard due to IO interruption of user,
>>>> let's reset all {min,mid,max}_interval to default one.
>>>>
>>>> c. tune {min,mid,max}_interval with below calculation method:
>>>>
>>>> base_interval = default_interval / 10;
>>>> total_interval = default_interval - base_interval;
>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>
>>>> For example:
>>>> min_interval (:100ms)
>>>> dev_util (%)	interval (ms)
>>>> 0		100
>>>> 10		91
>>>> 20		82
>>>> 30		73
>>>> ...
>>>> 80		28
>>>> 90		19
>>>> 100		10
>>>>
>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>> ---
>>>>  fs/f2fs/f2fs.h    | 11 ++++----
>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>>>>  fs/f2fs/segment.h |  9 +++++++
>>>>  fs/f2fs/super.c   |  2 +-
>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -185,10 +185,9 @@ enum {
>>>>  
>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>>>  
>>>> @@ -248,7 +247,8 @@ struct discard_entry {
>>>>  };
>>>>  
>>>>  /* default discard granularity of inner discard thread, unit: block count */
>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
>>>> +#define MID_DISCARD_GRANULARITY			16
>>>> +#define MIN_DISCARD_GRANULARITY			1
>>>>  
>>>>  /* max discard pend list number */
>>>>  #define MAX_PLIST_NUM		512
>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>>>>  	struct rb_root root;			/* root of discard rb-tree */
>>>>  	bool rbtree_check;			/* config for consistence check */
>>>> +	bool io_interrupted;			/* last state of io interrupted */
>>>>  };
>>>>  
>>>>  /* for the list of fsync inodes, used only during recovery */
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>>>>  #endif
>>>>  }
>>>>  
>>>> +static void __adjust_discard_speed(unsigned int *interval,
>>>> +				unsigned int def_interval, int dev_util)
>>>> +{
>>>> +	unsigned int base_interval, total_interval;
>>>> +
>>>> +	base_interval = def_interval / 10;
>>>> +	total_interval = def_interval - base_interval;
>>>> +
>>>> +	/*
>>>> +	 * if def_interval = 100, adjusted interval should be in range of
>>>> +	 * [10, 100].
>>>> +	 */
>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>> +}
>>>> +
>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>>>> +					struct discard_policy *dpolicy)
>>>> +{
>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>> +	int dev_util;
>>>> +
>>>> +	if (dcc->io_interrupted) {
>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	dev_util = dev_utilization(sbi);
>>>> +
>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
>>>> +}
>>>> +
>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>  				struct discard_policy *dpolicy,
>>>>  				int discard_type, unsigned int granularity)
>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>>>  
>>>>  	if (discard_type == DPOLICY_BG) {
>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>  		dpolicy->io_aware = true;
>>>>  		dpolicy->sync = false;
>>>>  		dpolicy->ordered = true;
>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>> -			dpolicy->granularity = 1;
>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>> -		}
>>>> +		__tune_discard_policy(sbi, dpolicy);
>>>>  	} else if (discard_type == DPOLICY_FORCE) {
>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>  		dpolicy->io_aware = false;
>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
>>>>  		dpolicy->io_aware = false;
>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>>>  	if (!issued && io_interrupted)
>>>>  		issued = -1;
>>>>  
>>>> +	dcc->io_interrupted = io_interrupted;
>>>> +
>>>>  	return issued;
>>>>  }
>>>>  
>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>  		if (i + 1 < dpolicy->granularity)
>>>>  			break;
>>>>  
>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>>>>  
>>>>  		pend_list = &dcc->pend_list[i];
>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>  	if (!issued && io_interrupted)
>>>>  		issued = -1;
>>>>  
>>>> +	dcc->io_interrupted = io_interrupted;
>>>> +
>>>>  	return issued;
>>>>  }
>>>>  
>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>>>>  	struct f2fs_sb_info *sbi = data;
>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
>>>> -	struct discard_policy dpolicy;
>>>> +	struct discard_policy dpolicy = {
>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>>>> +	};
>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>  	int issued;
>>>>  
>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>  	if (!dcc)
>>>>  		return -ENOMEM;
>>>>  
>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>  	dcc->next_pos = 0;
>>>>  	dcc->root = RB_ROOT;
>>>>  	dcc->rbtree_check = false;
>>>> +	dcc->io_interrupted = false;
>>>>  
>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
>>>>  	SM_I(sbi)->dcc_info = dcc;
>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>> index 422b0ceb1eaa..63b4da72cd34 100644
>>>> --- a/fs/f2fs/segment.h
>>>> +++ b/fs/f2fs/segment.h
>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>>>>  					sbi->user_block_count);
>>>>  }
>>>>  
>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	unsigned int dev_blks;
>>>> +
>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
>>>> +	return div_u64((u64)dev_blks * 100,
>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>>>> +}
>>>> +
>>>>  /*
>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
>>>>   * And, users can control the policy through sysfs entries.
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index b055f2ea77c5..55ed76daad23 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>>>>  	/* adjust parameters according to the volume size */
>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>>>> -		sm_i->dcc_info->discard_granularity = 1;
>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>>>  	}
>>>>  
>>>> -- 
>>>> 2.18.0.rc1
>>>
>>> .
>>>
> 
> .
>
Jaegeuk Kim Aug. 15, 2018, 2:33 a.m. UTC | #5
On 08/15, Chao Yu wrote:
> On 2018/8/15 1:23, Jaegeuk Kim wrote:
> > On 08/14, Chao Yu wrote:
> >> On 2018/8/14 12:19, Jaegeuk Kim wrote:
> >>> On 08/10, Chao Yu wrote:
> >>>> Previously, discard speed was fixed mostly, and in high usage rate
> >>>> device, we will speed up issuing discard, but it doesn't make sense
> >>>> that in a non-full filesystem, we still issue discard with slow speed.
> >>>
> >>> Could you please elaborate the problem in more detail? The speed depends
> >>> on how many candidates?
> >>
> >> undiscard blocks are all 4k granularity.
> >> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
> >> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
> >> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
> >>
> >>
> >> 1. for case c), we need to speed up issuing discard based on utilization of
> >> "filesystem + undiscard" instead of just utilization of filesystem.
> >>
> >> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >> -			dpolicy->granularity = 1;
> >> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >> -		}
> >>
> >> 2. If free space in storage touches therein threshold, performance will be very
> >> sensitive. In low-end storage, with high usage in space, even free space is
> >> reduced by 1%, performance will decrease a lot.
> > 
> > So, we may need to distinguish low-end vs. high-end storage. In high-end case,
> > it'd be better to avoid IO contention, while low-end device wants to get more
> > discard commands as much as possible. So, how about adding an option for this
> > as a tunable point?
> 
> Agreed, how about adding a sysfs entry discard_tunning:
> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
> 0: disabled, use dcc->discard_granularity, fixed speed.
> 
> By default: enabled
> 
> How do you think?

I don't think this is proper with a sysfs entry, since we already know the
device type when mounting the partition. We won't require to change the policy
on the fly. And, I still don't get to change the default.

> 
> Thanks,
> 
> > 
> >>
> >> IMO, in above cases, we'd better to issue discard with high speed for c), middle
> >> speed for b), and low speed for a).
> >>
> >> How do you think?
> >>
> >> Thanks,
> >>
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> >>>> and causing high lifetime overhead.
> >>>>
> >>>> Let's tune discard speed as below:
> >>>>
> >>>> a. adjust default issue interval:
> >>>> 		original	after
> >>>> min_interval:	50ms		100ms
> >>>> mid_interval:	500ms		1000ms
> >>>> max_interval:	60000ms		10000ms
> >>>>
> >>>> b. if last time we stop issuing discard due to IO interruption of user,
> >>>> let's reset all {min,mid,max}_interval to default one.
> >>>>
> >>>> c. tune {min,mid,max}_interval with below calculation method:
> >>>>
> >>>> base_interval = default_interval / 10;
> >>>> total_interval = default_interval - base_interval;
> >>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>>
> >>>> For example:
> >>>> min_interval (:100ms)
> >>>> dev_util (%)	interval (ms)
> >>>> 0		100
> >>>> 10		91
> >>>> 20		82
> >>>> 30		73
> >>>> ...
> >>>> 80		28
> >>>> 90		19
> >>>> 100		10
> >>>>
> >>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>> ---
> >>>>  fs/f2fs/f2fs.h    | 11 ++++----
> >>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
> >>>>  fs/f2fs/segment.h |  9 +++++++
> >>>>  fs/f2fs/super.c   |  2 +-
> >>>>  4 files changed, 67 insertions(+), 19 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -185,10 +185,9 @@ enum {
> >>>>  
> >>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
> >>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> >>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
> >>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
> >>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
> >>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
> >>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
> >>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
> >>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
> >>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >>>>  
> >>>> @@ -248,7 +247,8 @@ struct discard_entry {
> >>>>  };
> >>>>  
> >>>>  /* default discard granularity of inner discard thread, unit: block count */
> >>>> -#define DEFAULT_DISCARD_GRANULARITY		1
> >>>> +#define MID_DISCARD_GRANULARITY			16
> >>>> +#define MIN_DISCARD_GRANULARITY			1
> >>>>  
> >>>>  /* max discard pend list number */
> >>>>  #define MAX_PLIST_NUM		512
> >>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
> >>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
> >>>>  	struct rb_root root;			/* root of discard rb-tree */
> >>>>  	bool rbtree_check;			/* config for consistence check */
> >>>> +	bool io_interrupted;			/* last state of io interrupted */
> >>>>  };
> >>>>  
> >>>>  /* for the list of fsync inodes, used only during recovery */
> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>> index 8b52e8dfb12f..9564aaf1f27b 100644
> >>>> --- a/fs/f2fs/segment.c
> >>>> +++ b/fs/f2fs/segment.c
> >>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
> >>>>  #endif
> >>>>  }
> >>>>  
> >>>> +static void __adjust_discard_speed(unsigned int *interval,
> >>>> +				unsigned int def_interval, int dev_util)
> >>>> +{
> >>>> +	unsigned int base_interval, total_interval;
> >>>> +
> >>>> +	base_interval = def_interval / 10;
> >>>> +	total_interval = def_interval - base_interval;
> >>>> +
> >>>> +	/*
> >>>> +	 * if def_interval = 100, adjusted interval should be in range of
> >>>> +	 * [10, 100].
> >>>> +	 */
> >>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>> +}
> >>>> +
> >>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> >>>> +					struct discard_policy *dpolicy)
> >>>> +{
> >>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>> +	int dev_util;
> >>>> +
> >>>> +	if (dcc->io_interrupted) {
> >>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>> +		return;
> >>>> +	}
> >>>> +
> >>>> +	dev_util = dev_utilization(sbi);
> >>>> +
> >>>> +	__adjust_discard_speed(&dpolicy->min_interval,
> >>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> >>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
> >>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> >>>> +	__adjust_discard_speed(&dpolicy->max_interval,
> >>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
> >>>> +}
> >>>> +
> >>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>  				struct discard_policy *dpolicy,
> >>>>  				int discard_type, unsigned int granularity)
> >>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
> >>>>  
> >>>>  	if (discard_type == DPOLICY_BG) {
> >>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>  		dpolicy->io_aware = true;
> >>>>  		dpolicy->sync = false;
> >>>>  		dpolicy->ordered = true;
> >>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >>>> -			dpolicy->granularity = 1;
> >>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>> -		}
> >>>> +		__tune_discard_policy(sbi, dpolicy);
> >>>>  	} else if (discard_type == DPOLICY_FORCE) {
> >>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>  		dpolicy->io_aware = false;
> >>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
> >>>>  		dpolicy->io_aware = false;
> >>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> >>>>  	if (!issued && io_interrupted)
> >>>>  		issued = -1;
> >>>>  
> >>>> +	dcc->io_interrupted = io_interrupted;
> >>>> +
> >>>>  	return issued;
> >>>>  }
> >>>>  
> >>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>  		if (i + 1 < dpolicy->granularity)
> >>>>  			break;
> >>>>  
> >>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
> >>>>  
> >>>>  		pend_list = &dcc->pend_list[i];
> >>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>  	if (!issued && io_interrupted)
> >>>>  		issued = -1;
> >>>>  
> >>>> +	dcc->io_interrupted = io_interrupted;
> >>>> +
> >>>>  	return issued;
> >>>>  }
> >>>>  
> >>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
> >>>>  	struct f2fs_sb_info *sbi = data;
> >>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
> >>>> -	struct discard_policy dpolicy;
> >>>> +	struct discard_policy dpolicy = {
> >>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> >>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> >>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> >>>> +	};
> >>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>  	int issued;
> >>>>  
> >>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>  	if (!dcc)
> >>>>  		return -ENOMEM;
> >>>>  
> >>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> >>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>  	INIT_LIST_HEAD(&dcc->entry_list);
> >>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
> >>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
> >>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>  	dcc->next_pos = 0;
> >>>>  	dcc->root = RB_ROOT;
> >>>>  	dcc->rbtree_check = false;
> >>>> +	dcc->io_interrupted = false;
> >>>>  
> >>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
> >>>>  	SM_I(sbi)->dcc_info = dcc;
> >>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >>>> index 422b0ceb1eaa..63b4da72cd34 100644
> >>>> --- a/fs/f2fs/segment.h
> >>>> +++ b/fs/f2fs/segment.h
> >>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> >>>>  					sbi->user_block_count);
> >>>>  }
> >>>>  
> >>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	unsigned int dev_blks;
> >>>> +
> >>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
> >>>> +	return div_u64((u64)dev_blks * 100,
> >>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
> >>>>   * And, users can control the policy through sysfs entries.
> >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>> index b055f2ea77c5..55ed76daad23 100644
> >>>> --- a/fs/f2fs/super.c
> >>>> +++ b/fs/f2fs/super.c
> >>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
> >>>>  	/* adjust parameters according to the volume size */
> >>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
> >>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> >>>> -		sm_i->dcc_info->discard_granularity = 1;
> >>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
> >>>>  	}
> >>>>  
> >>>> -- 
> >>>> 2.18.0.rc1
> >>>
> >>> .
> >>>
> > 
> > .
> >
Chao Yu Aug. 15, 2018, 2:51 a.m. UTC | #6
On 2018/8/15 10:33, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
>>> On 08/14, Chao Yu wrote:
>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
>>>>> On 08/10, Chao Yu wrote:
>>>>>> Previously, discard speed was fixed mostly, and in high usage rate
>>>>>> device, we will speed up issuing discard, but it doesn't make sense
>>>>>> that in a non-full filesystem, we still issue discard with slow speed.
>>>>>
>>>>> Could you please elaborate the problem in more detail? The speed depends
>>>>> on how many candidates?
>>>>
>>>> undiscard blocks are all 4k granularity.
>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
>>>>
>>>>
>>>> 1. for case c), we need to speed up issuing discard based on utilization of
>>>> "filesystem + undiscard" instead of just utilization of filesystem.
>>>>
>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>> -			dpolicy->granularity = 1;
>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>> -		}
>>>>
>>>> 2. If free space in storage touches therein threshold, performance will be very
>>>> sensitive. In low-end storage, with high usage in space, even free space is
>>>> reduced by 1%, performance will decrease a lot.
>>>
>>> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
>>> it'd be better to avoid IO contention, while low-end device wants to get more
>>> discard commands as much as possible. So, how about adding an option for this
>>> as a tunable point?
>>
>> Agreed, how about adding a sysfs entry discard_tunning:
>> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
>> 0: disabled, use dcc->discard_granularity, fixed speed.
>>
>> By default: enabled
>>
>> How do you think?
> 
> I don't think this is proper with a sysfs entry, since we already know the

You mean by storage capacity? <= 32GB means low-end?

Thanks,

> device type when mounting the partition. We won't require to change the policy
> on the fly. And, I still don't get to change the default.
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
>>>> speed for b), and low speed for a).
>>>>
>>>> How do you think?
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
>>>>>> and causing high lifetime overhead.
>>>>>>
>>>>>> Let's tune discard speed as below:
>>>>>>
>>>>>> a. adjust default issue interval:
>>>>>> 		original	after
>>>>>> min_interval:	50ms		100ms
>>>>>> mid_interval:	500ms		1000ms
>>>>>> max_interval:	60000ms		10000ms
>>>>>>
>>>>>> b. if last time we stop issuing discard due to IO interruption of user,
>>>>>> let's reset all {min,mid,max}_interval to default one.
>>>>>>
>>>>>> c. tune {min,mid,max}_interval with below calculation method:
>>>>>>
>>>>>> base_interval = default_interval / 10;
>>>>>> total_interval = default_interval - base_interval;
>>>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>>
>>>>>> For example:
>>>>>> min_interval (:100ms)
>>>>>> dev_util (%)	interval (ms)
>>>>>> 0		100
>>>>>> 10		91
>>>>>> 20		82
>>>>>> 30		73
>>>>>> ...
>>>>>> 80		28
>>>>>> 90		19
>>>>>> 100		10
>>>>>>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
>>>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>>>>>>  fs/f2fs/segment.h |  9 +++++++
>>>>>>  fs/f2fs/super.c   |  2 +-
>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -185,10 +185,9 @@ enum {
>>>>>>  
>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>>>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>>>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>>>>>  
>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
>>>>>>  };
>>>>>>  
>>>>>>  /* default discard granularity of inner discard thread, unit: block count */
>>>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
>>>>>> +#define MID_DISCARD_GRANULARITY			16
>>>>>> +#define MIN_DISCARD_GRANULARITY			1
>>>>>>  
>>>>>>  /* max discard pend list number */
>>>>>>  #define MAX_PLIST_NUM		512
>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>>>>>>  	struct rb_root root;			/* root of discard rb-tree */
>>>>>>  	bool rbtree_check;			/* config for consistence check */
>>>>>> +	bool io_interrupted;			/* last state of io interrupted */
>>>>>>  };
>>>>>>  
>>>>>>  /* for the list of fsync inodes, used only during recovery */
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>>>>>>  #endif
>>>>>>  }
>>>>>>  
>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
>>>>>> +				unsigned int def_interval, int dev_util)
>>>>>> +{
>>>>>> +	unsigned int base_interval, total_interval;
>>>>>> +
>>>>>> +	base_interval = def_interval / 10;
>>>>>> +	total_interval = def_interval - base_interval;
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * if def_interval = 100, adjusted interval should be in range of
>>>>>> +	 * [10, 100].
>>>>>> +	 */
>>>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>> +}
>>>>>> +
>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>>>>>> +					struct discard_policy *dpolicy)
>>>>>> +{
>>>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>> +	int dev_util;
>>>>>> +
>>>>>> +	if (dcc->io_interrupted) {
>>>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>> +		return;
>>>>>> +	}
>>>>>> +
>>>>>> +	dev_util = dev_utilization(sbi);
>>>>>> +
>>>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
>>>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
>>>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
>>>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
>>>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
>>>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
>>>>>> +}
>>>>>> +
>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>  				struct discard_policy *dpolicy,
>>>>>>  				int discard_type, unsigned int granularity)
>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>>>>>  
>>>>>>  	if (discard_type == DPOLICY_BG) {
>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>  		dpolicy->io_aware = true;
>>>>>>  		dpolicy->sync = false;
>>>>>>  		dpolicy->ordered = true;
>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>> -			dpolicy->granularity = 1;
>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>> -		}
>>>>>> +		__tune_discard_policy(sbi, dpolicy);
>>>>>>  	} else if (discard_type == DPOLICY_FORCE) {
>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>  		dpolicy->io_aware = false;
>>>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
>>>>>>  		dpolicy->io_aware = false;
>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>>>>>  	if (!issued && io_interrupted)
>>>>>>  		issued = -1;
>>>>>>  
>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>> +
>>>>>>  	return issued;
>>>>>>  }
>>>>>>  
>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>  		if (i + 1 < dpolicy->granularity)
>>>>>>  			break;
>>>>>>  
>>>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>>>>>>  
>>>>>>  		pend_list = &dcc->pend_list[i];
>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>  	if (!issued && io_interrupted)
>>>>>>  		issued = -1;
>>>>>>  
>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>> +
>>>>>>  	return issued;
>>>>>>  }
>>>>>>  
>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>>>>>>  	struct f2fs_sb_info *sbi = data;
>>>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
>>>>>> -	struct discard_policy dpolicy;
>>>>>> +	struct discard_policy dpolicy = {
>>>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>>>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>>>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>>>>>> +	};
>>>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>  	int issued;
>>>>>>  
>>>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>  	if (!dcc)
>>>>>>  		return -ENOMEM;
>>>>>>  
>>>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>>>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
>>>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>>>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
>>>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>  	dcc->next_pos = 0;
>>>>>>  	dcc->root = RB_ROOT;
>>>>>>  	dcc->rbtree_check = false;
>>>>>> +	dcc->io_interrupted = false;
>>>>>>  
>>>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
>>>>>>  	SM_I(sbi)->dcc_info = dcc;
>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
>>>>>> --- a/fs/f2fs/segment.h
>>>>>> +++ b/fs/f2fs/segment.h
>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>>>>>>  					sbi->user_block_count);
>>>>>>  }
>>>>>>  
>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	unsigned int dev_blks;
>>>>>> +
>>>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
>>>>>> +	return div_u64((u64)dev_blks * 100,
>>>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
>>>>>>   * And, users can control the policy through sysfs entries.
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index b055f2ea77c5..55ed76daad23 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>>>>>>  	/* adjust parameters according to the volume size */
>>>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>>>>>> -		sm_i->dcc_info->discard_granularity = 1;
>>>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>>>>>  	}
>>>>>>  
>>>>>> -- 
>>>>>> 2.18.0.rc1
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
>
Jaegeuk Kim Aug. 15, 2018, 2:56 a.m. UTC | #7
On 08/15, Chao Yu wrote:
> On 2018/8/15 10:33, Jaegeuk Kim wrote:
> > On 08/15, Chao Yu wrote:
> >> On 2018/8/15 1:23, Jaegeuk Kim wrote:
> >>> On 08/14, Chao Yu wrote:
> >>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
> >>>>> On 08/10, Chao Yu wrote:
> >>>>>> Previously, discard speed was fixed mostly, and in high usage rate
> >>>>>> device, we will speed up issuing discard, but it doesn't make sense
> >>>>>> that in a non-full filesystem, we still issue discard with slow speed.
> >>>>>
> >>>>> Could you please elaborate the problem in more detail? The speed depends
> >>>>> on how many candidates?
> >>>>
> >>>> undiscard blocks are all 4k granularity.
> >>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
> >>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
> >>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
> >>>>
> >>>>
> >>>> 1. for case c), we need to speed up issuing discard based on utilization of
> >>>> "filesystem + undiscard" instead of just utilization of filesystem.
> >>>>
> >>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >>>> -			dpolicy->granularity = 1;
> >>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>> -		}
> >>>>
> >>>> 2. If free space in storage touches therein threshold, performance will be very
> >>>> sensitive. In low-end storage, with high usage in space, even free space is
> >>>> reduced by 1%, performance will decrease a lot.
> >>>
> >>> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
> >>> it'd be better to avoid IO contention, while low-end device wants to get more
> >>> discard commands as much as possible. So, how about adding an option for this
> >>> as a tunable point?
> >>
> >> Agreed, how about adding a sysfs entry discard_tunning:
> >> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
> >> 0: disabled, use dcc->discard_granularity, fixed speed.
> >>
> >> By default: enabled
> >>
> >> How do you think?
> > 
> > I don't think this is proper with a sysfs entry, since we already know the
> 
> You mean by storage capacity? <= 32GB means low-end?

Yes, that's current condition to judge it. If there is any other method, it'd be
better to change it.

> 
> Thanks,
> 
> > device type when mounting the partition. We won't require to change the policy
> > on the fly. And, I still don't get to change the default.
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
> >>>> speed for b), and low speed for a).
> >>>>
> >>>> How do you think?
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>> Thanks,
> >>>>>
> >>>>>>
> >>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> >>>>>> and causing high lifetime overhead.
> >>>>>>
> >>>>>> Let's tune discard speed as below:
> >>>>>>
> >>>>>> a. adjust default issue interval:
> >>>>>> 		original	after
> >>>>>> min_interval:	50ms		100ms
> >>>>>> mid_interval:	500ms		1000ms
> >>>>>> max_interval:	60000ms		10000ms
> >>>>>>
> >>>>>> b. if last time we stop issuing discard due to IO interruption of user,
> >>>>>> let's reset all {min,mid,max}_interval to default one.
> >>>>>>
> >>>>>> c. tune {min,mid,max}_interval with below calculation method:
> >>>>>>
> >>>>>> base_interval = default_interval / 10;
> >>>>>> total_interval = default_interval - base_interval;
> >>>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>>>>
> >>>>>> For example:
> >>>>>> min_interval (:100ms)
> >>>>>> dev_util (%)	interval (ms)
> >>>>>> 0		100
> >>>>>> 10		91
> >>>>>> 20		82
> >>>>>> 30		73
> >>>>>> ...
> >>>>>> 80		28
> >>>>>> 90		19
> >>>>>> 100		10
> >>>>>>
> >>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>>>> ---
> >>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
> >>>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
> >>>>>>  fs/f2fs/segment.h |  9 +++++++
> >>>>>>  fs/f2fs/super.c   |  2 +-
> >>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
> >>>>>>
> >>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
> >>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>> @@ -185,10 +185,9 @@ enum {
> >>>>>>  
> >>>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
> >>>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> >>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
> >>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
> >>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
> >>>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
> >>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
> >>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
> >>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
> >>>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >>>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >>>>>>  
> >>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
> >>>>>>  };
> >>>>>>  
> >>>>>>  /* default discard granularity of inner discard thread, unit: block count */
> >>>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
> >>>>>> +#define MID_DISCARD_GRANULARITY			16
> >>>>>> +#define MIN_DISCARD_GRANULARITY			1
> >>>>>>  
> >>>>>>  /* max discard pend list number */
> >>>>>>  #define MAX_PLIST_NUM		512
> >>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
> >>>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
> >>>>>>  	struct rb_root root;			/* root of discard rb-tree */
> >>>>>>  	bool rbtree_check;			/* config for consistence check */
> >>>>>> +	bool io_interrupted;			/* last state of io interrupted */
> >>>>>>  };
> >>>>>>  
> >>>>>>  /* for the list of fsync inodes, used only during recovery */
> >>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
> >>>>>> --- a/fs/f2fs/segment.c
> >>>>>> +++ b/fs/f2fs/segment.c
> >>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
> >>>>>>  #endif
> >>>>>>  }
> >>>>>>  
> >>>>>> +static void __adjust_discard_speed(unsigned int *interval,
> >>>>>> +				unsigned int def_interval, int dev_util)
> >>>>>> +{
> >>>>>> +	unsigned int base_interval, total_interval;
> >>>>>> +
> >>>>>> +	base_interval = def_interval / 10;
> >>>>>> +	total_interval = def_interval - base_interval;
> >>>>>> +
> >>>>>> +	/*
> >>>>>> +	 * if def_interval = 100, adjusted interval should be in range of
> >>>>>> +	 * [10, 100].
> >>>>>> +	 */
> >>>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>> +					struct discard_policy *dpolicy)
> >>>>>> +{
> >>>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>>> +	int dev_util;
> >>>>>> +
> >>>>>> +	if (dcc->io_interrupted) {
> >>>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>> +		return;
> >>>>>> +	}
> >>>>>> +
> >>>>>> +	dev_util = dev_utilization(sbi);
> >>>>>> +
> >>>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
> >>>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> >>>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
> >>>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> >>>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
> >>>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
> >>>>>> +}
> >>>>>> +
> >>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>>  				struct discard_policy *dpolicy,
> >>>>>>  				int discard_type, unsigned int granularity)
> >>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
> >>>>>>  
> >>>>>>  	if (discard_type == DPOLICY_BG) {
> >>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>>  		dpolicy->io_aware = true;
> >>>>>>  		dpolicy->sync = false;
> >>>>>>  		dpolicy->ordered = true;
> >>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >>>>>> -			dpolicy->granularity = 1;
> >>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>> -		}
> >>>>>> +		__tune_discard_policy(sbi, dpolicy);
> >>>>>>  	} else if (discard_type == DPOLICY_FORCE) {
> >>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>>  		dpolicy->io_aware = false;
> >>>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
> >>>>>>  		dpolicy->io_aware = false;
> >>>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> >>>>>>  	if (!issued && io_interrupted)
> >>>>>>  		issued = -1;
> >>>>>>  
> >>>>>> +	dcc->io_interrupted = io_interrupted;
> >>>>>> +
> >>>>>>  	return issued;
> >>>>>>  }
> >>>>>>  
> >>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>>>  		if (i + 1 < dpolicy->granularity)
> >>>>>>  			break;
> >>>>>>  
> >>>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
> >>>>>>  
> >>>>>>  		pend_list = &dcc->pend_list[i];
> >>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>>>  	if (!issued && io_interrupted)
> >>>>>>  		issued = -1;
> >>>>>>  
> >>>>>> +	dcc->io_interrupted = io_interrupted;
> >>>>>> +
> >>>>>>  	return issued;
> >>>>>>  }
> >>>>>>  
> >>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
> >>>>>>  	struct f2fs_sb_info *sbi = data;
> >>>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
> >>>>>> -	struct discard_policy dpolicy;
> >>>>>> +	struct discard_policy dpolicy = {
> >>>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> >>>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> >>>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> >>>>>> +	};
> >>>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>  	int issued;
> >>>>>>  
> >>>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>>>  	if (!dcc)
> >>>>>>  		return -ENOMEM;
> >>>>>>  
> >>>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> >>>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
> >>>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
> >>>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
> >>>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>>>  	dcc->next_pos = 0;
> >>>>>>  	dcc->root = RB_ROOT;
> >>>>>>  	dcc->rbtree_check = false;
> >>>>>> +	dcc->io_interrupted = false;
> >>>>>>  
> >>>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
> >>>>>>  	SM_I(sbi)->dcc_info = dcc;
> >>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
> >>>>>> --- a/fs/f2fs/segment.h
> >>>>>> +++ b/fs/f2fs/segment.h
> >>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> >>>>>>  					sbi->user_block_count);
> >>>>>>  }
> >>>>>>  
> >>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> >>>>>> +{
> >>>>>> +	unsigned int dev_blks;
> >>>>>> +
> >>>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
> >>>>>> +	return div_u64((u64)dev_blks * 100,
> >>>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> >>>>>> +}
> >>>>>> +
> >>>>>>  /*
> >>>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
> >>>>>>   * And, users can control the policy through sysfs entries.
> >>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>>> index b055f2ea77c5..55ed76daad23 100644
> >>>>>> --- a/fs/f2fs/super.c
> >>>>>> +++ b/fs/f2fs/super.c
> >>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
> >>>>>>  	/* adjust parameters according to the volume size */
> >>>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
> >>>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> >>>>>> -		sm_i->dcc_info->discard_granularity = 1;
> >>>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
> >>>>>>  	}
> >>>>>>  
> >>>>>> -- 
> >>>>>> 2.18.0.rc1
> >>>>>
> >>>>> .
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> >
Chao Yu Aug. 15, 2018, 3:01 a.m. UTC | #8
On 2018/8/15 10:56, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 10:33, Jaegeuk Kim wrote:
>>> On 08/15, Chao Yu wrote:
>>>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
>>>>> On 08/14, Chao Yu wrote:
>>>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
>>>>>>> On 08/10, Chao Yu wrote:
>>>>>>>> Previously, discard speed was fixed mostly, and in high usage rate
>>>>>>>> device, we will speed up issuing discard, but it doesn't make sense
>>>>>>>> that in a non-full filesystem, we still issue discard with slow speed.
>>>>>>>
>>>>>>> Could you please elaborate the problem in more detail? The speed depends
>>>>>>> on how many candidates?
>>>>>>
>>>>>> undiscard blocks are all 4k granularity.
>>>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
>>>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
>>>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
>>>>>>
>>>>>>
>>>>>> 1. for case c), we need to speed up issuing discard based on utilization of
>>>>>> "filesystem + undiscard" instead of just utilization of filesystem.
>>>>>>
>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>> -			dpolicy->granularity = 1;
>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>> -		}
>>>>>>
>>>>>> 2. If free space in storage touches therein threshold, performance will be very
>>>>>> sensitive. In low-end storage, with high usage in space, even free space is
>>>>>> reduced by 1%, performance will decrease a lot.
>>>>>
>>>>> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
>>>>> it'd be better to avoid IO contention, while low-end device wants to get more
>>>>> discard commands as much as possible. So, how about adding an option for this
>>>>> as a tunable point?
>>>>
>>>> Agreed, how about adding a sysfs entry discard_tunning:
>>>> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
>>>> 0: disabled, use dcc->discard_granularity, fixed speed.
>>>>
>>>> By default: enabled
>>>>
>>>> How do you think?
>>>
>>> I don't think this is proper with a sysfs entry, since we already know the
>>
>> You mean by storage capacity? <= 32GB means low-end?
> 
> Yes, that's current condition to judge it. If there is any other method, it'd be

That would be hard code...

Still I have not got any other method to do the judgment except capacity.

Thanks,

> better to change it.
> 
>>
>> Thanks,
>>
>>> device type when mounting the partition. We won't require to change the policy
>>> on the fly. And, I still don't get to change the default.
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
>>>>>> speed for b), and low speed for a).
>>>>>>
>>>>>> How do you think?
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>>>
>>>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
>>>>>>>> and causing high lifetime overhead.
>>>>>>>>
>>>>>>>> Let's tune discard speed as below:
>>>>>>>>
>>>>>>>> a. adjust default issue interval:
>>>>>>>> 		original	after
>>>>>>>> min_interval:	50ms		100ms
>>>>>>>> mid_interval:	500ms		1000ms
>>>>>>>> max_interval:	60000ms		10000ms
>>>>>>>>
>>>>>>>> b. if last time we stop issuing discard due to IO interruption of user,
>>>>>>>> let's reset all {min,mid,max}_interval to default one.
>>>>>>>>
>>>>>>>> c. tune {min,mid,max}_interval with below calculation method:
>>>>>>>>
>>>>>>>> base_interval = default_interval / 10;
>>>>>>>> total_interval = default_interval - base_interval;
>>>>>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>>>>
>>>>>>>> For example:
>>>>>>>> min_interval (:100ms)
>>>>>>>> dev_util (%)	interval (ms)
>>>>>>>> 0		100
>>>>>>>> 10		91
>>>>>>>> 20		82
>>>>>>>> 30		73
>>>>>>>> ...
>>>>>>>> 80		28
>>>>>>>> 90		19
>>>>>>>> 100		10
>>>>>>>>
>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>> ---
>>>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
>>>>>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>>>>>>>>  fs/f2fs/segment.h |  9 +++++++
>>>>>>>>  fs/f2fs/super.c   |  2 +-
>>>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>> @@ -185,10 +185,9 @@ enum {
>>>>>>>>  
>>>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>>>>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>>>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>>>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>>>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>>>>>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
>>>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
>>>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
>>>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>>>>>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>>>>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>>>>>>>  
>>>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
>>>>>>>>  };
>>>>>>>>  
>>>>>>>>  /* default discard granularity of inner discard thread, unit: block count */
>>>>>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
>>>>>>>> +#define MID_DISCARD_GRANULARITY			16
>>>>>>>> +#define MIN_DISCARD_GRANULARITY			1
>>>>>>>>  
>>>>>>>>  /* max discard pend list number */
>>>>>>>>  #define MAX_PLIST_NUM		512
>>>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>>>>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>>>>>>>>  	struct rb_root root;			/* root of discard rb-tree */
>>>>>>>>  	bool rbtree_check;			/* config for consistence check */
>>>>>>>> +	bool io_interrupted;			/* last state of io interrupted */
>>>>>>>>  };
>>>>>>>>  
>>>>>>>>  /* for the list of fsync inodes, used only during recovery */
>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>>>>>>>>  #endif
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
>>>>>>>> +				unsigned int def_interval, int dev_util)
>>>>>>>> +{
>>>>>>>> +	unsigned int base_interval, total_interval;
>>>>>>>> +
>>>>>>>> +	base_interval = def_interval / 10;
>>>>>>>> +	total_interval = def_interval - base_interval;
>>>>>>>> +
>>>>>>>> +	/*
>>>>>>>> +	 * if def_interval = 100, adjusted interval should be in range of
>>>>>>>> +	 * [10, 100].
>>>>>>>> +	 */
>>>>>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>> +					struct discard_policy *dpolicy)
>>>>>>>> +{
>>>>>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>> +	int dev_util;
>>>>>>>> +
>>>>>>>> +	if (dcc->io_interrupted) {
>>>>>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>> +		return;
>>>>>>>> +	}
>>>>>>>> +
>>>>>>>> +	dev_util = dev_utilization(sbi);
>>>>>>>> +
>>>>>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
>>>>>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
>>>>>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
>>>>>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>  				struct discard_policy *dpolicy,
>>>>>>>>  				int discard_type, unsigned int granularity)
>>>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>>>>>>>  
>>>>>>>>  	if (discard_type == DPOLICY_BG) {
>>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>  		dpolicy->io_aware = true;
>>>>>>>>  		dpolicy->sync = false;
>>>>>>>>  		dpolicy->ordered = true;
>>>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>>>> -			dpolicy->granularity = 1;
>>>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>> -		}
>>>>>>>> +		__tune_discard_policy(sbi, dpolicy);
>>>>>>>>  	} else if (discard_type == DPOLICY_FORCE) {
>>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>  		dpolicy->io_aware = false;
>>>>>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
>>>>>>>>  		dpolicy->io_aware = false;
>>>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>>>>>>>  	if (!issued && io_interrupted)
>>>>>>>>  		issued = -1;
>>>>>>>>  
>>>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>>>> +
>>>>>>>>  	return issued;
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>>>  		if (i + 1 < dpolicy->granularity)
>>>>>>>>  			break;
>>>>>>>>  
>>>>>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>>>>>>>>  
>>>>>>>>  		pend_list = &dcc->pend_list[i];
>>>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>>>  	if (!issued && io_interrupted)
>>>>>>>>  		issued = -1;
>>>>>>>>  
>>>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>>>> +
>>>>>>>>  	return issued;
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>>>>>>>>  	struct f2fs_sb_info *sbi = data;
>>>>>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
>>>>>>>> -	struct discard_policy dpolicy;
>>>>>>>> +	struct discard_policy dpolicy = {
>>>>>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>>>>>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>>>>>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>>>>>>>> +	};
>>>>>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>  	int issued;
>>>>>>>>  
>>>>>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>  	if (!dcc)
>>>>>>>>  		return -ENOMEM;
>>>>>>>>  
>>>>>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>>>>>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
>>>>>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>>>>>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
>>>>>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>  	dcc->next_pos = 0;
>>>>>>>>  	dcc->root = RB_ROOT;
>>>>>>>>  	dcc->rbtree_check = false;
>>>>>>>> +	dcc->io_interrupted = false;
>>>>>>>>  
>>>>>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
>>>>>>>>  	SM_I(sbi)->dcc_info = dcc;
>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>>>>>>>>  					sbi->user_block_count);
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>>>>>>>> +{
>>>>>>>> +	unsigned int dev_blks;
>>>>>>>> +
>>>>>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
>>>>>>>> +	return div_u64((u64)dev_blks * 100,
>>>>>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /*
>>>>>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
>>>>>>>>   * And, users can control the policy through sysfs entries.
>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>> index b055f2ea77c5..55ed76daad23 100644
>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>>>>>>>>  	/* adjust parameters according to the volume size */
>>>>>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>>>>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>>>>>>>> -		sm_i->dcc_info->discard_granularity = 1;
>>>>>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>>>>>>>  	}
>>>>>>>>  
>>>>>>>> -- 
>>>>>>>> 2.18.0.rc1
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
>
Jaegeuk Kim Aug. 15, 2018, 3:20 a.m. UTC | #9
On 08/15, Chao Yu wrote:
> On 2018/8/15 10:56, Jaegeuk Kim wrote:
> > On 08/15, Chao Yu wrote:
> >> On 2018/8/15 10:33, Jaegeuk Kim wrote:
> >>> On 08/15, Chao Yu wrote:
> >>>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
> >>>>> On 08/14, Chao Yu wrote:
> >>>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
> >>>>>>> On 08/10, Chao Yu wrote:
> >>>>>>>> Previously, discard speed was fixed mostly, and in high usage rate
> >>>>>>>> device, we will speed up issuing discard, but it doesn't make sense
> >>>>>>>> that in a non-full filesystem, we still issue discard with slow speed.
> >>>>>>>
> >>>>>>> Could you please elaborate the problem in more detail? The speed depends
> >>>>>>> on how many candidates?
> >>>>>>
> >>>>>> undiscard blocks are all 4k granularity.
> >>>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
> >>>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
> >>>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
> >>>>>>
> >>>>>>
> >>>>>> 1. for case c), we need to speed up issuing discard based on utilization of
> >>>>>> "filesystem + undiscard" instead of just utilization of filesystem.
> >>>>>>
> >>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >>>>>> -			dpolicy->granularity = 1;
> >>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>> -		}
> >>>>>>
> >>>>>> 2. If free space in storage touches therein threshold, performance will be very
> >>>>>> sensitive. In low-end storage, with high usage in space, even free space is
> >>>>>> reduced by 1%, performance will decrease a lot.
> >>>>>
> >>>>> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
> >>>>> it'd be better to avoid IO contention, while low-end device wants to get more
> >>>>> discard commands as much as possible. So, how about adding an option for this
> >>>>> as a tunable point?
> >>>>
> >>>> Agreed, how about adding a sysfs entry discard_tunning:
> >>>> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
> >>>> 0: disabled, use dcc->discard_granularity, fixed speed.
> >>>>
> >>>> By default: enabled
> >>>>
> >>>> How do you think?
> >>>
> >>> I don't think this is proper with a sysfs entry, since we already know the
> >>
> >> You mean by storage capacity? <= 32GB means low-end?
> > 
> > Yes, that's current condition to judge it. If there is any other method, it'd be
> 
> That would be hard code...
> 
> Still I have not got any other method to do the judgment except capacity.

Maybe ufs or emmc?

> 
> Thanks,
> 
> > better to change it.
> > 
> >>
> >> Thanks,
> >>
> >>> device type when mounting the partition. We won't require to change the policy
> >>> on the fly. And, I still don't get to change the default.
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>>>
> >>>>>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
> >>>>>> speed for b), and low speed for a).
> >>>>>>
> >>>>>> How do you think?
> >>>>>>
> >>>>>> Thanks,
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks,
> >>>>>>>
> >>>>>>>>
> >>>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> >>>>>>>> and causing high lifetime overhead.
> >>>>>>>>
> >>>>>>>> Let's tune discard speed as below:
> >>>>>>>>
> >>>>>>>> a. adjust default issue interval:
> >>>>>>>> 		original	after
> >>>>>>>> min_interval:	50ms		100ms
> >>>>>>>> mid_interval:	500ms		1000ms
> >>>>>>>> max_interval:	60000ms		10000ms
> >>>>>>>>
> >>>>>>>> b. if last time we stop issuing discard due to IO interruption of user,
> >>>>>>>> let's reset all {min,mid,max}_interval to default one.
> >>>>>>>>
> >>>>>>>> c. tune {min,mid,max}_interval with below calculation method:
> >>>>>>>>
> >>>>>>>> base_interval = default_interval / 10;
> >>>>>>>> total_interval = default_interval - base_interval;
> >>>>>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>>>>>>
> >>>>>>>> For example:
> >>>>>>>> min_interval (:100ms)
> >>>>>>>> dev_util (%)	interval (ms)
> >>>>>>>> 0		100
> >>>>>>>> 10		91
> >>>>>>>> 20		82
> >>>>>>>> 30		73
> >>>>>>>> ...
> >>>>>>>> 80		28
> >>>>>>>> 90		19
> >>>>>>>> 100		10
> >>>>>>>>
> >>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>>>>>> ---
> >>>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
> >>>>>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
> >>>>>>>>  fs/f2fs/segment.h |  9 +++++++
> >>>>>>>>  fs/f2fs/super.c   |  2 +-
> >>>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
> >>>>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>>>> @@ -185,10 +185,9 @@ enum {
> >>>>>>>>  
> >>>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
> >>>>>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> >>>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
> >>>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
> >>>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
> >>>>>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
> >>>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
> >>>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
> >>>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
> >>>>>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >>>>>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >>>>>>>>  
> >>>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
> >>>>>>>>  };
> >>>>>>>>  
> >>>>>>>>  /* default discard granularity of inner discard thread, unit: block count */
> >>>>>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
> >>>>>>>> +#define MID_DISCARD_GRANULARITY			16
> >>>>>>>> +#define MIN_DISCARD_GRANULARITY			1
> >>>>>>>>  
> >>>>>>>>  /* max discard pend list number */
> >>>>>>>>  #define MAX_PLIST_NUM		512
> >>>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
> >>>>>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
> >>>>>>>>  	struct rb_root root;			/* root of discard rb-tree */
> >>>>>>>>  	bool rbtree_check;			/* config for consistence check */
> >>>>>>>> +	bool io_interrupted;			/* last state of io interrupted */
> >>>>>>>>  };
> >>>>>>>>  
> >>>>>>>>  /* for the list of fsync inodes, used only during recovery */
> >>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
> >>>>>>>> --- a/fs/f2fs/segment.c
> >>>>>>>> +++ b/fs/f2fs/segment.c
> >>>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
> >>>>>>>>  #endif
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
> >>>>>>>> +				unsigned int def_interval, int dev_util)
> >>>>>>>> +{
> >>>>>>>> +	unsigned int base_interval, total_interval;
> >>>>>>>> +
> >>>>>>>> +	base_interval = def_interval / 10;
> >>>>>>>> +	total_interval = def_interval - base_interval;
> >>>>>>>> +
> >>>>>>>> +	/*
> >>>>>>>> +	 * if def_interval = 100, adjusted interval should be in range of
> >>>>>>>> +	 * [10, 100].
> >>>>>>>> +	 */
> >>>>>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>>>> +					struct discard_policy *dpolicy)
> >>>>>>>> +{
> >>>>>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>>>>> +	int dev_util;
> >>>>>>>> +
> >>>>>>>> +	if (dcc->io_interrupted) {
> >>>>>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>>>> +		return;
> >>>>>>>> +	}
> >>>>>>>> +
> >>>>>>>> +	dev_util = dev_utilization(sbi);
> >>>>>>>> +
> >>>>>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
> >>>>>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> >>>>>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
> >>>>>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> >>>>>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
> >>>>>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>>>>  				struct discard_policy *dpolicy,
> >>>>>>>>  				int discard_type, unsigned int granularity)
> >>>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >>>>>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
> >>>>>>>>  
> >>>>>>>>  	if (discard_type == DPOLICY_BG) {
> >>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>>>>  		dpolicy->io_aware = true;
> >>>>>>>>  		dpolicy->sync = false;
> >>>>>>>>  		dpolicy->ordered = true;
> >>>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> >>>>>>>> -			dpolicy->granularity = 1;
> >>>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>>> -		}
> >>>>>>>> +		__tune_discard_policy(sbi, dpolicy);
> >>>>>>>>  	} else if (discard_type == DPOLICY_FORCE) {
> >>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> >>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> >>>>>>>>  		dpolicy->io_aware = false;
> >>>>>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
> >>>>>>>>  		dpolicy->io_aware = false;
> >>>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> >>>>>>>>  	if (!issued && io_interrupted)
> >>>>>>>>  		issued = -1;
> >>>>>>>>  
> >>>>>>>> +	dcc->io_interrupted = io_interrupted;
> >>>>>>>> +
> >>>>>>>>  	return issued;
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>>>>>  		if (i + 1 < dpolicy->granularity)
> >>>>>>>>  			break;
> >>>>>>>>  
> >>>>>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>>>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
> >>>>>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
> >>>>>>>>  
> >>>>>>>>  		pend_list = &dcc->pend_list[i];
> >>>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> >>>>>>>>  	if (!issued && io_interrupted)
> >>>>>>>>  		issued = -1;
> >>>>>>>>  
> >>>>>>>> +	dcc->io_interrupted = io_interrupted;
> >>>>>>>> +
> >>>>>>>>  	return issued;
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
> >>>>>>>>  	struct f2fs_sb_info *sbi = data;
> >>>>>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> >>>>>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
> >>>>>>>> -	struct discard_policy dpolicy;
> >>>>>>>> +	struct discard_policy dpolicy = {
> >>>>>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> >>>>>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> >>>>>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> >>>>>>>> +	};
> >>>>>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> >>>>>>>>  	int issued;
> >>>>>>>>  
> >>>>>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>>>>>  	if (!dcc)
> >>>>>>>>  		return -ENOMEM;
> >>>>>>>>  
> >>>>>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> >>>>>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
> >>>>>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
> >>>>>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
> >>>>>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> >>>>>>>>  	dcc->next_pos = 0;
> >>>>>>>>  	dcc->root = RB_ROOT;
> >>>>>>>>  	dcc->rbtree_check = false;
> >>>>>>>> +	dcc->io_interrupted = false;
> >>>>>>>>  
> >>>>>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
> >>>>>>>>  	SM_I(sbi)->dcc_info = dcc;
> >>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >>>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
> >>>>>>>> --- a/fs/f2fs/segment.h
> >>>>>>>> +++ b/fs/f2fs/segment.h
> >>>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> >>>>>>>>  					sbi->user_block_count);
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> >>>>>>>> +{
> >>>>>>>> +	unsigned int dev_blks;
> >>>>>>>> +
> >>>>>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
> >>>>>>>> +	return div_u64((u64)dev_blks * 100,
> >>>>>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  /*
> >>>>>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
> >>>>>>>>   * And, users can control the policy through sysfs entries.
> >>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>>>>> index b055f2ea77c5..55ed76daad23 100644
> >>>>>>>> --- a/fs/f2fs/super.c
> >>>>>>>> +++ b/fs/f2fs/super.c
> >>>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
> >>>>>>>>  	/* adjust parameters according to the volume size */
> >>>>>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
> >>>>>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> >>>>>>>> -		sm_i->dcc_info->discard_granularity = 1;
> >>>>>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
> >>>>>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
> >>>>>>>>  	}
> >>>>>>>>  
> >>>>>>>> -- 
> >>>>>>>> 2.18.0.rc1
> >>>>>>>
> >>>>>>> .
> >>>>>>>
> >>>>>
> >>>>> .
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> >
Chao Yu Aug. 15, 2018, 3:30 a.m. UTC | #10
On 2018/8/15 11:20, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 10:56, Jaegeuk Kim wrote:
>>> On 08/15, Chao Yu wrote:
>>>> On 2018/8/15 10:33, Jaegeuk Kim wrote:
>>>>> On 08/15, Chao Yu wrote:
>>>>>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
>>>>>>> On 08/14, Chao Yu wrote:
>>>>>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
>>>>>>>>> On 08/10, Chao Yu wrote:
>>>>>>>>>> Previously, discard speed was fixed mostly, and in high usage rate
>>>>>>>>>> device, we will speed up issuing discard, but it doesn't make sense
>>>>>>>>>> that in a non-full filesystem, we still issue discard with slow speed.
>>>>>>>>>
>>>>>>>>> Could you please elaborate the problem in more detail? The speed depends
>>>>>>>>> on how many candidates?
>>>>>>>>
>>>>>>>> undiscard blocks are all 4k granularity.
>>>>>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash storage: 40%
>>>>>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash storage: 65%
>>>>>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash storage: 100%
>>>>>>>>
>>>>>>>>
>>>>>>>> 1. for case c), we need to speed up issuing discard based on utilization of
>>>>>>>> "filesystem + undiscard" instead of just utilization of filesystem.
>>>>>>>>
>>>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>>>> -			dpolicy->granularity = 1;
>>>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>> -		}
>>>>>>>>
>>>>>>>> 2. If free space in storage touches therein threshold, performance will be very
>>>>>>>> sensitive. In low-end storage, with high usage in space, even free space is
>>>>>>>> reduced by 1%, performance will decrease a lot.
>>>>>>>
>>>>>>> So, we may need to distinguish low-end vs. high-end storage. In high-end case,
>>>>>>> it'd be better to avoid IO contention, while low-end device wants to get more
>>>>>>> discard commands as much as possible. So, how about adding an option for this
>>>>>>> as a tunable point?
>>>>>>
>>>>>> Agreed, how about adding a sysfs entry discard_tunning:
>>>>>> 1: enabled, use 4k granularity, self-adapted speed based on real device free space.
>>>>>> 0: disabled, use dcc->discard_granularity, fixed speed.
>>>>>>
>>>>>> By default: enabled
>>>>>>
>>>>>> How do you think?
>>>>>
>>>>> I don't think this is proper with a sysfs entry, since we already know the
>>>>
>>>> You mean by storage capacity? <= 32GB means low-end?
>>>
>>> Yes, that's current condition to judge it. If there is any other method, it'd be
>>
>> That would be hard code...
>>
>> Still I have not got any other method to do the judgment except capacity.
> 
> Maybe ufs or emmc?

Yeah, that's may be a good way.

I remember very initial version UFS has very poor discard performance, for that
kind of storage, it will be not accurate?

Let me check whether there is a flag to distinguish ufs/emmc.

Thanks,

> 
>>
>> Thanks,
>>
>>> better to change it.
>>>
>>>>
>>>> Thanks,
>>>>
>>>>> device type when mounting the partition. We won't require to change the policy
>>>>> on the fly. And, I still don't get to change the default.
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> IMO, in above cases, we'd better to issue discard with high speed for c), middle
>>>>>>>> speed for b), and low speed for a).
>>>>>>>>
>>>>>>>> How do you think?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
>>>>>>>>>> and causing high lifetime overhead.
>>>>>>>>>>
>>>>>>>>>> Let's tune discard speed as below:
>>>>>>>>>>
>>>>>>>>>> a. adjust default issue interval:
>>>>>>>>>> 		original	after
>>>>>>>>>> min_interval:	50ms		100ms
>>>>>>>>>> mid_interval:	500ms		1000ms
>>>>>>>>>> max_interval:	60000ms		10000ms
>>>>>>>>>>
>>>>>>>>>> b. if last time we stop issuing discard due to IO interruption of user,
>>>>>>>>>> let's reset all {min,mid,max}_interval to default one.
>>>>>>>>>>
>>>>>>>>>> c. tune {min,mid,max}_interval with below calculation method:
>>>>>>>>>>
>>>>>>>>>> base_interval = default_interval / 10;
>>>>>>>>>> total_interval = default_interval - base_interval;
>>>>>>>>>> interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>>>>>>
>>>>>>>>>> For example:
>>>>>>>>>> min_interval (:100ms)
>>>>>>>>>> dev_util (%)	interval (ms)
>>>>>>>>>> 0		100
>>>>>>>>>> 10		91
>>>>>>>>>> 20		82
>>>>>>>>>> 30		73
>>>>>>>>>> ...
>>>>>>>>>> 80		28
>>>>>>>>>> 90		19
>>>>>>>>>> 100		10
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>>>> ---
>>>>>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
>>>>>>>>>>  fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++----------
>>>>>>>>>>  fs/f2fs/segment.h |  9 +++++++
>>>>>>>>>>  fs/f2fs/super.c   |  2 +-
>>>>>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>>> @@ -185,10 +185,9 @@ enum {
>>>>>>>>>>  
>>>>>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>>>>>>>>>>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
>>>>>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
>>>>>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
>>>>>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
>>>>>>>>>> -#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
>>>>>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
>>>>>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
>>>>>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
>>>>>>>>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>>>>>>>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>>>>>>>>>  
>>>>>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
>>>>>>>>>>  };
>>>>>>>>>>  
>>>>>>>>>>  /* default discard granularity of inner discard thread, unit: block count */
>>>>>>>>>> -#define DEFAULT_DISCARD_GRANULARITY		1
>>>>>>>>>> +#define MID_DISCARD_GRANULARITY			16
>>>>>>>>>> +#define MIN_DISCARD_GRANULARITY			1
>>>>>>>>>>  
>>>>>>>>>>  /* max discard pend list number */
>>>>>>>>>>  #define MAX_PLIST_NUM		512
>>>>>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>>>>>>>>>  	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
>>>>>>>>>>  	struct rb_root root;			/* root of discard rb-tree */
>>>>>>>>>>  	bool rbtree_check;			/* config for consistence check */
>>>>>>>>>> +	bool io_interrupted;			/* last state of io interrupted */
>>>>>>>>>>  };
>>>>>>>>>>  
>>>>>>>>>>  /* for the list of fsync inodes, used only during recovery */
>>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
>>>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>>>>>>>>>>  #endif
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
>>>>>>>>>> +				unsigned int def_interval, int dev_util)
>>>>>>>>>> +{
>>>>>>>>>> +	unsigned int base_interval, total_interval;
>>>>>>>>>> +
>>>>>>>>>> +	base_interval = def_interval / 10;
>>>>>>>>>> +	total_interval = def_interval - base_interval;
>>>>>>>>>> +
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * if def_interval = 100, adjusted interval should be in range of
>>>>>>>>>> +	 * [10, 100].
>>>>>>>>>> +	 */
>>>>>>>>>> +	*interval = base_interval + total_interval * (100 - dev_util) / 100;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>>> +					struct discard_policy *dpolicy)
>>>>>>>>>> +{
>>>>>>>>>> +	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>>>> +	int dev_util;
>>>>>>>>>> +
>>>>>>>>>> +	if (dcc->io_interrupted) {
>>>>>>>>>> +		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>> +		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>> +		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>> +		return;
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	dev_util = dev_utilization(sbi);
>>>>>>>>>> +
>>>>>>>>>> +	__adjust_discard_speed(&dpolicy->min_interval,
>>>>>>>>>> +				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>>>> +	__adjust_discard_speed(&dpolicy->mid_interval,
>>>>>>>>>> +				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>>>> +	__adjust_discard_speed(&dpolicy->max_interval,
>>>>>>>>>> +				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>>>  				struct discard_policy *dpolicy,
>>>>>>>>>>  				int discard_type, unsigned int granularity)
>>>>>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>>>  	dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>>>>>>>>>  
>>>>>>>>>>  	if (discard_type == DPOLICY_BG) {
>>>>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>>  		dpolicy->io_aware = true;
>>>>>>>>>>  		dpolicy->sync = false;
>>>>>>>>>>  		dpolicy->ordered = true;
>>>>>>>>>> -		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>>>>>> -			dpolicy->granularity = 1;
>>>>>>>>>> -			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>> -		}
>>>>>>>>>> +		__tune_discard_policy(sbi, dpolicy);
>>>>>>>>>>  	} else if (discard_type == DPOLICY_FORCE) {
>>>>>>>>>> -		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>> -		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>> -		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>>  		dpolicy->io_aware = false;
>>>>>>>>>>  	} else if (discard_type == DPOLICY_FSTRIM) {
>>>>>>>>>>  		dpolicy->io_aware = false;
>>>>>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>>>>>>>>>  	if (!issued && io_interrupted)
>>>>>>>>>>  		issued = -1;
>>>>>>>>>>  
>>>>>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>>>>>> +
>>>>>>>>>>  	return issued;
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>>>>>  		if (i + 1 < dpolicy->granularity)
>>>>>>>>>>  			break;
>>>>>>>>>>  
>>>>>>>>>> -		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>>>>>> +		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
>>>>>>>>>>  			return __issue_discard_cmd_orderly(sbi, dpolicy);
>>>>>>>>>>  
>>>>>>>>>>  		pend_list = &dcc->pend_list[i];
>>>>>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>>>>>>>>>  	if (!issued && io_interrupted)
>>>>>>>>>>  		issued = -1;
>>>>>>>>>>  
>>>>>>>>>> +	dcc->io_interrupted = io_interrupted;
>>>>>>>>>> +
>>>>>>>>>>  	return issued;
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void *data)
>>>>>>>>>>  	struct f2fs_sb_info *sbi = data;
>>>>>>>>>>  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>>>>  	wait_queue_head_t *q = &dcc->discard_wait_queue;
>>>>>>>>>> -	struct discard_policy dpolicy;
>>>>>>>>>> +	struct discard_policy dpolicy = {
>>>>>>>>>> +		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>>>>>>>>>> +		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>>>>>>>>>> +		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>>>>>>>>>> +	};
>>>>>>>>>>  	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>  	int issued;
>>>>>>>>>>  
>>>>>>>>>> @@ -1929,7 +1966,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>>>  	if (!dcc)
>>>>>>>>>>  		return -ENOMEM;
>>>>>>>>>>  
>>>>>>>>>> -	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>>>>>>>>>> +	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>>>>>  	INIT_LIST_HEAD(&dcc->entry_list);
>>>>>>>>>>  	for (i = 0; i < MAX_PLIST_NUM; i++)
>>>>>>>>>>  		INIT_LIST_HEAD(&dcc->pend_list[i]);
>>>>>>>>>> @@ -1945,6 +1982,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>>>  	dcc->next_pos = 0;
>>>>>>>>>>  	dcc->root = RB_ROOT;
>>>>>>>>>>  	dcc->rbtree_check = false;
>>>>>>>>>> +	dcc->io_interrupted = false;
>>>>>>>>>>  
>>>>>>>>>>  	init_waitqueue_head(&dcc->discard_wait_queue);
>>>>>>>>>>  	SM_I(sbi)->dcc_info = dcc;
>>>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
>>>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct f2fs_sb_info *sbi)
>>>>>>>>>>  					sbi->user_block_count);
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>>>>>>>>>> +{
>>>>>>>>>> +	unsigned int dev_blks;
>>>>>>>>>> +
>>>>>>>>>> +	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
>>>>>>>>>> +	return div_u64((u64)dev_blks * 100,
>>>>>>>>>> +			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  /*
>>>>>>>>>>   * Sometimes f2fs may be better to drop out-of-place update policy.
>>>>>>>>>>   * And, users can control the policy through sysfs entries.
>>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>>>> index b055f2ea77c5..55ed76daad23 100644
>>>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
>>>>>>>>>>  	/* adjust parameters according to the volume size */
>>>>>>>>>>  	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>>>>>>>>>  		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>>>>>>>>>> -		sm_i->dcc_info->discard_granularity = 1;
>>>>>>>>>> +		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>>>>>  		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>>>>>>>>>  	}
>>>>>>>>>>  
>>>>>>>>>> -- 
>>>>>>>>>> 2.18.0.rc1
>>>>>>>>>
>>>>>>>>> .
>>>>>>>>>
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
>
Jaegeuk Kim Aug. 16, 2018, 1:23 a.m. UTC | #11
On 08/15, Ju Hyung Park wrote:
> btrfs also distinguishes high/low end SSDs with mount option "ssd" and
> "ssd_spread".
> I believe there is no implementation inside btrfs to do it automatically.
> 
> Providing users an option with a well made documentation is acceptable imo.

Yeah, I was thinking about this as well. How about adding "-o disk=%s" where
%s can be ufs/emmc/ssd/low_ssd/nvme/...?
And, we also can decide the device type automatically w/ kmsg by default.

> 
> And I personally think, if the default goes to a dynamically deciding
> method, it should tell it to kmsg.
> 
> Thanks,
> 
> On Wed, Aug 15, 2018, 12:31 PM Chao Yu <yuchao0@huawei.com> wrote:
> 
> > On 2018/8/15 11:20, Jaegeuk Kim wrote:
> > > On 08/15, Chao Yu wrote:
> > >> On 2018/8/15 10:56, Jaegeuk Kim wrote:
> > >>> On 08/15, Chao Yu wrote:
> > >>>> On 2018/8/15 10:33, Jaegeuk Kim wrote:
> > >>>>> On 08/15, Chao Yu wrote:
> > >>>>>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
> > >>>>>>> On 08/14, Chao Yu wrote:
> > >>>>>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
> > >>>>>>>>> On 08/10, Chao Yu wrote:
> > >>>>>>>>>> Previously, discard speed was fixed mostly, and in high usage
> > rate
> > >>>>>>>>>> device, we will speed up issuing discard, but it doesn't make
> > sense
> > >>>>>>>>>> that in a non-full filesystem, we still issue discard with slow
> > speed.
> > >>>>>>>>>
> > >>>>>>>>> Could you please elaborate the problem in more detail? The speed
> > depends
> > >>>>>>>>> on how many candidates?
> > >>>>>>>>
> > >>>>>>>> undiscard blocks are all 4k granularity.
> > >>>>>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash
> > storage: 40%
> > >>>>>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash
> > storage: 65%
> > >>>>>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash
> > storage: 100%
> > >>>>>>>>
> > >>>>>>>>
> > >>>>>>>> 1. for case c), we need to speed up issuing discard based on
> > utilization of
> > >>>>>>>> "filesystem + undiscard" instead of just utilization of
> > filesystem.
> > >>>>>>>>
> > >>>>>>>> -              if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> > >>>>>>>> -                      dpolicy->granularity = 1;
> > >>>>>>>> -                      dpolicy->max_interval =
> > DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>> -              }
> > >>>>>>>>
> > >>>>>>>> 2. If free space in storage touches therein threshold,
> > performance will be very
> > >>>>>>>> sensitive. In low-end storage, with high usage in space, even
> > free space is
> > >>>>>>>> reduced by 1%, performance will decrease a lot.
> > >>>>>>>
> > >>>>>>> So, we may need to distinguish low-end vs. high-end storage. In
> > high-end case,
> > >>>>>>> it'd be better to avoid IO contention, while low-end device wants
> > to get more
> > >>>>>>> discard commands as much as possible. So, how about adding an
> > option for this
> > >>>>>>> as a tunable point?
> > >>>>>>
> > >>>>>> Agreed, how about adding a sysfs entry discard_tunning:
> > >>>>>> 1: enabled, use 4k granularity, self-adapted speed based on real
> > device free space.
> > >>>>>> 0: disabled, use dcc->discard_granularity, fixed speed.
> > >>>>>>
> > >>>>>> By default: enabled
> > >>>>>>
> > >>>>>> How do you think?
> > >>>>>
> > >>>>> I don't think this is proper with a sysfs entry, since we already
> > know the
> > >>>>
> > >>>> You mean by storage capacity? <= 32GB means low-end?
> > >>>
> > >>> Yes, that's current condition to judge it. If there is any other
> > method, it'd be
> > >>
> > >> That would be hard code...
> > >>
> > >> Still I have not got any other method to do the judgment except
> > capacity.
> > >
> > > Maybe ufs or emmc?
> >
> > Yeah, that's may be a good way.
> >
> > I remember very initial version UFS has very poor discard performance, for
> > that
> > kind of storage, it will be not accurate?
> >
> > Let me check whether there is a flag to distinguish ufs/emmc.
> >
> > Thanks,
> >
> > >
> > >>
> > >> Thanks,
> > >>
> > >>> better to change it.
> > >>>
> > >>>>
> > >>>> Thanks,
> > >>>>
> > >>>>> device type when mounting the partition. We won't require to change
> > the policy
> > >>>>> on the fly. And, I still don't get to change the default.
> > >>>>>
> > >>>>>>
> > >>>>>> Thanks,
> > >>>>>>
> > >>>>>>>
> > >>>>>>>>
> > >>>>>>>> IMO, in above cases, we'd better to issue discard with high speed
> > for c), middle
> > >>>>>>>> speed for b), and low speed for a).
> > >>>>>>>>
> > >>>>>>>> How do you think?
> > >>>>>>>>
> > >>>>>>>> Thanks,
> > >>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>> Thanks,
> > >>>>>>>>>
> > >>>>>>>>>>
> > >>>>>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower
> > efficient
> > >>>>>>>>>> and causing high lifetime overhead.
> > >>>>>>>>>>
> > >>>>>>>>>> Let's tune discard speed as below:
> > >>>>>>>>>>
> > >>>>>>>>>> a. adjust default issue interval:
> > >>>>>>>>>>              original        after
> > >>>>>>>>>> min_interval:        50ms            100ms
> > >>>>>>>>>> mid_interval:        500ms           1000ms
> > >>>>>>>>>> max_interval:        60000ms         10000ms
> > >>>>>>>>>>
> > >>>>>>>>>> b. if last time we stop issuing discard due to IO interruption
> > of user,
> > >>>>>>>>>> let's reset all {min,mid,max}_interval to default one.
> > >>>>>>>>>>
> > >>>>>>>>>> c. tune {min,mid,max}_interval with below calculation method:
> > >>>>>>>>>>
> > >>>>>>>>>> base_interval = default_interval / 10;
> > >>>>>>>>>> total_interval = default_interval - base_interval;
> > >>>>>>>>>> interval = base_interval + total_interval * (100 - dev_util) /
> > 100;
> > >>>>>>>>>>
> > >>>>>>>>>> For example:
> > >>>>>>>>>> min_interval (:100ms)
> > >>>>>>>>>> dev_util (%) interval (ms)
> > >>>>>>>>>> 0            100
> > >>>>>>>>>> 10           91
> > >>>>>>>>>> 20           82
> > >>>>>>>>>> 30           73
> > >>>>>>>>>> ...
> > >>>>>>>>>> 80           28
> > >>>>>>>>>> 90           19
> > >>>>>>>>>> 100          10
> > >>>>>>>>>>
> > >>>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> > >>>>>>>>>> ---
> > >>>>>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
> > >>>>>>>>>>  fs/f2fs/segment.c | 64
> > +++++++++++++++++++++++++++++++++++++----------
> > >>>>>>>>>>  fs/f2fs/segment.h |  9 +++++++
> > >>>>>>>>>>  fs/f2fs/super.c   |  2 +-
> > >>>>>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
> > >>>>>>>>>>
> > >>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > >>>>>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
> > >>>>>>>>>> --- a/fs/f2fs/f2fs.h
> > >>>>>>>>>> +++ b/fs/f2fs/f2fs.h
> > >>>>>>>>>> @@ -185,10 +185,9 @@ enum {
> > >>>>>>>>>>
> > >>>>>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)             BLKS_PER_SEC(sbi)
> > >>>>>>>>>>  #define DEF_MAX_DISCARD_REQUEST             8       /* issue 8
> > discards per round */
> > >>>>>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME  50      /* 50 ms, if
> > exists */
> > >>>>>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME  500     /* 500 ms, if
> > device busy */
> > >>>>>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME  60000   /* 60 s, if no
> > candidates */
> > >>>>>>>>>> -#define DEF_DISCARD_URGENT_UTIL             80      /* do more
> > discard over 80% */
> > >>>>>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME  100     /* 100 ms, if
> > exists */
> > >>>>>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME  1000    /* 1000 ms, if
> > device busy */
> > >>>>>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME  10000   /* 10000 ms, if no
> > candidates */
> > >>>>>>>>>>  #define DEF_CP_INTERVAL                     60      /* 60 secs
> > */
> > >>>>>>>>>>  #define DEF_IDLE_INTERVAL           5       /* 5 secs */
> > >>>>>>>>>>
> > >>>>>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
> > >>>>>>>>>>  };
> > >>>>>>>>>>
> > >>>>>>>>>>  /* default discard granularity of inner discard thread, unit:
> > block count */
> > >>>>>>>>>> -#define DEFAULT_DISCARD_GRANULARITY         1
> > >>>>>>>>>> +#define MID_DISCARD_GRANULARITY                     16
> > >>>>>>>>>> +#define MIN_DISCARD_GRANULARITY                     1
> > >>>>>>>>>>
> > >>>>>>>>>>  /* max discard pend list number */
> > >>>>>>>>>>  #define MAX_PLIST_NUM               512
> > >>>>>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
> > >>>>>>>>>>      atomic_t discard_cmd_cnt;               /* # of cached cmd
> > count */
> > >>>>>>>>>>      struct rb_root root;                    /* root of discard
> > rb-tree */
> > >>>>>>>>>>      bool rbtree_check;                      /* config for
> > consistence check */
> > >>>>>>>>>> +    bool io_interrupted;                    /* last state of
> > io interrupted */
> > >>>>>>>>>>  };
> > >>>>>>>>>>
> > >>>>>>>>>>  /* for the list of fsync inodes, used only during recovery */
> > >>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > >>>>>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
> > >>>>>>>>>> --- a/fs/f2fs/segment.c
> > >>>>>>>>>> +++ b/fs/f2fs/segment.c
> > >>>>>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct
> > f2fs_sb_info *sbi,
> > >>>>>>>>>>  #endif
> > >>>>>>>>>>  }
> > >>>>>>>>>>
> > >>>>>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
> > >>>>>>>>>> +                            unsigned int def_interval, int
> > dev_util)
> > >>>>>>>>>> +{
> > >>>>>>>>>> +    unsigned int base_interval, total_interval;
> > >>>>>>>>>> +
> > >>>>>>>>>> +    base_interval = def_interval / 10;
> > >>>>>>>>>> +    total_interval = def_interval - base_interval;
> > >>>>>>>>>> +
> > >>>>>>>>>> +    /*
> > >>>>>>>>>> +     * if def_interval = 100, adjusted interval should be in
> > range of
> > >>>>>>>>>> +     * [10, 100].
> > >>>>>>>>>> +     */
> > >>>>>>>>>> +    *interval = base_interval + total_interval * (100 -
> > dev_util) / 100;
> > >>>>>>>>>> +}
> > >>>>>>>>>> +
> > >>>>>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> > >>>>>>>>>> +                                    struct discard_policy
> > *dpolicy)
> > >>>>>>>>>> +{
> > >>>>>>>>>> +    struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > >>>>>>>>>> +    int dev_util;
> > >>>>>>>>>> +
> > >>>>>>>>>> +    if (dcc->io_interrupted) {
> > >>>>>>>>>> +            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> +            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> +            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> +            return;
> > >>>>>>>>>> +    }
> > >>>>>>>>>> +
> > >>>>>>>>>> +    dev_util = dev_utilization(sbi);
> > >>>>>>>>>> +
> > >>>>>>>>>> +    __adjust_discard_speed(&dpolicy->min_interval,
> > >>>>>>>>>> +                            DEF_MIN_DISCARD_ISSUE_TIME,
> > dev_util);
> > >>>>>>>>>> +    __adjust_discard_speed(&dpolicy->mid_interval,
> > >>>>>>>>>> +                            DEF_MID_DISCARD_ISSUE_TIME,
> > dev_util);
> > >>>>>>>>>> +    __adjust_discard_speed(&dpolicy->max_interval,
> > >>>>>>>>>> +                            DEF_MAX_DISCARD_ISSUE_TIME,
> > dev_util);
> > >>>>>>>>>> +}
> > >>>>>>>>>> +
> > >>>>>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
> > >>>>>>>>>>                              struct discard_policy *dpolicy,
> > >>>>>>>>>>                              int discard_type, unsigned int
> > granularity)
> > >>>>>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct
> > f2fs_sb_info *sbi,
> > >>>>>>>>>>      dpolicy->io_aware_gran = MAX_PLIST_NUM;
> > >>>>>>>>>>
> > >>>>>>>>>>      if (discard_type == DPOLICY_BG) {
> > >>>>>>>>>> -            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> -            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> -            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> > >>>>>>>>>>              dpolicy->io_aware = true;
> > >>>>>>>>>>              dpolicy->sync = false;
> > >>>>>>>>>>              dpolicy->ordered = true;
> > >>>>>>>>>> -            if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
> > >>>>>>>>>> -                    dpolicy->granularity = 1;
> > >>>>>>>>>> -                    dpolicy->max_interval =
> > DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> -            }
> > >>>>>>>>>> +            __tune_discard_policy(sbi, dpolicy);
> > >>>>>>>>>>      } else if (discard_type == DPOLICY_FORCE) {
> > >>>>>>>>>> -            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> -            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> > >>>>>>>>>> -            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> > >>>>>>>>>>              dpolicy->io_aware = false;
> > >>>>>>>>>>      } else if (discard_type == DPOLICY_FSTRIM) {
> > >>>>>>>>>>              dpolicy->io_aware = false;
> > >>>>>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int
> > __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
> > >>>>>>>>>>      if (!issued && io_interrupted)
> > >>>>>>>>>>              issued = -1;
> > >>>>>>>>>>
> > >>>>>>>>>> +    dcc->io_interrupted = io_interrupted;
> > >>>>>>>>>> +
> > >>>>>>>>>>      return issued;
> > >>>>>>>>>>  }
> > >>>>>>>>>>
> > >>>>>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct
> > f2fs_sb_info *sbi,
> > >>>>>>>>>>              if (i + 1 < dpolicy->granularity)
> > >>>>>>>>>>                      break;
> > >>>>>>>>>>
> > >>>>>>>>>> -            if (i < DEFAULT_DISCARD_GRANULARITY &&
> > dpolicy->ordered)
> > >>>>>>>>>> +            if (i < MID_DISCARD_GRANULARITY &&
> > dpolicy->ordered)
> > >>>>>>>>>>                      return __issue_discard_cmd_orderly(sbi,
> > dpolicy);
> > >>>>>>>>>>
> > >>>>>>>>>>              pend_list = &dcc->pend_list[i];
> > >>>>>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct
> > f2fs_sb_info *sbi,
> > >>>>>>>>>>      if (!issued && io_interrupted)
> > >>>>>>>>>>              issued = -1;
> > >>>>>>>>>>
> > >>>>>>>>>> +    dcc->io_interrupted = io_interrupted;
> > >>>>>>>>>> +
> > >>>>>>>>>>      return issued;
> > >>>>>>>>>>  }
> > >>>>>>>>>>
> > >>>>>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void
> > *data)
> > >>>>>>>>>>      struct f2fs_sb_info *sbi = data;
> > >>>>>>>>>>      struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > >>>>>>>>>>      wait_queue_head_t *q = &dcc->discard_wait_queue;
> > >>>>>>>>>> -    struct discard_policy dpolicy;
> > >>>>>>>>>> +    struct discard_policy dpolicy = {
> > >>>>>>>>>> +            .min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
> > >>>>>>>>>> +            .mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
> > >>>>>>>>>> +            .max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
> > >>>>>>>>>> +    };
> > >>>>>>>>>>      unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
> > >>>>>>>>>>      int issued;
> > >>>>>>>>>>
> > >>>>>>>>>> @@ -1929,7 +1966,7 @@ static int
> > create_discard_cmd_control(struct f2fs_sb_info *sbi)
> > >>>>>>>>>>      if (!dcc)
> > >>>>>>>>>>              return -ENOMEM;
> > >>>>>>>>>>
> > >>>>>>>>>> -    dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
> > >>>>>>>>>> +    dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
> > >>>>>>>>>>      INIT_LIST_HEAD(&dcc->entry_list);
> > >>>>>>>>>>      for (i = 0; i < MAX_PLIST_NUM; i++)
> > >>>>>>>>>>              INIT_LIST_HEAD(&dcc->pend_list[i]);
> > >>>>>>>>>> @@ -1945,6 +1982,7 @@ static int
> > create_discard_cmd_control(struct f2fs_sb_info *sbi)
> > >>>>>>>>>>      dcc->next_pos = 0;
> > >>>>>>>>>>      dcc->root = RB_ROOT;
> > >>>>>>>>>>      dcc->rbtree_check = false;
> > >>>>>>>>>> +    dcc->io_interrupted = false;
> > >>>>>>>>>>
> > >>>>>>>>>>      init_waitqueue_head(&dcc->discard_wait_queue);
> > >>>>>>>>>>      SM_I(sbi)->dcc_info = dcc;
> > >>>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > >>>>>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
> > >>>>>>>>>> --- a/fs/f2fs/segment.h
> > >>>>>>>>>> +++ b/fs/f2fs/segment.h
> > >>>>>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct
> > f2fs_sb_info *sbi)
> > >>>>>>>>>>                                      sbi->user_block_count);
> > >>>>>>>>>>  }
> > >>>>>>>>>>
> > >>>>>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
> > >>>>>>>>>> +{
> > >>>>>>>>>> +    unsigned int dev_blks;
> > >>>>>>>>>> +
> > >>>>>>>>>> +    dev_blks = valid_user_blocks(sbi) +
> > SM_I(sbi)->dcc_info->undiscard_blks;
> > >>>>>>>>>> +    return div_u64((u64)dev_blks * 100,
> > >>>>>>>>>> +                    MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
> > >>>>>>>>>> +}
> > >>>>>>>>>> +
> > >>>>>>>>>>  /*
> > >>>>>>>>>>   * Sometimes f2fs may be better to drop out-of-place update
> > policy.
> > >>>>>>>>>>   * And, users can control the policy through sysfs entries.
> > >>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > >>>>>>>>>> index b055f2ea77c5..55ed76daad23 100644
> > >>>>>>>>>> --- a/fs/f2fs/super.c
> > >>>>>>>>>> +++ b/fs/f2fs/super.c
> > >>>>>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct
> > f2fs_sb_info *sbi)
> > >>>>>>>>>>      /* adjust parameters according to the volume size */
> > >>>>>>>>>>      if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
> > >>>>>>>>>>              F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
> > >>>>>>>>>> -            sm_i->dcc_info->discard_granularity = 1;
> > >>>>>>>>>> +            sm_i->dcc_info->discard_granularity =
> > MIN_DISCARD_GRANULARITY;
> > >>>>>>>>>>              sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
> > >>>>>>>>>>      }
> > >>>>>>>>>>
> > >>>>>>>>>> --
> > >>>>>>>>>> 2.18.0.rc1
> > >>>>>>>>>
> > >>>>>>>>> .
> > >>>>>>>>>
> > >>>>>>>
> > >>>>>>> .
> > >>>>>>>
> > >>>>>
> > >>>>> .
> > >>>>>
> > >>>
> > >>> .
> > >>>
> > >
> > > .
> > >
> >
> >
> >
> > ------------------------------------------------------------------------------
> > Check out the vibrant tech community on one of the world's most
> > engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
Chao Yu Aug. 16, 2018, 11:25 a.m. UTC | #12
On 2018/8/16 9:23, Jaegeuk Kim wrote:
> On 08/15, Ju Hyung Park wrote:
>> btrfs also distinguishes high/low end SSDs with mount option "ssd" and
>> "ssd_spread".
>> I believe there is no implementation inside btrfs to do it automatically.
>>
>> Providing users an option with a well made documentation is acceptable imo.
> 
> Yeah, I was thinking about this as well. How about adding "-o disk=%s" where
> %s can be ufs/emmc/ssd/low_ssd/nvme/...?

Agreed,

> And, we also can decide the device type automatically w/ kmsg by default.

You mean by scanning kmsg to see the device type?

Thanks,

> 
>>
>> And I personally think, if the default goes to a dynamically deciding
>> method, it should tell it to kmsg.
>>
>> Thanks,
>>
>> On Wed, Aug 15, 2018, 12:31 PM Chao Yu <yuchao0@huawei.com> wrote:
>>
>>> On 2018/8/15 11:20, Jaegeuk Kim wrote:
>>>> On 08/15, Chao Yu wrote:
>>>>> On 2018/8/15 10:56, Jaegeuk Kim wrote:
>>>>>> On 08/15, Chao Yu wrote:
>>>>>>> On 2018/8/15 10:33, Jaegeuk Kim wrote:
>>>>>>>> On 08/15, Chao Yu wrote:
>>>>>>>>> On 2018/8/15 1:23, Jaegeuk Kim wrote:
>>>>>>>>>> On 08/14, Chao Yu wrote:
>>>>>>>>>>> On 2018/8/14 12:19, Jaegeuk Kim wrote:
>>>>>>>>>>>> On 08/10, Chao Yu wrote:
>>>>>>>>>>>>> Previously, discard speed was fixed mostly, and in high usage
>>> rate
>>>>>>>>>>>>> device, we will speed up issuing discard, but it doesn't make
>>> sense
>>>>>>>>>>>>> that in a non-full filesystem, we still issue discard with slow
>>> speed.
>>>>>>>>>>>>
>>>>>>>>>>>> Could you please elaborate the problem in more detail? The speed
>>> depends
>>>>>>>>>>>> on how many candidates?
>>>>>>>>>>>
>>>>>>>>>>> undiscard blocks are all 4k granularity.
>>>>>>>>>>> a) utility: filesystem: 20% + undiscard blocks: 20% = flash
>>> storage: 40%
>>>>>>>>>>> b) utility: filesystem: 40% + undiscard blocks: 25% = flash
>>> storage: 65%
>>>>>>>>>>> c) utility: filesystem: 60% + undiscard blocks: 30% = flash
>>> storage: 100%
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> 1. for case c), we need to speed up issuing discard based on
>>> utilization of
>>>>>>>>>>> "filesystem + undiscard" instead of just utilization of
>>> filesystem.
>>>>>>>>>>>
>>>>>>>>>>> -              if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>>>>>>> -                      dpolicy->granularity = 1;
>>>>>>>>>>> -                      dpolicy->max_interval =
>>> DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>> -              }
>>>>>>>>>>>
>>>>>>>>>>> 2. If free space in storage touches therein threshold,
>>> performance will be very
>>>>>>>>>>> sensitive. In low-end storage, with high usage in space, even
>>> free space is
>>>>>>>>>>> reduced by 1%, performance will decrease a lot.
>>>>>>>>>>
>>>>>>>>>> So, we may need to distinguish low-end vs. high-end storage. In
>>> high-end case,
>>>>>>>>>> it'd be better to avoid IO contention, while low-end device wants
>>> to get more
>>>>>>>>>> discard commands as much as possible. So, how about adding an
>>> option for this
>>>>>>>>>> as a tunable point?
>>>>>>>>>
>>>>>>>>> Agreed, how about adding a sysfs entry discard_tunning:
>>>>>>>>> 1: enabled, use 4k granularity, self-adapted speed based on real
>>> device free space.
>>>>>>>>> 0: disabled, use dcc->discard_granularity, fixed speed.
>>>>>>>>>
>>>>>>>>> By default: enabled
>>>>>>>>>
>>>>>>>>> How do you think?
>>>>>>>>
>>>>>>>> I don't think this is proper with a sysfs entry, since we already
>>> know the
>>>>>>>
>>>>>>> You mean by storage capacity? <= 32GB means low-end?
>>>>>>
>>>>>> Yes, that's current condition to judge it. If there is any other
>>> method, it'd be
>>>>>
>>>>> That would be hard code...
>>>>>
>>>>> Still I have not got any other method to do the judgment except
>>> capacity.
>>>>
>>>> Maybe ufs or emmc?
>>>
>>> Yeah, that's may be a good way.
>>>
>>> I remember very initial version UFS has very poor discard performance, for
>>> that
>>> kind of storage, it will be not accurate?
>>>
>>> Let me check whether there is a flag to distinguish ufs/emmc.
>>>
>>> Thanks,
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>>> better to change it.
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>>> device type when mounting the partition. We won't require to change
>>> the policy
>>>>>>>> on the fly. And, I still don't get to change the default.
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> IMO, in above cases, we'd better to issue discard with high speed
>>> for c), middle
>>>>>>>>>>> speed for b), and low speed for a).
>>>>>>>>>>>
>>>>>>>>>>> How do you think?
>>>>>>>>>>>
>>>>>>>>>>> Thanks,
>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Anyway, it comes out undiscarded block makes FTL GC be lower
>>> efficient
>>>>>>>>>>>>> and causing high lifetime overhead.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Let's tune discard speed as below:
>>>>>>>>>>>>>
>>>>>>>>>>>>> a. adjust default issue interval:
>>>>>>>>>>>>>              original        after
>>>>>>>>>>>>> min_interval:        50ms            100ms
>>>>>>>>>>>>> mid_interval:        500ms           1000ms
>>>>>>>>>>>>> max_interval:        60000ms         10000ms
>>>>>>>>>>>>>
>>>>>>>>>>>>> b. if last time we stop issuing discard due to IO interruption
>>> of user,
>>>>>>>>>>>>> let's reset all {min,mid,max}_interval to default one.
>>>>>>>>>>>>>
>>>>>>>>>>>>> c. tune {min,mid,max}_interval with below calculation method:
>>>>>>>>>>>>>
>>>>>>>>>>>>> base_interval = default_interval / 10;
>>>>>>>>>>>>> total_interval = default_interval - base_interval;
>>>>>>>>>>>>> interval = base_interval + total_interval * (100 - dev_util) /
>>> 100;
>>>>>>>>>>>>>
>>>>>>>>>>>>> For example:
>>>>>>>>>>>>> min_interval (:100ms)
>>>>>>>>>>>>> dev_util (%) interval (ms)
>>>>>>>>>>>>> 0            100
>>>>>>>>>>>>> 10           91
>>>>>>>>>>>>> 20           82
>>>>>>>>>>>>> 30           73
>>>>>>>>>>>>> ...
>>>>>>>>>>>>> 80           28
>>>>>>>>>>>>> 90           19
>>>>>>>>>>>>> 100          10
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>  fs/f2fs/f2fs.h    | 11 ++++----
>>>>>>>>>>>>>  fs/f2fs/segment.c | 64
>>> +++++++++++++++++++++++++++++++++++++----------
>>>>>>>>>>>>>  fs/f2fs/segment.h |  9 +++++++
>>>>>>>>>>>>>  fs/f2fs/super.c   |  2 +-
>>>>>>>>>>>>>  4 files changed, 67 insertions(+), 19 deletions(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>>>>>> index 273ffdaf4891..a1dd2e1c3cb9 100644
>>>>>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>>>>>> @@ -185,10 +185,9 @@ enum {
>>>>>>>>>>>>>
>>>>>>>>>>>>>  #define MAX_DISCARD_BLOCKS(sbi)             BLKS_PER_SEC(sbi)
>>>>>>>>>>>>>  #define DEF_MAX_DISCARD_REQUEST             8       /* issue 8
>>> discards per round */
>>>>>>>>>>>>> -#define DEF_MIN_DISCARD_ISSUE_TIME  50      /* 50 ms, if
>>> exists */
>>>>>>>>>>>>> -#define DEF_MID_DISCARD_ISSUE_TIME  500     /* 500 ms, if
>>> device busy */
>>>>>>>>>>>>> -#define DEF_MAX_DISCARD_ISSUE_TIME  60000   /* 60 s, if no
>>> candidates */
>>>>>>>>>>>>> -#define DEF_DISCARD_URGENT_UTIL             80      /* do more
>>> discard over 80% */
>>>>>>>>>>>>> +#define DEF_MIN_DISCARD_ISSUE_TIME  100     /* 100 ms, if
>>> exists */
>>>>>>>>>>>>> +#define DEF_MID_DISCARD_ISSUE_TIME  1000    /* 1000 ms, if
>>> device busy */
>>>>>>>>>>>>> +#define DEF_MAX_DISCARD_ISSUE_TIME  10000   /* 10000 ms, if no
>>> candidates */
>>>>>>>>>>>>>  #define DEF_CP_INTERVAL                     60      /* 60 secs
>>> */
>>>>>>>>>>>>>  #define DEF_IDLE_INTERVAL           5       /* 5 secs */
>>>>>>>>>>>>>
>>>>>>>>>>>>> @@ -248,7 +247,8 @@ struct discard_entry {
>>>>>>>>>>>>>  };
>>>>>>>>>>>>>
>>>>>>>>>>>>>  /* default discard granularity of inner discard thread, unit:
>>> block count */
>>>>>>>>>>>>> -#define DEFAULT_DISCARD_GRANULARITY         1
>>>>>>>>>>>>> +#define MID_DISCARD_GRANULARITY                     16
>>>>>>>>>>>>> +#define MIN_DISCARD_GRANULARITY                     1
>>>>>>>>>>>>>
>>>>>>>>>>>>>  /* max discard pend list number */
>>>>>>>>>>>>>  #define MAX_PLIST_NUM               512
>>>>>>>>>>>>> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>>>>>>>>>>>>>      atomic_t discard_cmd_cnt;               /* # of cached cmd
>>> count */
>>>>>>>>>>>>>      struct rb_root root;                    /* root of discard
>>> rb-tree */
>>>>>>>>>>>>>      bool rbtree_check;                      /* config for
>>> consistence check */
>>>>>>>>>>>>> +    bool io_interrupted;                    /* last state of
>>> io interrupted */
>>>>>>>>>>>>>  };
>>>>>>>>>>>>>
>>>>>>>>>>>>>  /* for the list of fsync inodes, used only during recovery */
>>>>>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>>>>>>> index 8b52e8dfb12f..9564aaf1f27b 100644
>>>>>>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>>>>>>> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct
>>> f2fs_sb_info *sbi,
>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>
>>>>>>>>>>>>> +static void __adjust_discard_speed(unsigned int *interval,
>>>>>>>>>>>>> +                            unsigned int def_interval, int
>>> dev_util)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    unsigned int base_interval, total_interval;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    base_interval = def_interval / 10;
>>>>>>>>>>>>> +    total_interval = def_interval - base_interval;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    /*
>>>>>>>>>>>>> +     * if def_interval = 100, adjusted interval should be in
>>> range of
>>>>>>>>>>>>> +     * [10, 100].
>>>>>>>>>>>>> +     */
>>>>>>>>>>>>> +    *interval = base_interval + total_interval * (100 -
>>> dev_util) / 100;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>>>>>> +                                    struct discard_policy
>>> *dpolicy)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>>>>>>> +    int dev_util;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    if (dcc->io_interrupted) {
>>>>>>>>>>>>> +            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> +            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> +            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> +            return;
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    dev_util = dev_utilization(sbi);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    __adjust_discard_speed(&dpolicy->min_interval,
>>>>>>>>>>>>> +                            DEF_MIN_DISCARD_ISSUE_TIME,
>>> dev_util);
>>>>>>>>>>>>> +    __adjust_discard_speed(&dpolicy->mid_interval,
>>>>>>>>>>>>> +                            DEF_MID_DISCARD_ISSUE_TIME,
>>> dev_util);
>>>>>>>>>>>>> +    __adjust_discard_speed(&dpolicy->max_interval,
>>>>>>>>>>>>> +                            DEF_MAX_DISCARD_ISSUE_TIME,
>>> dev_util);
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>>>>>>>>>>>                              struct discard_policy *dpolicy,
>>>>>>>>>>>>>                              int discard_type, unsigned int
>>> granularity)
>>>>>>>>>>>>> @@ -982,20 +1020,11 @@ static void __init_discard_policy(struct
>>> f2fs_sb_info *sbi,
>>>>>>>>>>>>>      dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>>>>>>>>>>>>
>>>>>>>>>>>>>      if (discard_type == DPOLICY_BG) {
>>>>>>>>>>>>> -            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> -            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> -            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>>              dpolicy->io_aware = true;
>>>>>>>>>>>>>              dpolicy->sync = false;
>>>>>>>>>>>>>              dpolicy->ordered = true;
>>>>>>>>>>>>> -            if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
>>>>>>>>>>>>> -                    dpolicy->granularity = 1;
>>>>>>>>>>>>> -                    dpolicy->max_interval =
>>> DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> -            }
>>>>>>>>>>>>> +            __tune_discard_policy(sbi, dpolicy);
>>>>>>>>>>>>>      } else if (discard_type == DPOLICY_FORCE) {
>>>>>>>>>>>>> -            dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> -            dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>> -            dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>>              dpolicy->io_aware = false;
>>>>>>>>>>>>>      } else if (discard_type == DPOLICY_FSTRIM) {
>>>>>>>>>>>>>              dpolicy->io_aware = false;
>>>>>>>>>>>>> @@ -1353,6 +1382,8 @@ static unsigned int
>>> __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
>>>>>>>>>>>>>      if (!issued && io_interrupted)
>>>>>>>>>>>>>              issued = -1;
>>>>>>>>>>>>>
>>>>>>>>>>>>> +    dcc->io_interrupted = io_interrupted;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>      return issued;
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>
>>>>>>>>>>>>> @@ -1370,7 +1401,7 @@ static int __issue_discard_cmd(struct
>>> f2fs_sb_info *sbi,
>>>>>>>>>>>>>              if (i + 1 < dpolicy->granularity)
>>>>>>>>>>>>>                      break;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -            if (i < DEFAULT_DISCARD_GRANULARITY &&
>>> dpolicy->ordered)
>>>>>>>>>>>>> +            if (i < MID_DISCARD_GRANULARITY &&
>>> dpolicy->ordered)
>>>>>>>>>>>>>                      return __issue_discard_cmd_orderly(sbi,
>>> dpolicy);
>>>>>>>>>>>>>
>>>>>>>>>>>>>              pend_list = &dcc->pend_list[i];
>>>>>>>>>>>>> @@ -1407,6 +1438,8 @@ static int __issue_discard_cmd(struct
>>> f2fs_sb_info *sbi,
>>>>>>>>>>>>>      if (!issued && io_interrupted)
>>>>>>>>>>>>>              issued = -1;
>>>>>>>>>>>>>
>>>>>>>>>>>>> +    dcc->io_interrupted = io_interrupted;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>      return issued;
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>
>>>>>>>>>>>>> @@ -1576,7 +1609,11 @@ static int issue_discard_thread(void
>>> *data)
>>>>>>>>>>>>>      struct f2fs_sb_info *sbi = data;
>>>>>>>>>>>>>      struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>>>>>>>>>>>>      wait_queue_head_t *q = &dcc->discard_wait_queue;
>>>>>>>>>>>>> -    struct discard_policy dpolicy;
>>>>>>>>>>>>> +    struct discard_policy dpolicy = {
>>>>>>>>>>>>> +            .min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
>>>>>>>>>>>>> +            .mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
>>>>>>>>>>>>> +            .max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
>>>>>>>>>>>>> +    };
>>>>>>>>>>>>>      unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
>>>>>>>>>>>>>      int issued;
>>>>>>>>>>>>>
>>>>>>>>>>>>> @@ -1929,7 +1966,7 @@ static int
>>> create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>>>>>>      if (!dcc)
>>>>>>>>>>>>>              return -ENOMEM;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
>>>>>>>>>>>>> +    dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
>>>>>>>>>>>>>      INIT_LIST_HEAD(&dcc->entry_list);
>>>>>>>>>>>>>      for (i = 0; i < MAX_PLIST_NUM; i++)
>>>>>>>>>>>>>              INIT_LIST_HEAD(&dcc->pend_list[i]);
>>>>>>>>>>>>> @@ -1945,6 +1982,7 @@ static int
>>> create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>>>>>>>>>>>>      dcc->next_pos = 0;
>>>>>>>>>>>>>      dcc->root = RB_ROOT;
>>>>>>>>>>>>>      dcc->rbtree_check = false;
>>>>>>>>>>>>> +    dcc->io_interrupted = false;
>>>>>>>>>>>>>
>>>>>>>>>>>>>      init_waitqueue_head(&dcc->discard_wait_queue);
>>>>>>>>>>>>>      SM_I(sbi)->dcc_info = dcc;
>>>>>>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>>>>>>> index 422b0ceb1eaa..63b4da72cd34 100644
>>>>>>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>>>>>>> @@ -616,6 +616,15 @@ static inline int utilization(struct
>>> f2fs_sb_info *sbi)
>>>>>>>>>>>>>                                      sbi->user_block_count);
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>
>>>>>>>>>>>>> +static inline int dev_utilization(struct f2fs_sb_info *sbi)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    unsigned int dev_blks;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    dev_blks = valid_user_blocks(sbi) +
>>> SM_I(sbi)->dcc_info->undiscard_blks;
>>>>>>>>>>>>> +    return div_u64((u64)dev_blks * 100,
>>>>>>>>>>>>> +                    MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  /*
>>>>>>>>>>>>>   * Sometimes f2fs may be better to drop out-of-place update
>>> policy.
>>>>>>>>>>>>>   * And, users can control the policy through sysfs entries.
>>>>>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>>>>>>> index b055f2ea77c5..55ed76daad23 100644
>>>>>>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>>>>>>> @@ -2862,7 +2862,7 @@ static void f2fs_tuning_parameters(struct
>>> f2fs_sb_info *sbi)
>>>>>>>>>>>>>      /* adjust parameters according to the volume size */
>>>>>>>>>>>>>      if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
>>>>>>>>>>>>>              F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
>>>>>>>>>>>>> -            sm_i->dcc_info->discard_granularity = 1;
>>>>>>>>>>>>> +            sm_i->dcc_info->discard_granularity =
>>> MIN_DISCARD_GRANULARITY;
>>>>>>>>>>>>>              sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
>>>>>>>>>>>>>      }
>>>>>>>>>>>>>
>>>>>>>>>>>>> --
>>>>>>>>>>>>> 2.18.0.rc1
>>>>>>>>>>>>
>>>>>>>>>>>> .
>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> .
>>>>>>>>>>
>>>>>>>>
>>>>>>>> .
>>>>>>>>
>>>>>>
>>>>>> .
>>>>>>
>>>>
>>>> .
>>>>
>>>
>>>
>>>
>>> ------------------------------------------------------------------------------
>>> Check out the vibrant tech community on one of the world's most
>>> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
>>> _______________________________________________
>>> Linux-f2fs-devel mailing list
>>> Linux-f2fs-devel@lists.sourceforge.net
>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
> 
> .
>

Patch
diff mbox series

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 273ffdaf4891..a1dd2e1c3cb9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -185,10 +185,9 @@  enum {
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
-#define DEF_MIN_DISCARD_ISSUE_TIME	50	/* 50 ms, if exists */
-#define DEF_MID_DISCARD_ISSUE_TIME	500	/* 500 ms, if device busy */
-#define DEF_MAX_DISCARD_ISSUE_TIME	60000	/* 60 s, if no candidates */
-#define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
+#define DEF_MIN_DISCARD_ISSUE_TIME	100	/* 100 ms, if exists */
+#define DEF_MID_DISCARD_ISSUE_TIME	1000	/* 1000 ms, if device busy */
+#define DEF_MAX_DISCARD_ISSUE_TIME	10000	/* 10000 ms, if no candidates */
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
 
@@ -248,7 +247,8 @@  struct discard_entry {
 };
 
 /* default discard granularity of inner discard thread, unit: block count */
-#define DEFAULT_DISCARD_GRANULARITY		1
+#define MID_DISCARD_GRANULARITY			16
+#define MIN_DISCARD_GRANULARITY			1
 
 /* max discard pend list number */
 #define MAX_PLIST_NUM		512
@@ -330,6 +330,7 @@  struct discard_cmd_control {
 	atomic_t discard_cmd_cnt;		/* # of cached cmd count */
 	struct rb_root root;			/* root of discard rb-tree */
 	bool rbtree_check;			/* config for consistence check */
+	bool io_interrupted;			/* last state of io interrupted */
 };
 
 /* for the list of fsync inodes, used only during recovery */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8b52e8dfb12f..9564aaf1f27b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -968,6 +968,44 @@  static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 #endif
 }
 
+static void __adjust_discard_speed(unsigned int *interval,
+				unsigned int def_interval, int dev_util)
+{
+	unsigned int base_interval, total_interval;
+
+	base_interval = def_interval / 10;
+	total_interval = def_interval - base_interval;
+
+	/*
+	 * if def_interval = 100, adjusted interval should be in range of
+	 * [10, 100].
+	 */
+	*interval = base_interval + total_interval * (100 - dev_util) / 100;
+}
+
+static void __tune_discard_policy(struct f2fs_sb_info *sbi,
+					struct discard_policy *dpolicy)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	int dev_util;
+
+	if (dcc->io_interrupted) {
+		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
+		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+		return;
+	}
+
+	dev_util = dev_utilization(sbi);
+
+	__adjust_discard_speed(&dpolicy->min_interval,
+				DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
+	__adjust_discard_speed(&dpolicy->mid_interval,
+				DEF_MID_DISCARD_ISSUE_TIME, dev_util);
+	__adjust_discard_speed(&dpolicy->max_interval,
+				DEF_MAX_DISCARD_ISSUE_TIME, dev_util);
+}
+
 static void __init_discard_policy(struct f2fs_sb_info *sbi,
 				struct discard_policy *dpolicy,
 				int discard_type, unsigned int granularity)
@@ -982,20 +1020,11 @@  static void __init_discard_policy(struct f2fs_sb_info *sbi,
 	dpolicy->io_aware_gran = MAX_PLIST_NUM;
 
 	if (discard_type == DPOLICY_BG) {
-		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
-		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
 		dpolicy->io_aware = true;
 		dpolicy->sync = false;
 		dpolicy->ordered = true;
-		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
-			dpolicy->granularity = 1;
-			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		}
+		__tune_discard_policy(sbi, dpolicy);
 	} else if (discard_type == DPOLICY_FORCE) {
-		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
-		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
 		dpolicy->io_aware = false;
 	} else if (discard_type == DPOLICY_FSTRIM) {
 		dpolicy->io_aware = false;
@@ -1353,6 +1382,8 @@  static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
 	if (!issued && io_interrupted)
 		issued = -1;
 
+	dcc->io_interrupted = io_interrupted;
+
 	return issued;
 }
 
@@ -1370,7 +1401,7 @@  static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
 		if (i + 1 < dpolicy->granularity)
 			break;
 
-		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+		if (i < MID_DISCARD_GRANULARITY && dpolicy->ordered)
 			return __issue_discard_cmd_orderly(sbi, dpolicy);
 
 		pend_list = &dcc->pend_list[i];
@@ -1407,6 +1438,8 @@  static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
 	if (!issued && io_interrupted)
 		issued = -1;
 
+	dcc->io_interrupted = io_interrupted;
+
 	return issued;
 }
 
@@ -1576,7 +1609,11 @@  static int issue_discard_thread(void *data)
 	struct f2fs_sb_info *sbi = data;
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	wait_queue_head_t *q = &dcc->discard_wait_queue;
-	struct discard_policy dpolicy;
+	struct discard_policy dpolicy = {
+		.min_interval = DEF_MIN_DISCARD_ISSUE_TIME,
+		.mid_interval = DEF_MID_DISCARD_ISSUE_TIME,
+		.max_interval = DEF_MAX_DISCARD_ISSUE_TIME,
+	};
 	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
 	int issued;
 
@@ -1929,7 +1966,7 @@  static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	if (!dcc)
 		return -ENOMEM;
 
-	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
+	dcc->discard_granularity = MIN_DISCARD_GRANULARITY;
 	INIT_LIST_HEAD(&dcc->entry_list);
 	for (i = 0; i < MAX_PLIST_NUM; i++)
 		INIT_LIST_HEAD(&dcc->pend_list[i]);
@@ -1945,6 +1982,7 @@  static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	dcc->next_pos = 0;
 	dcc->root = RB_ROOT;
 	dcc->rbtree_check = false;
+	dcc->io_interrupted = false;
 
 	init_waitqueue_head(&dcc->discard_wait_queue);
 	SM_I(sbi)->dcc_info = dcc;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 422b0ceb1eaa..63b4da72cd34 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -616,6 +616,15 @@  static inline int utilization(struct f2fs_sb_info *sbi)
 					sbi->user_block_count);
 }
 
+static inline int dev_utilization(struct f2fs_sb_info *sbi)
+{
+	unsigned int dev_blks;
+
+	dev_blks = valid_user_blocks(sbi) + SM_I(sbi)->dcc_info->undiscard_blks;
+	return div_u64((u64)dev_blks * 100,
+			MAIN_SEGS(sbi) << sbi->log_blocks_per_seg);
+}
+
 /*
  * Sometimes f2fs may be better to drop out-of-place update policy.
  * And, users can control the policy through sysfs entries.
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b055f2ea77c5..55ed76daad23 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2862,7 +2862,7 @@  static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 	/* adjust parameters according to the volume size */
 	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
 		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
-		sm_i->dcc_info->discard_granularity = 1;
+		sm_i->dcc_info->discard_granularity = MIN_DISCARD_GRANULARITY;
 		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
 	}