[PATCH] f2fs: fix performance issue observed with multi-thread sequential read

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] f2fs: fix performance issue observed with multi-thread sequential read
@ 2018-08-10  2:37 Jaegeuk Kim
  2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-10  2:37 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim, Sahitya Tummala

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c  | 14 ++++++++++++--
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/super.c |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..e042d050b87d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2157,15 +2158,24 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		atomic_inc(&sbi->wb_sync_req[DATA]);
-	else if (atomic_read(&sbi->wb_sync_req[DATA]))
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	} else if (atomic_read(&sbi->wb_sync_req[DATA])) {
 		goto skip_write;
+	} else {
+		atomic_inc(&sbi->wb_sync_req[DATA]);
+		mutex_lock(&sbi->writepages);
+	}
 
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10  2:37 [PATCH] f2fs: fix performance issue observed with multi-thread sequential read Jaegeuk Kim
@ 2018-08-10  2:48 ` Jaegeuk Kim
  2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-10  2:48 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Sahitya Tummala

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
 - quick code fix

 fs/f2fs/data.c  | 14 ++++++++++++--
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/super.c |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..6da143a27b3a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2157,15 +2158,24 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		atomic_inc(&sbi->wb_sync_req[DATA]);
-	else if (atomic_read(&sbi->wb_sync_req[DATA]))
+		goto writepages_lock;
+	} else if (atomic_read(&sbi->wb_sync_req[DATA])) {
 		goto skip_write;
+	} else {
+writepages_lock:
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
 
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
@ 2018-08-10 18:56   ` Jaegeuk Kim
  2018-08-13  2:20     ` Chao Yu
  2018-08-17 18:29     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
  0 siblings, 2 replies; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-10 18:56 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Changelog from v2:
 - fix deadlock in f2fs_gc

 fs/f2fs/data.c  | 9 +++++++++
 fs/f2fs/f2fs.h  | 1 +
 fs/f2fs/super.c | 1 +
 3 files changed, 11 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..9f0149cdeed8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2162,10 +2163,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (!S_ISDIR(inode->i_mode)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
@ 2018-08-13  2:20     ` Chao Yu
  2018-08-13 20:11       ` Jaegeuk Kim
  2018-08-17 18:29     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
  1 sibling, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-13  2:20 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

Hi Jaegeuk,

On 2018/8/11 2:56, Jaegeuk Kim wrote:
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).

IMO, it only impact sequential read performance in a large file which may be
fragmented during multi-thread writing.

In android environment, mostly, the large file should be cold type, such as apk,
mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
data area writer.

So how about adding a mount option to serialize writepage() for different type
of log, e.g. in android, using serialize=4; by default, using serialize=7
HOT_DATA	1
WARM_DATA	2
COLD_DATA	4

Thanks,

> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Changelog from v2:
>  - fix deadlock in f2fs_gc
> 
>  fs/f2fs/data.c  | 9 +++++++++
>  fs/f2fs/f2fs.h  | 1 +
>  fs/f2fs/super.c | 1 +
>  3 files changed, 11 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 45f043ee48bd..9f0149cdeed8 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct blk_plug plug;
>  	int ret;
> +	bool locked = false;
>  
>  	/* deal with chardevs and other special file */
>  	if (!mapping->a_ops->writepage)
> @@ -2162,10 +2163,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>  		goto skip_write;
>  
> +	if (!S_ISDIR(inode->i_mode)) {
> +		mutex_lock(&sbi->writepages);
> +		locked = true;
> +	}
> +
>  	blk_start_plug(&plug);
>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>  	blk_finish_plug(&plug);
>  
> +	if (locked)
> +		mutex_unlock(&sbi->writepages);
> +
>  	if (wbc->sync_mode == WB_SYNC_ALL)
>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>  	/*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b1c127d8383b..29bbc55cbc0b 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>  	int valid_super_block;			/* valid super block no */
>  	unsigned long s_flag;				/* flags for sbi */
> +	struct mutex writepages;		/* mutex for writepages() */
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index be41dbd7b261..53d70b64fea1 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	/* init f2fs-specific super block info */
>  	sbi->valid_super_block = valid_super_block;
>  	mutex_init(&sbi->gc_mutex);
> +	mutex_init(&sbi->writepages);
>  	mutex_init(&sbi->cp_mutex);
>  	init_rwsem(&sbi->node_write);
>  	init_rwsem(&sbi->node_change);
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-13  2:20     ` Chao Yu
@ 2018-08-13 20:11       ` Jaegeuk Kim
  2018-08-14  3:49         ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-13 20:11 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/13, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> > This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> > to fix the drop in sequential read throughput.
> > 
> > Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> > device: UFS
> > 
> > Before -
> > read throughput: 185 MB/s
> > total read requests: 85177 (of these ~80000 are 4KB size requests).
> > total write requests: 2546 (of these ~2208 requests are written in 512KB).
> > 
> > After -
> > read throughput: 758 MB/s
> > total read requests: 2417 (of these ~2042 are 512KB reads).
> > total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> IMO, it only impact sequential read performance in a large file which may be
> fragmented during multi-thread writing.
> 
> In android environment, mostly, the large file should be cold type, such as apk,
> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> data area writer.
> 
> So how about adding a mount option to serialize writepage() for different type
> of log, e.g. in android, using serialize=4; by default, using serialize=7
> HOT_DATA	1
> WARM_DATA	2
> COLD_DATA	4

Well, I don't think we need to give too many mount options for this fragmented
case. How about doing this for the large files only like this?

From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Aug 2018 17:53:34 -0700
Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
 sequential read

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
 fs/f2fs/data.c                          | 10 ++++++++++
 fs/f2fs/f2fs.h                          |  2 ++
 fs/f2fs/segment.c                       |  1 +
 fs/f2fs/super.c                         |  1 +
 fs/f2fs/sysfs.c                         |  2 ++
 6 files changed, 24 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b0123388f18..94a24aedcdb2 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@ Description:
 		 Controls the dirty page count condition for the in-place-update
 		 policies.
 
+What:		/sys/fs/f2fs/<disk>/min_seq_blocks
+Date:		August 2018
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		 Controls the dirty page count condition for batched sequential
+		 writes in ->writepages.
+
+
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..f09231b1cc74 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (!S_ISDIR(inode->i_mode) &&
+			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 375aa9f30cfa..098bdedc28bf 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -913,6 +913,7 @@ struct f2fs_sm_info {
 	unsigned int ipu_policy;	/* in-place-update policy */
 	unsigned int min_ipu_util;	/* in-place-update threshold */
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
+	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
 	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
 	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
 
@@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 63fc647f9ac2..ffea2d1303bd 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
 	sm_info->min_ssr_sections = reserved_sections(sbi);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index cd2e030e47b8..81c0e5337443 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_seq_blocks),
 	ATTR_LIST(min_hot_blocks),
 	ATTR_LIST(min_ssr_sections),
 	ATTR_LIST(max_victim_search),
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-13 20:11       ` Jaegeuk Kim
@ 2018-08-14  3:49         ` Chao Yu
  2018-08-14  4:04           ` Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-14  3:49 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/14 4:11, Jaegeuk Kim wrote:
> On 08/13, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>> to fix the drop in sequential read throughput.
>>>
>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>> device: UFS
>>>
>>> Before -
>>> read throughput: 185 MB/s
>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>
>>> After -
>>> read throughput: 758 MB/s
>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>
>> IMO, it only impact sequential read performance in a large file which may be
>> fragmented during multi-thread writing.
>>
>> In android environment, mostly, the large file should be cold type, such as apk,
>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>> data area writer.
>>
>> So how about adding a mount option to serialize writepage() for different type
>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>> HOT_DATA	1
>> WARM_DATA	2
>> COLD_DATA	4
> 
> Well, I don't think we need to give too many mount options for this fragmented
> case. How about doing this for the large files only like this?

Thread A write 512 pages			Thread B write 8 pages

- writepages()
 - mutex_lock(&sbi->writepages);
  - writepage();
...
						- writepages()
						 - writepage()
						  ....
  - writepage();
...
 - mutex_unlock(&sbi->writepages);

Above case will also cause fragmentation since we didn't serialize all
concurrent IO with the lock.

Do we need to consider such case?

Thanks,

> 
>>From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Thu, 9 Aug 2018 17:53:34 -0700
> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>  sequential read
> 
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>  fs/f2fs/data.c                          | 10 ++++++++++
>  fs/f2fs/f2fs.h                          |  2 ++
>  fs/f2fs/segment.c                       |  1 +
>  fs/f2fs/super.c                         |  1 +
>  fs/f2fs/sysfs.c                         |  2 ++
>  6 files changed, 24 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 9b0123388f18..94a24aedcdb2 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -51,6 +51,14 @@ Description:
>  		 Controls the dirty page count condition for the in-place-update
>  		 policies.
>  
> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> +Date:		August 2018
> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description:
> +		 Controls the dirty page count condition for batched sequential
> +		 writes in ->writepages.
> +
> +
>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>  Date:		March 2017
>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 45f043ee48bd..f09231b1cc74 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct blk_plug plug;
>  	int ret;
> +	bool locked = false;
>  
>  	/* deal with chardevs and other special file */
>  	if (!mapping->a_ops->writepage)
> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>  		goto skip_write;
>  
> +	if (!S_ISDIR(inode->i_mode) &&
> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> +		mutex_lock(&sbi->writepages);
> +		locked = true;
> +	}
> +
>  	blk_start_plug(&plug);
>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>  	blk_finish_plug(&plug);
>  
> +	if (locked)
> +		mutex_unlock(&sbi->writepages);
> +
>  	if (wbc->sync_mode == WB_SYNC_ALL)
>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>  	/*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 375aa9f30cfa..098bdedc28bf 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>  	unsigned int ipu_policy;	/* in-place-update policy */
>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>  
> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>  	int valid_super_block;			/* valid super block no */
>  	unsigned long s_flag;				/* flags for sbi */
> +	struct mutex writepages;		/* mutex for writepages() */
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 63fc647f9ac2..ffea2d1303bd 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index be41dbd7b261..53d70b64fea1 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	/* init f2fs-specific super block info */
>  	sbi->valid_super_block = valid_super_block;
>  	mutex_init(&sbi->gc_mutex);
> +	mutex_init(&sbi->writepages);
>  	mutex_init(&sbi->cp_mutex);
>  	init_rwsem(&sbi->node_write);
>  	init_rwsem(&sbi->node_change);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index cd2e030e47b8..81c0e5337443 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>  	ATTR_LIST(ipu_policy),
>  	ATTR_LIST(min_ipu_util),
>  	ATTR_LIST(min_fsync_blocks),
> +	ATTR_LIST(min_seq_blocks),
>  	ATTR_LIST(min_hot_blocks),
>  	ATTR_LIST(min_ssr_sections),
>  	ATTR_LIST(max_victim_search),
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  3:49         ` Chao Yu
@ 2018-08-14  4:04           ` Jaegeuk Kim
  2018-08-14  6:18             ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-14  4:04 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/14, Chao Yu wrote:
> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> > On 08/13, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>> to fix the drop in sequential read throughput.
> >>>
> >>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>> device: UFS
> >>>
> >>> Before -
> >>> read throughput: 185 MB/s
> >>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>
> >>> After -
> >>> read throughput: 758 MB/s
> >>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>
> >> IMO, it only impact sequential read performance in a large file which may be
> >> fragmented during multi-thread writing.
> >>
> >> In android environment, mostly, the large file should be cold type, such as apk,
> >> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >> data area writer.
> >>
> >> So how about adding a mount option to serialize writepage() for different type
> >> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >> HOT_DATA	1
> >> WARM_DATA	2
> >> COLD_DATA	4
> > 
> > Well, I don't think we need to give too many mount options for this fragmented
> > case. How about doing this for the large files only like this?
> 
> Thread A write 512 pages			Thread B write 8 pages
> 
> - writepages()
>  - mutex_lock(&sbi->writepages);
>   - writepage();
> ...
> 						- writepages()
> 						 - writepage()
> 						  ....
>   - writepage();
> ...
>  - mutex_unlock(&sbi->writepages);
> 
> Above case will also cause fragmentation since we didn't serialize all
> concurrent IO with the lock.
> 
> Do we need to consider such case?

We can simply allow 512 and 8 in the same segment, which would not a big deal,
when considering starvation of Thread B.

> 
> Thanks,
> 
> > 
> >>From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk@kernel.org>
> > Date: Thu, 9 Aug 2018 17:53:34 -0700
> > Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >  sequential read
> > 
> > This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> > to fix the drop in sequential read throughput.
> > 
> > Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> > device: UFS
> > 
> > Before -
> > read throughput: 185 MB/s
> > total read requests: 85177 (of these ~80000 are 4KB size requests).
> > total write requests: 2546 (of these ~2208 requests are written in 512KB).
> > 
> > After -
> > read throughput: 758 MB/s
> > total read requests: 2417 (of these ~2042 are 512KB reads).
> > total write requests: 2701 (of these ~2034 requests are written in 512KB).
> > 
> > Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >  fs/f2fs/data.c                          | 10 ++++++++++
> >  fs/f2fs/f2fs.h                          |  2 ++
> >  fs/f2fs/segment.c                       |  1 +
> >  fs/f2fs/super.c                         |  1 +
> >  fs/f2fs/sysfs.c                         |  2 ++
> >  6 files changed, 24 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 9b0123388f18..94a24aedcdb2 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -51,6 +51,14 @@ Description:
> >  		 Controls the dirty page count condition for the in-place-update
> >  		 policies.
> >  
> > +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> > +Date:		August 2018
> > +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > +Description:
> > +		 Controls the dirty page count condition for batched sequential
> > +		 writes in ->writepages.
> > +
> > +
> >  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >  Date:		March 2017
> >  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 45f043ee48bd..f09231b1cc74 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >  	struct blk_plug plug;
> >  	int ret;
> > +	bool locked = false;
> >  
> >  	/* deal with chardevs and other special file */
> >  	if (!mapping->a_ops->writepage)
> > @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >  		goto skip_write;
> >  
> > +	if (!S_ISDIR(inode->i_mode) &&
> > +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> > +		mutex_lock(&sbi->writepages);
> > +		locked = true;
> > +	}
> > +
> >  	blk_start_plug(&plug);
> >  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >  	blk_finish_plug(&plug);
> >  
> > +	if (locked)
> > +		mutex_unlock(&sbi->writepages);
> > +
> >  	if (wbc->sync_mode == WB_SYNC_ALL)
> >  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >  	/*
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 375aa9f30cfa..098bdedc28bf 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >  	unsigned int ipu_policy;	/* in-place-update policy */
> >  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> > +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >  
> > @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >  	int valid_super_block;			/* valid super block no */
> >  	unsigned long s_flag;				/* flags for sbi */
> > +	struct mutex writepages;		/* mutex for writepages() */
> >  
> >  #ifdef CONFIG_BLK_DEV_ZONED
> >  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 63fc647f9ac2..ffea2d1303bd 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >  
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index be41dbd7b261..53d70b64fea1 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >  	/* init f2fs-specific super block info */
> >  	sbi->valid_super_block = valid_super_block;
> >  	mutex_init(&sbi->gc_mutex);
> > +	mutex_init(&sbi->writepages);
> >  	mutex_init(&sbi->cp_mutex);
> >  	init_rwsem(&sbi->node_write);
> >  	init_rwsem(&sbi->node_change);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index cd2e030e47b8..81c0e5337443 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> > @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(ipu_policy),
> >  	ATTR_LIST(min_ipu_util),
> >  	ATTR_LIST(min_fsync_blocks),
> > +	ATTR_LIST(min_seq_blocks),
> >  	ATTR_LIST(min_hot_blocks),
> >  	ATTR_LIST(min_ssr_sections),
> >  	ATTR_LIST(max_victim_search),
> > 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  4:04           ` Jaegeuk Kim
@ 2018-08-14  6:18             ` Chao Yu
  2018-08-14 17:28               ` Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-14  6:18 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/14 12:04, Jaegeuk Kim wrote:
> On 08/14, Chao Yu wrote:
>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>> On 08/13, Chao Yu wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>> to fix the drop in sequential read throughput.
>>>>>
>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>> device: UFS
>>>>>
>>>>> Before -
>>>>> read throughput: 185 MB/s
>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>
>>>>> After -
>>>>> read throughput: 758 MB/s
>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>
>>>> IMO, it only impact sequential read performance in a large file which may be
>>>> fragmented during multi-thread writing.
>>>>
>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>> data area writer.
>>>>
>>>> So how about adding a mount option to serialize writepage() for different type
>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>> HOT_DATA	1
>>>> WARM_DATA	2
>>>> COLD_DATA	4
>>>
>>> Well, I don't think we need to give too many mount options for this fragmented
>>> case. How about doing this for the large files only like this?
>>
>> Thread A write 512 pages			Thread B write 8 pages
>>
>> - writepages()
>>  - mutex_lock(&sbi->writepages);
>>   - writepage();
>> ...
>> 						- writepages()
>> 						 - writepage()
>> 						  ....
>>   - writepage();
>> ...
>>  - mutex_unlock(&sbi->writepages);
>>
>> Above case will also cause fragmentation since we didn't serialize all
>> concurrent IO with the lock.
>>
>> Do we need to consider such case?
> 
> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> when considering starvation of Thread B.

Yeah, but in reality, there would be more threads competing in same log header,
so I worry that the effect of defragmenting will not so good as we expect,
anyway, for benchmark, it's enough.

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>  sequential read
>>>
>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>> to fix the drop in sequential read throughput.
>>>
>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>> device: UFS
>>>
>>> Before -
>>> read throughput: 185 MB/s
>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>
>>> After -
>>> read throughput: 758 MB/s
>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>
>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>  fs/f2fs/segment.c                       |  1 +
>>>  fs/f2fs/super.c                         |  1 +
>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>  6 files changed, 24 insertions(+)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index 9b0123388f18..94a24aedcdb2 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -51,6 +51,14 @@ Description:
>>>  		 Controls the dirty page count condition for the in-place-update
>>>  		 policies.
>>>  
>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>> +Date:		August 2018
>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> +Description:
>>> +		 Controls the dirty page count condition for batched sequential
>>> +		 writes in ->writepages.
>>> +
>>> +
>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>  Date:		March 2017
>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 45f043ee48bd..f09231b1cc74 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>  	struct blk_plug plug;
>>>  	int ret;
>>> +	bool locked = false;
>>>  
>>>  	/* deal with chardevs and other special file */
>>>  	if (!mapping->a_ops->writepage)
>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>  		goto skip_write;
>>>  
>>> +	if (!S_ISDIR(inode->i_mode) &&
>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>> +		mutex_lock(&sbi->writepages);
>>> +		locked = true;
>>> +	}
>>> +
>>>  	blk_start_plug(&plug);
>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>  	blk_finish_plug(&plug);
>>>  
>>> +	if (locked)
>>> +		mutex_unlock(&sbi->writepages);
>>> +
>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>  	/*
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 375aa9f30cfa..098bdedc28bf 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>  
>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>  	int valid_super_block;			/* valid super block no */
>>>  	unsigned long s_flag;				/* flags for sbi */
>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>  
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>  
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index be41dbd7b261..53d70b64fea1 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  	/* init f2fs-specific super block info */
>>>  	sbi->valid_super_block = valid_super_block;
>>>  	mutex_init(&sbi->gc_mutex);
>>> +	mutex_init(&sbi->writepages);
>>>  	mutex_init(&sbi->cp_mutex);
>>>  	init_rwsem(&sbi->node_write);
>>>  	init_rwsem(&sbi->node_change);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index cd2e030e47b8..81c0e5337443 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>  	ATTR_LIST(ipu_policy),
>>>  	ATTR_LIST(min_ipu_util),
>>>  	ATTR_LIST(min_fsync_blocks),
>>> +	ATTR_LIST(min_seq_blocks),
>>>  	ATTR_LIST(min_hot_blocks),
>>>  	ATTR_LIST(min_ssr_sections),
>>>  	ATTR_LIST(max_victim_search),
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  6:18             ` Chao Yu
@ 2018-08-14 17:28               ` Jaegeuk Kim
  2018-08-15  1:52                 ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-14 17:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/14, Chao Yu wrote:
> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> > On 08/14, Chao Yu wrote:
> >> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>> On 08/13, Chao Yu wrote:
> >>>> Hi Jaegeuk,
> >>>>
> >>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>> to fix the drop in sequential read throughput.
> >>>>>
> >>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>> device: UFS
> >>>>>
> >>>>> Before -
> >>>>> read throughput: 185 MB/s
> >>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>
> >>>>> After -
> >>>>> read throughput: 758 MB/s
> >>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>
> >>>> IMO, it only impact sequential read performance in a large file which may be
> >>>> fragmented during multi-thread writing.
> >>>>
> >>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>> data area writer.
> >>>>
> >>>> So how about adding a mount option to serialize writepage() for different type
> >>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>> HOT_DATA	1
> >>>> WARM_DATA	2
> >>>> COLD_DATA	4
> >>>
> >>> Well, I don't think we need to give too many mount options for this fragmented
> >>> case. How about doing this for the large files only like this?
> >>
> >> Thread A write 512 pages			Thread B write 8 pages
> >>
> >> - writepages()
> >>  - mutex_lock(&sbi->writepages);
> >>   - writepage();
> >> ...
> >> 						- writepages()
> >> 						 - writepage()
> >> 						  ....
> >>   - writepage();
> >> ...
> >>  - mutex_unlock(&sbi->writepages);
> >>
> >> Above case will also cause fragmentation since we didn't serialize all
> >> concurrent IO with the lock.
> >>
> >> Do we need to consider such case?
> > 
> > We can simply allow 512 and 8 in the same segment, which would not a big deal,
> > when considering starvation of Thread B.
> 
> Yeah, but in reality, there would be more threads competing in same log header,
> so I worry that the effect of defragmenting will not so good as we expect,
> anyway, for benchmark, it's enough.

Basically, I think this is not a benchmark issue. :) It just reveals the issue
much easily. Let me think about three cases:
1) WB_SYNC_NONE & WB_SYNC_NONE
 -> can simply use mutex_lock

2) WB_SYNC_ALL & WB_SYNC_NONE
 -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
    will skip writing blocks

3) WB_SYNC_ALL & WB_SYNC_ALL
 -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
    starvation.


I've been testing the below.

if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
                get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
        mutex_lock(&sbi->writepages);
        locked = true;
}

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>  sequential read
> >>>
> >>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>> to fix the drop in sequential read throughput.
> >>>
> >>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>> device: UFS
> >>>
> >>> Before -
> >>> read throughput: 185 MB/s
> >>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>
> >>> After -
> >>> read throughput: 758 MB/s
> >>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>
> >>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> ---
> >>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>  fs/f2fs/segment.c                       |  1 +
> >>>  fs/f2fs/super.c                         |  1 +
> >>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>  6 files changed, 24 insertions(+)
> >>>
> >>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> index 9b0123388f18..94a24aedcdb2 100644
> >>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> @@ -51,6 +51,14 @@ Description:
> >>>  		 Controls the dirty page count condition for the in-place-update
> >>>  		 policies.
> >>>  
> >>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>> +Date:		August 2018
> >>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> +Description:
> >>> +		 Controls the dirty page count condition for batched sequential
> >>> +		 writes in ->writepages.
> >>> +
> >>> +
> >>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>  Date:		March 2017
> >>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>> index 45f043ee48bd..f09231b1cc74 100644
> >>> --- a/fs/f2fs/data.c
> >>> +++ b/fs/f2fs/data.c
> >>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>  	struct blk_plug plug;
> >>>  	int ret;
> >>> +	bool locked = false;
> >>>  
> >>>  	/* deal with chardevs and other special file */
> >>>  	if (!mapping->a_ops->writepage)
> >>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>  		goto skip_write;
> >>>  
> >>> +	if (!S_ISDIR(inode->i_mode) &&
> >>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>> +		mutex_lock(&sbi->writepages);
> >>> +		locked = true;
> >>> +	}
> >>> +
> >>>  	blk_start_plug(&plug);
> >>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>  	blk_finish_plug(&plug);
> >>>  
> >>> +	if (locked)
> >>> +		mutex_unlock(&sbi->writepages);
> >>> +
> >>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>  	/*
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 375aa9f30cfa..098bdedc28bf 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>  
> >>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>  	int valid_super_block;			/* valid super block no */
> >>>  	unsigned long s_flag;				/* flags for sbi */
> >>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>  
> >>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>  
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index be41dbd7b261..53d70b64fea1 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>  	/* init f2fs-specific super block info */
> >>>  	sbi->valid_super_block = valid_super_block;
> >>>  	mutex_init(&sbi->gc_mutex);
> >>> +	mutex_init(&sbi->writepages);
> >>>  	mutex_init(&sbi->cp_mutex);
> >>>  	init_rwsem(&sbi->node_write);
> >>>  	init_rwsem(&sbi->node_change);
> >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>> index cd2e030e47b8..81c0e5337443 100644
> >>> --- a/fs/f2fs/sysfs.c
> >>> +++ b/fs/f2fs/sysfs.c
> >>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>  	ATTR_LIST(ipu_policy),
> >>>  	ATTR_LIST(min_ipu_util),
> >>>  	ATTR_LIST(min_fsync_blocks),
> >>> +	ATTR_LIST(min_seq_blocks),
> >>>  	ATTR_LIST(min_hot_blocks),
> >>>  	ATTR_LIST(min_ssr_sections),
> >>>  	ATTR_LIST(max_victim_search),
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14 17:28               ` Jaegeuk Kim
@ 2018-08-15  1:52                 ` Chao Yu
  2018-08-15  2:15                   ` Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-15  1:52 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/15 1:28, Jaegeuk Kim wrote:
> On 08/14, Chao Yu wrote:
>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>> On 08/14, Chao Yu wrote:
>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>> On 08/13, Chao Yu wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>
>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>> device: UFS
>>>>>>>
>>>>>>> Before -
>>>>>>> read throughput: 185 MB/s
>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>
>>>>>>> After -
>>>>>>> read throughput: 758 MB/s
>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>
>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>> fragmented during multi-thread writing.
>>>>>>
>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>> data area writer.
>>>>>>
>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>> HOT_DATA	1
>>>>>> WARM_DATA	2
>>>>>> COLD_DATA	4
>>>>>
>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>> case. How about doing this for the large files only like this?
>>>>
>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>
>>>> - writepages()
>>>>  - mutex_lock(&sbi->writepages);
>>>>   - writepage();
>>>> ...
>>>> 						- writepages()
>>>> 						 - writepage()
>>>> 						  ....
>>>>   - writepage();
>>>> ...
>>>>  - mutex_unlock(&sbi->writepages);
>>>>
>>>> Above case will also cause fragmentation since we didn't serialize all
>>>> concurrent IO with the lock.
>>>>
>>>> Do we need to consider such case?
>>>
>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>> when considering starvation of Thread B.
>>
>> Yeah, but in reality, there would be more threads competing in same log header,
>> so I worry that the effect of defragmenting will not so good as we expect,
>> anyway, for benchmark, it's enough.
> 
> Basically, I think this is not a benchmark issue. :) It just reveals the issue
> much easily. Let me think about three cases:
> 1) WB_SYNC_NONE & WB_SYNC_NONE
>  -> can simply use mutex_lock
> 
> 2) WB_SYNC_ALL & WB_SYNC_NONE
>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>     will skip writing blocks
> 
> 3) WB_SYNC_ALL & WB_SYNC_ALL
>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>     starvation.
> 
> 
> I've been testing the below.
> 
> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>         mutex_lock(&sbi->writepages);
>         locked = true;

Just cover buffered IO? how about covering Direct IO and atomic write as well?

Thanks,

> }
> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>  sequential read
>>>>>
>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>> to fix the drop in sequential read throughput.
>>>>>
>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>> device: UFS
>>>>>
>>>>> Before -
>>>>> read throughput: 185 MB/s
>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>
>>>>> After -
>>>>> read throughput: 758 MB/s
>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>
>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>> ---
>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>  6 files changed, 24 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>  		 policies.
>>>>>  
>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>> +Date:		August 2018
>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>> +Description:
>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>> +		 writes in ->writepages.
>>>>> +
>>>>> +
>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>  Date:		March 2017
>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>  	struct blk_plug plug;
>>>>>  	int ret;
>>>>> +	bool locked = false;
>>>>>  
>>>>>  	/* deal with chardevs and other special file */
>>>>>  	if (!mapping->a_ops->writepage)
>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>  		goto skip_write;
>>>>>  
>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>> +		mutex_lock(&sbi->writepages);
>>>>> +		locked = true;
>>>>> +	}
>>>>> +
>>>>>  	blk_start_plug(&plug);
>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>  	blk_finish_plug(&plug);
>>>>>  
>>>>> +	if (locked)
>>>>> +		mutex_unlock(&sbi->writepages);
>>>>> +
>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>  	/*
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>  
>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>  
>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>  
>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>  	/* init f2fs-specific super block info */
>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>> +	mutex_init(&sbi->writepages);
>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>  	init_rwsem(&sbi->node_write);
>>>>>  	init_rwsem(&sbi->node_change);
>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>> --- a/fs/f2fs/sysfs.c
>>>>> +++ b/fs/f2fs/sysfs.c
>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>  	ATTR_LIST(ipu_policy),
>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>  	ATTR_LIST(max_victim_search),
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  1:52                 ` Chao Yu
@ 2018-08-15  2:15                   ` Jaegeuk Kim
  2018-08-15  3:44                     ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-15  2:15 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/15, Chao Yu wrote:
> On 2018/8/15 1:28, Jaegeuk Kim wrote:
> > On 08/14, Chao Yu wrote:
> >> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> >>> On 08/14, Chao Yu wrote:
> >>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>>>> On 08/13, Chao Yu wrote:
> >>>>>> Hi Jaegeuk,
> >>>>>>
> >>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>
> >>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>> device: UFS
> >>>>>>>
> >>>>>>> Before -
> >>>>>>> read throughput: 185 MB/s
> >>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>
> >>>>>>> After -
> >>>>>>> read throughput: 758 MB/s
> >>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>
> >>>>>> IMO, it only impact sequential read performance in a large file which may be
> >>>>>> fragmented during multi-thread writing.
> >>>>>>
> >>>>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>>>> data area writer.
> >>>>>>
> >>>>>> So how about adding a mount option to serialize writepage() for different type
> >>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>>>> HOT_DATA	1
> >>>>>> WARM_DATA	2
> >>>>>> COLD_DATA	4
> >>>>>
> >>>>> Well, I don't think we need to give too many mount options for this fragmented
> >>>>> case. How about doing this for the large files only like this?
> >>>>
> >>>> Thread A write 512 pages			Thread B write 8 pages
> >>>>
> >>>> - writepages()
> >>>>  - mutex_lock(&sbi->writepages);
> >>>>   - writepage();
> >>>> ...
> >>>> 						- writepages()
> >>>> 						 - writepage()
> >>>> 						  ....
> >>>>   - writepage();
> >>>> ...
> >>>>  - mutex_unlock(&sbi->writepages);
> >>>>
> >>>> Above case will also cause fragmentation since we didn't serialize all
> >>>> concurrent IO with the lock.
> >>>>
> >>>> Do we need to consider such case?
> >>>
> >>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> >>> when considering starvation of Thread B.
> >>
> >> Yeah, but in reality, there would be more threads competing in same log header,
> >> so I worry that the effect of defragmenting will not so good as we expect,
> >> anyway, for benchmark, it's enough.
> > 
> > Basically, I think this is not a benchmark issue. :) It just reveals the issue
> > much easily. Let me think about three cases:
> > 1) WB_SYNC_NONE & WB_SYNC_NONE
> >  -> can simply use mutex_lock
> > 
> > 2) WB_SYNC_ALL & WB_SYNC_NONE
> >  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
> >     will skip writing blocks
> > 
> > 3) WB_SYNC_ALL & WB_SYNC_ALL
> >  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
> >     starvation.
> > 
> > 
> > I've been testing the below.
> > 
> > if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> >                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
> >         mutex_lock(&sbi->writepages);
> >         locked = true;
> 
> Just cover buffered IO? how about covering Direct IO and atomic write as well?

I'd expect direct IO does in-place-updates, and not sure whether we need to
add another lock contention between buffered or direct IO. Atomic writes
would be covered by ->min_seq_blocks.

> 
> Thanks,
> 
> > }
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>>>  sequential read
> >>>>>
> >>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>> to fix the drop in sequential read throughput.
> >>>>>
> >>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>> device: UFS
> >>>>>
> >>>>> Before -
> >>>>> read throughput: 185 MB/s
> >>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>
> >>>>> After -
> >>>>> read throughput: 758 MB/s
> >>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>
> >>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>> ---
> >>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>>>  fs/f2fs/segment.c                       |  1 +
> >>>>>  fs/f2fs/super.c                         |  1 +
> >>>>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>>>  6 files changed, 24 insertions(+)
> >>>>>
> >>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> index 9b0123388f18..94a24aedcdb2 100644
> >>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> @@ -51,6 +51,14 @@ Description:
> >>>>>  		 Controls the dirty page count condition for the in-place-update
> >>>>>  		 policies.
> >>>>>  
> >>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>>>> +Date:		August 2018
> >>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>> +Description:
> >>>>> +		 Controls the dirty page count condition for batched sequential
> >>>>> +		 writes in ->writepages.
> >>>>> +
> >>>>> +
> >>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>>>  Date:		March 2017
> >>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>>> index 45f043ee48bd..f09231b1cc74 100644
> >>>>> --- a/fs/f2fs/data.c
> >>>>> +++ b/fs/f2fs/data.c
> >>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>  	struct blk_plug plug;
> >>>>>  	int ret;
> >>>>> +	bool locked = false;
> >>>>>  
> >>>>>  	/* deal with chardevs and other special file */
> >>>>>  	if (!mapping->a_ops->writepage)
> >>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>>>  		goto skip_write;
> >>>>>  
> >>>>> +	if (!S_ISDIR(inode->i_mode) &&
> >>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>>>> +		mutex_lock(&sbi->writepages);
> >>>>> +		locked = true;
> >>>>> +	}
> >>>>> +
> >>>>>  	blk_start_plug(&plug);
> >>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>>>  	blk_finish_plug(&plug);
> >>>>>  
> >>>>> +	if (locked)
> >>>>> +		mutex_unlock(&sbi->writepages);
> >>>>> +
> >>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>>>  	/*
> >>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>> index 375aa9f30cfa..098bdedc28bf 100644
> >>>>> --- a/fs/f2fs/f2fs.h
> >>>>> +++ b/fs/f2fs/f2fs.h
> >>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>>>  
> >>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>>>  	int valid_super_block;			/* valid super block no */
> >>>>>  	unsigned long s_flag;				/* flags for sbi */
> >>>>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>>>  
> >>>>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>>>  
> >>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>> index be41dbd7b261..53d70b64fea1 100644
> >>>>> --- a/fs/f2fs/super.c
> >>>>> +++ b/fs/f2fs/super.c
> >>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>>  	/* init f2fs-specific super block info */
> >>>>>  	sbi->valid_super_block = valid_super_block;
> >>>>>  	mutex_init(&sbi->gc_mutex);
> >>>>> +	mutex_init(&sbi->writepages);
> >>>>>  	mutex_init(&sbi->cp_mutex);
> >>>>>  	init_rwsem(&sbi->node_write);
> >>>>>  	init_rwsem(&sbi->node_change);
> >>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>>>> index cd2e030e47b8..81c0e5337443 100644
> >>>>> --- a/fs/f2fs/sysfs.c
> >>>>> +++ b/fs/f2fs/sysfs.c
> >>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>>>  	ATTR_LIST(ipu_policy),
> >>>>>  	ATTR_LIST(min_ipu_util),
> >>>>>  	ATTR_LIST(min_fsync_blocks),
> >>>>> +	ATTR_LIST(min_seq_blocks),
> >>>>>  	ATTR_LIST(min_hot_blocks),
> >>>>>  	ATTR_LIST(min_ssr_sections),
> >>>>>  	ATTR_LIST(max_victim_search),
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  2:15                   ` Jaegeuk Kim
@ 2018-08-15  3:44                     ` Chao Yu
  2018-08-16  1:34                       ` Jaegeuk Kim
  0 siblings, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-15  3:44 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/15 10:15, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 1:28, Jaegeuk Kim wrote:
>>> On 08/14, Chao Yu wrote:
>>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>>>> On 08/14, Chao Yu wrote:
>>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>>>> On 08/13, Chao Yu wrote:
>>>>>>>> Hi Jaegeuk,
>>>>>>>>
>>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>
>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>> device: UFS
>>>>>>>>>
>>>>>>>>> Before -
>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> After -
>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>
>>>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>>>> fragmented during multi-thread writing.
>>>>>>>>
>>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>>>> data area writer.
>>>>>>>>
>>>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>>>> HOT_DATA	1
>>>>>>>> WARM_DATA	2
>>>>>>>> COLD_DATA	4
>>>>>>>
>>>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>>>> case. How about doing this for the large files only like this?
>>>>>>
>>>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>>>
>>>>>> - writepages()
>>>>>>  - mutex_lock(&sbi->writepages);
>>>>>>   - writepage();
>>>>>> ...
>>>>>> 						- writepages()
>>>>>> 						 - writepage()
>>>>>> 						  ....
>>>>>>   - writepage();
>>>>>> ...
>>>>>>  - mutex_unlock(&sbi->writepages);
>>>>>>
>>>>>> Above case will also cause fragmentation since we didn't serialize all
>>>>>> concurrent IO with the lock.
>>>>>>
>>>>>> Do we need to consider such case?
>>>>>
>>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>>>> when considering starvation of Thread B.
>>>>
>>>> Yeah, but in reality, there would be more threads competing in same log header,
>>>> so I worry that the effect of defragmenting will not so good as we expect,
>>>> anyway, for benchmark, it's enough.
>>>
>>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
>>> much easily. Let me think about three cases:
>>> 1) WB_SYNC_NONE & WB_SYNC_NONE
>>>  -> can simply use mutex_lock
>>>
>>> 2) WB_SYNC_ALL & WB_SYNC_NONE
>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>>>     will skip writing blocks
>>>
>>> 3) WB_SYNC_ALL & WB_SYNC_ALL
>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>>>     starvation.
>>>
>>>
>>> I've been testing the below.
>>>
>>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>>>         mutex_lock(&sbi->writepages);
>>>         locked = true;
>>
>> Just cover buffered IO? how about covering Direct IO and atomic write as well?
> 
> I'd expect direct IO does in-place-updates, and not sure whether we need to

For initial writes, they are not IPU.

> add another lock contention between buffered or direct IO. Atomic writes
> would be covered by ->min_seq_blocks.

Okay. :)

Thanks,

> 
>>
>> Thanks,
>>
>>> }
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>>>  sequential read
>>>>>>>
>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>
>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>> device: UFS
>>>>>>>
>>>>>>> Before -
>>>>>>> read throughput: 185 MB/s
>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>
>>>>>>> After -
>>>>>>> read throughput: 758 MB/s
>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>
>>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>> ---
>>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>>>  6 files changed, 24 insertions(+)
>>>>>>>
>>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>>>  		 policies.
>>>>>>>  
>>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>>>> +Date:		August 2018
>>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>> +Description:
>>>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>>>> +		 writes in ->writepages.
>>>>>>> +
>>>>>>> +
>>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>>>  Date:		March 2017
>>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>>>> --- a/fs/f2fs/data.c
>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>  	struct blk_plug plug;
>>>>>>>  	int ret;
>>>>>>> +	bool locked = false;
>>>>>>>  
>>>>>>>  	/* deal with chardevs and other special file */
>>>>>>>  	if (!mapping->a_ops->writepage)
>>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>  		goto skip_write;
>>>>>>>  
>>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>>>> +		mutex_lock(&sbi->writepages);
>>>>>>> +		locked = true;
>>>>>>> +	}
>>>>>>> +
>>>>>>>  	blk_start_plug(&plug);
>>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>>>  	blk_finish_plug(&plug);
>>>>>>>  
>>>>>>> +	if (locked)
>>>>>>> +		mutex_unlock(&sbi->writepages);
>>>>>>> +
>>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>>>  	/*
>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>>>  
>>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>>>  
>>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>>>  
>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>>>> --- a/fs/f2fs/super.c
>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>>  	/* init f2fs-specific super block info */
>>>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>>>> +	mutex_init(&sbi->writepages);
>>>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>>>  	init_rwsem(&sbi->node_write);
>>>>>>>  	init_rwsem(&sbi->node_change);
>>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>>>  	ATTR_LIST(ipu_policy),
>>>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>>>  	ATTR_LIST(max_victim_search),
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  3:44                     ` Chao Yu
@ 2018-08-16  1:34                       ` Jaegeuk Kim
  2018-08-16 11:39                         ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-16  1:34 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/15, Chao Yu wrote:
> On 2018/8/15 10:15, Jaegeuk Kim wrote:
> > On 08/15, Chao Yu wrote:
> >> On 2018/8/15 1:28, Jaegeuk Kim wrote:
> >>> On 08/14, Chao Yu wrote:
> >>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> >>>>> On 08/14, Chao Yu wrote:
> >>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>>>>>> On 08/13, Chao Yu wrote:
> >>>>>>>> Hi Jaegeuk,
> >>>>>>>>
> >>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>>>
> >>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>>>> device: UFS
> >>>>>>>>>
> >>>>>>>>> Before -
> >>>>>>>>> read throughput: 185 MB/s
> >>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>>>
> >>>>>>>>> After -
> >>>>>>>>> read throughput: 758 MB/s
> >>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>>>
> >>>>>>>> IMO, it only impact sequential read performance in a large file which may be
> >>>>>>>> fragmented during multi-thread writing.
> >>>>>>>>
> >>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>>>>>> data area writer.
> >>>>>>>>
> >>>>>>>> So how about adding a mount option to serialize writepage() for different type
> >>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>>>>>> HOT_DATA	1
> >>>>>>>> WARM_DATA	2
> >>>>>>>> COLD_DATA	4
> >>>>>>>
> >>>>>>> Well, I don't think we need to give too many mount options for this fragmented
> >>>>>>> case. How about doing this for the large files only like this?
> >>>>>>
> >>>>>> Thread A write 512 pages			Thread B write 8 pages
> >>>>>>
> >>>>>> - writepages()
> >>>>>>  - mutex_lock(&sbi->writepages);
> >>>>>>   - writepage();
> >>>>>> ...
> >>>>>> 						- writepages()
> >>>>>> 						 - writepage()
> >>>>>> 						  ....
> >>>>>>   - writepage();
> >>>>>> ...
> >>>>>>  - mutex_unlock(&sbi->writepages);
> >>>>>>
> >>>>>> Above case will also cause fragmentation since we didn't serialize all
> >>>>>> concurrent IO with the lock.
> >>>>>>
> >>>>>> Do we need to consider such case?
> >>>>>
> >>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> >>>>> when considering starvation of Thread B.
> >>>>
> >>>> Yeah, but in reality, there would be more threads competing in same log header,
> >>>> so I worry that the effect of defragmenting will not so good as we expect,
> >>>> anyway, for benchmark, it's enough.
> >>>
> >>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
> >>> much easily. Let me think about three cases:
> >>> 1) WB_SYNC_NONE & WB_SYNC_NONE
> >>>  -> can simply use mutex_lock
> >>>
> >>> 2) WB_SYNC_ALL & WB_SYNC_NONE
> >>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
> >>>     will skip writing blocks
> >>>
> >>> 3) WB_SYNC_ALL & WB_SYNC_ALL
> >>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
> >>>     starvation.
> >>>
> >>>
> >>> I've been testing the below.
> >>>
> >>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> >>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
> >>>         mutex_lock(&sbi->writepages);
> >>>         locked = true;
> >>
> >> Just cover buffered IO? how about covering Direct IO and atomic write as well?
> > 
> > I'd expect direct IO does in-place-updates, and not sure whether we need to
> 
> For initial writes, they are not IPU.

It's a little bit different. DIO allocates blocks first in the bulk mode, and
sumbit bios later. So, it'd be more likely to allocate consecutive blocks by
get_data_block_dio, if user gave a big chunk to write. I'm more concerend about
lock contention, and don't think we have to serialize all the block allocation.

> 
> > add another lock contention between buffered or direct IO. Atomic writes
> > would be covered by ->min_seq_blocks.
> 
> Okay. :)
> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>> }
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>>
> >>>>>>>
> >>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>>>>>  sequential read
> >>>>>>>
> >>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>
> >>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>> device: UFS
> >>>>>>>
> >>>>>>> Before -
> >>>>>>> read throughput: 185 MB/s
> >>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>
> >>>>>>> After -
> >>>>>>> read throughput: 758 MB/s
> >>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>>
> >>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>>> ---
> >>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>>>>>  fs/f2fs/segment.c                       |  1 +
> >>>>>>>  fs/f2fs/super.c                         |  1 +
> >>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>>>>>  6 files changed, 24 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> index 9b0123388f18..94a24aedcdb2 100644
> >>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> @@ -51,6 +51,14 @@ Description:
> >>>>>>>  		 Controls the dirty page count condition for the in-place-update
> >>>>>>>  		 policies.
> >>>>>>>  
> >>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>>>>>> +Date:		August 2018
> >>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>>>> +Description:
> >>>>>>> +		 Controls the dirty page count condition for batched sequential
> >>>>>>> +		 writes in ->writepages.
> >>>>>>> +
> >>>>>>> +
> >>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>>>>>  Date:		March 2017
> >>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>>>>> index 45f043ee48bd..f09231b1cc74 100644
> >>>>>>> --- a/fs/f2fs/data.c
> >>>>>>> +++ b/fs/f2fs/data.c
> >>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>>>  	struct blk_plug plug;
> >>>>>>>  	int ret;
> >>>>>>> +	bool locked = false;
> >>>>>>>  
> >>>>>>>  	/* deal with chardevs and other special file */
> >>>>>>>  	if (!mapping->a_ops->writepage)
> >>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>>>>>  		goto skip_write;
> >>>>>>>  
> >>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
> >>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>>>>>> +		mutex_lock(&sbi->writepages);
> >>>>>>> +		locked = true;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>>  	blk_start_plug(&plug);
> >>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>>>>>  	blk_finish_plug(&plug);
> >>>>>>>  
> >>>>>>> +	if (locked)
> >>>>>>> +		mutex_unlock(&sbi->writepages);
> >>>>>>> +
> >>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>>>>>  	/*
> >>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
> >>>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>>>>>  
> >>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>>>>>  	int valid_super_block;			/* valid super block no */
> >>>>>>>  	unsigned long s_flag;				/* flags for sbi */
> >>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>>>>>  
> >>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>>>>>> --- a/fs/f2fs/segment.c
> >>>>>>> +++ b/fs/f2fs/segment.c
> >>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>>>>>  
> >>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>>>> index be41dbd7b261..53d70b64fea1 100644
> >>>>>>> --- a/fs/f2fs/super.c
> >>>>>>> +++ b/fs/f2fs/super.c
> >>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>>>>  	/* init f2fs-specific super block info */
> >>>>>>>  	sbi->valid_super_block = valid_super_block;
> >>>>>>>  	mutex_init(&sbi->gc_mutex);
> >>>>>>> +	mutex_init(&sbi->writepages);
> >>>>>>>  	mutex_init(&sbi->cp_mutex);
> >>>>>>>  	init_rwsem(&sbi->node_write);
> >>>>>>>  	init_rwsem(&sbi->node_change);
> >>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>>>>>> index cd2e030e47b8..81c0e5337443 100644
> >>>>>>> --- a/fs/f2fs/sysfs.c
> >>>>>>> +++ b/fs/f2fs/sysfs.c
> >>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>>>>>  	ATTR_LIST(ipu_policy),
> >>>>>>>  	ATTR_LIST(min_ipu_util),
> >>>>>>>  	ATTR_LIST(min_fsync_blocks),
> >>>>>>> +	ATTR_LIST(min_seq_blocks),
> >>>>>>>  	ATTR_LIST(min_hot_blocks),
> >>>>>>>  	ATTR_LIST(min_ssr_sections),
> >>>>>>>  	ATTR_LIST(max_victim_search),
> >>>>>>>
> >>>>>
> >>>>> .
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-16  1:34                       ` Jaegeuk Kim
@ 2018-08-16 11:39                         ` Chao Yu
  0 siblings, 0 replies; 20+ messages in thread
From: Chao Yu @ 2018-08-16 11:39 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/16 9:34, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 10:15, Jaegeuk Kim wrote:
>>> On 08/15, Chao Yu wrote:
>>>> On 2018/8/15 1:28, Jaegeuk Kim wrote:
>>>>> On 08/14, Chao Yu wrote:
>>>>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>>>>>> On 08/14, Chao Yu wrote:
>>>>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>>>>>> On 08/13, Chao Yu wrote:
>>>>>>>>>> Hi Jaegeuk,
>>>>>>>>>>
>>>>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>>>
>>>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>>>> device: UFS
>>>>>>>>>>>
>>>>>>>>>>> Before -
>>>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>>>
>>>>>>>>>>> After -
>>>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>>>
>>>>>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>>>>>> fragmented during multi-thread writing.
>>>>>>>>>>
>>>>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>>>>>> data area writer.
>>>>>>>>>>
>>>>>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>>>>>> HOT_DATA	1
>>>>>>>>>> WARM_DATA	2
>>>>>>>>>> COLD_DATA	4
>>>>>>>>>
>>>>>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>>>>>> case. How about doing this for the large files only like this?
>>>>>>>>
>>>>>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>>>>>
>>>>>>>> - writepages()
>>>>>>>>  - mutex_lock(&sbi->writepages);
>>>>>>>>   - writepage();
>>>>>>>> ...
>>>>>>>> 						- writepages()
>>>>>>>> 						 - writepage()
>>>>>>>> 						  ....
>>>>>>>>   - writepage();
>>>>>>>> ...
>>>>>>>>  - mutex_unlock(&sbi->writepages);
>>>>>>>>
>>>>>>>> Above case will also cause fragmentation since we didn't serialize all
>>>>>>>> concurrent IO with the lock.
>>>>>>>>
>>>>>>>> Do we need to consider such case?
>>>>>>>
>>>>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>>>>>> when considering starvation of Thread B.
>>>>>>
>>>>>> Yeah, but in reality, there would be more threads competing in same log header,
>>>>>> so I worry that the effect of defragmenting will not so good as we expect,
>>>>>> anyway, for benchmark, it's enough.
>>>>>
>>>>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
>>>>> much easily. Let me think about three cases:
>>>>> 1) WB_SYNC_NONE & WB_SYNC_NONE
>>>>>  -> can simply use mutex_lock
>>>>>
>>>>> 2) WB_SYNC_ALL & WB_SYNC_NONE
>>>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>>>>>     will skip writing blocks
>>>>>
>>>>> 3) WB_SYNC_ALL & WB_SYNC_ALL
>>>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>>>>>     starvation.
>>>>>
>>>>>
>>>>> I've been testing the below.
>>>>>
>>>>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>>>>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>>>>>         mutex_lock(&sbi->writepages);
>>>>>         locked = true;
>>>>
>>>> Just cover buffered IO? how about covering Direct IO and atomic write as well?
>>>
>>> I'd expect direct IO does in-place-updates, and not sure whether we need to
>>
>> For initial writes, they are not IPU.
> 
> It's a little bit different. DIO allocates blocks first in the bulk mode, and
> sumbit bios later. So, it'd be more likely to allocate consecutive blocks by
> get_data_block_dio, if user gave a big chunk to write. I'm more concerend about
> lock contention, and don't think we have to serialize all the block allocation.

IMO, that's a tradeoff between avoid fragmentation and improving concurrency.

For single kind of above demands, we can separate optimizing design with
configuration, enable it dynamically, but there is no single best choice to
adapt all demands.

Well, the design is okay for me, and could you send out the patch? :)

Thanks,


> 
>>
>>> add another lock contention between buffered or direct IO. Atomic writes
>>> would be covered by ->min_seq_blocks.
>>
>> Okay. :)
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>> }
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>>>
>>>>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>>>>>  sequential read
>>>>>>>>>
>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>
>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>> device: UFS
>>>>>>>>>
>>>>>>>>> Before -
>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> After -
>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>>>> ---
>>>>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>>>>>  6 files changed, 24 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>>>>>  		 policies.
>>>>>>>>>  
>>>>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>>>>>> +Date:		August 2018
>>>>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>>>> +Description:
>>>>>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>>>>>> +		 writes in ->writepages.
>>>>>>>>> +
>>>>>>>>> +
>>>>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>>>>>  Date:		March 2017
>>>>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>>>  	struct blk_plug plug;
>>>>>>>>>  	int ret;
>>>>>>>>> +	bool locked = false;
>>>>>>>>>  
>>>>>>>>>  	/* deal with chardevs and other special file */
>>>>>>>>>  	if (!mapping->a_ops->writepage)
>>>>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>>>  		goto skip_write;
>>>>>>>>>  
>>>>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>>>>>> +		mutex_lock(&sbi->writepages);
>>>>>>>>> +		locked = true;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>>  	blk_start_plug(&plug);
>>>>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>>>>>  	blk_finish_plug(&plug);
>>>>>>>>>  
>>>>>>>>> +	if (locked)
>>>>>>>>> +		mutex_unlock(&sbi->writepages);
>>>>>>>>> +
>>>>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>>>>>  	/*
>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>>>>>  
>>>>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>>>>>  
>>>>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>>>>>  
>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>>>>  	/* init f2fs-specific super block info */
>>>>>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>>>>>> +	mutex_init(&sbi->writepages);
>>>>>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>>>>>  	init_rwsem(&sbi->node_write);
>>>>>>>>>  	init_rwsem(&sbi->node_change);
>>>>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>>>>>  	ATTR_LIST(ipu_policy),
>>>>>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>>>>>  	ATTR_LIST(max_victim_search),
>>>>>>>>>
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
  2018-08-13  2:20     ` Chao Yu
@ 2018-08-17 18:29     ` Jaegeuk Kim
  2018-08-20  1:21       ` Chao Yu
  2018-08-21  2:36       ` [f2fs-dev] [PATCH v5] " Jaegeuk Kim
  1 sibling, 2 replies; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-17 18:29 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

Change log from v3:
 - add more conditions to serialize the allocation

 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
 fs/f2fs/data.c                          | 10 ++++++++++
 fs/f2fs/f2fs.h                          |  2 ++
 fs/f2fs/segment.c                       |  1 +
 fs/f2fs/super.c                         |  1 +
 fs/f2fs/sysfs.c                         |  2 ++
 6 files changed, 24 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b0123388f18..94a24aedcdb2 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@ Description:
 		 Controls the dirty page count condition for the in-place-update
 		 policies.
 
+What:		/sys/fs/f2fs/<disk>/min_seq_blocks
+Date:		August 2018
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		 Controls the dirty page count condition for batched sequential
+		 writes in ->writepages.
+
+
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 43d3723dc886..fb63425ea242 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2130,6 +2130,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2160,10 +2161,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
+			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a6ba4a8d338..170573f8a04a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -913,6 +913,7 @@ struct f2fs_sm_info {
 	unsigned int ipu_policy;	/* in-place-update policy */
 	unsigned int min_ipu_util;	/* in-place-update threshold */
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
+	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
 	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
 	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
 
@@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b136e39e1e9e..20650e25117b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
 	sm_info->min_ssr_sections = reserved_sections(sbi);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c6e4750a9187..6b6cb4eb8439 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2864,6 +2864,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index cd2e030e47b8..81c0e5337443 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_seq_blocks),
 	ATTR_LIST(min_hot_blocks),
 	ATTR_LIST(min_ssr_sections),
 	ATTR_LIST(max_victim_search),
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-17 18:29     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
@ 2018-08-20  1:21       ` Chao Yu
  2018-08-21  2:28         ` Jaegeuk Kim
  2018-08-21  2:36       ` [f2fs-dev] [PATCH v5] " Jaegeuk Kim
  1 sibling, 1 reply; 20+ messages in thread
From: Chao Yu @ 2018-08-20  1:21 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2018/8/18 2:29, Jaegeuk Kim wrote:
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> 
> Change log from v3:
>  - add more conditions to serialize the allocation
> 
>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>  fs/f2fs/data.c                          | 10 ++++++++++
>  fs/f2fs/f2fs.h                          |  2 ++
>  fs/f2fs/segment.c                       |  1 +
>  fs/f2fs/super.c                         |  1 +
>  fs/f2fs/sysfs.c                         |  2 ++
>  6 files changed, 24 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 9b0123388f18..94a24aedcdb2 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -51,6 +51,14 @@ Description:
>  		 Controls the dirty page count condition for the in-place-update
>  		 policies.
>  
> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> +Date:		August 2018
> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description:
> +		 Controls the dirty page count condition for batched sequential
> +		 writes in ->writepages.
> +
> +
>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>  Date:		March 2017
>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 43d3723dc886..fb63425ea242 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2130,6 +2130,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct blk_plug plug;
>  	int ret;
> +	bool locked = false;
>  
>  	/* deal with chardevs and other special file */
>  	if (!mapping->a_ops->writepage)
> @@ -2160,10 +2161,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>  		goto skip_write;
>  
> +	if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {

get_dirty_pages(inode) >= SM_I(sbi)->min_seq_blocks

> +		mutex_lock(&sbi->writepages);

Still didn't see atomic write being covered by this lock.

How about introducing a macro like __should_serialize_io() for indicating the
condition in where we should serialize IOs.

Thanks,

> +		locked = true;
> +	}
> +
>  	blk_start_plug(&plug);
>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>  	blk_finish_plug(&plug);
>  
> +	if (locked)
> +		mutex_unlock(&sbi->writepages);
> +
>  	if (wbc->sync_mode == WB_SYNC_ALL)
>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>  	/*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 9a6ba4a8d338..170573f8a04a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>  	unsigned int ipu_policy;	/* in-place-update policy */
>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>  
> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>  	int valid_super_block;			/* valid super block no */
>  	unsigned long s_flag;				/* flags for sbi */
> +	struct mutex writepages;		/* mutex for writepages() */
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index b136e39e1e9e..20650e25117b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index c6e4750a9187..6b6cb4eb8439 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2864,6 +2864,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	/* init f2fs-specific super block info */
>  	sbi->valid_super_block = valid_super_block;
>  	mutex_init(&sbi->gc_mutex);
> +	mutex_init(&sbi->writepages);
>  	mutex_init(&sbi->cp_mutex);
>  	init_rwsem(&sbi->node_write);
>  	init_rwsem(&sbi->node_change);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index cd2e030e47b8..81c0e5337443 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>  	ATTR_LIST(ipu_policy),
>  	ATTR_LIST(min_ipu_util),
>  	ATTR_LIST(min_fsync_blocks),
> +	ATTR_LIST(min_seq_blocks),
>  	ATTR_LIST(min_hot_blocks),
>  	ATTR_LIST(min_ssr_sections),
>  	ATTR_LIST(max_victim_search),
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-20  1:21       ` Chao Yu
@ 2018-08-21  2:28         ` Jaegeuk Kim
  2018-08-21  3:26           ` Chao Yu
  0 siblings, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-21  2:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/20, Chao Yu wrote:
> On 2018/8/18 2:29, Jaegeuk Kim wrote:
> > This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> > to fix the drop in sequential read throughput.
> > 
> > Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> > device: UFS
> > 
> > Before -
> > read throughput: 185 MB/s
> > total read requests: 85177 (of these ~80000 are 4KB size requests).
> > total write requests: 2546 (of these ~2208 requests are written in 512KB).
> > 
> > After -
> > read throughput: 758 MB/s
> > total read requests: 2417 (of these ~2042 are 512KB reads).
> > total write requests: 2701 (of these ~2034 requests are written in 512KB).
> > 
> > Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> > 
> > Change log from v3:
> >  - add more conditions to serialize the allocation
> > 
> >  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >  fs/f2fs/data.c                          | 10 ++++++++++
> >  fs/f2fs/f2fs.h                          |  2 ++
> >  fs/f2fs/segment.c                       |  1 +
> >  fs/f2fs/super.c                         |  1 +
> >  fs/f2fs/sysfs.c                         |  2 ++
> >  6 files changed, 24 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 9b0123388f18..94a24aedcdb2 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -51,6 +51,14 @@ Description:
> >  		 Controls the dirty page count condition for the in-place-update
> >  		 policies.
> >  
> > +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> > +Date:		August 2018
> > +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > +Description:
> > +		 Controls the dirty page count condition for batched sequential
> > +		 writes in ->writepages.
> > +
> > +
> >  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >  Date:		March 2017
> >  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 43d3723dc886..fb63425ea242 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2130,6 +2130,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >  	struct blk_plug plug;
> >  	int ret;
> > +	bool locked = false;
> >  
> >  	/* deal with chardevs and other special file */
> >  	if (!mapping->a_ops->writepage)
> > @@ -2160,10 +2161,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >  		goto skip_write;
> >  
> > +	if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> > +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
> 
> get_dirty_pages(inode) >= SM_I(sbi)->min_seq_blocks

Oops. :P

> 
> > +		mutex_lock(&sbi->writepages);
> 
> Still didn't see atomic write being covered by this lock.

Taking a look at this again, I'm in doubt to cover this, since 1) normal usecase
of atomic writes is to set page dirty and write it right away during commit, 2)
there'd be no large number of dirty pages even when we consider race condition
between writepages and atomic_commit. Am I missing another case?

> 
> How about introducing a macro like __should_serialize_io() for indicating the
> condition in where we should serialize IOs.

Done.

Thank you for ths suggestion.

> 
> Thanks,
> 
> > +		locked = true;
> > +	}
> > +
> >  	blk_start_plug(&plug);
> >  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >  	blk_finish_plug(&plug);
> >  
> > +	if (locked)
> > +		mutex_unlock(&sbi->writepages);
> > +
> >  	if (wbc->sync_mode == WB_SYNC_ALL)
> >  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >  	/*
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 9a6ba4a8d338..170573f8a04a 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >  	unsigned int ipu_policy;	/* in-place-update policy */
> >  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> > +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >  
> > @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >  	int valid_super_block;			/* valid super block no */
> >  	unsigned long s_flag;				/* flags for sbi */
> > +	struct mutex writepages;		/* mutex for writepages() */
> >  
> >  #ifdef CONFIG_BLK_DEV_ZONED
> >  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index b136e39e1e9e..20650e25117b 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >  
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index c6e4750a9187..6b6cb4eb8439 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -2864,6 +2864,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >  	/* init f2fs-specific super block info */
> >  	sbi->valid_super_block = valid_super_block;
> >  	mutex_init(&sbi->gc_mutex);
> > +	mutex_init(&sbi->writepages);
> >  	mutex_init(&sbi->cp_mutex);
> >  	init_rwsem(&sbi->node_write);
> >  	init_rwsem(&sbi->node_change);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index cd2e030e47b8..81c0e5337443 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> > @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(ipu_policy),
> >  	ATTR_LIST(min_ipu_util),
> >  	ATTR_LIST(min_fsync_blocks),
> > +	ATTR_LIST(min_seq_blocks),
> >  	ATTR_LIST(min_hot_blocks),
> >  	ATTR_LIST(min_ssr_sections),
> >  	ATTR_LIST(max_victim_search),
> > 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v5] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-17 18:29     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
  2018-08-20  1:21       ` Chao Yu
@ 2018-08-21  2:36       ` Jaegeuk Kim
  2018-08-21  3:30         ` Chao Yu
  1 sibling, 1 reply; 20+ messages in thread
From: Jaegeuk Kim @ 2018-08-21  2:36 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v4:
 - add __should_serialize_io
 - fix wrong condition

 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
 fs/f2fs/data.c                          | 21 +++++++++++++++++++++
 fs/f2fs/f2fs.h                          |  2 ++
 fs/f2fs/segment.c                       |  1 +
 fs/f2fs/super.c                         |  1 +
 fs/f2fs/sysfs.c                         |  2 ++
 6 files changed, 35 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b0123388f18..94a24aedcdb2 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@ Description:
 		 Controls the dirty page count condition for the in-place-update
 		 policies.
 
+What:		/sys/fs/f2fs/<disk>/min_seq_blocks
+Date:		August 2018
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		 Controls the dirty page count condition for batched sequential
+		 writes in ->writepages.
+
+
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 43d3723dc886..bdcb023506a7 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2122,6 +2122,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	return ret;
 }
 
+static inline bool __should_serialize_io(struct inode *inode,
+					struct writeback_control *wbc)
+{
+	if (!S_ISREG(inode->i_mode))
+		return false;
+	if (wbc->sync_mode != WB_SYNC_ALL)
+		return true;
+	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
+		return true;
+	return false;
+}
+
 static int __f2fs_write_data_pages(struct address_space *mapping,
 						struct writeback_control *wbc,
 						enum iostat_type io_type)
@@ -2130,6 +2142,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2160,10 +2173,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (__should_serialize_io(inode, wbc)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a6ba4a8d338..170573f8a04a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -913,6 +913,7 @@ struct f2fs_sm_info {
 	unsigned int ipu_policy;	/* in-place-update policy */
 	unsigned int min_ipu_util;	/* in-place-update threshold */
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
+	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
 	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
 	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
 
@@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b136e39e1e9e..20650e25117b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
 	sm_info->min_ssr_sections = reserved_sections(sbi);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index cd2e030e47b8..81c0e5337443 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_seq_blocks),
 	ATTR_LIST(min_hot_blocks),
 	ATTR_LIST(min_ssr_sections),
 	ATTR_LIST(max_victim_search),
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-21  2:28         ` Jaegeuk Kim
@ 2018-08-21  3:26           ` Chao Yu
  0 siblings, 0 replies; 20+ messages in thread
From: Chao Yu @ 2018-08-21  3:26 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/21 10:28, Jaegeuk Kim wrote:
> On 08/20, Chao Yu wrote:
>> On 2018/8/18 2:29, Jaegeuk Kim wrote:
>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>> to fix the drop in sequential read throughput.
>>>
>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>> device: UFS
>>>
>>> Before -
>>> read throughput: 185 MB/s
>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>
>>> After -
>>> read throughput: 758 MB/s
>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>
>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>
>>> Change log from v3:
>>>  - add more conditions to serialize the allocation
>>>
>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>  fs/f2fs/segment.c                       |  1 +
>>>  fs/f2fs/super.c                         |  1 +
>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>  6 files changed, 24 insertions(+)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index 9b0123388f18..94a24aedcdb2 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -51,6 +51,14 @@ Description:
>>>  		 Controls the dirty page count condition for the in-place-update
>>>  		 policies.
>>>  
>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>> +Date:		August 2018
>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> +Description:
>>> +		 Controls the dirty page count condition for batched sequential
>>> +		 writes in ->writepages.
>>> +
>>> +
>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>  Date:		March 2017
>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 43d3723dc886..fb63425ea242 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -2130,6 +2130,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>  	struct blk_plug plug;
>>>  	int ret;
>>> +	bool locked = false;
>>>  
>>>  	/* deal with chardevs and other special file */
>>>  	if (!mapping->a_ops->writepage)
>>> @@ -2160,10 +2161,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>  		goto skip_write;
>>>  
>>> +	if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>>
>> get_dirty_pages(inode) >= SM_I(sbi)->min_seq_blocks
> 
> Oops. :P
> 
>>
>>> +		mutex_lock(&sbi->writepages);
>>
>> Still didn't see atomic write being covered by this lock.
> 
> Taking a look at this again, I'm in doubt to cover this, since 1) normal usecase
> of atomic writes is to set page dirty and write it right away during commit, 2)
> there'd be no large number of dirty pages even when we consider race condition
> between writepages and atomic_commit. Am I missing another case?

Yup, reasonable, hot type data is not fit for this optimization, due to easily
defragmenting, and small size. :)

Thanks,

> 
>>
>> How about introducing a macro like __should_serialize_io() for indicating the
>> condition in where we should serialize IOs.
> 
> Done.
> 
> Thank you for ths suggestion.
> 
>>
>> Thanks,
>>
>>> +		locked = true;
>>> +	}
>>> +
>>>  	blk_start_plug(&plug);
>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>  	blk_finish_plug(&plug);
>>>  
>>> +	if (locked)
>>> +		mutex_unlock(&sbi->writepages);
>>> +
>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>  	/*
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 9a6ba4a8d338..170573f8a04a 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>  
>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>  	int valid_super_block;			/* valid super block no */
>>>  	unsigned long s_flag;				/* flags for sbi */
>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>  
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index b136e39e1e9e..20650e25117b 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>  
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index c6e4750a9187..6b6cb4eb8439 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -2864,6 +2864,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  	/* init f2fs-specific super block info */
>>>  	sbi->valid_super_block = valid_super_block;
>>>  	mutex_init(&sbi->gc_mutex);
>>> +	mutex_init(&sbi->writepages);
>>>  	mutex_init(&sbi->cp_mutex);
>>>  	init_rwsem(&sbi->node_write);
>>>  	init_rwsem(&sbi->node_change);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index cd2e030e47b8..81c0e5337443 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>  	ATTR_LIST(ipu_policy),
>>>  	ATTR_LIST(min_ipu_util),
>>>  	ATTR_LIST(min_fsync_blocks),
>>> +	ATTR_LIST(min_seq_blocks),
>>>  	ATTR_LIST(min_hot_blocks),
>>>  	ATTR_LIST(min_ssr_sections),
>>>  	ATTR_LIST(max_victim_search),
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [f2fs-dev] [PATCH v5] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-21  2:36       ` [f2fs-dev] [PATCH v5] " Jaegeuk Kim
@ 2018-08-21  3:30         ` Chao Yu
  0 siblings, 0 replies; 20+ messages in thread
From: Chao Yu @ 2018-08-21  3:30 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2018/8/21 10:36, Jaegeuk Kim wrote:
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Change log from v4:
>  - add __should_serialize_io
>  - fix wrong condition
> 
>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>  fs/f2fs/data.c                          | 21 +++++++++++++++++++++
>  fs/f2fs/f2fs.h                          |  2 ++
>  fs/f2fs/segment.c                       |  1 +
>  fs/f2fs/super.c                         |  1 +
>  fs/f2fs/sysfs.c                         |  2 ++
>  6 files changed, 35 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 9b0123388f18..94a24aedcdb2 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -51,6 +51,14 @@ Description:
>  		 Controls the dirty page count condition for the in-place-update
>  		 policies.
>  
> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> +Date:		August 2018
> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description:
> +		 Controls the dirty page count condition for batched sequential
> +		 writes in ->writepages.
> +
> +
>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>  Date:		March 2017
>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 43d3723dc886..bdcb023506a7 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2122,6 +2122,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>  	return ret;
>  }
>  
> +static inline bool __should_serialize_io(struct inode *inode,
> +					struct writeback_control *wbc)
> +{
> +	if (!S_ISREG(inode->i_mode))

Good catch! Will be better. :)

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2018-08-21  3:30 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-10  2:37 [PATCH] f2fs: fix performance issue observed with multi-thread sequential read Jaegeuk Kim
2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
2018-08-13  2:20     ` Chao Yu
2018-08-13 20:11       ` Jaegeuk Kim
2018-08-14  3:49         ` Chao Yu
2018-08-14  4:04           ` Jaegeuk Kim
2018-08-14  6:18             ` Chao Yu
2018-08-14 17:28               ` Jaegeuk Kim
2018-08-15  1:52                 ` Chao Yu
2018-08-15  2:15                   ` Jaegeuk Kim
2018-08-15  3:44                     ` Chao Yu
2018-08-16  1:34                       ` Jaegeuk Kim
2018-08-16 11:39                         ` Chao Yu
2018-08-17 18:29     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
2018-08-20  1:21       ` Chao Yu
2018-08-21  2:28         ` Jaegeuk Kim
2018-08-21  3:26           ` Chao Yu
2018-08-21  2:36       ` [f2fs-dev] [PATCH v5] " Jaegeuk Kim
2018-08-21  3:30         ` Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).