LKML Archive on lore.kernel.org
 help / Atom feed
* [PATCH] f2fs: fix performance issue observed with multi-thread sequential read
@ 2018-08-10  2:37 Jaegeuk Kim
  2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-10  2:37 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim, Sahitya Tummala

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c  | 14 ++++++++++++--
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/super.c |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..e042d050b87d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2157,15 +2158,24 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		atomic_inc(&sbi->wb_sync_req[DATA]);
-	else if (atomic_read(&sbi->wb_sync_req[DATA]))
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	} else if (atomic_read(&sbi->wb_sync_req[DATA])) {
 		goto skip_write;
+	} else {
+		atomic_inc(&sbi->wb_sync_req[DATA]);
+		mutex_lock(&sbi->writepages);
+	}
 
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10  2:37 [PATCH] f2fs: fix performance issue observed with multi-thread sequential read Jaegeuk Kim
@ 2018-08-10  2:48 ` " Jaegeuk Kim
  2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-10  2:48 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Sahitya Tummala

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
 - quick code fix

 fs/f2fs/data.c  | 14 ++++++++++++--
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/super.c |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..6da143a27b3a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2157,15 +2158,24 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		atomic_inc(&sbi->wb_sync_req[DATA]);
-	else if (atomic_read(&sbi->wb_sync_req[DATA]))
+		goto writepages_lock;
+	} else if (atomic_read(&sbi->wb_sync_req[DATA])) {
 		goto skip_write;
+	} else {
+writepages_lock:
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
 
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
@ 2018-08-10 18:56   ` " Jaegeuk Kim
  2018-08-13  2:20     ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-10 18:56 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Changelog from v2:
 - fix deadlock in f2fs_gc

 fs/f2fs/data.c  | 9 +++++++++
 fs/f2fs/f2fs.h  | 1 +
 fs/f2fs/super.c | 1 +
 3 files changed, 11 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..9f0149cdeed8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2162,10 +2163,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (!S_ISDIR(inode->i_mode)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b1c127d8383b..29bbc55cbc0b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
@ 2018-08-13  2:20     ` Chao Yu
  2018-08-13 20:11       ` Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2018-08-13  2:20 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

Hi Jaegeuk,

On 2018/8/11 2:56, Jaegeuk Kim wrote:
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).

IMO, it only impact sequential read performance in a large file which may be
fragmented during multi-thread writing.

In android environment, mostly, the large file should be cold type, such as apk,
mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
data area writer.

So how about adding a mount option to serialize writepage() for different type
of log, e.g. in android, using serialize=4; by default, using serialize=7
HOT_DATA	1
WARM_DATA	2
COLD_DATA	4

Thanks,

> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Changelog from v2:
>  - fix deadlock in f2fs_gc
> 
>  fs/f2fs/data.c  | 9 +++++++++
>  fs/f2fs/f2fs.h  | 1 +
>  fs/f2fs/super.c | 1 +
>  3 files changed, 11 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 45f043ee48bd..9f0149cdeed8 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct blk_plug plug;
>  	int ret;
> +	bool locked = false;
>  
>  	/* deal with chardevs and other special file */
>  	if (!mapping->a_ops->writepage)
> @@ -2162,10 +2163,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>  		goto skip_write;
>  
> +	if (!S_ISDIR(inode->i_mode)) {
> +		mutex_lock(&sbi->writepages);
> +		locked = true;
> +	}
> +
>  	blk_start_plug(&plug);
>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>  	blk_finish_plug(&plug);
>  
> +	if (locked)
> +		mutex_unlock(&sbi->writepages);
> +
>  	if (wbc->sync_mode == WB_SYNC_ALL)
>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>  	/*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b1c127d8383b..29bbc55cbc0b 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1133,6 +1133,7 @@ struct f2fs_sb_info {
>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>  	int valid_super_block;			/* valid super block no */
>  	unsigned long s_flag;				/* flags for sbi */
> +	struct mutex writepages;		/* mutex for writepages() */
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index be41dbd7b261..53d70b64fea1 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	/* init f2fs-specific super block info */
>  	sbi->valid_super_block = valid_super_block;
>  	mutex_init(&sbi->gc_mutex);
> +	mutex_init(&sbi->writepages);
>  	mutex_init(&sbi->cp_mutex);
>  	init_rwsem(&sbi->node_write);
>  	init_rwsem(&sbi->node_change);
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-13  2:20     ` Chao Yu
@ 2018-08-13 20:11       ` Jaegeuk Kim
  2018-08-14  3:49         ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-13 20:11 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/13, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> > This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> > to fix the drop in sequential read throughput.
> > 
> > Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> > device: UFS
> > 
> > Before -
> > read throughput: 185 MB/s
> > total read requests: 85177 (of these ~80000 are 4KB size requests).
> > total write requests: 2546 (of these ~2208 requests are written in 512KB).
> > 
> > After -
> > read throughput: 758 MB/s
> > total read requests: 2417 (of these ~2042 are 512KB reads).
> > total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> IMO, it only impact sequential read performance in a large file which may be
> fragmented during multi-thread writing.
> 
> In android environment, mostly, the large file should be cold type, such as apk,
> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> data area writer.
> 
> So how about adding a mount option to serialize writepage() for different type
> of log, e.g. in android, using serialize=4; by default, using serialize=7
> HOT_DATA	1
> WARM_DATA	2
> COLD_DATA	4

Well, I don't think we need to give too many mount options for this fragmented
case. How about doing this for the large files only like this?

From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Aug 2018 17:53:34 -0700
Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
 sequential read

This reverts the commit - "b93f771 - f2fs: remove writepages lock"
to fix the drop in sequential read throughput.

Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
device: UFS

Before -
read throughput: 185 MB/s
total read requests: 85177 (of these ~80000 are 4KB size requests).
total write requests: 2546 (of these ~2208 requests are written in 512KB).

After -
read throughput: 758 MB/s
total read requests: 2417 (of these ~2042 are 512KB reads).
total write requests: 2701 (of these ~2034 requests are written in 512KB).

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
 fs/f2fs/data.c                          | 10 ++++++++++
 fs/f2fs/f2fs.h                          |  2 ++
 fs/f2fs/segment.c                       |  1 +
 fs/f2fs/super.c                         |  1 +
 fs/f2fs/sysfs.c                         |  2 ++
 6 files changed, 24 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b0123388f18..94a24aedcdb2 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@ Description:
 		 Controls the dirty page count condition for the in-place-update
 		 policies.
 
+What:		/sys/fs/f2fs/<disk>/min_seq_blocks
+Date:		August 2018
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		 Controls the dirty page count condition for batched sequential
+		 writes in ->writepages.
+
+
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45f043ee48bd..f09231b1cc74 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct blk_plug plug;
 	int ret;
+	bool locked = false;
 
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
@@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	else if (atomic_read(&sbi->wb_sync_req[DATA]))
 		goto skip_write;
 
+	if (!S_ISDIR(inode->i_mode) &&
+			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
+
 	blk_start_plug(&plug);
 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
 	blk_finish_plug(&plug);
 
+	if (locked)
+		mutex_unlock(&sbi->writepages);
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_dec(&sbi->wb_sync_req[DATA]);
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 375aa9f30cfa..098bdedc28bf 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -913,6 +913,7 @@ struct f2fs_sm_info {
 	unsigned int ipu_policy;	/* in-place-update policy */
 	unsigned int min_ipu_util;	/* in-place-update threshold */
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
+	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
 	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
 	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
 
@@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
 	struct rw_semaphore sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
+	struct mutex writepages;		/* mutex for writepages() */
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 63fc647f9ac2..ffea2d1303bd 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
 	sm_info->min_ssr_sections = reserved_sections(sbi);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index be41dbd7b261..53d70b64fea1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index cd2e030e47b8..81c0e5337443 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_seq_blocks),
 	ATTR_LIST(min_hot_blocks),
 	ATTR_LIST(min_ssr_sections),
 	ATTR_LIST(max_victim_search),
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-13 20:11       ` Jaegeuk Kim
@ 2018-08-14  3:49         ` Chao Yu
  2018-08-14  4:04           ` Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2018-08-14  3:49 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/14 4:11, Jaegeuk Kim wrote:
> On 08/13, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>> to fix the drop in sequential read throughput.
>>>
>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>> device: UFS
>>>
>>> Before -
>>> read throughput: 185 MB/s
>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>
>>> After -
>>> read throughput: 758 MB/s
>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>
>> IMO, it only impact sequential read performance in a large file which may be
>> fragmented during multi-thread writing.
>>
>> In android environment, mostly, the large file should be cold type, such as apk,
>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>> data area writer.
>>
>> So how about adding a mount option to serialize writepage() for different type
>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>> HOT_DATA	1
>> WARM_DATA	2
>> COLD_DATA	4
> 
> Well, I don't think we need to give too many mount options for this fragmented
> case. How about doing this for the large files only like this?

Thread A write 512 pages			Thread B write 8 pages

- writepages()
 - mutex_lock(&sbi->writepages);
  - writepage();
...
						- writepages()
						 - writepage()
						  ....
  - writepage();
...
 - mutex_unlock(&sbi->writepages);

Above case will also cause fragmentation since we didn't serialize all
concurrent IO with the lock.

Do we need to consider such case?

Thanks,

> 
>>From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Thu, 9 Aug 2018 17:53:34 -0700
> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>  sequential read
> 
> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> to fix the drop in sequential read throughput.
> 
> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> device: UFS
> 
> Before -
> read throughput: 185 MB/s
> total read requests: 85177 (of these ~80000 are 4KB size requests).
> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> 
> After -
> read throughput: 758 MB/s
> total read requests: 2417 (of these ~2042 are 512KB reads).
> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>  fs/f2fs/data.c                          | 10 ++++++++++
>  fs/f2fs/f2fs.h                          |  2 ++
>  fs/f2fs/segment.c                       |  1 +
>  fs/f2fs/super.c                         |  1 +
>  fs/f2fs/sysfs.c                         |  2 ++
>  6 files changed, 24 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 9b0123388f18..94a24aedcdb2 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -51,6 +51,14 @@ Description:
>  		 Controls the dirty page count condition for the in-place-update
>  		 policies.
>  
> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> +Date:		August 2018
> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> +Description:
> +		 Controls the dirty page count condition for batched sequential
> +		 writes in ->writepages.
> +
> +
>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>  Date:		March 2017
>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 45f043ee48bd..f09231b1cc74 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct blk_plug plug;
>  	int ret;
> +	bool locked = false;
>  
>  	/* deal with chardevs and other special file */
>  	if (!mapping->a_ops->writepage)
> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>  		goto skip_write;
>  
> +	if (!S_ISDIR(inode->i_mode) &&
> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> +		mutex_lock(&sbi->writepages);
> +		locked = true;
> +	}
> +
>  	blk_start_plug(&plug);
>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>  	blk_finish_plug(&plug);
>  
> +	if (locked)
> +		mutex_unlock(&sbi->writepages);
> +
>  	if (wbc->sync_mode == WB_SYNC_ALL)
>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>  	/*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 375aa9f30cfa..098bdedc28bf 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>  	unsigned int ipu_policy;	/* in-place-update policy */
>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>  
> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>  	int valid_super_block;			/* valid super block no */
>  	unsigned long s_flag;				/* flags for sbi */
> +	struct mutex writepages;		/* mutex for writepages() */
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 63fc647f9ac2..ffea2d1303bd 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index be41dbd7b261..53d70b64fea1 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	/* init f2fs-specific super block info */
>  	sbi->valid_super_block = valid_super_block;
>  	mutex_init(&sbi->gc_mutex);
> +	mutex_init(&sbi->writepages);
>  	mutex_init(&sbi->cp_mutex);
>  	init_rwsem(&sbi->node_write);
>  	init_rwsem(&sbi->node_change);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index cd2e030e47b8..81c0e5337443 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>  	ATTR_LIST(ipu_policy),
>  	ATTR_LIST(min_ipu_util),
>  	ATTR_LIST(min_fsync_blocks),
> +	ATTR_LIST(min_seq_blocks),
>  	ATTR_LIST(min_hot_blocks),
>  	ATTR_LIST(min_ssr_sections),
>  	ATTR_LIST(max_victim_search),
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  3:49         ` Chao Yu
@ 2018-08-14  4:04           ` Jaegeuk Kim
  2018-08-14  6:18             ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-14  4:04 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/14, Chao Yu wrote:
> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> > On 08/13, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>> to fix the drop in sequential read throughput.
> >>>
> >>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>> device: UFS
> >>>
> >>> Before -
> >>> read throughput: 185 MB/s
> >>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>
> >>> After -
> >>> read throughput: 758 MB/s
> >>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>
> >> IMO, it only impact sequential read performance in a large file which may be
> >> fragmented during multi-thread writing.
> >>
> >> In android environment, mostly, the large file should be cold type, such as apk,
> >> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >> data area writer.
> >>
> >> So how about adding a mount option to serialize writepage() for different type
> >> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >> HOT_DATA	1
> >> WARM_DATA	2
> >> COLD_DATA	4
> > 
> > Well, I don't think we need to give too many mount options for this fragmented
> > case. How about doing this for the large files only like this?
> 
> Thread A write 512 pages			Thread B write 8 pages
> 
> - writepages()
>  - mutex_lock(&sbi->writepages);
>   - writepage();
> ...
> 						- writepages()
> 						 - writepage()
> 						  ....
>   - writepage();
> ...
>  - mutex_unlock(&sbi->writepages);
> 
> Above case will also cause fragmentation since we didn't serialize all
> concurrent IO with the lock.
> 
> Do we need to consider such case?

We can simply allow 512 and 8 in the same segment, which would not a big deal,
when considering starvation of Thread B.

> 
> Thanks,
> 
> > 
> >>From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk@kernel.org>
> > Date: Thu, 9 Aug 2018 17:53:34 -0700
> > Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >  sequential read
> > 
> > This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> > to fix the drop in sequential read throughput.
> > 
> > Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> > device: UFS
> > 
> > Before -
> > read throughput: 185 MB/s
> > total read requests: 85177 (of these ~80000 are 4KB size requests).
> > total write requests: 2546 (of these ~2208 requests are written in 512KB).
> > 
> > After -
> > read throughput: 758 MB/s
> > total read requests: 2417 (of these ~2042 are 512KB reads).
> > total write requests: 2701 (of these ~2034 requests are written in 512KB).
> > 
> > Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >  fs/f2fs/data.c                          | 10 ++++++++++
> >  fs/f2fs/f2fs.h                          |  2 ++
> >  fs/f2fs/segment.c                       |  1 +
> >  fs/f2fs/super.c                         |  1 +
> >  fs/f2fs/sysfs.c                         |  2 ++
> >  6 files changed, 24 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index 9b0123388f18..94a24aedcdb2 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -51,6 +51,14 @@ Description:
> >  		 Controls the dirty page count condition for the in-place-update
> >  		 policies.
> >  
> > +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> > +Date:		August 2018
> > +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > +Description:
> > +		 Controls the dirty page count condition for batched sequential
> > +		 writes in ->writepages.
> > +
> > +
> >  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >  Date:		March 2017
> >  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 45f043ee48bd..f09231b1cc74 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >  	struct blk_plug plug;
> >  	int ret;
> > +	bool locked = false;
> >  
> >  	/* deal with chardevs and other special file */
> >  	if (!mapping->a_ops->writepage)
> > @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >  		goto skip_write;
> >  
> > +	if (!S_ISDIR(inode->i_mode) &&
> > +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> > +		mutex_lock(&sbi->writepages);
> > +		locked = true;
> > +	}
> > +
> >  	blk_start_plug(&plug);
> >  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >  	blk_finish_plug(&plug);
> >  
> > +	if (locked)
> > +		mutex_unlock(&sbi->writepages);
> > +
> >  	if (wbc->sync_mode == WB_SYNC_ALL)
> >  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >  	/*
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 375aa9f30cfa..098bdedc28bf 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >  	unsigned int ipu_policy;	/* in-place-update policy */
> >  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> > +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >  
> > @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >  	int valid_super_block;			/* valid super block no */
> >  	unsigned long s_flag;				/* flags for sbi */
> > +	struct mutex writepages;		/* mutex for writepages() */
> >  
> >  #ifdef CONFIG_BLK_DEV_ZONED
> >  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 63fc647f9ac2..ffea2d1303bd 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >  
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index be41dbd7b261..53d70b64fea1 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >  	/* init f2fs-specific super block info */
> >  	sbi->valid_super_block = valid_super_block;
> >  	mutex_init(&sbi->gc_mutex);
> > +	mutex_init(&sbi->writepages);
> >  	mutex_init(&sbi->cp_mutex);
> >  	init_rwsem(&sbi->node_write);
> >  	init_rwsem(&sbi->node_change);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index cd2e030e47b8..81c0e5337443 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> > @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(ipu_policy),
> >  	ATTR_LIST(min_ipu_util),
> >  	ATTR_LIST(min_fsync_blocks),
> > +	ATTR_LIST(min_seq_blocks),
> >  	ATTR_LIST(min_hot_blocks),
> >  	ATTR_LIST(min_ssr_sections),
> >  	ATTR_LIST(max_victim_search),
> > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  4:04           ` Jaegeuk Kim
@ 2018-08-14  6:18             ` Chao Yu
  2018-08-14 17:28               ` Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2018-08-14  6:18 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/14 12:04, Jaegeuk Kim wrote:
> On 08/14, Chao Yu wrote:
>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>> On 08/13, Chao Yu wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>> to fix the drop in sequential read throughput.
>>>>>
>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>> device: UFS
>>>>>
>>>>> Before -
>>>>> read throughput: 185 MB/s
>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>
>>>>> After -
>>>>> read throughput: 758 MB/s
>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>
>>>> IMO, it only impact sequential read performance in a large file which may be
>>>> fragmented during multi-thread writing.
>>>>
>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>> data area writer.
>>>>
>>>> So how about adding a mount option to serialize writepage() for different type
>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>> HOT_DATA	1
>>>> WARM_DATA	2
>>>> COLD_DATA	4
>>>
>>> Well, I don't think we need to give too many mount options for this fragmented
>>> case. How about doing this for the large files only like this?
>>
>> Thread A write 512 pages			Thread B write 8 pages
>>
>> - writepages()
>>  - mutex_lock(&sbi->writepages);
>>   - writepage();
>> ...
>> 						- writepages()
>> 						 - writepage()
>> 						  ....
>>   - writepage();
>> ...
>>  - mutex_unlock(&sbi->writepages);
>>
>> Above case will also cause fragmentation since we didn't serialize all
>> concurrent IO with the lock.
>>
>> Do we need to consider such case?
> 
> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> when considering starvation of Thread B.

Yeah, but in reality, there would be more threads competing in same log header,
so I worry that the effect of defragmenting will not so good as we expect,
anyway, for benchmark, it's enough.

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>  sequential read
>>>
>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>> to fix the drop in sequential read throughput.
>>>
>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>> device: UFS
>>>
>>> Before -
>>> read throughput: 185 MB/s
>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>
>>> After -
>>> read throughput: 758 MB/s
>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>
>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>  fs/f2fs/segment.c                       |  1 +
>>>  fs/f2fs/super.c                         |  1 +
>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>  6 files changed, 24 insertions(+)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index 9b0123388f18..94a24aedcdb2 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -51,6 +51,14 @@ Description:
>>>  		 Controls the dirty page count condition for the in-place-update
>>>  		 policies.
>>>  
>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>> +Date:		August 2018
>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> +Description:
>>> +		 Controls the dirty page count condition for batched sequential
>>> +		 writes in ->writepages.
>>> +
>>> +
>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>  Date:		March 2017
>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 45f043ee48bd..f09231b1cc74 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>  	struct blk_plug plug;
>>>  	int ret;
>>> +	bool locked = false;
>>>  
>>>  	/* deal with chardevs and other special file */
>>>  	if (!mapping->a_ops->writepage)
>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>  		goto skip_write;
>>>  
>>> +	if (!S_ISDIR(inode->i_mode) &&
>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>> +		mutex_lock(&sbi->writepages);
>>> +		locked = true;
>>> +	}
>>> +
>>>  	blk_start_plug(&plug);
>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>  	blk_finish_plug(&plug);
>>>  
>>> +	if (locked)
>>> +		mutex_unlock(&sbi->writepages);
>>> +
>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>  	/*
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 375aa9f30cfa..098bdedc28bf 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>  
>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>  	int valid_super_block;			/* valid super block no */
>>>  	unsigned long s_flag;				/* flags for sbi */
>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>  
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>  
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index be41dbd7b261..53d70b64fea1 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  	/* init f2fs-specific super block info */
>>>  	sbi->valid_super_block = valid_super_block;
>>>  	mutex_init(&sbi->gc_mutex);
>>> +	mutex_init(&sbi->writepages);
>>>  	mutex_init(&sbi->cp_mutex);
>>>  	init_rwsem(&sbi->node_write);
>>>  	init_rwsem(&sbi->node_change);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index cd2e030e47b8..81c0e5337443 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>  	ATTR_LIST(ipu_policy),
>>>  	ATTR_LIST(min_ipu_util),
>>>  	ATTR_LIST(min_fsync_blocks),
>>> +	ATTR_LIST(min_seq_blocks),
>>>  	ATTR_LIST(min_hot_blocks),
>>>  	ATTR_LIST(min_ssr_sections),
>>>  	ATTR_LIST(max_victim_search),
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14  6:18             ` Chao Yu
@ 2018-08-14 17:28               ` Jaegeuk Kim
  2018-08-15  1:52                 ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-14 17:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/14, Chao Yu wrote:
> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> > On 08/14, Chao Yu wrote:
> >> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>> On 08/13, Chao Yu wrote:
> >>>> Hi Jaegeuk,
> >>>>
> >>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>> to fix the drop in sequential read throughput.
> >>>>>
> >>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>> device: UFS
> >>>>>
> >>>>> Before -
> >>>>> read throughput: 185 MB/s
> >>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>
> >>>>> After -
> >>>>> read throughput: 758 MB/s
> >>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>
> >>>> IMO, it only impact sequential read performance in a large file which may be
> >>>> fragmented during multi-thread writing.
> >>>>
> >>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>> data area writer.
> >>>>
> >>>> So how about adding a mount option to serialize writepage() for different type
> >>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>> HOT_DATA	1
> >>>> WARM_DATA	2
> >>>> COLD_DATA	4
> >>>
> >>> Well, I don't think we need to give too many mount options for this fragmented
> >>> case. How about doing this for the large files only like this?
> >>
> >> Thread A write 512 pages			Thread B write 8 pages
> >>
> >> - writepages()
> >>  - mutex_lock(&sbi->writepages);
> >>   - writepage();
> >> ...
> >> 						- writepages()
> >> 						 - writepage()
> >> 						  ....
> >>   - writepage();
> >> ...
> >>  - mutex_unlock(&sbi->writepages);
> >>
> >> Above case will also cause fragmentation since we didn't serialize all
> >> concurrent IO with the lock.
> >>
> >> Do we need to consider such case?
> > 
> > We can simply allow 512 and 8 in the same segment, which would not a big deal,
> > when considering starvation of Thread B.
> 
> Yeah, but in reality, there would be more threads competing in same log header,
> so I worry that the effect of defragmenting will not so good as we expect,
> anyway, for benchmark, it's enough.

Basically, I think this is not a benchmark issue. :) It just reveals the issue
much easily. Let me think about three cases:
1) WB_SYNC_NONE & WB_SYNC_NONE
 -> can simply use mutex_lock

2) WB_SYNC_ALL & WB_SYNC_NONE
 -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
    will skip writing blocks

3) WB_SYNC_ALL & WB_SYNC_ALL
 -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
    starvation.


I've been testing the below.

if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
                get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
        mutex_lock(&sbi->writepages);
        locked = true;
}

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>  sequential read
> >>>
> >>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>> to fix the drop in sequential read throughput.
> >>>
> >>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>> device: UFS
> >>>
> >>> Before -
> >>> read throughput: 185 MB/s
> >>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>
> >>> After -
> >>> read throughput: 758 MB/s
> >>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>
> >>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> ---
> >>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>  fs/f2fs/segment.c                       |  1 +
> >>>  fs/f2fs/super.c                         |  1 +
> >>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>  6 files changed, 24 insertions(+)
> >>>
> >>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> index 9b0123388f18..94a24aedcdb2 100644
> >>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> @@ -51,6 +51,14 @@ Description:
> >>>  		 Controls the dirty page count condition for the in-place-update
> >>>  		 policies.
> >>>  
> >>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>> +Date:		August 2018
> >>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> +Description:
> >>> +		 Controls the dirty page count condition for batched sequential
> >>> +		 writes in ->writepages.
> >>> +
> >>> +
> >>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>  Date:		March 2017
> >>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>> index 45f043ee48bd..f09231b1cc74 100644
> >>> --- a/fs/f2fs/data.c
> >>> +++ b/fs/f2fs/data.c
> >>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>  	struct blk_plug plug;
> >>>  	int ret;
> >>> +	bool locked = false;
> >>>  
> >>>  	/* deal with chardevs and other special file */
> >>>  	if (!mapping->a_ops->writepage)
> >>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>  		goto skip_write;
> >>>  
> >>> +	if (!S_ISDIR(inode->i_mode) &&
> >>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>> +		mutex_lock(&sbi->writepages);
> >>> +		locked = true;
> >>> +	}
> >>> +
> >>>  	blk_start_plug(&plug);
> >>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>  	blk_finish_plug(&plug);
> >>>  
> >>> +	if (locked)
> >>> +		mutex_unlock(&sbi->writepages);
> >>> +
> >>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>  	/*
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 375aa9f30cfa..098bdedc28bf 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>  
> >>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>  	int valid_super_block;			/* valid super block no */
> >>>  	unsigned long s_flag;				/* flags for sbi */
> >>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>  
> >>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>  
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index be41dbd7b261..53d70b64fea1 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>  	/* init f2fs-specific super block info */
> >>>  	sbi->valid_super_block = valid_super_block;
> >>>  	mutex_init(&sbi->gc_mutex);
> >>> +	mutex_init(&sbi->writepages);
> >>>  	mutex_init(&sbi->cp_mutex);
> >>>  	init_rwsem(&sbi->node_write);
> >>>  	init_rwsem(&sbi->node_change);
> >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>> index cd2e030e47b8..81c0e5337443 100644
> >>> --- a/fs/f2fs/sysfs.c
> >>> +++ b/fs/f2fs/sysfs.c
> >>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>  	ATTR_LIST(ipu_policy),
> >>>  	ATTR_LIST(min_ipu_util),
> >>>  	ATTR_LIST(min_fsync_blocks),
> >>> +	ATTR_LIST(min_seq_blocks),
> >>>  	ATTR_LIST(min_hot_blocks),
> >>>  	ATTR_LIST(min_ssr_sections),
> >>>  	ATTR_LIST(max_victim_search),
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-14 17:28               ` Jaegeuk Kim
@ 2018-08-15  1:52                 ` Chao Yu
  2018-08-15  2:15                   ` Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2018-08-15  1:52 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/15 1:28, Jaegeuk Kim wrote:
> On 08/14, Chao Yu wrote:
>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>> On 08/14, Chao Yu wrote:
>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>> On 08/13, Chao Yu wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>
>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>> device: UFS
>>>>>>>
>>>>>>> Before -
>>>>>>> read throughput: 185 MB/s
>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>
>>>>>>> After -
>>>>>>> read throughput: 758 MB/s
>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>
>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>> fragmented during multi-thread writing.
>>>>>>
>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>> data area writer.
>>>>>>
>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>> HOT_DATA	1
>>>>>> WARM_DATA	2
>>>>>> COLD_DATA	4
>>>>>
>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>> case. How about doing this for the large files only like this?
>>>>
>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>
>>>> - writepages()
>>>>  - mutex_lock(&sbi->writepages);
>>>>   - writepage();
>>>> ...
>>>> 						- writepages()
>>>> 						 - writepage()
>>>> 						  ....
>>>>   - writepage();
>>>> ...
>>>>  - mutex_unlock(&sbi->writepages);
>>>>
>>>> Above case will also cause fragmentation since we didn't serialize all
>>>> concurrent IO with the lock.
>>>>
>>>> Do we need to consider such case?
>>>
>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>> when considering starvation of Thread B.
>>
>> Yeah, but in reality, there would be more threads competing in same log header,
>> so I worry that the effect of defragmenting will not so good as we expect,
>> anyway, for benchmark, it's enough.
> 
> Basically, I think this is not a benchmark issue. :) It just reveals the issue
> much easily. Let me think about three cases:
> 1) WB_SYNC_NONE & WB_SYNC_NONE
>  -> can simply use mutex_lock
> 
> 2) WB_SYNC_ALL & WB_SYNC_NONE
>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>     will skip writing blocks
> 
> 3) WB_SYNC_ALL & WB_SYNC_ALL
>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>     starvation.
> 
> 
> I've been testing the below.
> 
> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>         mutex_lock(&sbi->writepages);
>         locked = true;

Just cover buffered IO? how about covering Direct IO and atomic write as well?

Thanks,

> }
> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>  sequential read
>>>>>
>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>> to fix the drop in sequential read throughput.
>>>>>
>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>> device: UFS
>>>>>
>>>>> Before -
>>>>> read throughput: 185 MB/s
>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>
>>>>> After -
>>>>> read throughput: 758 MB/s
>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>
>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>> ---
>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>  6 files changed, 24 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>  		 policies.
>>>>>  
>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>> +Date:		August 2018
>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>> +Description:
>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>> +		 writes in ->writepages.
>>>>> +
>>>>> +
>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>  Date:		March 2017
>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>  	struct blk_plug plug;
>>>>>  	int ret;
>>>>> +	bool locked = false;
>>>>>  
>>>>>  	/* deal with chardevs and other special file */
>>>>>  	if (!mapping->a_ops->writepage)
>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>  		goto skip_write;
>>>>>  
>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>> +		mutex_lock(&sbi->writepages);
>>>>> +		locked = true;
>>>>> +	}
>>>>> +
>>>>>  	blk_start_plug(&plug);
>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>  	blk_finish_plug(&plug);
>>>>>  
>>>>> +	if (locked)
>>>>> +		mutex_unlock(&sbi->writepages);
>>>>> +
>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>  	/*
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>  
>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>  
>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>  
>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>  	/* init f2fs-specific super block info */
>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>> +	mutex_init(&sbi->writepages);
>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>  	init_rwsem(&sbi->node_write);
>>>>>  	init_rwsem(&sbi->node_change);
>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>> --- a/fs/f2fs/sysfs.c
>>>>> +++ b/fs/f2fs/sysfs.c
>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>  	ATTR_LIST(ipu_policy),
>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>  	ATTR_LIST(max_victim_search),
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  1:52                 ` Chao Yu
@ 2018-08-15  2:15                   ` Jaegeuk Kim
  2018-08-15  3:44                     ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-15  2:15 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/15, Chao Yu wrote:
> On 2018/8/15 1:28, Jaegeuk Kim wrote:
> > On 08/14, Chao Yu wrote:
> >> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> >>> On 08/14, Chao Yu wrote:
> >>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>>>> On 08/13, Chao Yu wrote:
> >>>>>> Hi Jaegeuk,
> >>>>>>
> >>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>
> >>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>> device: UFS
> >>>>>>>
> >>>>>>> Before -
> >>>>>>> read throughput: 185 MB/s
> >>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>
> >>>>>>> After -
> >>>>>>> read throughput: 758 MB/s
> >>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>
> >>>>>> IMO, it only impact sequential read performance in a large file which may be
> >>>>>> fragmented during multi-thread writing.
> >>>>>>
> >>>>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>>>> data area writer.
> >>>>>>
> >>>>>> So how about adding a mount option to serialize writepage() for different type
> >>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>>>> HOT_DATA	1
> >>>>>> WARM_DATA	2
> >>>>>> COLD_DATA	4
> >>>>>
> >>>>> Well, I don't think we need to give too many mount options for this fragmented
> >>>>> case. How about doing this for the large files only like this?
> >>>>
> >>>> Thread A write 512 pages			Thread B write 8 pages
> >>>>
> >>>> - writepages()
> >>>>  - mutex_lock(&sbi->writepages);
> >>>>   - writepage();
> >>>> ...
> >>>> 						- writepages()
> >>>> 						 - writepage()
> >>>> 						  ....
> >>>>   - writepage();
> >>>> ...
> >>>>  - mutex_unlock(&sbi->writepages);
> >>>>
> >>>> Above case will also cause fragmentation since we didn't serialize all
> >>>> concurrent IO with the lock.
> >>>>
> >>>> Do we need to consider such case?
> >>>
> >>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> >>> when considering starvation of Thread B.
> >>
> >> Yeah, but in reality, there would be more threads competing in same log header,
> >> so I worry that the effect of defragmenting will not so good as we expect,
> >> anyway, for benchmark, it's enough.
> > 
> > Basically, I think this is not a benchmark issue. :) It just reveals the issue
> > much easily. Let me think about three cases:
> > 1) WB_SYNC_NONE & WB_SYNC_NONE
> >  -> can simply use mutex_lock
> > 
> > 2) WB_SYNC_ALL & WB_SYNC_NONE
> >  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
> >     will skip writing blocks
> > 
> > 3) WB_SYNC_ALL & WB_SYNC_ALL
> >  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
> >     starvation.
> > 
> > 
> > I've been testing the below.
> > 
> > if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> >                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
> >         mutex_lock(&sbi->writepages);
> >         locked = true;
> 
> Just cover buffered IO? how about covering Direct IO and atomic write as well?

I'd expect direct IO does in-place-updates, and not sure whether we need to
add another lock contention between buffered or direct IO. Atomic writes
would be covered by ->min_seq_blocks.

> 
> Thanks,
> 
> > }
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>>>  sequential read
> >>>>>
> >>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>> to fix the drop in sequential read throughput.
> >>>>>
> >>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>> device: UFS
> >>>>>
> >>>>> Before -
> >>>>> read throughput: 185 MB/s
> >>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>
> >>>>> After -
> >>>>> read throughput: 758 MB/s
> >>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>
> >>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>> ---
> >>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>>>  fs/f2fs/segment.c                       |  1 +
> >>>>>  fs/f2fs/super.c                         |  1 +
> >>>>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>>>  6 files changed, 24 insertions(+)
> >>>>>
> >>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> index 9b0123388f18..94a24aedcdb2 100644
> >>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>> @@ -51,6 +51,14 @@ Description:
> >>>>>  		 Controls the dirty page count condition for the in-place-update
> >>>>>  		 policies.
> >>>>>  
> >>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>>>> +Date:		August 2018
> >>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>> +Description:
> >>>>> +		 Controls the dirty page count condition for batched sequential
> >>>>> +		 writes in ->writepages.
> >>>>> +
> >>>>> +
> >>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>>>  Date:		March 2017
> >>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>>> index 45f043ee48bd..f09231b1cc74 100644
> >>>>> --- a/fs/f2fs/data.c
> >>>>> +++ b/fs/f2fs/data.c
> >>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>  	struct blk_plug plug;
> >>>>>  	int ret;
> >>>>> +	bool locked = false;
> >>>>>  
> >>>>>  	/* deal with chardevs and other special file */
> >>>>>  	if (!mapping->a_ops->writepage)
> >>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>>>  		goto skip_write;
> >>>>>  
> >>>>> +	if (!S_ISDIR(inode->i_mode) &&
> >>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>>>> +		mutex_lock(&sbi->writepages);
> >>>>> +		locked = true;
> >>>>> +	}
> >>>>> +
> >>>>>  	blk_start_plug(&plug);
> >>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>>>  	blk_finish_plug(&plug);
> >>>>>  
> >>>>> +	if (locked)
> >>>>> +		mutex_unlock(&sbi->writepages);
> >>>>> +
> >>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>>>  	/*
> >>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>> index 375aa9f30cfa..098bdedc28bf 100644
> >>>>> --- a/fs/f2fs/f2fs.h
> >>>>> +++ b/fs/f2fs/f2fs.h
> >>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>>>  
> >>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>>>  	int valid_super_block;			/* valid super block no */
> >>>>>  	unsigned long s_flag;				/* flags for sbi */
> >>>>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>>>  
> >>>>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>>>  
> >>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>> index be41dbd7b261..53d70b64fea1 100644
> >>>>> --- a/fs/f2fs/super.c
> >>>>> +++ b/fs/f2fs/super.c
> >>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>>  	/* init f2fs-specific super block info */
> >>>>>  	sbi->valid_super_block = valid_super_block;
> >>>>>  	mutex_init(&sbi->gc_mutex);
> >>>>> +	mutex_init(&sbi->writepages);
> >>>>>  	mutex_init(&sbi->cp_mutex);
> >>>>>  	init_rwsem(&sbi->node_write);
> >>>>>  	init_rwsem(&sbi->node_change);
> >>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>>>> index cd2e030e47b8..81c0e5337443 100644
> >>>>> --- a/fs/f2fs/sysfs.c
> >>>>> +++ b/fs/f2fs/sysfs.c
> >>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>>>  	ATTR_LIST(ipu_policy),
> >>>>>  	ATTR_LIST(min_ipu_util),
> >>>>>  	ATTR_LIST(min_fsync_blocks),
> >>>>> +	ATTR_LIST(min_seq_blocks),
> >>>>>  	ATTR_LIST(min_hot_blocks),
> >>>>>  	ATTR_LIST(min_ssr_sections),
> >>>>>  	ATTR_LIST(max_victim_search),
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  2:15                   ` Jaegeuk Kim
@ 2018-08-15  3:44                     ` Chao Yu
  2018-08-16  1:34                       ` Jaegeuk Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Chao Yu @ 2018-08-15  3:44 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/15 10:15, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 1:28, Jaegeuk Kim wrote:
>>> On 08/14, Chao Yu wrote:
>>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>>>> On 08/14, Chao Yu wrote:
>>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>>>> On 08/13, Chao Yu wrote:
>>>>>>>> Hi Jaegeuk,
>>>>>>>>
>>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>
>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>> device: UFS
>>>>>>>>>
>>>>>>>>> Before -
>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> After -
>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>
>>>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>>>> fragmented during multi-thread writing.
>>>>>>>>
>>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>>>> data area writer.
>>>>>>>>
>>>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>>>> HOT_DATA	1
>>>>>>>> WARM_DATA	2
>>>>>>>> COLD_DATA	4
>>>>>>>
>>>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>>>> case. How about doing this for the large files only like this?
>>>>>>
>>>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>>>
>>>>>> - writepages()
>>>>>>  - mutex_lock(&sbi->writepages);
>>>>>>   - writepage();
>>>>>> ...
>>>>>> 						- writepages()
>>>>>> 						 - writepage()
>>>>>> 						  ....
>>>>>>   - writepage();
>>>>>> ...
>>>>>>  - mutex_unlock(&sbi->writepages);
>>>>>>
>>>>>> Above case will also cause fragmentation since we didn't serialize all
>>>>>> concurrent IO with the lock.
>>>>>>
>>>>>> Do we need to consider such case?
>>>>>
>>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>>>> when considering starvation of Thread B.
>>>>
>>>> Yeah, but in reality, there would be more threads competing in same log header,
>>>> so I worry that the effect of defragmenting will not so good as we expect,
>>>> anyway, for benchmark, it's enough.
>>>
>>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
>>> much easily. Let me think about three cases:
>>> 1) WB_SYNC_NONE & WB_SYNC_NONE
>>>  -> can simply use mutex_lock
>>>
>>> 2) WB_SYNC_ALL & WB_SYNC_NONE
>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>>>     will skip writing blocks
>>>
>>> 3) WB_SYNC_ALL & WB_SYNC_ALL
>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>>>     starvation.
>>>
>>>
>>> I've been testing the below.
>>>
>>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>>>         mutex_lock(&sbi->writepages);
>>>         locked = true;
>>
>> Just cover buffered IO? how about covering Direct IO and atomic write as well?
> 
> I'd expect direct IO does in-place-updates, and not sure whether we need to

For initial writes, they are not IPU.

> add another lock contention between buffered or direct IO. Atomic writes
> would be covered by ->min_seq_blocks.

Okay. :)

Thanks,

> 
>>
>> Thanks,
>>
>>> }
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>>>  sequential read
>>>>>>>
>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>
>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>> device: UFS
>>>>>>>
>>>>>>> Before -
>>>>>>> read throughput: 185 MB/s
>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>
>>>>>>> After -
>>>>>>> read throughput: 758 MB/s
>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>
>>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>> ---
>>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>>>  6 files changed, 24 insertions(+)
>>>>>>>
>>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>>>  		 policies.
>>>>>>>  
>>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>>>> +Date:		August 2018
>>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>> +Description:
>>>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>>>> +		 writes in ->writepages.
>>>>>>> +
>>>>>>> +
>>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>>>  Date:		March 2017
>>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>>>> --- a/fs/f2fs/data.c
>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>  	struct blk_plug plug;
>>>>>>>  	int ret;
>>>>>>> +	bool locked = false;
>>>>>>>  
>>>>>>>  	/* deal with chardevs and other special file */
>>>>>>>  	if (!mapping->a_ops->writepage)
>>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>  		goto skip_write;
>>>>>>>  
>>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>>>> +		mutex_lock(&sbi->writepages);
>>>>>>> +		locked = true;
>>>>>>> +	}
>>>>>>> +
>>>>>>>  	blk_start_plug(&plug);
>>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>>>  	blk_finish_plug(&plug);
>>>>>>>  
>>>>>>> +	if (locked)
>>>>>>> +		mutex_unlock(&sbi->writepages);
>>>>>>> +
>>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>>>  	/*
>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>>>  
>>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>>>  
>>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>>>  
>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>>>> --- a/fs/f2fs/super.c
>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>>  	/* init f2fs-specific super block info */
>>>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>>>> +	mutex_init(&sbi->writepages);
>>>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>>>  	init_rwsem(&sbi->node_write);
>>>>>>>  	init_rwsem(&sbi->node_change);
>>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>>>  	ATTR_LIST(ipu_policy),
>>>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>>>  	ATTR_LIST(max_victim_search),
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-15  3:44                     ` Chao Yu
@ 2018-08-16  1:34                       ` Jaegeuk Kim
  2018-08-16 11:39                         ` Chao Yu
  0 siblings, 1 reply; 14+ messages in thread
From: Jaegeuk Kim @ 2018-08-16  1:34 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/15, Chao Yu wrote:
> On 2018/8/15 10:15, Jaegeuk Kim wrote:
> > On 08/15, Chao Yu wrote:
> >> On 2018/8/15 1:28, Jaegeuk Kim wrote:
> >>> On 08/14, Chao Yu wrote:
> >>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
> >>>>> On 08/14, Chao Yu wrote:
> >>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
> >>>>>>> On 08/13, Chao Yu wrote:
> >>>>>>>> Hi Jaegeuk,
> >>>>>>>>
> >>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
> >>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>>>
> >>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>>>> device: UFS
> >>>>>>>>>
> >>>>>>>>> Before -
> >>>>>>>>> read throughput: 185 MB/s
> >>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>>>
> >>>>>>>>> After -
> >>>>>>>>> read throughput: 758 MB/s
> >>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>>>
> >>>>>>>> IMO, it only impact sequential read performance in a large file which may be
> >>>>>>>> fragmented during multi-thread writing.
> >>>>>>>>
> >>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
> >>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
> >>>>>>>> data area writer.
> >>>>>>>>
> >>>>>>>> So how about adding a mount option to serialize writepage() for different type
> >>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
> >>>>>>>> HOT_DATA	1
> >>>>>>>> WARM_DATA	2
> >>>>>>>> COLD_DATA	4
> >>>>>>>
> >>>>>>> Well, I don't think we need to give too many mount options for this fragmented
> >>>>>>> case. How about doing this for the large files only like this?
> >>>>>>
> >>>>>> Thread A write 512 pages			Thread B write 8 pages
> >>>>>>
> >>>>>> - writepages()
> >>>>>>  - mutex_lock(&sbi->writepages);
> >>>>>>   - writepage();
> >>>>>> ...
> >>>>>> 						- writepages()
> >>>>>> 						 - writepage()
> >>>>>> 						  ....
> >>>>>>   - writepage();
> >>>>>> ...
> >>>>>>  - mutex_unlock(&sbi->writepages);
> >>>>>>
> >>>>>> Above case will also cause fragmentation since we didn't serialize all
> >>>>>> concurrent IO with the lock.
> >>>>>>
> >>>>>> Do we need to consider such case?
> >>>>>
> >>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
> >>>>> when considering starvation of Thread B.
> >>>>
> >>>> Yeah, but in reality, there would be more threads competing in same log header,
> >>>> so I worry that the effect of defragmenting will not so good as we expect,
> >>>> anyway, for benchmark, it's enough.
> >>>
> >>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
> >>> much easily. Let me think about three cases:
> >>> 1) WB_SYNC_NONE & WB_SYNC_NONE
> >>>  -> can simply use mutex_lock
> >>>
> >>> 2) WB_SYNC_ALL & WB_SYNC_NONE
> >>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
> >>>     will skip writing blocks
> >>>
> >>> 3) WB_SYNC_ALL & WB_SYNC_ALL
> >>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
> >>>     starvation.
> >>>
> >>>
> >>> I've been testing the below.
> >>>
> >>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
> >>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
> >>>         mutex_lock(&sbi->writepages);
> >>>         locked = true;
> >>
> >> Just cover buffered IO? how about covering Direct IO and atomic write as well?
> > 
> > I'd expect direct IO does in-place-updates, and not sure whether we need to
> 
> For initial writes, they are not IPU.

It's a little bit different. DIO allocates blocks first in the bulk mode, and
sumbit bios later. So, it'd be more likely to allocate consecutive blocks by
get_data_block_dio, if user gave a big chunk to write. I'm more concerend about
lock contention, and don't think we have to serialize all the block allocation.

> 
> > add another lock contention between buffered or direct IO. Atomic writes
> > would be covered by ->min_seq_blocks.
> 
> Okay. :)
> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>> }
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>>
> >>>>>>>
> >>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
> >>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
> >>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
> >>>>>>>  sequential read
> >>>>>>>
> >>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
> >>>>>>> to fix the drop in sequential read throughput.
> >>>>>>>
> >>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
> >>>>>>> device: UFS
> >>>>>>>
> >>>>>>> Before -
> >>>>>>> read throughput: 185 MB/s
> >>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
> >>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
> >>>>>>>
> >>>>>>> After -
> >>>>>>> read throughput: 758 MB/s
> >>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
> >>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
> >>>>>>>
> >>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> >>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>>> ---
> >>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
> >>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
> >>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
> >>>>>>>  fs/f2fs/segment.c                       |  1 +
> >>>>>>>  fs/f2fs/super.c                         |  1 +
> >>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
> >>>>>>>  6 files changed, 24 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> index 9b0123388f18..94a24aedcdb2 100644
> >>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>>>>>> @@ -51,6 +51,14 @@ Description:
> >>>>>>>  		 Controls the dirty page count condition for the in-place-update
> >>>>>>>  		 policies.
> >>>>>>>  
> >>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
> >>>>>>> +Date:		August 2018
> >>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>>>> +Description:
> >>>>>>> +		 Controls the dirty page count condition for batched sequential
> >>>>>>> +		 writes in ->writepages.
> >>>>>>> +
> >>>>>>> +
> >>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
> >>>>>>>  Date:		March 2017
> >>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>>>>> index 45f043ee48bd..f09231b1cc74 100644
> >>>>>>> --- a/fs/f2fs/data.c
> >>>>>>> +++ b/fs/f2fs/data.c
> >>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>>>  	struct blk_plug plug;
> >>>>>>>  	int ret;
> >>>>>>> +	bool locked = false;
> >>>>>>>  
> >>>>>>>  	/* deal with chardevs and other special file */
> >>>>>>>  	if (!mapping->a_ops->writepage)
> >>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
> >>>>>>>  		goto skip_write;
> >>>>>>>  
> >>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
> >>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
> >>>>>>> +		mutex_lock(&sbi->writepages);
> >>>>>>> +		locked = true;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>>  	blk_start_plug(&plug);
> >>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
> >>>>>>>  	blk_finish_plug(&plug);
> >>>>>>>  
> >>>>>>> +	if (locked)
> >>>>>>> +		mutex_unlock(&sbi->writepages);
> >>>>>>> +
> >>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
> >>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
> >>>>>>>  	/*
> >>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
> >>>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
> >>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
> >>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
> >>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
> >>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
> >>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
> >>>>>>>  
> >>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
> >>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
> >>>>>>>  	int valid_super_block;			/* valid super block no */
> >>>>>>>  	unsigned long s_flag;				/* flags for sbi */
> >>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
> >>>>>>>  
> >>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> >>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
> >>>>>>> --- a/fs/f2fs/segment.c
> >>>>>>> +++ b/fs/f2fs/segment.c
> >>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
> >>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> >>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> >>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> >>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
> >>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
> >>>>>>>  
> >>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>>>>> index be41dbd7b261..53d70b64fea1 100644
> >>>>>>> --- a/fs/f2fs/super.c
> >>>>>>> +++ b/fs/f2fs/super.c
> >>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>>>>  	/* init f2fs-specific super block info */
> >>>>>>>  	sbi->valid_super_block = valid_super_block;
> >>>>>>>  	mutex_init(&sbi->gc_mutex);
> >>>>>>> +	mutex_init(&sbi->writepages);
> >>>>>>>  	mutex_init(&sbi->cp_mutex);
> >>>>>>>  	init_rwsem(&sbi->node_write);
> >>>>>>>  	init_rwsem(&sbi->node_change);
> >>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>>>>>> index cd2e030e47b8..81c0e5337443 100644
> >>>>>>> --- a/fs/f2fs/sysfs.c
> >>>>>>> +++ b/fs/f2fs/sysfs.c
> >>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> >>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
> >>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> >>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
> >>>>>>>  	ATTR_LIST(ipu_policy),
> >>>>>>>  	ATTR_LIST(min_ipu_util),
> >>>>>>>  	ATTR_LIST(min_fsync_blocks),
> >>>>>>> +	ATTR_LIST(min_seq_blocks),
> >>>>>>>  	ATTR_LIST(min_hot_blocks),
> >>>>>>>  	ATTR_LIST(min_ssr_sections),
> >>>>>>>  	ATTR_LIST(max_victim_search),
> >>>>>>>
> >>>>>
> >>>>> .
> >>>>>
> >>>
> >>> .
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: fix performance issue observed with multi-thread sequential read
  2018-08-16  1:34                       ` Jaegeuk Kim
@ 2018-08-16 11:39                         ` Chao Yu
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Yu @ 2018-08-16 11:39 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018/8/16 9:34, Jaegeuk Kim wrote:
> On 08/15, Chao Yu wrote:
>> On 2018/8/15 10:15, Jaegeuk Kim wrote:
>>> On 08/15, Chao Yu wrote:
>>>> On 2018/8/15 1:28, Jaegeuk Kim wrote:
>>>>> On 08/14, Chao Yu wrote:
>>>>>> On 2018/8/14 12:04, Jaegeuk Kim wrote:
>>>>>>> On 08/14, Chao Yu wrote:
>>>>>>>> On 2018/8/14 4:11, Jaegeuk Kim wrote:
>>>>>>>>> On 08/13, Chao Yu wrote:
>>>>>>>>>> Hi Jaegeuk,
>>>>>>>>>>
>>>>>>>>>> On 2018/8/11 2:56, Jaegeuk Kim wrote:
>>>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>>>
>>>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>>>> device: UFS
>>>>>>>>>>>
>>>>>>>>>>> Before -
>>>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>>>
>>>>>>>>>>> After -
>>>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>>>
>>>>>>>>>> IMO, it only impact sequential read performance in a large file which may be
>>>>>>>>>> fragmented during multi-thread writing.
>>>>>>>>>>
>>>>>>>>>> In android environment, mostly, the large file should be cold type, such as apk,
>>>>>>>>>> mp3, rmvb, jpeg..., so I think we only need to serialize writepages() for cold
>>>>>>>>>> data area writer.
>>>>>>>>>>
>>>>>>>>>> So how about adding a mount option to serialize writepage() for different type
>>>>>>>>>> of log, e.g. in android, using serialize=4; by default, using serialize=7
>>>>>>>>>> HOT_DATA	1
>>>>>>>>>> WARM_DATA	2
>>>>>>>>>> COLD_DATA	4
>>>>>>>>>
>>>>>>>>> Well, I don't think we need to give too many mount options for this fragmented
>>>>>>>>> case. How about doing this for the large files only like this?
>>>>>>>>
>>>>>>>> Thread A write 512 pages			Thread B write 8 pages
>>>>>>>>
>>>>>>>> - writepages()
>>>>>>>>  - mutex_lock(&sbi->writepages);
>>>>>>>>   - writepage();
>>>>>>>> ...
>>>>>>>> 						- writepages()
>>>>>>>> 						 - writepage()
>>>>>>>> 						  ....
>>>>>>>>   - writepage();
>>>>>>>> ...
>>>>>>>>  - mutex_unlock(&sbi->writepages);
>>>>>>>>
>>>>>>>> Above case will also cause fragmentation since we didn't serialize all
>>>>>>>> concurrent IO with the lock.
>>>>>>>>
>>>>>>>> Do we need to consider such case?
>>>>>>>
>>>>>>> We can simply allow 512 and 8 in the same segment, which would not a big deal,
>>>>>>> when considering starvation of Thread B.
>>>>>>
>>>>>> Yeah, but in reality, there would be more threads competing in same log header,
>>>>>> so I worry that the effect of defragmenting will not so good as we expect,
>>>>>> anyway, for benchmark, it's enough.
>>>>>
>>>>> Basically, I think this is not a benchmark issue. :) It just reveals the issue
>>>>> much easily. Let me think about three cases:
>>>>> 1) WB_SYNC_NONE & WB_SYNC_NONE
>>>>>  -> can simply use mutex_lock
>>>>>
>>>>> 2) WB_SYNC_ALL & WB_SYNC_NONE
>>>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, while WB_SYNC_NONE
>>>>>     will skip writing blocks
>>>>>
>>>>> 3) WB_SYNC_ALL & WB_SYNC_ALL
>>>>>  -> can use mutex_lock on WB_SYNC_ALL having >512 blocks, in order to avoid
>>>>>     starvation.
>>>>>
>>>>>
>>>>> I've been testing the below.
>>>>>
>>>>> if (!S_ISDIR(inode->i_mode) && (wbc->sync_mode != WB_SYNC_ALL ||
>>>>>                 get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks)) {
>>>>>         mutex_lock(&sbi->writepages);
>>>>>         locked = true;
>>>>
>>>> Just cover buffered IO? how about covering Direct IO and atomic write as well?
>>>
>>> I'd expect direct IO does in-place-updates, and not sure whether we need to
>>
>> For initial writes, they are not IPU.
> 
> It's a little bit different. DIO allocates blocks first in the bulk mode, and
> sumbit bios later. So, it'd be more likely to allocate consecutive blocks by
> get_data_block_dio, if user gave a big chunk to write. I'm more concerend about
> lock contention, and don't think we have to serialize all the block allocation.

IMO, that's a tradeoff between avoid fragmentation and improving concurrency.

For single kind of above demands, we can separate optimizing design with
configuration, enable it dynamically, but there is no single best choice to
adapt all demands.

Well, the design is okay for me, and could you send out the patch? :)

Thanks,


> 
>>
>>> add another lock contention between buffered or direct IO. Atomic writes
>>> would be covered by ->min_seq_blocks.
>>
>> Okay. :)
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>> }
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>>>
>>>>>>>>> >From 4fea0b6e4da8512a72dd52afc7a51beb35966ad9 Mon Sep 17 00:00:00 2001
>>>>>>>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>>>> Date: Thu, 9 Aug 2018 17:53:34 -0700
>>>>>>>>> Subject: [PATCH] f2fs: fix performance issue observed with multi-thread
>>>>>>>>>  sequential read
>>>>>>>>>
>>>>>>>>> This reverts the commit - "b93f771 - f2fs: remove writepages lock"
>>>>>>>>> to fix the drop in sequential read throughput.
>>>>>>>>>
>>>>>>>>> Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L
>>>>>>>>> device: UFS
>>>>>>>>>
>>>>>>>>> Before -
>>>>>>>>> read throughput: 185 MB/s
>>>>>>>>> total read requests: 85177 (of these ~80000 are 4KB size requests).
>>>>>>>>> total write requests: 2546 (of these ~2208 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> After -
>>>>>>>>> read throughput: 758 MB/s
>>>>>>>>> total read requests: 2417 (of these ~2042 are 512KB reads).
>>>>>>>>> total write requests: 2701 (of these ~2034 requests are written in 512KB).
>>>>>>>>>
>>>>>>>>> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
>>>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>>>> ---
>>>>>>>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++++++
>>>>>>>>>  fs/f2fs/data.c                          | 10 ++++++++++
>>>>>>>>>  fs/f2fs/f2fs.h                          |  2 ++
>>>>>>>>>  fs/f2fs/segment.c                       |  1 +
>>>>>>>>>  fs/f2fs/super.c                         |  1 +
>>>>>>>>>  fs/f2fs/sysfs.c                         |  2 ++
>>>>>>>>>  6 files changed, 24 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> index 9b0123388f18..94a24aedcdb2 100644
>>>>>>>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>>>>>>>> @@ -51,6 +51,14 @@ Description:
>>>>>>>>>  		 Controls the dirty page count condition for the in-place-update
>>>>>>>>>  		 policies.
>>>>>>>>>  
>>>>>>>>> +What:		/sys/fs/f2fs/<disk>/min_seq_blocks
>>>>>>>>> +Date:		August 2018
>>>>>>>>> +Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>>>> +Description:
>>>>>>>>> +		 Controls the dirty page count condition for batched sequential
>>>>>>>>> +		 writes in ->writepages.
>>>>>>>>> +
>>>>>>>>> +
>>>>>>>>>  What:		/sys/fs/f2fs/<disk>/min_hot_blocks
>>>>>>>>>  Date:		March 2017
>>>>>>>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>>> index 45f043ee48bd..f09231b1cc74 100644
>>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>>> @@ -2132,6 +2132,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>>>  	struct blk_plug plug;
>>>>>>>>>  	int ret;
>>>>>>>>> +	bool locked = false;
>>>>>>>>>  
>>>>>>>>>  	/* deal with chardevs and other special file */
>>>>>>>>>  	if (!mapping->a_ops->writepage)
>>>>>>>>> @@ -2162,10 +2163,19 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>>>  	else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>>>  		goto skip_write;
>>>>>>>>>  
>>>>>>>>> +	if (!S_ISDIR(inode->i_mode) &&
>>>>>>>>> +			get_dirty_pages(inode) <= SM_I(sbi)->min_seq_blocks) {
>>>>>>>>> +		mutex_lock(&sbi->writepages);
>>>>>>>>> +		locked = true;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>>  	blk_start_plug(&plug);
>>>>>>>>>  	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
>>>>>>>>>  	blk_finish_plug(&plug);
>>>>>>>>>  
>>>>>>>>> +	if (locked)
>>>>>>>>> +		mutex_unlock(&sbi->writepages);
>>>>>>>>> +
>>>>>>>>>  	if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>>>  		atomic_dec(&sbi->wb_sync_req[DATA]);
>>>>>>>>>  	/*
>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>> index 375aa9f30cfa..098bdedc28bf 100644
>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>> @@ -913,6 +913,7 @@ struct f2fs_sm_info {
>>>>>>>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>>>>>>>>  	unsigned int min_ipu_util;	/* in-place-update threshold */
>>>>>>>>>  	unsigned int min_fsync_blocks;	/* threshold for fsync */
>>>>>>>>> +	unsigned int min_seq_blocks;	/* threshold for sequential blocks */
>>>>>>>>>  	unsigned int min_hot_blocks;	/* threshold for hot block allocation */
>>>>>>>>>  	unsigned int min_ssr_sections;	/* threshold to trigger SSR allocation */
>>>>>>>>>  
>>>>>>>>> @@ -1133,6 +1134,7 @@ struct f2fs_sb_info {
>>>>>>>>>  	struct rw_semaphore sb_lock;		/* lock for raw super block */
>>>>>>>>>  	int valid_super_block;			/* valid super block no */
>>>>>>>>>  	unsigned long s_flag;				/* flags for sbi */
>>>>>>>>> +	struct mutex writepages;		/* mutex for writepages() */
>>>>>>>>>  
>>>>>>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>>> index 63fc647f9ac2..ffea2d1303bd 100644
>>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>>> @@ -4131,6 +4131,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
>>>>>>>>>  		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
>>>>>>>>>  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
>>>>>>>>>  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
>>>>>>>>> +	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
>>>>>>>>>  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>>>>>>>>>  	sm_info->min_ssr_sections = reserved_sections(sbi);
>>>>>>>>>  
>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>>> index be41dbd7b261..53d70b64fea1 100644
>>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>>> @@ -2842,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>>>>  	/* init f2fs-specific super block info */
>>>>>>>>>  	sbi->valid_super_block = valid_super_block;
>>>>>>>>>  	mutex_init(&sbi->gc_mutex);
>>>>>>>>> +	mutex_init(&sbi->writepages);
>>>>>>>>>  	mutex_init(&sbi->cp_mutex);
>>>>>>>>>  	init_rwsem(&sbi->node_write);
>>>>>>>>>  	init_rwsem(&sbi->node_change);
>>>>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>>>>> index cd2e030e47b8..81c0e5337443 100644
>>>>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>>>>> @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>>>>>>>> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
>>>>>>>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
>>>>>>>>>  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
>>>>>>>>> @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
>>>>>>>>>  	ATTR_LIST(ipu_policy),
>>>>>>>>>  	ATTR_LIST(min_ipu_util),
>>>>>>>>>  	ATTR_LIST(min_fsync_blocks),
>>>>>>>>> +	ATTR_LIST(min_seq_blocks),
>>>>>>>>>  	ATTR_LIST(min_hot_blocks),
>>>>>>>>>  	ATTR_LIST(min_ssr_sections),
>>>>>>>>>  	ATTR_LIST(max_victim_search),
>>>>>>>>>
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-10  2:37 [PATCH] f2fs: fix performance issue observed with multi-thread sequential read Jaegeuk Kim
2018-08-10  2:48 ` [PATCH v2] " Jaegeuk Kim
2018-08-10 18:56   ` [f2fs-dev] [PATCH v3] " Jaegeuk Kim
2018-08-13  2:20     ` Chao Yu
2018-08-13 20:11       ` Jaegeuk Kim
2018-08-14  3:49         ` Chao Yu
2018-08-14  4:04           ` Jaegeuk Kim
2018-08-14  6:18             ` Chao Yu
2018-08-14 17:28               ` Jaegeuk Kim
2018-08-15  1:52                 ` Chao Yu
2018-08-15  2:15                   ` Jaegeuk Kim
2018-08-15  3:44                     ` Chao Yu
2018-08-16  1:34                       ` Jaegeuk Kim
2018-08-16 11:39                         ` Chao Yu

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox