linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] f2fs: bug fix and improvement
@ 2019-03-14 16:37 Damien Le Moal
  2019-03-14 16:37 ` [PATCH 1/2] f2fs: Fix use of number of devices Damien Le Moal
  2019-03-14 16:37 ` [PATCH 2/2] f2fs: Reduce zoned block device memory usage Damien Le Moal
  0 siblings, 2 replies; 7+ messages in thread
From: Damien Le Moal @ 2019-03-14 16:37 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

Small 2 patch series to fix a bug with single zoned block device volume
handling and reduce memory usage when zoned block devices are used.

Damien Le Moal (2):
  f2fs: Fix use of number of devices
  f2fs: Reduce zoned block device memory usage

 fs/f2fs/data.c    | 17 ++++++++++------
 fs/f2fs/f2fs.h    | 27 ++++++++++++++++----------
 fs/f2fs/file.c    |  2 +-
 fs/f2fs/gc.c      |  2 +-
 fs/f2fs/segment.c | 49 ++++++++++++++++++++++-------------------------
 fs/f2fs/super.c   | 13 ++++++++-----
 6 files changed, 61 insertions(+), 49 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] f2fs: Fix use of number of devices
  2019-03-14 16:37 [PATCH 0/2] f2fs: bug fix and improvement Damien Le Moal
@ 2019-03-14 16:37 ` Damien Le Moal
  2019-03-15  6:33   ` Chao Yu
  2019-03-14 16:37 ` [PATCH 2/2] f2fs: Reduce zoned block device memory usage Damien Le Moal
  1 sibling, 1 reply; 7+ messages in thread
From: Damien Le Moal @ 2019-03-14 16:37 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

For a single device mount using a zoned block device, the zone
information for the device is stored in the sbi->devs single entry
array and sbi->s_ndevs is set to 1. This differs from a single device
mount using a regular block device which does not allocate sbi->devs
and sets sbi->s_ndevs to 0.

However, sbi->s_devs == 0 condition is used throughout the code to
differentiate a single device mount from a multi-device mount where
sbi->s_ndevs is always larger than 1. This results in problems with
single zoned block device volumes as these are treated as multi-device
mounts but do not have the start_blk and end_blk information set. One
of the problem observed is skipping of zone discard issuing resulting in
write commands being issued to full zones or unaligned to a zone write
pointer.

Fix this problem by simply treating the cases sbi->s_ndevs == 0 (single
regular block device mount) and sbi->s_ndevs == 1 (single zoned block
device mount) in the same manner. This is done by introducing the
helper function f2fs_is_multi_device() and using thius helper in place
of direct tests of sbi->s_ndevs value, improving code readability.

Fixes: 7bb3a371d199 ("f2fs: Fix zoned block device support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 fs/f2fs/data.c    | 17 +++++++++++------
 fs/f2fs/f2fs.h    | 13 ++++++++++++-
 fs/f2fs/file.c    |  2 +-
 fs/f2fs/gc.c      |  2 +-
 fs/f2fs/segment.c | 13 +++++++------
 5 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 568e1d09eb48..91dd8407ba86 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -220,12 +220,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 	struct block_device *bdev = sbi->sb->s_bdev;
 	int i;
 
-	for (i = 0; i < sbi->s_ndevs; i++) {
-		if (FDEV(i).start_blk <= blk_addr &&
-					FDEV(i).end_blk >= blk_addr) {
-			blk_addr -= FDEV(i).start_blk;
-			bdev = FDEV(i).bdev;
-			break;
+	if (f2fs_is_multi_device(sbi)) {
+		for (i = 0; i < sbi->s_ndevs; i++) {
+			if (FDEV(i).start_blk <= blk_addr &&
+			    FDEV(i).end_blk >= blk_addr) {
+				blk_addr -= FDEV(i).start_blk;
+				bdev = FDEV(i).bdev;
+				break;
+			}
 		}
 	}
 	if (bio) {
@@ -239,6 +241,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
 	int i;
 
+	if (!f2fs_is_multi_device(sbi))
+		return 0;
+
 	for (i = 0; i < sbi->s_ndevs; i++)
 		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 			return i;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7ea5c9cede37..073f450af346 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1364,6 +1364,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 }
 #endif
 
+/*
+ * Test if the current moun t super block is a multi-device volume.
+ * For single device regular disks, sbi->s_ndevs is 0.
+ * For single device zoned disks, sbi->s_ndevs is 1.
+ * For multi-device mounts, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+	return sbi->s_ndevs > 1;
+}
+
 /* For write statistics. Suppose sector size is 512 bytes,
  * and the return value is in kbytes. s is of struct f2fs_sb_info.
  */
@@ -3586,7 +3597,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
 
 	if (f2fs_post_read_required(inode))
 		return true;
-	if (sbi->s_ndevs)
+	if (f2fs_is_multi_device(sbi))
 		return true;
 	/*
 	 * for blkzoned device, fallback direct IO to buffered IO, so
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ba5954f41e14..1e3a78bf7a66 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2571,7 +2571,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 							sizeof(range)))
 		return -EFAULT;
 
-	if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
+	if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
 			__is_large_section(sbi)) {
 		f2fs_msg(sbi->sb, KERN_WARNING,
 			"Can't flush %u in %d for segs_per_sec %u != 1\n",
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 195cf0f9d9ef..ab764bd106de 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1346,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
 	sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
 
 	/* give warm/cold data area from slower device */
-	if (sbi->s_ndevs && !__is_large_section(sbi))
+	if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
 		SIT_I(sbi)->last_victim[ALLOC_NEXT] =
 				GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9b79056d705d..d8f531b33350 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -560,7 +560,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
 	int ret = 0;
 	int i;
 
-	if (!sbi->s_ndevs)
+	if (!f2fs_is_multi_device(sbi))
 		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
 
 	for (i = 0; i < sbi->s_ndevs; i++) {
@@ -628,7 +628,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 		return ret;
 	}
 
-	if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
+	if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+	    f2fs_is_multi_device(sbi)) {
 		ret = submit_flush_wait(sbi, ino);
 		atomic_dec(&fcc->queued_flush);
 
@@ -734,7 +735,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 {
 	int ret = 0, i;
 
-	if (!sbi->s_ndevs)
+	if (!f2fs_is_multi_device(sbi))
 		return 0;
 
 	for (i = 1; i < sbi->s_ndevs; i++) {
@@ -1343,7 +1344,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
 
 	trace_f2fs_queue_discard(bdev, blkstart, blklen);
 
-	if (sbi->s_ndevs) {
+	if (f2fs_is_multi_device(sbi)) {
 		int devi = f2fs_target_device_index(sbi, blkstart);
 
 		blkstart -= FDEV(devi).start_blk;
@@ -1698,7 +1699,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 	block_t lblkstart = blkstart;
 	int devi = 0;
 
-	if (sbi->s_ndevs) {
+	if (f2fs_is_multi_device(sbi)) {
 		devi = f2fs_target_device_index(sbi, blkstart);
 		blkstart -= FDEV(devi).start_blk;
 	}
@@ -3055,7 +3056,7 @@ static void update_device_state(struct f2fs_io_info *fio)
 	struct f2fs_sb_info *sbi = fio->sbi;
 	unsigned int devidx;
 
-	if (!sbi->s_ndevs)
+	if (!f2fs_is_multi_device(sbi))
 		return;
 
 	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] f2fs: Reduce zoned block device memory usage
  2019-03-14 16:37 [PATCH 0/2] f2fs: bug fix and improvement Damien Le Moal
  2019-03-14 16:37 ` [PATCH 1/2] f2fs: Fix use of number of devices Damien Le Moal
@ 2019-03-14 16:37 ` Damien Le Moal
  2019-03-15  6:43   ` Chao Yu
  1 sibling, 1 reply; 7+ messages in thread
From: Damien Le Moal @ 2019-03-14 16:37 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

For zoned block devices, an array of zone types for each device is
allocated and initialized in order to determine if a section is stored
on a sequential zone (zone reset needed) or a conventional zone (no
zone reset needed and regular discard applies). Considering this usage,
the zone types stored in memory can be replaced with a bitmap to
indicate an equivalent information, that is, if a zone is sequential or
not. This reduces the memory usage for each zoned device by roughly 8:
on a 14TB disk with zones of 256 MB, the zone type array consumes
13x4KB pages while the bitmap uses only 2x4KB pages.

This patch changes the f2fs_dev_info structure blkz_type field to the
bitmap blkz_seq. Access to this bitmap is done using the helper
function f2fs_blkz_is_seq(), which is a rewrite of the function
get_blkz_type().

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 fs/f2fs/f2fs.h    | 14 +++++---------
 fs/f2fs/segment.c | 36 ++++++++++++++++--------------------
 fs/f2fs/super.c   | 13 ++++++++-----
 3 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 073f450af346..1a4a07e3ce05 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1066,8 +1066,8 @@ struct f2fs_dev_info {
 	block_t start_blk;
 	block_t end_blk;
 #ifdef CONFIG_BLK_DEV_ZONED
-	unsigned int nr_blkz;			/* Total number of zones */
-	u8 *blkz_type;				/* Array of zones type */
+	unsigned int nr_blkz;		/* Total number of zones */
+	unsigned long *blkz_seq;	/* Bitmap indicating sequential zones */
 #endif
 };
 
@@ -3513,16 +3513,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
 F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static inline int get_blkz_type(struct f2fs_sb_info *sbi,
-			struct block_device *bdev, block_t blkaddr)
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+				    block_t blkaddr)
 {
 	unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
-	int i;
 
-	for (i = 0; i < sbi->s_ndevs; i++)
-		if (FDEV(i).bdev == bdev)
-			return FDEV(i).blkz_type[zno];
-	return -EINVAL;
+	return test_bit(zno, FDEV(devi).blkz_seq);
 }
 #endif
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d8f531b33350..f40148b735d7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1701,40 +1701,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 
 	if (f2fs_is_multi_device(sbi)) {
 		devi = f2fs_target_device_index(sbi, blkstart);
+		if (blkstart < FDEV(devi).start_blk ||
+		    blkstart > FDEV(devi).end_blk) {
+			f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
+				 blkstart);
+			return -EIO;
+		}
 		blkstart -= FDEV(devi).start_blk;
 	}
 
-	/*
-	 * We need to know the type of the zone: for conventional zones,
-	 * use regular discard if the drive supports it. For sequential
-	 * zones, reset the zone write pointer.
-	 */
-	switch (get_blkz_type(sbi, bdev, blkstart)) {
-
-	case BLK_ZONE_TYPE_CONVENTIONAL:
-		if (!blk_queue_discard(bdev_get_queue(bdev)))
-			return 0;
-		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
-	case BLK_ZONE_TYPE_SEQWRITE_REQ:
-	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+	/* For sequential zones, reset the zone write pointer */
+	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
 		sector = SECTOR_FROM_BLOCK(blkstart);
 		nr_sects = SECTOR_FROM_BLOCK(blklen);
 
 		if (sector & (bdev_zone_sectors(bdev) - 1) ||
 				nr_sects != bdev_zone_sectors(bdev)) {
-			f2fs_msg(sbi->sb, KERN_INFO,
-				"(%d) %s: Unaligned discard attempted (block %x + %x)",
+			f2fs_msg(sbi->sb, KERN_ERR,
+				"(%d) %s: Unaligned zone reset attempted (block %x + %x)",
 				devi, sbi->s_ndevs ? FDEV(devi).path: "",
 				blkstart, blklen);
 			return -EIO;
 		}
 		trace_f2fs_issue_reset_zone(bdev, blkstart);
-		return blkdev_reset_zones(bdev, sector,
-					  nr_sects, GFP_NOFS);
-	default:
-		/* Unknown zone type: broken device ? */
-		return -EIO;
+		return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
 	}
+
+	/* For conventional zones, use regular discard if supported */
+	if (!blk_queue_discard(bdev_get_queue(bdev)))
+		return 0;
+	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
 }
 #endif
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index d1ccc52afc93..8d0caf4c5f2b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1017,7 +1017,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	for (i = 0; i < sbi->s_ndevs; i++) {
 		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
 #ifdef CONFIG_BLK_DEV_ZONED
-		kvfree(FDEV(i).blkz_type);
+		kvfree(FDEV(i).blkz_seq);
 #endif
 	}
 	kvfree(sbi->devs);
@@ -2765,9 +2765,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
 		FDEV(devi).nr_blkz++;
 
-	FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
-								GFP_KERNEL);
-	if (!FDEV(devi).blkz_type)
+	FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+					BITS_TO_LONGS(FDEV(devi).nr_blkz)
+					* sizeof(unsigned long),
+					GFP_KERNEL);
+	if (!FDEV(devi).blkz_seq)
 		return -ENOMEM;
 
 #define F2FS_REPORT_NR_ZONES   4096
@@ -2794,7 +2796,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 		}
 
 		for (i = 0; i < nr_zones; i++) {
-			FDEV(devi).blkz_type[n] = zones[i].type;
+			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+				set_bit(n, FDEV(devi).blkz_seq);
 			sector += zones[i].len;
 			n++;
 		}
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] f2fs: Fix use of number of devices
  2019-03-14 16:37 ` [PATCH 1/2] f2fs: Fix use of number of devices Damien Le Moal
@ 2019-03-15  6:33   ` Chao Yu
  0 siblings, 0 replies; 7+ messages in thread
From: Chao Yu @ 2019-03-15  6:33 UTC (permalink / raw)
  To: Damien Le Moal, Jaegeuk Kim, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

On 2019/3/15 0:37, Damien Le Moal wrote:
> For a single device mount using a zoned block device, the zone
> information for the device is stored in the sbi->devs single entry
> array and sbi->s_ndevs is set to 1. This differs from a single device
> mount using a regular block device which does not allocate sbi->devs
> and sets sbi->s_ndevs to 0.
> 
> However, sbi->s_devs == 0 condition is used throughout the code to
> differentiate a single device mount from a multi-device mount where
> sbi->s_ndevs is always larger than 1. This results in problems with
> single zoned block device volumes as these are treated as multi-device
> mounts but do not have the start_blk and end_blk information set. One
> of the problem observed is skipping of zone discard issuing resulting in
> write commands being issued to full zones or unaligned to a zone write
> pointer.
> 
> Fix this problem by simply treating the cases sbi->s_ndevs == 0 (single
> regular block device mount) and sbi->s_ndevs == 1 (single zoned block
> device mount) in the same manner. This is done by introducing the
> helper function f2fs_is_multi_device() and using thius helper in place
                                                   ^^^^^
this

> of direct tests of sbi->s_ndevs value, improving code readability.
> 
> Fixes: 7bb3a371d199 ("f2fs: Fix zoned block device support")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>  fs/f2fs/data.c    | 17 +++++++++++------
>  fs/f2fs/f2fs.h    | 13 ++++++++++++-
>  fs/f2fs/file.c    |  2 +-
>  fs/f2fs/gc.c      |  2 +-
>  fs/f2fs/segment.c | 13 +++++++------
>  5 files changed, 32 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 568e1d09eb48..91dd8407ba86 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -220,12 +220,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
>  	struct block_device *bdev = sbi->sb->s_bdev;
>  	int i;
>  
> -	for (i = 0; i < sbi->s_ndevs; i++) {
> -		if (FDEV(i).start_blk <= blk_addr &&
> -					FDEV(i).end_blk >= blk_addr) {
> -			blk_addr -= FDEV(i).start_blk;
> -			bdev = FDEV(i).bdev;
> -			break;
> +	if (f2fs_is_multi_device(sbi)) {
> +		for (i = 0; i < sbi->s_ndevs; i++) {
> +			if (FDEV(i).start_blk <= blk_addr &&
> +			    FDEV(i).end_blk >= blk_addr) {
> +				blk_addr -= FDEV(i).start_blk;
> +				bdev = FDEV(i).bdev;
> +				break;
> +			}
>  		}
>  	}
>  	if (bio) {
> @@ -239,6 +241,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
>  {
>  	int i;
>  
> +	if (!f2fs_is_multi_device(sbi))
> +		return 0;
> +
>  	for (i = 0; i < sbi->s_ndevs; i++)
>  		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
>  			return i;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7ea5c9cede37..073f450af346 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1364,6 +1364,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
>  }
>  #endif
>  
> +/*
> + * Test if the current moun t super block is a multi-device volume.
                          ^^^^^^
mounted?

Other part looks good to me. :)

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

> + * For single device regular disks, sbi->s_ndevs is 0.
> + * For single device zoned disks, sbi->s_ndevs is 1.
> + * For multi-device mounts, sbi->s_ndevs is always 2 or more.
> + */
> +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
> +{
> +	return sbi->s_ndevs > 1;
> +}
> +
>  /* For write statistics. Suppose sector size is 512 bytes,
>   * and the return value is in kbytes. s is of struct f2fs_sb_info.
>   */
> @@ -3586,7 +3597,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
>  
>  	if (f2fs_post_read_required(inode))
>  		return true;
> -	if (sbi->s_ndevs)
> +	if (f2fs_is_multi_device(sbi))
>  		return true;
>  	/*
>  	 * for blkzoned device, fallback direct IO to buffered IO, so
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index ba5954f41e14..1e3a78bf7a66 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2571,7 +2571,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
>  							sizeof(range)))
>  		return -EFAULT;
>  
> -	if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
> +	if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
>  			__is_large_section(sbi)) {
>  		f2fs_msg(sbi->sb, KERN_WARNING,
>  			"Can't flush %u in %d for segs_per_sec %u != 1\n",
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 195cf0f9d9ef..ab764bd106de 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1346,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
>  	sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
>  
>  	/* give warm/cold data area from slower device */
> -	if (sbi->s_ndevs && !__is_large_section(sbi))
> +	if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
>  		SIT_I(sbi)->last_victim[ALLOC_NEXT] =
>  				GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
>  }
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 9b79056d705d..d8f531b33350 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -560,7 +560,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
>  	int ret = 0;
>  	int i;
>  
> -	if (!sbi->s_ndevs)
> +	if (!f2fs_is_multi_device(sbi))
>  		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
>  
>  	for (i = 0; i < sbi->s_ndevs; i++) {
> @@ -628,7 +628,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
>  		return ret;
>  	}
>  
> -	if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
> +	if (atomic_inc_return(&fcc->queued_flush) == 1 ||
> +	    f2fs_is_multi_device(sbi)) {
>  		ret = submit_flush_wait(sbi, ino);
>  		atomic_dec(&fcc->queued_flush);
>  
> @@ -734,7 +735,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
>  {
>  	int ret = 0, i;
>  
> -	if (!sbi->s_ndevs)
> +	if (!f2fs_is_multi_device(sbi))
>  		return 0;
>  
>  	for (i = 1; i < sbi->s_ndevs; i++) {
> @@ -1343,7 +1344,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
>  
>  	trace_f2fs_queue_discard(bdev, blkstart, blklen);
>  
> -	if (sbi->s_ndevs) {
> +	if (f2fs_is_multi_device(sbi)) {
>  		int devi = f2fs_target_device_index(sbi, blkstart);
>  
>  		blkstart -= FDEV(devi).start_blk;
> @@ -1698,7 +1699,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
>  	block_t lblkstart = blkstart;
>  	int devi = 0;
>  
> -	if (sbi->s_ndevs) {
> +	if (f2fs_is_multi_device(sbi)) {
>  		devi = f2fs_target_device_index(sbi, blkstart);
>  		blkstart -= FDEV(devi).start_blk;
>  	}
> @@ -3055,7 +3056,7 @@ static void update_device_state(struct f2fs_io_info *fio)
>  	struct f2fs_sb_info *sbi = fio->sbi;
>  	unsigned int devidx;
>  
> -	if (!sbi->s_ndevs)
> +	if (!f2fs_is_multi_device(sbi))
>  		return;
>  
>  	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] f2fs: Reduce zoned block device memory usage
  2019-03-14 16:37 ` [PATCH 2/2] f2fs: Reduce zoned block device memory usage Damien Le Moal
@ 2019-03-15  6:43   ` Chao Yu
  2019-03-15 17:59     ` Damien Le Moal
  2019-03-15 18:20     ` Damien Le Moal
  0 siblings, 2 replies; 7+ messages in thread
From: Chao Yu @ 2019-03-15  6:43 UTC (permalink / raw)
  To: Damien Le Moal, Jaegeuk Kim, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

On 2019/3/15 0:37, Damien Le Moal wrote:
> For zoned block devices, an array of zone types for each device is
> allocated and initialized in order to determine if a section is stored
> on a sequential zone (zone reset needed) or a conventional zone (no
> zone reset needed and regular discard applies). Considering this usage,
> the zone types stored in memory can be replaced with a bitmap to
> indicate an equivalent information, that is, if a zone is sequential or
> not. This reduces the memory usage for each zoned device by roughly 8:
> on a 14TB disk with zones of 256 MB, the zone type array consumes
> 13x4KB pages while the bitmap uses only 2x4KB pages.
> 
> This patch changes the f2fs_dev_info structure blkz_type field to the
> bitmap blkz_seq. Access to this bitmap is done using the helper
> function f2fs_blkz_is_seq(), which is a rewrite of the function
> get_blkz_type().
> 
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>  fs/f2fs/f2fs.h    | 14 +++++---------
>  fs/f2fs/segment.c | 36 ++++++++++++++++--------------------
>  fs/f2fs/super.c   | 13 ++++++++-----
>  3 files changed, 29 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 073f450af346..1a4a07e3ce05 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1066,8 +1066,8 @@ struct f2fs_dev_info {
>  	block_t start_blk;
>  	block_t end_blk;
>  #ifdef CONFIG_BLK_DEV_ZONED
> -	unsigned int nr_blkz;			/* Total number of zones */
> -	u8 *blkz_type;				/* Array of zones type */
> +	unsigned int nr_blkz;		/* Total number of zones */
> +	unsigned long *blkz_seq;	/* Bitmap indicating sequential zones */
>  #endif
>  };
>  
> @@ -3513,16 +3513,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
>  F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
> -static inline int get_blkz_type(struct f2fs_sb_info *sbi,
> -			struct block_device *bdev, block_t blkaddr)
> +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
> +				    block_t blkaddr)
>  {
>  	unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
> -	int i;
>  
> -	for (i = 0; i < sbi->s_ndevs; i++)
> -		if (FDEV(i).bdev == bdev)
> -			return FDEV(i).blkz_type[zno];
> -	return -EINVAL;
> +	return test_bit(zno, FDEV(devi).blkz_seq);
>  }
>  #endif
>  
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index d8f531b33350..f40148b735d7 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1701,40 +1701,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
>  
>  	if (f2fs_is_multi_device(sbi)) {
>  		devi = f2fs_target_device_index(sbi, blkstart);
> +		if (blkstart < FDEV(devi).start_blk ||
> +		    blkstart > FDEV(devi).end_blk) {
> +			f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
> +				 blkstart);
> +			return -EIO;
> +		}
>  		blkstart -= FDEV(devi).start_blk;
>  	}
>  
> -	/*
> -	 * We need to know the type of the zone: for conventional zones,
> -	 * use regular discard if the drive supports it. For sequential
> -	 * zones, reset the zone write pointer.
> -	 */
> -	switch (get_blkz_type(sbi, bdev, blkstart)) {
> -
> -	case BLK_ZONE_TYPE_CONVENTIONAL:
> -		if (!blk_queue_discard(bdev_get_queue(bdev)))
> -			return 0;
> -		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
> -	case BLK_ZONE_TYPE_SEQWRITE_REQ:
> -	case BLK_ZONE_TYPE_SEQWRITE_PREF:
> +	/* For sequential zones, reset the zone write pointer */
> +	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
>  		sector = SECTOR_FROM_BLOCK(blkstart);
>  		nr_sects = SECTOR_FROM_BLOCK(blklen);
>  
>  		if (sector & (bdev_zone_sectors(bdev) - 1) ||
>  				nr_sects != bdev_zone_sectors(bdev)) {
> -			f2fs_msg(sbi->sb, KERN_INFO,
> -				"(%d) %s: Unaligned discard attempted (block %x + %x)",
> +			f2fs_msg(sbi->sb, KERN_ERR,
> +				"(%d) %s: Unaligned zone reset attempted (block %x + %x)",
>  				devi, sbi->s_ndevs ? FDEV(devi).path: "",
>  				blkstart, blklen);
>  			return -EIO;
>  		}
>  		trace_f2fs_issue_reset_zone(bdev, blkstart);
> -		return blkdev_reset_zones(bdev, sector,
> -					  nr_sects, GFP_NOFS);
> -	default:
> -		/* Unknown zone type: broken device ? */
> -		return -EIO;
> +		return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
>  	}
> +
> +	/* For conventional zones, use regular discard if supported */
> +	if (!blk_queue_discard(bdev_get_queue(bdev)))

if (!f2fs_hw_support_discard())

Otherwise, it looks good to me.

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

> +		return 0;
> +	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
>  }
>  #endif
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index d1ccc52afc93..8d0caf4c5f2b 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1017,7 +1017,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
>  	for (i = 0; i < sbi->s_ndevs; i++) {
>  		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
>  #ifdef CONFIG_BLK_DEV_ZONED
> -		kvfree(FDEV(i).blkz_type);
> +		kvfree(FDEV(i).blkz_seq);
>  #endif
>  	}
>  	kvfree(sbi->devs);
> @@ -2765,9 +2765,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>  	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
>  		FDEV(devi).nr_blkz++;
>  
> -	FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
> -								GFP_KERNEL);
> -	if (!FDEV(devi).blkz_type)
> +	FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
> +					BITS_TO_LONGS(FDEV(devi).nr_blkz)
> +					* sizeof(unsigned long),
> +					GFP_KERNEL);
> +	if (!FDEV(devi).blkz_seq)
>  		return -ENOMEM;
>  
>  #define F2FS_REPORT_NR_ZONES   4096
> @@ -2794,7 +2796,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>  		}
>  
>  		for (i = 0; i < nr_zones; i++) {
> -			FDEV(devi).blkz_type[n] = zones[i].type;
> +			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
> +				set_bit(n, FDEV(devi).blkz_seq);
>  			sector += zones[i].len;
>  			n++;
>  		}
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] f2fs: Reduce zoned block device memory usage
  2019-03-15  6:43   ` Chao Yu
@ 2019-03-15 17:59     ` Damien Le Moal
  2019-03-15 18:20     ` Damien Le Moal
  1 sibling, 0 replies; 7+ messages in thread
From: Damien Le Moal @ 2019-03-15 17:59 UTC (permalink / raw)
  To: Chao Yu, Jaegeuk Kim, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

On 2019/03/14 23:43, Chao Yu wrote:
> On 2019/3/15 0:37, Damien Le Moal wrote:
>> For zoned block devices, an array of zone types for each device is
>> allocated and initialized in order to determine if a section is stored
>> on a sequential zone (zone reset needed) or a conventional zone (no
>> zone reset needed and regular discard applies). Considering this usage,
>> the zone types stored in memory can be replaced with a bitmap to
>> indicate an equivalent information, that is, if a zone is sequential or
>> not. This reduces the memory usage for each zoned device by roughly 8:
>> on a 14TB disk with zones of 256 MB, the zone type array consumes
>> 13x4KB pages while the bitmap uses only 2x4KB pages.
>>
>> This patch changes the f2fs_dev_info structure blkz_type field to the
>> bitmap blkz_seq. Access to this bitmap is done using the helper
>> function f2fs_blkz_is_seq(), which is a rewrite of the function
>> get_blkz_type().
>>
>> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
>> ---
>>  fs/f2fs/f2fs.h    | 14 +++++---------
>>  fs/f2fs/segment.c | 36 ++++++++++++++++--------------------
>>  fs/f2fs/super.c   | 13 ++++++++-----
>>  3 files changed, 29 insertions(+), 34 deletions(-)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 073f450af346..1a4a07e3ce05 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1066,8 +1066,8 @@ struct f2fs_dev_info {
>>  	block_t start_blk;
>>  	block_t end_blk;
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -	unsigned int nr_blkz;			/* Total number of zones */
>> -	u8 *blkz_type;				/* Array of zones type */
>> +	unsigned int nr_blkz;		/* Total number of zones */
>> +	unsigned long *blkz_seq;	/* Bitmap indicating sequential zones */
>>  #endif
>>  };
>>  
>> @@ -3513,16 +3513,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
>>  F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
>>  
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -static inline int get_blkz_type(struct f2fs_sb_info *sbi,
>> -			struct block_device *bdev, block_t blkaddr)
>> +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>> +				    block_t blkaddr)
>>  {
>>  	unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
>> -	int i;
>>  
>> -	for (i = 0; i < sbi->s_ndevs; i++)
>> -		if (FDEV(i).bdev == bdev)
>> -			return FDEV(i).blkz_type[zno];
>> -	return -EINVAL;
>> +	return test_bit(zno, FDEV(devi).blkz_seq);
>>  }
>>  #endif
>>  
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index d8f531b33350..f40148b735d7 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1701,40 +1701,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
>>  
>>  	if (f2fs_is_multi_device(sbi)) {
>>  		devi = f2fs_target_device_index(sbi, blkstart);
>> +		if (blkstart < FDEV(devi).start_blk ||
>> +		    blkstart > FDEV(devi).end_blk) {
>> +			f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
>> +				 blkstart);
>> +			return -EIO;
>> +		}
>>  		blkstart -= FDEV(devi).start_blk;
>>  	}
>>  
>> -	/*
>> -	 * We need to know the type of the zone: for conventional zones,
>> -	 * use regular discard if the drive supports it. For sequential
>> -	 * zones, reset the zone write pointer.
>> -	 */
>> -	switch (get_blkz_type(sbi, bdev, blkstart)) {
>> -
>> -	case BLK_ZONE_TYPE_CONVENTIONAL:
>> -		if (!blk_queue_discard(bdev_get_queue(bdev)))
>> -			return 0;
>> -		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
>> -	case BLK_ZONE_TYPE_SEQWRITE_REQ:
>> -	case BLK_ZONE_TYPE_SEQWRITE_PREF:
>> +	/* For sequential zones, reset the zone write pointer */
>> +	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
>>  		sector = SECTOR_FROM_BLOCK(blkstart);
>>  		nr_sects = SECTOR_FROM_BLOCK(blklen);
>>  
>>  		if (sector & (bdev_zone_sectors(bdev) - 1) ||
>>  				nr_sects != bdev_zone_sectors(bdev)) {
>> -			f2fs_msg(sbi->sb, KERN_INFO,
>> -				"(%d) %s: Unaligned discard attempted (block %x + %x)",
>> +			f2fs_msg(sbi->sb, KERN_ERR,
>> +				"(%d) %s: Unaligned zone reset attempted (block %x + %x)",
>>  				devi, sbi->s_ndevs ? FDEV(devi).path: "",
>>  				blkstart, blklen);
>>  			return -EIO;
>>  		}
>>  		trace_f2fs_issue_reset_zone(bdev, blkstart);
>> -		return blkdev_reset_zones(bdev, sector,
>> -					  nr_sects, GFP_NOFS);
>> -	default:
>> -		/* Unknown zone type: broken device ? */
>> -		return -EIO;
>> +		return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
>>  	}
>> +
>> +	/* For conventional zones, use regular discard if supported */
>> +	if (!blk_queue_discard(bdev_get_queue(bdev)))
> 
> if (!f2fs_hw_support_discard())

Are you sure about this one ? f2fs_hw_support_discard(sbi) will check discard
for the first disk only (sbi->sb->s_bdev), but __f2fs_issue_discard_zone() may
be called by __issue_discard_async() for another bdev in the case of a
multi-device mount.


> 
> Otherwise, it looks good to me.
> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> Thanks,
> 
>> +		return 0;
>> +	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
>>  }
>>  #endif
>>  
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index d1ccc52afc93..8d0caf4c5f2b 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1017,7 +1017,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
>>  	for (i = 0; i < sbi->s_ndevs; i++) {
>>  		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -		kvfree(FDEV(i).blkz_type);
>> +		kvfree(FDEV(i).blkz_seq);
>>  #endif
>>  	}
>>  	kvfree(sbi->devs);
>> @@ -2765,9 +2765,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>  	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
>>  		FDEV(devi).nr_blkz++;
>>  
>> -	FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
>> -								GFP_KERNEL);
>> -	if (!FDEV(devi).blkz_type)
>> +	FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
>> +					BITS_TO_LONGS(FDEV(devi).nr_blkz)
>> +					* sizeof(unsigned long),
>> +					GFP_KERNEL);
>> +	if (!FDEV(devi).blkz_seq)
>>  		return -ENOMEM;
>>  
>>  #define F2FS_REPORT_NR_ZONES   4096
>> @@ -2794,7 +2796,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>  		}
>>  
>>  		for (i = 0; i < nr_zones; i++) {
>> -			FDEV(devi).blkz_type[n] = zones[i].type;
>> +			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
>> +				set_bit(n, FDEV(devi).blkz_seq);
>>  			sector += zones[i].len;
>>  			n++;
>>  		}
>>
> 
> 


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] f2fs: Reduce zoned block device memory usage
  2019-03-15  6:43   ` Chao Yu
  2019-03-15 17:59     ` Damien Le Moal
@ 2019-03-15 18:20     ` Damien Le Moal
  1 sibling, 0 replies; 7+ messages in thread
From: Damien Le Moal @ 2019-03-15 18:20 UTC (permalink / raw)
  To: Chao Yu, Jaegeuk Kim, linux-f2fs-devel
  Cc: linux-fsdevel, Matias Bjorling, Masato Suzuki

On 2019/03/14 23:43, Chao Yu wrote:
> On 2019/3/15 0:37, Damien Le Moal wrote:
>> For zoned block devices, an array of zone types for each device is
>> allocated and initialized in order to determine if a section is stored
>> on a sequential zone (zone reset needed) or a conventional zone (no
>> zone reset needed and regular discard applies). Considering this usage,
>> the zone types stored in memory can be replaced with a bitmap to
>> indicate an equivalent information, that is, if a zone is sequential or
>> not. This reduces the memory usage for each zoned device by roughly 8:
>> on a 14TB disk with zones of 256 MB, the zone type array consumes
>> 13x4KB pages while the bitmap uses only 2x4KB pages.
>>
>> This patch changes the f2fs_dev_info structure blkz_type field to the
>> bitmap blkz_seq. Access to this bitmap is done using the helper
>> function f2fs_blkz_is_seq(), which is a rewrite of the function
>> get_blkz_type().
>>
>> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
>> ---
>>  fs/f2fs/f2fs.h    | 14 +++++---------
>>  fs/f2fs/segment.c | 36 ++++++++++++++++--------------------
>>  fs/f2fs/super.c   | 13 ++++++++-----
>>  3 files changed, 29 insertions(+), 34 deletions(-)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 073f450af346..1a4a07e3ce05 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1066,8 +1066,8 @@ struct f2fs_dev_info {
>>  	block_t start_blk;
>>  	block_t end_blk;
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -	unsigned int nr_blkz;			/* Total number of zones */
>> -	u8 *blkz_type;				/* Array of zones type */
>> +	unsigned int nr_blkz;		/* Total number of zones */
>> +	unsigned long *blkz_seq;	/* Bitmap indicating sequential zones */
>>  #endif
>>  };
>>  
>> @@ -3513,16 +3513,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
>>  F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
>>  
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -static inline int get_blkz_type(struct f2fs_sb_info *sbi,
>> -			struct block_device *bdev, block_t blkaddr)
>> +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>> +				    block_t blkaddr)
>>  {
>>  	unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
>> -	int i;
>>  
>> -	for (i = 0; i < sbi->s_ndevs; i++)
>> -		if (FDEV(i).bdev == bdev)
>> -			return FDEV(i).blkz_type[zno];
>> -	return -EINVAL;
>> +	return test_bit(zno, FDEV(devi).blkz_seq);
>>  }
>>  #endif
>>  
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index d8f531b33350..f40148b735d7 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1701,40 +1701,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
>>  
>>  	if (f2fs_is_multi_device(sbi)) {
>>  		devi = f2fs_target_device_index(sbi, blkstart);
>> +		if (blkstart < FDEV(devi).start_blk ||
>> +		    blkstart > FDEV(devi).end_blk) {
>> +			f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
>> +				 blkstart);
>> +			return -EIO;
>> +		}
>>  		blkstart -= FDEV(devi).start_blk;
>>  	}
>>  
>> -	/*
>> -	 * We need to know the type of the zone: for conventional zones,
>> -	 * use regular discard if the drive supports it. For sequential
>> -	 * zones, reset the zone write pointer.
>> -	 */
>> -	switch (get_blkz_type(sbi, bdev, blkstart)) {
>> -
>> -	case BLK_ZONE_TYPE_CONVENTIONAL:
>> -		if (!blk_queue_discard(bdev_get_queue(bdev)))
>> -			return 0;
>> -		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
>> -	case BLK_ZONE_TYPE_SEQWRITE_REQ:
>> -	case BLK_ZONE_TYPE_SEQWRITE_PREF:
>> +	/* For sequential zones, reset the zone write pointer */
>> +	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
>>  		sector = SECTOR_FROM_BLOCK(blkstart);
>>  		nr_sects = SECTOR_FROM_BLOCK(blklen);
>>  
>>  		if (sector & (bdev_zone_sectors(bdev) - 1) ||
>>  				nr_sects != bdev_zone_sectors(bdev)) {
>> -			f2fs_msg(sbi->sb, KERN_INFO,
>> -				"(%d) %s: Unaligned discard attempted (block %x + %x)",
>> +			f2fs_msg(sbi->sb, KERN_ERR,
>> +				"(%d) %s: Unaligned zone reset attempted (block %x + %x)",
>>  				devi, sbi->s_ndevs ? FDEV(devi).path: "",
>>  				blkstart, blklen);
>>  			return -EIO;
>>  		}
>>  		trace_f2fs_issue_reset_zone(bdev, blkstart);
>> -		return blkdev_reset_zones(bdev, sector,
>> -					  nr_sects, GFP_NOFS);
>> -	default:
>> -		/* Unknown zone type: broken device ? */
>> -		return -EIO;
>> +		return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
>>  	}
>> +
>> +	/* For conventional zones, use regular discard if supported */
>> +	if (!blk_queue_discard(bdev_get_queue(bdev)))
> 
> if (!f2fs_hw_support_discard())

Looking more into this, it turns out that f2fs_sb_has_blkzoned(),
f2fs_hw_should_discard() and f2fs_hw_support_discard() all behave the same and
look directly at the information attached to sbi, which excludes a per device
granularity for a multi-device mount. This is a problem for multi-device mounts
combining zoned and regular devices, which is possible and working now, but not
really handled safely I think. I.e. all segments on all disks should be the same
size, so all zoned devices should have the same zone size and that size used
also for the regular disks segments.

We may need additional patches to do all this safely. Combining regular and
zoned disks is a needed feature as that allows supporting zoned disks without
conventional zones: the fixed location metadata go onto a first regular device
(partition) of the mount and the sequential zone only disk can be added as a
second disk of the volume.

Thoughts ?

> 
> Otherwise, it looks good to me.
> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> Thanks,
> 
>> +		return 0;
>> +	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
>>  }
>>  #endif
>>  
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index d1ccc52afc93..8d0caf4c5f2b 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1017,7 +1017,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
>>  	for (i = 0; i < sbi->s_ndevs; i++) {
>>  		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
>>  #ifdef CONFIG_BLK_DEV_ZONED
>> -		kvfree(FDEV(i).blkz_type);
>> +		kvfree(FDEV(i).blkz_seq);
>>  #endif
>>  	}
>>  	kvfree(sbi->devs);
>> @@ -2765,9 +2765,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>  	if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
>>  		FDEV(devi).nr_blkz++;
>>  
>> -	FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
>> -								GFP_KERNEL);
>> -	if (!FDEV(devi).blkz_type)
>> +	FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
>> +					BITS_TO_LONGS(FDEV(devi).nr_blkz)
>> +					* sizeof(unsigned long),
>> +					GFP_KERNEL);
>> +	if (!FDEV(devi).blkz_seq)
>>  		return -ENOMEM;
>>  
>>  #define F2FS_REPORT_NR_ZONES   4096
>> @@ -2794,7 +2796,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>  		}
>>  
>>  		for (i = 0; i < nr_zones; i++) {
>> -			FDEV(devi).blkz_type[n] = zones[i].type;
>> +			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
>> +				set_bit(n, FDEV(devi).blkz_seq);
>>  			sector += zones[i].len;
>>  			n++;
>>  		}
>>
> 
> 


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-03-15 18:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-14 16:37 [PATCH 0/2] f2fs: bug fix and improvement Damien Le Moal
2019-03-14 16:37 ` [PATCH 1/2] f2fs: Fix use of number of devices Damien Le Moal
2019-03-15  6:33   ` Chao Yu
2019-03-14 16:37 ` [PATCH 2/2] f2fs: Reduce zoned block device memory usage Damien Le Moal
2019-03-15  6:43   ` Chao Yu
2019-03-15 17:59     ` Damien Le Moal
2019-03-15 18:20     ` Damien Le Moal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).