linux-f2fs-devel.lists.sourceforge.net archive mirror
 help / color / mirror / Atom feed
* [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
@ 2024-04-07 13:58 Chao Yu
  2024-04-10 18:26 ` Daeho Jeong
  2024-04-12 21:11 ` Jaegeuk Kim
  0 siblings, 2 replies; 10+ messages in thread
From: Chao Yu @ 2024-04-07 13:58 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

max open zone may be larger than log header number of f2fs, for
such case, it doesn't need to wait last IO in previous zone, let's
introduce available_open_zone semaphore, and reduce it once we
submit first write IO in a zone, and increase it after completion
of last IO in the zone.

Cc: Daeho Jeong <daeho43@gmail.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
v3:
- avoid race condition in between __submit_merged_bio()
and __allocate_new_segment().
 fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
 fs/f2fs/f2fs.h    |  34 ++++++++++++---
 fs/f2fs/iostat.c  |   7 ++++
 fs/f2fs/iostat.h  |   2 +
 fs/f2fs/segment.c |  43 ++++++++++++++++---
 fs/f2fs/segment.h |  12 +++++-
 fs/f2fs/super.c   |   2 +
 7 files changed, 156 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0d88649c60a5..18a4ac0a06bc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
 #ifdef CONFIG_BLK_DEV_ZONED
 static void f2fs_zone_write_end_io(struct bio *bio)
 {
-	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
+	struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
 
-	bio->bi_private = io->bi_private;
-	complete(&io->zone_wait);
 	f2fs_write_end_io(bio);
+	up(&sbi->available_open_zones);
 }
 #endif
 
@@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;
 
+#ifdef CONFIG_BLK_DEV_ZONED
+	if (io->open_zone) {
+		/*
+		 * if there is no open zone, it will wait for last IO in
+		 * previous zone before submitting new IO.
+		 */
+		down(&fio->sbi->available_open_zones);
+		io->open_zone = false;
+		io->zone_openned = true;
+	}
+
+	if (io->close_zone) {
+		io->bio->bi_end_io = f2fs_zone_write_end_io;
+		io->zone_openned = false;
+		io->close_zone = false;
+	}
+#endif
+
 	if (is_read_io(fio->op)) {
 		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
@@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
 			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
 			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 #ifdef CONFIG_BLK_DEV_ZONED
-			init_completion(&sbi->write_io[i][j].zone_wait);
-			sbi->write_io[i][j].zone_pending_bio = NULL;
-			sbi->write_io[i][j].bi_private = NULL;
+			sbi->write_io[i][j].open_zone = false;
+			sbi->write_io[i][j].zone_openned = false;
+			sbi->write_io[i][j].close_zone = false;
 #endif
 		}
 	}
@@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 	f2fs_up_write(&io->io_rwsem);
 }
 
+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+	struct f2fs_bio_info *io;
+
+	if (!f2fs_sb_has_blkzoned(sbi))
+		return;
+
+	io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
+
+	f2fs_down_write(&io->io_rwsem);
+	if (io->zone_openned) {
+		if (io->bio) {
+			io->close_zone = true;
+			__submit_merged_bio(io);
+		} else if (io->zone_openned) {
+			up(&sbi->available_open_zones);
+			io->zone_openned = false;
+		}
+	}
+	f2fs_up_write(&io->io_rwsem);
+#endif
+
+}
+
 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, struct page *page,
 				nid_t ino, enum page_type type, bool force)
@@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
+static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
+					block_t blkaddr, bool start)
 {
-	int devi = 0;
+	if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
+		return false;
+
+	if (start)
+		return (blkaddr % sbi->blocks_per_blkz) == 0;
+	return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
 
-	if (f2fs_is_multi_device(sbi)) {
-		devi = f2fs_target_device_index(sbi, blkaddr);
-		if (blkaddr < FDEV(devi).start_blk ||
-		    blkaddr > FDEV(devi).end_blk) {
-			f2fs_err(sbi, "Invalid block %x", blkaddr);
-			return false;
-		}
-		blkaddr -= FDEV(devi).start_blk;
-	}
-	return bdev_is_zoned(FDEV(devi).bdev) &&
-		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
-		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
 }
 #endif
 
@@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 	struct page *bio_page;
 	enum count_type type;
+#ifdef CONFIG_BLK_DEV_ZONED
+	bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
+#endif
 
 	f2fs_bug_on(sbi, is_read_io(fio->op));
 
 	f2fs_down_write(&io->io_rwsem);
 next:
-#ifdef CONFIG_BLK_DEV_ZONED
-	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
-		wait_for_completion_io(&io->zone_wait);
-		bio_put(io->zone_pending_bio);
-		io->zone_pending_bio = NULL;
-		io->bi_private = NULL;
-	}
-#endif
-
 	if (fio->in_list) {
 		spin_lock(&io->io_lock);
 		if (list_empty(&io->io_list)) {
@@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 	type = WB_DATA_TYPE(bio_page, fio->compressed_page);
 	inc_page_count(sbi, type);
 
+#ifdef CONFIG_BLK_DEV_ZONED
+	if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
+		io->open_zone = true;
+#endif
+
 	if (io->bio &&
 	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
 			      fio->new_blkaddr) ||
@@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 	io->last_block_in_bio = fio->new_blkaddr;
 
 	trace_f2fs_submit_page_write(fio->page, fio);
+
 #ifdef CONFIG_BLK_DEV_ZONED
-	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
-			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
-		bio_get(io->bio);
-		reinit_completion(&io->zone_wait);
-		io->bi_private = io->bio->bi_private;
-		io->bio->bi_private = io;
-		io->bio->bi_end_io = f2fs_zone_write_end_io;
-		io->zone_pending_bio = io->bio;
+	if (blkzoned &&
+		is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
+		io->close_zone = true;
 		__submit_merged_bio(io);
 	}
 #endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 694f8a52cb84..8a26530cf4fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
 	struct bio *bio;		/* bios to merge */
 	sector_t last_block_in_bio;	/* last block number */
 	struct f2fs_io_info fio;	/* store buffered io info. */
-#ifdef CONFIG_BLK_DEV_ZONED
-	struct completion zone_wait;	/* condition value for the previous open zone to close */
-	struct bio *zone_pending_bio;	/* pending bio for the previous zone */
-	void *bi_private;		/* previous bi_private for pending bio */
-#endif
 	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
 	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
 	struct list_head io_list;	/* track fios */
 	struct list_head bio_list;	/* bio entry list head */
 	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
+#ifdef CONFIG_BLK_DEV_ZONED
+	bool open_zone;			/* open a zone */
+	bool zone_openned;		/* zone has been openned */
+	bool close_zone;		/* close a zone */
+#endif
 };
 
 #define FDEV(i)				(sbi->devs[i])
@@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
 	unsigned int max_open_zones;		/* max open zone resources of the zoned device */
+	struct semaphore available_open_zones;	/* available open zones */
 #endif
 
 	/* for node-related operations */
@@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
 			  enum page_type type);
 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, struct page *page,
@@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
 
 	return test_bit(zno, FDEV(devi).blkz_seq);
 }
+
+static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
+							block_t blkaddr)
+{
+	int devi = 0;
+
+	if (f2fs_is_multi_device(sbi)) {
+		devi = f2fs_target_device_index(sbi, blkaddr);
+		if (blkaddr < FDEV(devi).start_blk ||
+		    blkaddr > FDEV(devi).end_blk) {
+			f2fs_err(sbi, "Invalid block %x", blkaddr);
+			return false;
+		}
+		blkaddr -= FDEV(devi).start_blk;
+	}
+
+	if (!bdev_is_zoned(FDEV(devi).bdev) ||
+		!f2fs_blkz_is_seq(sbi, devi, blkaddr))
+		return false;
+
+	return true;
+}
 #endif
 
 static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index f8703038e1d8..a8626e297876 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
 	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
 }
 
+void *iostat_get_bio_private(struct bio *bio)
+{
+	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+	return iostat_ctx->sbi;
+}
+
 void iostat_update_and_unbind_ctx(struct bio *bio)
 {
 	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index eb99d05cf272..9006c3d41590 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
 	return iostat_ctx->post_read_ctx;
 }
 
+extern void *iostat_get_bio_private(struct bio *bio);
 extern void iostat_update_and_unbind_ctx(struct bio *bio);
 extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
 		struct bio *bio, struct bio_post_read_ctx *ctx);
@@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
 #else
 static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
 		enum iostat_type type, unsigned long long io_bytes) {}
+static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
 static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
 static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
 		struct bio *bio, struct bio_post_read_ctx *ctx) {}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4fd76e867e0a..4a3cf2888faf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
 		return err;
 	stat_inc_seg_type(sbi, curseg);
 	locate_dirty_segment(sbi, old_segno);
+
+	f2fs_blkzoned_submit_merged_write(sbi, type);
+
 	return 0;
 }
 
@@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 		f2fs_bug_on(fio->sbi, true);
 	}
 
-	if (IS_HOT(type))
-		fio->temp = HOT;
-	else if (IS_WARM(type))
-		fio->temp = WARM;
-	else
-		fio->temp = COLD;
+	fio->temp = type_to_temp(type);
 	return type;
 }
 
@@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_BLK_DEV_ZONED
+	if (f2fs_sb_has_blkzoned(sbi)) {
+		for (type = 0; type < NR_PERSISTENT_LOG; type++) {
+			struct curseg_info *curseg = CURSEG_I(sbi, type);
+			enum page_type ptype;
+			enum temp_type temp;
+
+			if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
+				continue;
+
+			if (!f2fs_blkaddr_in_seqzone(sbi,
+					START_BLOCK(sbi, curseg->segno)))
+				continue;
+
+			ptype = PAGE_TYPE(type);
+			temp = type_to_temp(type);
+			down(&sbi->available_open_zones);
+			sbi->write_io[ptype][temp].zone_openned = true;
+		}
+	}
+#endif
 	return 0;
 }
 
@@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
 		kfree(array[i].sum_blk);
 		kfree(array[i].journal);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+		if (f2fs_sb_has_blkzoned(sbi)) {
+			enum page_type ptype = PAGE_TYPE(i);
+			enum temp_type temp = type_to_temp(i);
+
+			if (sbi->write_io[ptype][temp].zone_openned) {
+				up(&sbi->available_open_zones);
+				sbi->write_io[ptype][temp].zone_openned = false;
+			}
+		}
+#endif
 	}
 	kfree(array);
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e1c0f418aa11..855978ca869f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -24,7 +24,8 @@
 
 #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
 #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
-#define SE_PAGETYPE(se)	((IS_NODESEG((se)->type) ? NODE : DATA))
+#define PAGE_TYPE(t)	(IS_NODESEG(t) ? NODE : DATA)
+#define SE_PAGETYPE(se)	(PAGE_TYPE((se)->type))
 
 static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
 						unsigned short seg_type)
@@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
 			return GET_SEGNO(sbi, FDEV(devi).start_blk);
 	return 0;
 }
+
+static inline enum temp_type type_to_temp(int type)
+{
+	if (IS_HOT(type))
+		return HOT;
+	else if (IS_WARM(type))
+		return WARM;
+	return COLD;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fdf358c7f808..954baa6c100d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 				sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
 			return -EINVAL;
 		}
+
+		sema_init(&sbi->available_open_zones, sbi->max_open_zones);
 	}
 
 	zone_sectors = bdev_zone_sectors(bdev);
-- 
2.40.1



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-07 13:58 [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone Chao Yu
@ 2024-04-10 18:26 ` Daeho Jeong
  2024-04-12 21:11 ` Jaegeuk Kim
  1 sibling, 0 replies; 10+ messages in thread
From: Daeho Jeong @ 2024-04-10 18:26 UTC (permalink / raw)
  To: Chao Yu; +Cc: jaegeuk, linux-kernel, linux-f2fs-devel

On Sun, Apr 7, 2024 at 6:58 AM Chao Yu <chao@kernel.org> wrote:
>
> max open zone may be larger than log header number of f2fs, for
> such case, it doesn't need to wait last IO in previous zone, let's
> introduce available_open_zone semaphore, and reduce it once we
> submit first write IO in a zone, and increase it after completion
> of last IO in the zone.
>
> Cc: Daeho Jeong <daeho43@gmail.com>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
> v3:
> - avoid race condition in between __submit_merged_bio()
> and __allocate_new_segment().
>  fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>  fs/f2fs/f2fs.h    |  34 ++++++++++++---
>  fs/f2fs/iostat.c  |   7 ++++
>  fs/f2fs/iostat.h  |   2 +
>  fs/f2fs/segment.c |  43 ++++++++++++++++---
>  fs/f2fs/segment.h |  12 +++++-
>  fs/f2fs/super.c   |   2 +
>  7 files changed, 156 insertions(+), 49 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 0d88649c60a5..18a4ac0a06bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>  #ifdef CONFIG_BLK_DEV_ZONED
>  static void f2fs_zone_write_end_io(struct bio *bio)
>  {
> -       struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> +       struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>
> -       bio->bi_private = io->bi_private;
> -       complete(&io->zone_wait);
>         f2fs_write_end_io(bio);
> +       up(&sbi->available_open_zones);
>  }
>  #endif
>
> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>         if (!io->bio)
>                 return;
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       if (io->open_zone) {
> +               /*
> +                * if there is no open zone, it will wait for last IO in
> +                * previous zone before submitting new IO.
> +                */
> +               down(&fio->sbi->available_open_zones);
> +               io->open_zone = false;
> +               io->zone_openned = true;
> +       }
> +
> +       if (io->close_zone) {
> +               io->bio->bi_end_io = f2fs_zone_write_end_io;
> +               io->zone_openned = false;
> +               io->close_zone = false;
> +       }
> +#endif
> +
>         if (is_read_io(fio->op)) {
>                 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>                 f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>                         INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>                         init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>  #ifdef CONFIG_BLK_DEV_ZONED
> -                       init_completion(&sbi->write_io[i][j].zone_wait);
> -                       sbi->write_io[i][j].zone_pending_bio = NULL;
> -                       sbi->write_io[i][j].bi_private = NULL;
> +                       sbi->write_io[i][j].open_zone = false;
> +                       sbi->write_io[i][j].zone_openned = false;
> +                       sbi->write_io[i][j].close_zone = false;
>  #endif
>                 }
>         }
> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>         f2fs_up_write(&io->io_rwsem);
>  }
>
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> +{
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       struct f2fs_bio_info *io;
> +
> +       if (!f2fs_sb_has_blkzoned(sbi))
> +               return;
> +
> +       io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> +
> +       f2fs_down_write(&io->io_rwsem);
> +       if (io->zone_openned) {
> +               if (io->bio) {
> +                       io->close_zone = true;
> +                       __submit_merged_bio(io);
> +               } else if (io->zone_openned) {

We already checked if this value is true above.

Thanks,

> +                       up(&sbi->available_open_zones);
> +                       io->zone_openned = false;
> +               }
> +       }
> +       f2fs_up_write(&io->io_rwsem);
> +#endif
> +
> +}
> +
>  static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>                                 struct inode *inode, struct page *page,
>                                 nid_t ino, enum page_type type, bool force)
> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>  }
>
>  #ifdef CONFIG_BLK_DEV_ZONED
> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> +                                       block_t blkaddr, bool start)
>  {
> -       int devi = 0;
> +       if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> +               return false;
> +
> +       if (start)
> +               return (blkaddr % sbi->blocks_per_blkz) == 0;
> +       return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>
> -       if (f2fs_is_multi_device(sbi)) {
> -               devi = f2fs_target_device_index(sbi, blkaddr);
> -               if (blkaddr < FDEV(devi).start_blk ||
> -                   blkaddr > FDEV(devi).end_blk) {
> -                       f2fs_err(sbi, "Invalid block %x", blkaddr);
> -                       return false;
> -               }
> -               blkaddr -= FDEV(devi).start_blk;
> -       }
> -       return bdev_is_zoned(FDEV(devi).bdev) &&
> -               f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> -               (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>  }
>  #endif
>
> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>         struct page *bio_page;
>         enum count_type type;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> +#endif
>
>         f2fs_bug_on(sbi, is_read_io(fio->op));
>
>         f2fs_down_write(&io->io_rwsem);
>  next:
> -#ifdef CONFIG_BLK_DEV_ZONED
> -       if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> -               wait_for_completion_io(&io->zone_wait);
> -               bio_put(io->zone_pending_bio);
> -               io->zone_pending_bio = NULL;
> -               io->bi_private = NULL;
> -       }
> -#endif
> -
>         if (fio->in_list) {
>                 spin_lock(&io->io_lock);
>                 if (list_empty(&io->io_list)) {
> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>         type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>         inc_page_count(sbi, type);
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> +               io->open_zone = true;
> +#endif
> +
>         if (io->bio &&
>             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>                               fio->new_blkaddr) ||
> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>         io->last_block_in_bio = fio->new_blkaddr;
>
>         trace_f2fs_submit_page_write(fio->page, fio);
> +
>  #ifdef CONFIG_BLK_DEV_ZONED
> -       if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> -                       is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> -               bio_get(io->bio);
> -               reinit_completion(&io->zone_wait);
> -               io->bi_private = io->bio->bi_private;
> -               io->bio->bi_private = io;
> -               io->bio->bi_end_io = f2fs_zone_write_end_io;
> -               io->zone_pending_bio = io->bio;
> +       if (blkzoned &&
> +               is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> +               io->close_zone = true;
>                 __submit_merged_bio(io);
>         }
>  #endif
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 694f8a52cb84..8a26530cf4fb 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>         struct bio *bio;                /* bios to merge */
>         sector_t last_block_in_bio;     /* last block number */
>         struct f2fs_io_info fio;        /* store buffered io info. */
> -#ifdef CONFIG_BLK_DEV_ZONED
> -       struct completion zone_wait;    /* condition value for the previous open zone to close */
> -       struct bio *zone_pending_bio;   /* pending bio for the previous zone */
> -       void *bi_private;               /* previous bi_private for pending bio */
> -#endif
>         struct f2fs_rwsem io_rwsem;     /* blocking op for bio */
>         spinlock_t io_lock;             /* serialize DATA/NODE IOs */
>         struct list_head io_list;       /* track fios */
>         struct list_head bio_list;      /* bio entry list head */
>         struct f2fs_rwsem bio_list_lock;        /* lock to protect bio entry list */
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       bool open_zone;                 /* open a zone */
> +       bool zone_openned;              /* zone has been openned */
> +       bool close_zone;                /* close a zone */
> +#endif
>  };
>
>  #define FDEV(i)                                (sbi->devs[i])
> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>  #ifdef CONFIG_BLK_DEV_ZONED
>         unsigned int blocks_per_blkz;           /* F2FS blocks per zone */
>         unsigned int max_open_zones;            /* max open zone resources of the zoned device */
> +       struct semaphore available_open_zones;  /* available open zones */
>  #endif
>
>         /* for node-related operations */
> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>  void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>                           enum page_type type);
>  int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>  void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>  void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>                                 struct inode *inode, struct page *page,
> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>
>         return test_bit(zno, FDEV(devi).blkz_seq);
>  }
> +
> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> +                                                       block_t blkaddr)
> +{
> +       int devi = 0;
> +
> +       if (f2fs_is_multi_device(sbi)) {
> +               devi = f2fs_target_device_index(sbi, blkaddr);
> +               if (blkaddr < FDEV(devi).start_blk ||
> +                   blkaddr > FDEV(devi).end_blk) {
> +                       f2fs_err(sbi, "Invalid block %x", blkaddr);
> +                       return false;
> +               }
> +               blkaddr -= FDEV(devi).start_blk;
> +       }
> +
> +       if (!bdev_is_zoned(FDEV(devi).bdev) ||
> +               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> +               return false;
> +
> +       return true;
> +}
>  #endif
>
>  static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index f8703038e1d8..a8626e297876 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>         spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>  }
>
> +void *iostat_get_bio_private(struct bio *bio)
> +{
> +       struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> +       return iostat_ctx->sbi;
> +}
> +
>  void iostat_update_and_unbind_ctx(struct bio *bio)
>  {
>         struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> index eb99d05cf272..9006c3d41590 100644
> --- a/fs/f2fs/iostat.h
> +++ b/fs/f2fs/iostat.h
> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>         return iostat_ctx->post_read_ctx;
>  }
>
> +extern void *iostat_get_bio_private(struct bio *bio);
>  extern void iostat_update_and_unbind_ctx(struct bio *bio);
>  extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>                 struct bio *bio, struct bio_post_read_ctx *ctx);
> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>  #else
>  static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>                 enum iostat_type type, unsigned long long io_bytes) {}
> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>  static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>  static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>                 struct bio *bio, struct bio_post_read_ctx *ctx) {}
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4fd76e867e0a..4a3cf2888faf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>                 return err;
>         stat_inc_seg_type(sbi, curseg);
>         locate_dirty_segment(sbi, old_segno);
> +
> +       f2fs_blkzoned_submit_merged_write(sbi, type);
> +
>         return 0;
>  }
>
> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>                 f2fs_bug_on(fio->sbi, true);
>         }
>
> -       if (IS_HOT(type))
> -               fio->temp = HOT;
> -       else if (IS_WARM(type))
> -               fio->temp = WARM;
> -       else
> -               fio->temp = COLD;
> +       fio->temp = type_to_temp(type);
>         return type;
>  }
>
> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>                 return -EINVAL;
>         }
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> +       if (f2fs_sb_has_blkzoned(sbi)) {
> +               for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> +                       struct curseg_info *curseg = CURSEG_I(sbi, type);
> +                       enum page_type ptype;
> +                       enum temp_type temp;
> +
> +                       if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> +                               continue;
> +
> +                       if (!f2fs_blkaddr_in_seqzone(sbi,
> +                                       START_BLOCK(sbi, curseg->segno)))
> +                               continue;
> +
> +                       ptype = PAGE_TYPE(type);
> +                       temp = type_to_temp(type);
> +                       down(&sbi->available_open_zones);
> +                       sbi->write_io[ptype][temp].zone_openned = true;
> +               }
> +       }
> +#endif
>         return 0;
>  }
>
> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>         for (i = 0; i < NR_CURSEG_TYPE; i++) {
>                 kfree(array[i].sum_blk);
>                 kfree(array[i].journal);
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> +               if (f2fs_sb_has_blkzoned(sbi)) {
> +                       enum page_type ptype = PAGE_TYPE(i);
> +                       enum temp_type temp = type_to_temp(i);
> +
> +                       if (sbi->write_io[ptype][temp].zone_openned) {
> +                               up(&sbi->available_open_zones);
> +                               sbi->write_io[ptype][temp].zone_openned = false;
> +                       }
> +               }
> +#endif
>         }
>         kfree(array);
>  }
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..855978ca869f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -24,7 +24,8 @@
>
>  #define IS_DATASEG(t)  ((t) <= CURSEG_COLD_DATA)
>  #define IS_NODESEG(t)  ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> -#define SE_PAGETYPE(se)        ((IS_NODESEG((se)->type) ? NODE : DATA))
> +#define PAGE_TYPE(t)   (IS_NODESEG(t) ? NODE : DATA)
> +#define SE_PAGETYPE(se)        (PAGE_TYPE((se)->type))
>
>  static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>                                                 unsigned short seg_type)
> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>                         return GET_SEGNO(sbi, FDEV(devi).start_blk);
>         return 0;
>  }
> +
> +static inline enum temp_type type_to_temp(int type)
> +{
> +       if (IS_HOT(type))
> +               return HOT;
> +       else if (IS_WARM(type))
> +               return WARM;
> +       return COLD;
> +}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fdf358c7f808..954baa6c100d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>                                 sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>                         return -EINVAL;
>                 }
> +
> +               sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>         }
>
>         zone_sectors = bdev_zone_sectors(bdev);
> --
> 2.40.1
>


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-07 13:58 [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone Chao Yu
  2024-04-10 18:26 ` Daeho Jeong
@ 2024-04-12 21:11 ` Jaegeuk Kim
  2024-04-13  1:33   ` Chao Yu
  1 sibling, 1 reply; 10+ messages in thread
From: Jaegeuk Kim @ 2024-04-12 21:11 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 04/07, Chao Yu wrote:
> max open zone may be larger than log header number of f2fs, for
> such case, it doesn't need to wait last IO in previous zone, let's
> introduce available_open_zone semaphore, and reduce it once we
> submit first write IO in a zone, and increase it after completion
> of last IO in the zone.
> 
> Cc: Daeho Jeong <daeho43@gmail.com>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
> v3:
> - avoid race condition in between __submit_merged_bio()
> and __allocate_new_segment().
>  fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>  fs/f2fs/f2fs.h    |  34 ++++++++++++---
>  fs/f2fs/iostat.c  |   7 ++++
>  fs/f2fs/iostat.h  |   2 +
>  fs/f2fs/segment.c |  43 ++++++++++++++++---
>  fs/f2fs/segment.h |  12 +++++-
>  fs/f2fs/super.c   |   2 +
>  7 files changed, 156 insertions(+), 49 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 0d88649c60a5..18a4ac0a06bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>  #ifdef CONFIG_BLK_DEV_ZONED
>  static void f2fs_zone_write_end_io(struct bio *bio)
>  {
> -	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> +	struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>  
> -	bio->bi_private = io->bi_private;
> -	complete(&io->zone_wait);
>  	f2fs_write_end_io(bio);
> +	up(&sbi->available_open_zones);
>  }
>  #endif
>  
> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>  	if (!io->bio)
>  		return;
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	if (io->open_zone) {
> +		/*
> +		 * if there is no open zone, it will wait for last IO in
> +		 * previous zone before submitting new IO.
> +		 */
> +		down(&fio->sbi->available_open_zones);
> +		io->open_zone = false;
> +		io->zone_openned = true;
> +	}
> +
> +	if (io->close_zone) {
> +		io->bio->bi_end_io = f2fs_zone_write_end_io;
> +		io->zone_openned = false;
> +		io->close_zone = false;
> +	}
> +#endif
> +
>  	if (is_read_io(fio->op)) {
>  		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>  		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>  			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>  			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>  #ifdef CONFIG_BLK_DEV_ZONED
> -			init_completion(&sbi->write_io[i][j].zone_wait);
> -			sbi->write_io[i][j].zone_pending_bio = NULL;
> -			sbi->write_io[i][j].bi_private = NULL;
> +			sbi->write_io[i][j].open_zone = false;
> +			sbi->write_io[i][j].zone_openned = false;
> +			sbi->write_io[i][j].close_zone = false;
>  #endif
>  		}
>  	}
> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>  	f2fs_up_write(&io->io_rwsem);
>  }
>  
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> +{
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	struct f2fs_bio_info *io;
> +
> +	if (!f2fs_sb_has_blkzoned(sbi))
> +		return;
> +
> +	io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> +
> +	f2fs_down_write(&io->io_rwsem);
> +	if (io->zone_openned) {
> +		if (io->bio) {
> +			io->close_zone = true;
> +			__submit_merged_bio(io);
> +		} else if (io->zone_openned) {
> +			up(&sbi->available_open_zones);
> +			io->zone_openned = false;
> +		}
> +	}
> +	f2fs_up_write(&io->io_rwsem);
> +#endif
> +
> +}
> +
>  static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>  				struct inode *inode, struct page *page,
>  				nid_t ino, enum page_type type, bool force)
> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>  }
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> +					block_t blkaddr, bool start)
>  {
> -	int devi = 0;
> +	if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> +		return false;
> +
> +	if (start)
> +		return (blkaddr % sbi->blocks_per_blkz) == 0;
> +	return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>  
> -	if (f2fs_is_multi_device(sbi)) {
> -		devi = f2fs_target_device_index(sbi, blkaddr);
> -		if (blkaddr < FDEV(devi).start_blk ||
> -		    blkaddr > FDEV(devi).end_blk) {
> -			f2fs_err(sbi, "Invalid block %x", blkaddr);
> -			return false;
> -		}
> -		blkaddr -= FDEV(devi).start_blk;
> -	}
> -	return bdev_is_zoned(FDEV(devi).bdev) &&
> -		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> -		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>  }
>  #endif
>  
> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>  	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>  	struct page *bio_page;
>  	enum count_type type;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> +#endif
>  
>  	f2fs_bug_on(sbi, is_read_io(fio->op));
>  
>  	f2fs_down_write(&io->io_rwsem);
>  next:
> -#ifdef CONFIG_BLK_DEV_ZONED
> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> -		wait_for_completion_io(&io->zone_wait);
> -		bio_put(io->zone_pending_bio);
> -		io->zone_pending_bio = NULL;
> -		io->bi_private = NULL;
> -	}
> -#endif
> -
>  	if (fio->in_list) {
>  		spin_lock(&io->io_lock);
>  		if (list_empty(&io->io_list)) {
> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>  	type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>  	inc_page_count(sbi, type);
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> +		io->open_zone = true;
> +#endif
> +
>  	if (io->bio &&
>  	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>  			      fio->new_blkaddr) ||
> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>  	io->last_block_in_bio = fio->new_blkaddr;
>  
>  	trace_f2fs_submit_page_write(fio->page, fio);
> +
>  #ifdef CONFIG_BLK_DEV_ZONED
> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> -			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> -		bio_get(io->bio);
> -		reinit_completion(&io->zone_wait);
> -		io->bi_private = io->bio->bi_private;
> -		io->bio->bi_private = io;
> -		io->bio->bi_end_io = f2fs_zone_write_end_io;
> -		io->zone_pending_bio = io->bio;
> +	if (blkzoned &&
> +		is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> +		io->close_zone = true;
>  		__submit_merged_bio(io);
>  	}
>  #endif
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 694f8a52cb84..8a26530cf4fb 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>  	struct bio *bio;		/* bios to merge */
>  	sector_t last_block_in_bio;	/* last block number */
>  	struct f2fs_io_info fio;	/* store buffered io info. */
> -#ifdef CONFIG_BLK_DEV_ZONED
> -	struct completion zone_wait;	/* condition value for the previous open zone to close */
> -	struct bio *zone_pending_bio;	/* pending bio for the previous zone */
> -	void *bi_private;		/* previous bi_private for pending bio */
> -#endif
>  	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
>  	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
>  	struct list_head io_list;	/* track fios */
>  	struct list_head bio_list;	/* bio entry list head */
>  	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	bool open_zone;			/* open a zone */
> +	bool zone_openned;		/* zone has been openned */
> +	bool close_zone;		/* close a zone */
> +#endif
>  };
>  
>  #define FDEV(i)				(sbi->devs[i])
> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>  	unsigned int max_open_zones;		/* max open zone resources of the zoned device */
> +	struct semaphore available_open_zones;	/* available open zones */
>  #endif
>  
>  	/* for node-related operations */
> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>  void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>  			  enum page_type type);
>  int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>  void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>  void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>  				struct inode *inode, struct page *page,
> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>  
>  	return test_bit(zno, FDEV(devi).blkz_seq);
>  }
> +
> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> +							block_t blkaddr)
> +{
> +	int devi = 0;
> +
> +	if (f2fs_is_multi_device(sbi)) {
> +		devi = f2fs_target_device_index(sbi, blkaddr);
> +		if (blkaddr < FDEV(devi).start_blk ||
> +		    blkaddr > FDEV(devi).end_blk) {
> +			f2fs_err(sbi, "Invalid block %x", blkaddr);
> +			return false;
> +		}
> +		blkaddr -= FDEV(devi).start_blk;
> +	}
> +
> +	if (!bdev_is_zoned(FDEV(devi).bdev) ||
> +		!f2fs_blkz_is_seq(sbi, devi, blkaddr))
> +		return false;
> +
> +	return true;

Applied as below.

@@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
                blkaddr -= FDEV(devi).start_blk;
        }

-       if (!bdev_is_zoned(FDEV(devi).bdev) ||
-               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
-               return false;
-
-       return true;
+       return bdev_is_zoned(FDEV(devi).bdev) &&
+               f2fs_blkz_is_seq(sbi, devi, blkaddr);
 }
 #endif

> +}
>  #endif
>  
>  static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index f8703038e1d8..a8626e297876 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>  	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>  }
>  
> +void *iostat_get_bio_private(struct bio *bio)
> +{
> +	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> +	return iostat_ctx->sbi;
> +}
> +
>  void iostat_update_and_unbind_ctx(struct bio *bio)
>  {
>  	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> index eb99d05cf272..9006c3d41590 100644
> --- a/fs/f2fs/iostat.h
> +++ b/fs/f2fs/iostat.h
> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>  	return iostat_ctx->post_read_ctx;
>  }
>  
> +extern void *iostat_get_bio_private(struct bio *bio);
>  extern void iostat_update_and_unbind_ctx(struct bio *bio);
>  extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>  		struct bio *bio, struct bio_post_read_ctx *ctx);
> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>  #else
>  static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>  		enum iostat_type type, unsigned long long io_bytes) {}
> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>  static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>  static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>  		struct bio *bio, struct bio_post_read_ctx *ctx) {}
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4fd76e867e0a..4a3cf2888faf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>  		return err;
>  	stat_inc_seg_type(sbi, curseg);
>  	locate_dirty_segment(sbi, old_segno);
> +
> +	f2fs_blkzoned_submit_merged_write(sbi, type);
> +
>  	return 0;
>  }
>  
> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>  		f2fs_bug_on(fio->sbi, true);
>  	}
>  
> -	if (IS_HOT(type))
> -		fio->temp = HOT;
> -	else if (IS_WARM(type))
> -		fio->temp = WARM;
> -	else
> -		fio->temp = COLD;
> +	fio->temp = type_to_temp(type);
>  	return type;
>  }
>  
> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>  		return -EINVAL;
>  	}
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +	if (f2fs_sb_has_blkzoned(sbi)) {
> +		for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> +			struct curseg_info *curseg = CURSEG_I(sbi, type);
> +			enum page_type ptype;
> +			enum temp_type temp;
> +
> +			if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> +				continue;
> +
> +			if (!f2fs_blkaddr_in_seqzone(sbi,
> +					START_BLOCK(sbi, curseg->segno)))
> +				continue;
> +
> +			ptype = PAGE_TYPE(type);
> +			temp = type_to_temp(type);
> +			down(&sbi->available_open_zones);
> +			sbi->write_io[ptype][temp].zone_openned = true;
> +		}
> +	}
> +#endif
>  	return 0;
>  }
>  
> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>  	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>  		kfree(array[i].sum_blk);
>  		kfree(array[i].journal);
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> +		if (f2fs_sb_has_blkzoned(sbi)) {
> +			enum page_type ptype = PAGE_TYPE(i);
> +			enum temp_type temp = type_to_temp(i);
> +
> +			if (sbi->write_io[ptype][temp].zone_openned) {
> +				up(&sbi->available_open_zones);
> +				sbi->write_io[ptype][temp].zone_openned = false;
> +			}
> +		}
> +#endif
>  	}
>  	kfree(array);
>  }
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..855978ca869f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -24,7 +24,8 @@
>  
>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>  #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> -#define SE_PAGETYPE(se)	((IS_NODESEG((se)->type) ? NODE : DATA))
> +#define PAGE_TYPE(t)	(IS_NODESEG(t) ? NODE : DATA)
> +#define SE_PAGETYPE(se)	(PAGE_TYPE((se)->type))
>  
>  static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>  						unsigned short seg_type)
> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>  			return GET_SEGNO(sbi, FDEV(devi).start_blk);
>  	return 0;
>  }
> +
> +static inline enum temp_type type_to_temp(int type)
> +{
> +	if (IS_HOT(type))
> +		return HOT;
> +	else if (IS_WARM(type))
> +		return WARM;
> +	return COLD;
> +}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fdf358c7f808..954baa6c100d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>  				sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>  			return -EINVAL;
>  		}
> +
> +		sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>  	}
>  
>  	zone_sectors = bdev_zone_sectors(bdev);
> -- 
> 2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-12 21:11 ` Jaegeuk Kim
@ 2024-04-13  1:33   ` Chao Yu
  2024-04-14 15:19     ` Jaegeuk Kim
  0 siblings, 1 reply; 10+ messages in thread
From: Chao Yu @ 2024-04-13  1:33 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2024/4/13 5:11, Jaegeuk Kim wrote:
> On 04/07, Chao Yu wrote:
>> max open zone may be larger than log header number of f2fs, for
>> such case, it doesn't need to wait last IO in previous zone, let's
>> introduce available_open_zone semaphore, and reduce it once we
>> submit first write IO in a zone, and increase it after completion
>> of last IO in the zone.
>>
>> Cc: Daeho Jeong <daeho43@gmail.com>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>> v3:
>> - avoid race condition in between __submit_merged_bio()
>> and __allocate_new_segment().
>>   fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>   fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>   fs/f2fs/iostat.c  |   7 ++++
>>   fs/f2fs/iostat.h  |   2 +
>>   fs/f2fs/segment.c |  43 ++++++++++++++++---
>>   fs/f2fs/segment.h |  12 +++++-
>>   fs/f2fs/super.c   |   2 +
>>   7 files changed, 156 insertions(+), 49 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 0d88649c60a5..18a4ac0a06bc 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>   #ifdef CONFIG_BLK_DEV_ZONED
>>   static void f2fs_zone_write_end_io(struct bio *bio)
>>   {
>> -	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>> +	struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>   
>> -	bio->bi_private = io->bi_private;
>> -	complete(&io->zone_wait);
>>   	f2fs_write_end_io(bio);
>> +	up(&sbi->available_open_zones);
>>   }
>>   #endif
>>   
>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>   	if (!io->bio)
>>   		return;
>>   
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	if (io->open_zone) {
>> +		/*
>> +		 * if there is no open zone, it will wait for last IO in
>> +		 * previous zone before submitting new IO.
>> +		 */
>> +		down(&fio->sbi->available_open_zones);
>> +		io->open_zone = false;
>> +		io->zone_openned = true;
>> +	}
>> +
>> +	if (io->close_zone) {
>> +		io->bio->bi_end_io = f2fs_zone_write_end_io;
>> +		io->zone_openned = false;
>> +		io->close_zone = false;
>> +	}
>> +#endif
>> +
>>   	if (is_read_io(fio->op)) {
>>   		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>   		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>   			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>   			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>   #ifdef CONFIG_BLK_DEV_ZONED
>> -			init_completion(&sbi->write_io[i][j].zone_wait);
>> -			sbi->write_io[i][j].zone_pending_bio = NULL;
>> -			sbi->write_io[i][j].bi_private = NULL;
>> +			sbi->write_io[i][j].open_zone = false;
>> +			sbi->write_io[i][j].zone_openned = false;
>> +			sbi->write_io[i][j].close_zone = false;
>>   #endif
>>   		}
>>   	}
>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>   	f2fs_up_write(&io->io_rwsem);
>>   }
>>   
>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>> +{
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	struct f2fs_bio_info *io;
>> +
>> +	if (!f2fs_sb_has_blkzoned(sbi))
>> +		return;
>> +
>> +	io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>> +
>> +	f2fs_down_write(&io->io_rwsem);
>> +	if (io->zone_openned) {
>> +		if (io->bio) {
>> +			io->close_zone = true;
>> +			__submit_merged_bio(io);
>> +		} else if (io->zone_openned) {
>> +			up(&sbi->available_open_zones);
>> +			io->zone_openned = false;
>> +		}
>> +	}
>> +	f2fs_up_write(&io->io_rwsem);
>> +#endif
>> +
>> +}
>> +
>>   static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>   				struct inode *inode, struct page *page,
>>   				nid_t ino, enum page_type type, bool force)
>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>   }
>>   
>>   #ifdef CONFIG_BLK_DEV_ZONED
>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>> +					block_t blkaddr, bool start)
>>   {
>> -	int devi = 0;
>> +	if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>> +		return false;
>> +
>> +	if (start)
>> +		return (blkaddr % sbi->blocks_per_blkz) == 0;
>> +	return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>   
>> -	if (f2fs_is_multi_device(sbi)) {
>> -		devi = f2fs_target_device_index(sbi, blkaddr);
>> -		if (blkaddr < FDEV(devi).start_blk ||
>> -		    blkaddr > FDEV(devi).end_blk) {
>> -			f2fs_err(sbi, "Invalid block %x", blkaddr);
>> -			return false;
>> -		}
>> -		blkaddr -= FDEV(devi).start_blk;
>> -	}
>> -	return bdev_is_zoned(FDEV(devi).bdev) &&
>> -		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>> -		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>   }
>>   #endif
>>   
>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>   	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>   	struct page *bio_page;
>>   	enum count_type type;
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>> +#endif
>>   
>>   	f2fs_bug_on(sbi, is_read_io(fio->op));
>>   
>>   	f2fs_down_write(&io->io_rwsem);
>>   next:
>> -#ifdef CONFIG_BLK_DEV_ZONED
>> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>> -		wait_for_completion_io(&io->zone_wait);
>> -		bio_put(io->zone_pending_bio);
>> -		io->zone_pending_bio = NULL;
>> -		io->bi_private = NULL;
>> -	}
>> -#endif
>> -
>>   	if (fio->in_list) {
>>   		spin_lock(&io->io_lock);
>>   		if (list_empty(&io->io_list)) {
>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>   	type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>   	inc_page_count(sbi, type);
>>   
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>> +		io->open_zone = true;
>> +#endif
>> +
>>   	if (io->bio &&
>>   	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>   			      fio->new_blkaddr) ||
>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>   	io->last_block_in_bio = fio->new_blkaddr;
>>   
>>   	trace_f2fs_submit_page_write(fio->page, fio);
>> +
>>   #ifdef CONFIG_BLK_DEV_ZONED
>> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>> -			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>> -		bio_get(io->bio);
>> -		reinit_completion(&io->zone_wait);
>> -		io->bi_private = io->bio->bi_private;
>> -		io->bio->bi_private = io;
>> -		io->bio->bi_end_io = f2fs_zone_write_end_io;
>> -		io->zone_pending_bio = io->bio;
>> +	if (blkzoned &&
>> +		is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>> +		io->close_zone = true;
>>   		__submit_merged_bio(io);
>>   	}
>>   #endif
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 694f8a52cb84..8a26530cf4fb 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>   	struct bio *bio;		/* bios to merge */
>>   	sector_t last_block_in_bio;	/* last block number */
>>   	struct f2fs_io_info fio;	/* store buffered io info. */
>> -#ifdef CONFIG_BLK_DEV_ZONED
>> -	struct completion zone_wait;	/* condition value for the previous open zone to close */
>> -	struct bio *zone_pending_bio;	/* pending bio for the previous zone */
>> -	void *bi_private;		/* previous bi_private for pending bio */
>> -#endif
>>   	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
>>   	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
>>   	struct list_head io_list;	/* track fios */
>>   	struct list_head bio_list;	/* bio entry list head */
>>   	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	bool open_zone;			/* open a zone */
>> +	bool zone_openned;		/* zone has been openned */
>> +	bool close_zone;		/* close a zone */
>> +#endif
>>   };
>>   
>>   #define FDEV(i)				(sbi->devs[i])
>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>   #ifdef CONFIG_BLK_DEV_ZONED
>>   	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>   	unsigned int max_open_zones;		/* max open zone resources of the zoned device */
>> +	struct semaphore available_open_zones;	/* available open zones */
>>   #endif
>>   
>>   	/* for node-related operations */
>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>   void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>   			  enum page_type type);
>>   int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>   void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>   void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>   				struct inode *inode, struct page *page,
>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>   
>>   	return test_bit(zno, FDEV(devi).blkz_seq);
>>   }
>> +
>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>> +							block_t blkaddr)
>> +{
>> +	int devi = 0;
>> +
>> +	if (f2fs_is_multi_device(sbi)) {
>> +		devi = f2fs_target_device_index(sbi, blkaddr);
>> +		if (blkaddr < FDEV(devi).start_blk ||
>> +		    blkaddr > FDEV(devi).end_blk) {
>> +			f2fs_err(sbi, "Invalid block %x", blkaddr);
>> +			return false;
>> +		}
>> +		blkaddr -= FDEV(devi).start_blk;
>> +	}
>> +
>> +	if (!bdev_is_zoned(FDEV(devi).bdev) ||
>> +		!f2fs_blkz_is_seq(sbi, devi, blkaddr))
>> +		return false;
>> +
>> +	return true;
> 
> Applied as below.
> 
> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>                  blkaddr -= FDEV(devi).start_blk;
>          }
> 
> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> -               return false;
> -
> -       return true;
> +       return bdev_is_zoned(FDEV(devi).bdev) &&
> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);

Looks good, thank you for cleanup.

Thanks,

>   }
>   #endif
> 
>> +}
>>   #endif
>>   
>>   static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>> index f8703038e1d8..a8626e297876 100644
>> --- a/fs/f2fs/iostat.c
>> +++ b/fs/f2fs/iostat.c
>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>   	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>   }
>>   
>> +void *iostat_get_bio_private(struct bio *bio)
>> +{
>> +	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>> +
>> +	return iostat_ctx->sbi;
>> +}
>> +
>>   void iostat_update_and_unbind_ctx(struct bio *bio)
>>   {
>>   	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>> index eb99d05cf272..9006c3d41590 100644
>> --- a/fs/f2fs/iostat.h
>> +++ b/fs/f2fs/iostat.h
>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>   	return iostat_ctx->post_read_ctx;
>>   }
>>   
>> +extern void *iostat_get_bio_private(struct bio *bio);
>>   extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>   extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>   		struct bio *bio, struct bio_post_read_ctx *ctx);
>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>   #else
>>   static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>   		enum iostat_type type, unsigned long long io_bytes) {}
>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>   static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>   static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>   		struct bio *bio, struct bio_post_read_ctx *ctx) {}
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 4fd76e867e0a..4a3cf2888faf 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>   		return err;
>>   	stat_inc_seg_type(sbi, curseg);
>>   	locate_dirty_segment(sbi, old_segno);
>> +
>> +	f2fs_blkzoned_submit_merged_write(sbi, type);
>> +
>>   	return 0;
>>   }
>>   
>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>   		f2fs_bug_on(fio->sbi, true);
>>   	}
>>   
>> -	if (IS_HOT(type))
>> -		fio->temp = HOT;
>> -	else if (IS_WARM(type))
>> -		fio->temp = WARM;
>> -	else
>> -		fio->temp = COLD;
>> +	fio->temp = type_to_temp(type);
>>   	return type;
>>   }
>>   
>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>   		return -EINVAL;
>>   	}
>>   
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	if (f2fs_sb_has_blkzoned(sbi)) {
>> +		for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>> +			struct curseg_info *curseg = CURSEG_I(sbi, type);
>> +			enum page_type ptype;
>> +			enum temp_type temp;
>> +
>> +			if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>> +				continue;
>> +
>> +			if (!f2fs_blkaddr_in_seqzone(sbi,
>> +					START_BLOCK(sbi, curseg->segno)))
>> +				continue;
>> +
>> +			ptype = PAGE_TYPE(type);
>> +			temp = type_to_temp(type);
>> +			down(&sbi->available_open_zones);
>> +			sbi->write_io[ptype][temp].zone_openned = true;
>> +		}
>> +	}
>> +#endif
>>   	return 0;
>>   }
>>   
>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>   	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>   		kfree(array[i].sum_blk);
>>   		kfree(array[i].journal);
>> +
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +		if (f2fs_sb_has_blkzoned(sbi)) {
>> +			enum page_type ptype = PAGE_TYPE(i);
>> +			enum temp_type temp = type_to_temp(i);
>> +
>> +			if (sbi->write_io[ptype][temp].zone_openned) {
>> +				up(&sbi->available_open_zones);
>> +				sbi->write_io[ptype][temp].zone_openned = false;
>> +			}
>> +		}
>> +#endif
>>   	}
>>   	kfree(array);
>>   }
>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>> index e1c0f418aa11..855978ca869f 100644
>> --- a/fs/f2fs/segment.h
>> +++ b/fs/f2fs/segment.h
>> @@ -24,7 +24,8 @@
>>   
>>   #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>   #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>> -#define SE_PAGETYPE(se)	((IS_NODESEG((se)->type) ? NODE : DATA))
>> +#define PAGE_TYPE(t)	(IS_NODESEG(t) ? NODE : DATA)
>> +#define SE_PAGETYPE(se)	(PAGE_TYPE((se)->type))
>>   
>>   static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>   						unsigned short seg_type)
>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>   			return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>   	return 0;
>>   }
>> +
>> +static inline enum temp_type type_to_temp(int type)
>> +{
>> +	if (IS_HOT(type))
>> +		return HOT;
>> +	else if (IS_WARM(type))
>> +		return WARM;
>> +	return COLD;
>> +}
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index fdf358c7f808..954baa6c100d 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>   				sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>   			return -EINVAL;
>>   		}
>> +
>> +		sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>   	}
>>   
>>   	zone_sectors = bdev_zone_sectors(bdev);
>> -- 
>> 2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-13  1:33   ` Chao Yu
@ 2024-04-14 15:19     ` Jaegeuk Kim
  2024-04-15  3:26       ` Chao Yu
  0 siblings, 1 reply; 10+ messages in thread
From: Jaegeuk Kim @ 2024-04-14 15:19 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

It seems this caused kernel hang. Chao, have you tested this patch enough?

On 04/13, Chao Yu wrote:
> On 2024/4/13 5:11, Jaegeuk Kim wrote:
> > On 04/07, Chao Yu wrote:
> > > max open zone may be larger than log header number of f2fs, for
> > > such case, it doesn't need to wait last IO in previous zone, let's
> > > introduce available_open_zone semaphore, and reduce it once we
> > > submit first write IO in a zone, and increase it after completion
> > > of last IO in the zone.
> > > 
> > > Cc: Daeho Jeong <daeho43@gmail.com>
> > > Signed-off-by: Chao Yu <chao@kernel.org>
> > > ---
> > > v3:
> > > - avoid race condition in between __submit_merged_bio()
> > > and __allocate_new_segment().
> > >   fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
> > >   fs/f2fs/f2fs.h    |  34 ++++++++++++---
> > >   fs/f2fs/iostat.c  |   7 ++++
> > >   fs/f2fs/iostat.h  |   2 +
> > >   fs/f2fs/segment.c |  43 ++++++++++++++++---
> > >   fs/f2fs/segment.h |  12 +++++-
> > >   fs/f2fs/super.c   |   2 +
> > >   7 files changed, 156 insertions(+), 49 deletions(-)
> > > 
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 0d88649c60a5..18a4ac0a06bc 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> > >   #ifdef CONFIG_BLK_DEV_ZONED
> > >   static void f2fs_zone_write_end_io(struct bio *bio)
> > >   {
> > > -	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> > > +	struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
> > > -	bio->bi_private = io->bi_private;
> > > -	complete(&io->zone_wait);
> > >   	f2fs_write_end_io(bio);
> > > +	up(&sbi->available_open_zones);
> > >   }
> > >   #endif
> > > @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> > >   	if (!io->bio)
> > >   		return;
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	if (io->open_zone) {
> > > +		/*
> > > +		 * if there is no open zone, it will wait for last IO in
> > > +		 * previous zone before submitting new IO.
> > > +		 */
> > > +		down(&fio->sbi->available_open_zones);
> > > +		io->open_zone = false;
> > > +		io->zone_openned = true;
> > > +	}
> > > +
> > > +	if (io->close_zone) {
> > > +		io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > +		io->zone_openned = false;
> > > +		io->close_zone = false;
> > > +	}
> > > +#endif
> > > +
> > >   	if (is_read_io(fio->op)) {
> > >   		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> > >   		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> > > @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> > >   			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> > >   			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> > >   #ifdef CONFIG_BLK_DEV_ZONED
> > > -			init_completion(&sbi->write_io[i][j].zone_wait);
> > > -			sbi->write_io[i][j].zone_pending_bio = NULL;
> > > -			sbi->write_io[i][j].bi_private = NULL;
> > > +			sbi->write_io[i][j].open_zone = false;
> > > +			sbi->write_io[i][j].zone_openned = false;
> > > +			sbi->write_io[i][j].close_zone = false;
> > >   #endif
> > >   		}
> > >   	}
> > > @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> > >   	f2fs_up_write(&io->io_rwsem);
> > >   }
> > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> > > +{
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	struct f2fs_bio_info *io;
> > > +
> > > +	if (!f2fs_sb_has_blkzoned(sbi))
> > > +		return;
> > > +
> > > +	io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> > > +
> > > +	f2fs_down_write(&io->io_rwsem);
> > > +	if (io->zone_openned) {
> > > +		if (io->bio) {
> > > +			io->close_zone = true;
> > > +			__submit_merged_bio(io);
> > > +		} else if (io->zone_openned) {
> > > +			up(&sbi->available_open_zones);
> > > +			io->zone_openned = false;
> > > +		}
> > > +	}
> > > +	f2fs_up_write(&io->io_rwsem);
> > > +#endif
> > > +
> > > +}
> > > +
> > >   static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > >   				struct inode *inode, struct page *page,
> > >   				nid_t ino, enum page_type type, bool force)
> > > @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> > >   }
> > >   #ifdef CONFIG_BLK_DEV_ZONED
> > > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> > > +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> > > +					block_t blkaddr, bool start)
> > >   {
> > > -	int devi = 0;
> > > +	if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> > > +		return false;
> > > +
> > > +	if (start)
> > > +		return (blkaddr % sbi->blocks_per_blkz) == 0;
> > > +	return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > -	if (f2fs_is_multi_device(sbi)) {
> > > -		devi = f2fs_target_device_index(sbi, blkaddr);
> > > -		if (blkaddr < FDEV(devi).start_blk ||
> > > -		    blkaddr > FDEV(devi).end_blk) {
> > > -			f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > -			return false;
> > > -		}
> > > -		blkaddr -= FDEV(devi).start_blk;
> > > -	}
> > > -	return bdev_is_zoned(FDEV(devi).bdev) &&
> > > -		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> > > -		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > >   }
> > >   #endif
> > > @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >   	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> > >   	struct page *bio_page;
> > >   	enum count_type type;
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> > > +#endif
> > >   	f2fs_bug_on(sbi, is_read_io(fio->op));
> > >   	f2fs_down_write(&io->io_rwsem);
> > >   next:
> > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > -	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> > > -		wait_for_completion_io(&io->zone_wait);
> > > -		bio_put(io->zone_pending_bio);
> > > -		io->zone_pending_bio = NULL;
> > > -		io->bi_private = NULL;
> > > -	}
> > > -#endif
> > > -
> > >   	if (fio->in_list) {
> > >   		spin_lock(&io->io_lock);
> > >   		if (list_empty(&io->io_list)) {
> > > @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >   	type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> > >   	inc_page_count(sbi, type);
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> > > +		io->open_zone = true;
> > > +#endif
> > > +
> > >   	if (io->bio &&
> > >   	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> > >   			      fio->new_blkaddr) ||
> > > @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >   	io->last_block_in_bio = fio->new_blkaddr;
> > >   	trace_f2fs_submit_page_write(fio->page, fio);
> > > +
> > >   #ifdef CONFIG_BLK_DEV_ZONED
> > > -	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> > > -			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> > > -		bio_get(io->bio);
> > > -		reinit_completion(&io->zone_wait);
> > > -		io->bi_private = io->bio->bi_private;
> > > -		io->bio->bi_private = io;
> > > -		io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > -		io->zone_pending_bio = io->bio;
> > > +	if (blkzoned &&
> > > +		is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> > > +		io->close_zone = true;
> > >   		__submit_merged_bio(io);
> > >   	}
> > >   #endif
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index 694f8a52cb84..8a26530cf4fb 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> > >   	struct bio *bio;		/* bios to merge */
> > >   	sector_t last_block_in_bio;	/* last block number */
> > >   	struct f2fs_io_info fio;	/* store buffered io info. */
> > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > -	struct completion zone_wait;	/* condition value for the previous open zone to close */
> > > -	struct bio *zone_pending_bio;	/* pending bio for the previous zone */
> > > -	void *bi_private;		/* previous bi_private for pending bio */
> > > -#endif
> > >   	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
> > >   	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
> > >   	struct list_head io_list;	/* track fios */
> > >   	struct list_head bio_list;	/* bio entry list head */
> > >   	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	bool open_zone;			/* open a zone */
> > > +	bool zone_openned;		/* zone has been openned */
> > > +	bool close_zone;		/* close a zone */
> > > +#endif
> > >   };
> > >   #define FDEV(i)				(sbi->devs[i])
> > > @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> > >   #ifdef CONFIG_BLK_DEV_ZONED
> > >   	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
> > >   	unsigned int max_open_zones;		/* max open zone resources of the zoned device */
> > > +	struct semaphore available_open_zones;	/* available open zones */
> > >   #endif
> > >   	/* for node-related operations */
> > > @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> > >   void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> > >   			  enum page_type type);
> > >   int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> > >   void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> > >   void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > >   				struct inode *inode, struct page *page,
> > > @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
> > >   	return test_bit(zno, FDEV(devi).blkz_seq);
> > >   }
> > > +
> > > +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > +							block_t blkaddr)
> > > +{
> > > +	int devi = 0;
> > > +
> > > +	if (f2fs_is_multi_device(sbi)) {
> > > +		devi = f2fs_target_device_index(sbi, blkaddr);
> > > +		if (blkaddr < FDEV(devi).start_blk ||
> > > +		    blkaddr > FDEV(devi).end_blk) {
> > > +			f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > +			return false;
> > > +		}
> > > +		blkaddr -= FDEV(devi).start_blk;
> > > +	}
> > > +
> > > +	if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > +		!f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > +		return false;
> > > +
> > > +	return true;
> > 
> > Applied as below.
> > 
> > @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> >                  blkaddr -= FDEV(devi).start_blk;
> >          }
> > 
> > -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > -               return false;
> > -
> > -       return true;
> > +       return bdev_is_zoned(FDEV(devi).bdev) &&
> > +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
> 
> Looks good, thank you for cleanup.
> 
> Thanks,
> 
> >   }
> >   #endif
> > 
> > > +}
> > >   #endif
> > >   static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> > > diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> > > index f8703038e1d8..a8626e297876 100644
> > > --- a/fs/f2fs/iostat.c
> > > +++ b/fs/f2fs/iostat.c
> > > @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> > >   	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> > >   }
> > > +void *iostat_get_bio_private(struct bio *bio)
> > > +{
> > > +	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > +
> > > +	return iostat_ctx->sbi;
> > > +}
> > > +
> > >   void iostat_update_and_unbind_ctx(struct bio *bio)
> > >   {
> > >   	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> > > index eb99d05cf272..9006c3d41590 100644
> > > --- a/fs/f2fs/iostat.h
> > > +++ b/fs/f2fs/iostat.h
> > > @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> > >   	return iostat_ctx->post_read_ctx;
> > >   }
> > > +extern void *iostat_get_bio_private(struct bio *bio);
> > >   extern void iostat_update_and_unbind_ctx(struct bio *bio);
> > >   extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > >   		struct bio *bio, struct bio_post_read_ctx *ctx);
> > > @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> > >   #else
> > >   static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> > >   		enum iostat_type type, unsigned long long io_bytes) {}
> > > +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> > >   static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> > >   static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > >   		struct bio *bio, struct bio_post_read_ctx *ctx) {}
> > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > index 4fd76e867e0a..4a3cf2888faf 100644
> > > --- a/fs/f2fs/segment.c
> > > +++ b/fs/f2fs/segment.c
> > > @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > >   		return err;
> > >   	stat_inc_seg_type(sbi, curseg);
> > >   	locate_dirty_segment(sbi, old_segno);
> > > +
> > > +	f2fs_blkzoned_submit_merged_write(sbi, type);
> > > +
> > >   	return 0;
> > >   }
> > > @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> > >   		f2fs_bug_on(fio->sbi, true);
> > >   	}
> > > -	if (IS_HOT(type))
> > > -		fio->temp = HOT;
> > > -	else if (IS_WARM(type))
> > > -		fio->temp = WARM;
> > > -	else
> > > -		fio->temp = COLD;
> > > +	fio->temp = type_to_temp(type);
> > >   	return type;
> > >   }
> > > @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > >   		return -EINVAL;
> > >   	}
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +	if (f2fs_sb_has_blkzoned(sbi)) {
> > > +		for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> > > +			struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > +			enum page_type ptype;
> > > +			enum temp_type temp;
> > > +
> > > +			if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> > > +				continue;
> > > +
> > > +			if (!f2fs_blkaddr_in_seqzone(sbi,
> > > +					START_BLOCK(sbi, curseg->segno)))
> > > +				continue;
> > > +
> > > +			ptype = PAGE_TYPE(type);
> > > +			temp = type_to_temp(type);
> > > +			down(&sbi->available_open_zones);
> > > +			sbi->write_io[ptype][temp].zone_openned = true;
> > > +		}
> > > +	}
> > > +#endif
> > >   	return 0;
> > >   }
> > > @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> > >   	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > >   		kfree(array[i].sum_blk);
> > >   		kfree(array[i].journal);
> > > +
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > +		if (f2fs_sb_has_blkzoned(sbi)) {
> > > +			enum page_type ptype = PAGE_TYPE(i);
> > > +			enum temp_type temp = type_to_temp(i);
> > > +
> > > +			if (sbi->write_io[ptype][temp].zone_openned) {
> > > +				up(&sbi->available_open_zones);
> > > +				sbi->write_io[ptype][temp].zone_openned = false;
> > > +			}
> > > +		}
> > > +#endif
> > >   	}
> > >   	kfree(array);
> > >   }
> > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > index e1c0f418aa11..855978ca869f 100644
> > > --- a/fs/f2fs/segment.h
> > > +++ b/fs/f2fs/segment.h
> > > @@ -24,7 +24,8 @@
> > >   #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> > >   #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > -#define SE_PAGETYPE(se)	((IS_NODESEG((se)->type) ? NODE : DATA))
> > > +#define PAGE_TYPE(t)	(IS_NODESEG(t) ? NODE : DATA)
> > > +#define SE_PAGETYPE(se)	(PAGE_TYPE((se)->type))
> > >   static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> > >   						unsigned short seg_type)
> > > @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> > >   			return GET_SEGNO(sbi, FDEV(devi).start_blk);
> > >   	return 0;
> > >   }
> > > +
> > > +static inline enum temp_type type_to_temp(int type)
> > > +{
> > > +	if (IS_HOT(type))
> > > +		return HOT;
> > > +	else if (IS_WARM(type))
> > > +		return WARM;
> > > +	return COLD;
> > > +}
> > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > index fdf358c7f808..954baa6c100d 100644
> > > --- a/fs/f2fs/super.c
> > > +++ b/fs/f2fs/super.c
> > > @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> > >   				sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> > >   			return -EINVAL;
> > >   		}
> > > +
> > > +		sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> > >   	}
> > >   	zone_sectors = bdev_zone_sectors(bdev);
> > > -- 
> > > 2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-14 15:19     ` Jaegeuk Kim
@ 2024-04-15  3:26       ` Chao Yu
  2024-04-15 14:01         ` Chao Yu
  0 siblings, 1 reply; 10+ messages in thread
From: Chao Yu @ 2024-04-15  3:26 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2024/4/14 23:19, Jaegeuk Kim wrote:
> It seems this caused kernel hang. Chao, have you tested this patch enough?

Jaegeuk,

Oh, I've checked this patch w/ fsstress before submitting it, but missed
the SPO testcase... do you encounter kernel hang w/ SPO testcase?

Anyway, let me test it more.

Thanks,

> 
> On 04/13, Chao Yu wrote:
>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>> On 04/07, Chao Yu wrote:
>>>> max open zone may be larger than log header number of f2fs, for
>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>> introduce available_open_zone semaphore, and reduce it once we
>>>> submit first write IO in a zone, and increase it after completion
>>>> of last IO in the zone.
>>>>
>>>> Cc: Daeho Jeong <daeho43@gmail.com>
>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>> ---
>>>> v3:
>>>> - avoid race condition in between __submit_merged_bio()
>>>> and __allocate_new_segment().
>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>    fs/f2fs/iostat.h  |   2 +
>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>    fs/f2fs/super.c   |   2 +
>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>    {
>>>> -	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>> +	struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>> -	bio->bi_private = io->bi_private;
>>>> -	complete(&io->zone_wait);
>>>>    	f2fs_write_end_io(bio);
>>>> +	up(&sbi->available_open_zones);
>>>>    }
>>>>    #endif
>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>    	if (!io->bio)
>>>>    		return;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	if (io->open_zone) {
>>>> +		/*
>>>> +		 * if there is no open zone, it will wait for last IO in
>>>> +		 * previous zone before submitting new IO.
>>>> +		 */
>>>> +		down(&fio->sbi->available_open_zones);
>>>> +		io->open_zone = false;
>>>> +		io->zone_openned = true;
>>>> +	}
>>>> +
>>>> +	if (io->close_zone) {
>>>> +		io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> +		io->zone_openned = false;
>>>> +		io->close_zone = false;
>>>> +	}
>>>> +#endif
>>>> +
>>>>    	if (is_read_io(fio->op)) {
>>>>    		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>    		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>    			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>    			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>> -			init_completion(&sbi->write_io[i][j].zone_wait);
>>>> -			sbi->write_io[i][j].zone_pending_bio = NULL;
>>>> -			sbi->write_io[i][j].bi_private = NULL;
>>>> +			sbi->write_io[i][j].open_zone = false;
>>>> +			sbi->write_io[i][j].zone_openned = false;
>>>> +			sbi->write_io[i][j].close_zone = false;
>>>>    #endif
>>>>    		}
>>>>    	}
>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>    	f2fs_up_write(&io->io_rwsem);
>>>>    }
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>> +{
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	struct f2fs_bio_info *io;
>>>> +
>>>> +	if (!f2fs_sb_has_blkzoned(sbi))
>>>> +		return;
>>>> +
>>>> +	io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>> +
>>>> +	f2fs_down_write(&io->io_rwsem);
>>>> +	if (io->zone_openned) {
>>>> +		if (io->bio) {
>>>> +			io->close_zone = true;
>>>> +			__submit_merged_bio(io);
>>>> +		} else if (io->zone_openned) {
>>>> +			up(&sbi->available_open_zones);
>>>> +			io->zone_openned = false;
>>>> +		}
>>>> +	}
>>>> +	f2fs_up_write(&io->io_rwsem);
>>>> +#endif
>>>> +
>>>> +}
>>>> +
>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>    				struct inode *inode, struct page *page,
>>>>    				nid_t ino, enum page_type type, bool force)
>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>    }
>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>> +					block_t blkaddr, bool start)
>>>>    {
>>>> -	int devi = 0;
>>>> +	if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>> +		return false;
>>>> +
>>>> +	if (start)
>>>> +		return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>> +	return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>> -	if (f2fs_is_multi_device(sbi)) {
>>>> -		devi = f2fs_target_device_index(sbi, blkaddr);
>>>> -		if (blkaddr < FDEV(devi).start_blk ||
>>>> -		    blkaddr > FDEV(devi).end_blk) {
>>>> -			f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> -			return false;
>>>> -		}
>>>> -		blkaddr -= FDEV(devi).start_blk;
>>>> -	}
>>>> -	return bdev_is_zoned(FDEV(devi).bdev) &&
>>>> -		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>> -		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>    }
>>>>    #endif
>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>    	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>    	struct page *bio_page;
>>>>    	enum count_type type;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>> +#endif
>>>>    	f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>    	f2fs_down_write(&io->io_rwsem);
>>>>    next:
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>> -		wait_for_completion_io(&io->zone_wait);
>>>> -		bio_put(io->zone_pending_bio);
>>>> -		io->zone_pending_bio = NULL;
>>>> -		io->bi_private = NULL;
>>>> -	}
>>>> -#endif
>>>> -
>>>>    	if (fio->in_list) {
>>>>    		spin_lock(&io->io_lock);
>>>>    		if (list_empty(&io->io_list)) {
>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>    	type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>    	inc_page_count(sbi, type);
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>> +		io->open_zone = true;
>>>> +#endif
>>>> +
>>>>    	if (io->bio &&
>>>>    	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>    			      fio->new_blkaddr) ||
>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>    	io->last_block_in_bio = fio->new_blkaddr;
>>>>    	trace_f2fs_submit_page_write(fio->page, fio);
>>>> +
>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>> -	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>> -			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>> -		bio_get(io->bio);
>>>> -		reinit_completion(&io->zone_wait);
>>>> -		io->bi_private = io->bio->bi_private;
>>>> -		io->bio->bi_private = io;
>>>> -		io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> -		io->zone_pending_bio = io->bio;
>>>> +	if (blkzoned &&
>>>> +		is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>> +		io->close_zone = true;
>>>>    		__submit_merged_bio(io);
>>>>    	}
>>>>    #endif
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>    	struct bio *bio;		/* bios to merge */
>>>>    	sector_t last_block_in_bio;	/* last block number */
>>>>    	struct f2fs_io_info fio;	/* store buffered io info. */
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> -	struct completion zone_wait;	/* condition value for the previous open zone to close */
>>>> -	struct bio *zone_pending_bio;	/* pending bio for the previous zone */
>>>> -	void *bi_private;		/* previous bi_private for pending bio */
>>>> -#endif
>>>>    	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
>>>>    	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
>>>>    	struct list_head io_list;	/* track fios */
>>>>    	struct list_head bio_list;	/* bio entry list head */
>>>>    	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	bool open_zone;			/* open a zone */
>>>> +	bool zone_openned;		/* zone has been openned */
>>>> +	bool close_zone;		/* close a zone */
>>>> +#endif
>>>>    };
>>>>    #define FDEV(i)				(sbi->devs[i])
>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>    	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
>>>>    	unsigned int max_open_zones;		/* max open zone resources of the zoned device */
>>>> +	struct semaphore available_open_zones;	/* available open zones */
>>>>    #endif
>>>>    	/* for node-related operations */
>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>    			  enum page_type type);
>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>    				struct inode *inode, struct page *page,
>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>    	return test_bit(zno, FDEV(devi).blkz_seq);
>>>>    }
>>>> +
>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>> +							block_t blkaddr)
>>>> +{
>>>> +	int devi = 0;
>>>> +
>>>> +	if (f2fs_is_multi_device(sbi)) {
>>>> +		devi = f2fs_target_device_index(sbi, blkaddr);
>>>> +		if (blkaddr < FDEV(devi).start_blk ||
>>>> +		    blkaddr > FDEV(devi).end_blk) {
>>>> +			f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> +			return false;
>>>> +		}
>>>> +		blkaddr -= FDEV(devi).start_blk;
>>>> +	}
>>>> +
>>>> +	if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>> +		!f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>> +		return false;
>>>> +
>>>> +	return true;
>>>
>>> Applied as below.
>>>
>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>                   blkaddr -= FDEV(devi).start_blk;
>>>           }
>>>
>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>> -               return false;
>>> -
>>> -       return true;
>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>
>> Looks good, thank you for cleanup.
>>
>> Thanks,
>>
>>>    }
>>>    #endif
>>>
>>>> +}
>>>>    #endif
>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>> index f8703038e1d8..a8626e297876 100644
>>>> --- a/fs/f2fs/iostat.c
>>>> +++ b/fs/f2fs/iostat.c
>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>    	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>    }
>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>> +{
>>>> +	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> +
>>>> +	return iostat_ctx->sbi;
>>>> +}
>>>> +
>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>    {
>>>>    	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>> index eb99d05cf272..9006c3d41590 100644
>>>> --- a/fs/f2fs/iostat.h
>>>> +++ b/fs/f2fs/iostat.h
>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>    	return iostat_ctx->post_read_ctx;
>>>>    }
>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>    		struct bio *bio, struct bio_post_read_ctx *ctx);
>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>    #else
>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>    		enum iostat_type type, unsigned long long io_bytes) {}
>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>    		struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>    		return err;
>>>>    	stat_inc_seg_type(sbi, curseg);
>>>>    	locate_dirty_segment(sbi, old_segno);
>>>> +
>>>> +	f2fs_blkzoned_submit_merged_write(sbi, type);
>>>> +
>>>>    	return 0;
>>>>    }
>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>    		f2fs_bug_on(fio->sbi, true);
>>>>    	}
>>>> -	if (IS_HOT(type))
>>>> -		fio->temp = HOT;
>>>> -	else if (IS_WARM(type))
>>>> -		fio->temp = WARM;
>>>> -	else
>>>> -		fio->temp = COLD;
>>>> +	fio->temp = type_to_temp(type);
>>>>    	return type;
>>>>    }
>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>    		return -EINVAL;
>>>>    	}
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +	if (f2fs_sb_has_blkzoned(sbi)) {
>>>> +		for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>> +			struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> +			enum page_type ptype;
>>>> +			enum temp_type temp;
>>>> +
>>>> +			if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>> +				continue;
>>>> +
>>>> +			if (!f2fs_blkaddr_in_seqzone(sbi,
>>>> +					START_BLOCK(sbi, curseg->segno)))
>>>> +				continue;
>>>> +
>>>> +			ptype = PAGE_TYPE(type);
>>>> +			temp = type_to_temp(type);
>>>> +			down(&sbi->available_open_zones);
>>>> +			sbi->write_io[ptype][temp].zone_openned = true;
>>>> +		}
>>>> +	}
>>>> +#endif
>>>>    	return 0;
>>>>    }
>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>    	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>    		kfree(array[i].sum_blk);
>>>>    		kfree(array[i].journal);
>>>> +
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +		if (f2fs_sb_has_blkzoned(sbi)) {
>>>> +			enum page_type ptype = PAGE_TYPE(i);
>>>> +			enum temp_type temp = type_to_temp(i);
>>>> +
>>>> +			if (sbi->write_io[ptype][temp].zone_openned) {
>>>> +				up(&sbi->available_open_zones);
>>>> +				sbi->write_io[ptype][temp].zone_openned = false;
>>>> +			}
>>>> +		}
>>>> +#endif
>>>>    	}
>>>>    	kfree(array);
>>>>    }
>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>> index e1c0f418aa11..855978ca869f 100644
>>>> --- a/fs/f2fs/segment.h
>>>> +++ b/fs/f2fs/segment.h
>>>> @@ -24,7 +24,8 @@
>>>>    #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>>    #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>> -#define SE_PAGETYPE(se)	((IS_NODESEG((se)->type) ? NODE : DATA))
>>>> +#define PAGE_TYPE(t)	(IS_NODESEG(t) ? NODE : DATA)
>>>> +#define SE_PAGETYPE(se)	(PAGE_TYPE((se)->type))
>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>    						unsigned short seg_type)
>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>    			return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>    	return 0;
>>>>    }
>>>> +
>>>> +static inline enum temp_type type_to_temp(int type)
>>>> +{
>>>> +	if (IS_HOT(type))
>>>> +		return HOT;
>>>> +	else if (IS_WARM(type))
>>>> +		return WARM;
>>>> +	return COLD;
>>>> +}
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index fdf358c7f808..954baa6c100d 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>    				sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>    			return -EINVAL;
>>>>    		}
>>>> +
>>>> +		sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>    	}
>>>>    	zone_sectors = bdev_zone_sectors(bdev);
>>>> -- 
>>>> 2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-15  3:26       ` Chao Yu
@ 2024-04-15 14:01         ` Chao Yu
  2024-04-16  0:59           ` Chao Yu
  0 siblings, 1 reply; 10+ messages in thread
From: Chao Yu @ 2024-04-15 14:01 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2024/4/15 11:26, Chao Yu wrote:
> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>> It seems this caused kernel hang. Chao, have you tested this patch enough?
> 
> Jaegeuk,
> 
> Oh, I've checked this patch w/ fsstress before submitting it, but missed
> the SPO testcase... do you encounter kernel hang w/ SPO testcase?

I did see any hang issue w/ por_fsstress testcase, which testcase do you use?

Thanks,

> 
> Anyway, let me test it more.
> 
> Thanks,
> 
>>
>> On 04/13, Chao Yu wrote:
>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>> On 04/07, Chao Yu wrote:
>>>>> max open zone may be larger than log header number of f2fs, for
>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>> submit first write IO in a zone, and increase it after completion
>>>>> of last IO in the zone.
>>>>>
>>>>> Cc: Daeho Jeong <daeho43@gmail.com>
>>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>>> ---
>>>>> v3:
>>>>> - avoid race condition in between __submit_merged_bio()
>>>>> and __allocate_new_segment().
>>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>>    fs/f2fs/iostat.h  |   2 +
>>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>>    fs/f2fs/super.c   |   2 +
>>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>    {
>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>> -    bio->bi_private = io->bi_private;
>>>>> -    complete(&io->zone_wait);
>>>>>        f2fs_write_end_io(bio);
>>>>> +    up(&sbi->available_open_zones);
>>>>>    }
>>>>>    #endif
>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>        if (!io->bio)
>>>>>            return;
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (io->open_zone) {
>>>>> +        /*
>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>> +         * previous zone before submitting new IO.
>>>>> +         */
>>>>> +        down(&fio->sbi->available_open_zones);
>>>>> +        io->open_zone = false;
>>>>> +        io->zone_openned = true;
>>>>> +    }
>>>>> +
>>>>> +    if (io->close_zone) {
>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>> +        io->zone_openned = false;
>>>>> +        io->close_zone = false;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>>        if (is_read_io(fio->op)) {
>>>>>            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>    #endif
>>>>>            }
>>>>>        }
>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>        f2fs_up_write(&io->io_rwsem);
>>>>>    }
>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>> +{
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    struct f2fs_bio_info *io;
>>>>> +
>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>> +        return;
>>>>> +
>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>> +
>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>> +    if (io->zone_openned) {
>>>>> +        if (io->bio) {
>>>>> +            io->close_zone = true;
>>>>> +            __submit_merged_bio(io);
>>>>> +        } else if (io->zone_openned) {
>>>>> +            up(&sbi->available_open_zones);
>>>>> +            io->zone_openned = false;
>>>>> +        }
>>>>> +    }
>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>> +#endif
>>>>> +
>>>>> +}
>>>>> +
>>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>                    struct inode *inode, struct page *page,
>>>>>                    nid_t ino, enum page_type type, bool force)
>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>    }
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>> +                    block_t blkaddr, bool start)
>>>>>    {
>>>>> -    int devi = 0;
>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>> +        return false;
>>>>> +
>>>>> +    if (start)
>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>> -            return false;
>>>>> -        }
>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>> -    }
>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>    }
>>>>>    #endif
>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>        struct page *bio_page;
>>>>>        enum count_type type;
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>> +#endif
>>>>>        f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>        f2fs_down_write(&io->io_rwsem);
>>>>>    next:
>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>> -        bio_put(io->zone_pending_bio);
>>>>> -        io->zone_pending_bio = NULL;
>>>>> -        io->bi_private = NULL;
>>>>> -    }
>>>>> -#endif
>>>>> -
>>>>>        if (fio->in_list) {
>>>>>            spin_lock(&io->io_lock);
>>>>>            if (list_empty(&io->io_list)) {
>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>        inc_page_count(sbi, type);
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>> +        io->open_zone = true;
>>>>> +#endif
>>>>> +
>>>>>        if (io->bio &&
>>>>>            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>                      fio->new_blkaddr) ||
>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        io->last_block_in_bio = fio->new_blkaddr;
>>>>>        trace_f2fs_submit_page_write(fio->page, fio);
>>>>> +
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>> -        bio_get(io->bio);
>>>>> -        reinit_completion(&io->zone_wait);
>>>>> -        io->bi_private = io->bio->bi_private;
>>>>> -        io->bio->bi_private = io;
>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>> -        io->zone_pending_bio = io->bio;
>>>>> +    if (blkzoned &&
>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>> +        io->close_zone = true;
>>>>>            __submit_merged_bio(io);
>>>>>        }
>>>>>    #endif
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>        struct bio *bio;        /* bios to merge */
>>>>>        sector_t last_block_in_bio;    /* last block number */
>>>>>        struct f2fs_io_info fio;    /* store buffered io info. */
>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>> -#endif
>>>>>        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>        struct list_head io_list;    /* track fios */
>>>>>        struct list_head bio_list;    /* bio entry list head */
>>>>>        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    bool open_zone;            /* open a zone */
>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>> +    bool close_zone;        /* close a zone */
>>>>> +#endif
>>>>>    };
>>>>>    #define FDEV(i)                (sbi->devs[i])
>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>    #endif
>>>>>        /* for node-related operations */
>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>                  enum page_type type);
>>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>                    struct inode *inode, struct page *page,
>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>        return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>    }
>>>>> +
>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>> +                            block_t blkaddr)
>>>>> +{
>>>>> +    int devi = 0;
>>>>> +
>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>> +            return false;
>>>>> +        }
>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>> +    }
>>>>> +
>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>> +        return false;
>>>>> +
>>>>> +    return true;
>>>>
>>>> Applied as below.
>>>>
>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>                   blkaddr -= FDEV(devi).start_blk;
>>>>           }
>>>>
>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>> -               return false;
>>>> -
>>>> -       return true;
>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>
>>> Looks good, thank you for cleanup.
>>>
>>> Thanks,
>>>
>>>>    }
>>>>    #endif
>>>>
>>>>> +}
>>>>>    #endif
>>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>> index f8703038e1d8..a8626e297876 100644
>>>>> --- a/fs/f2fs/iostat.c
>>>>> +++ b/fs/f2fs/iostat.c
>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>    }
>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>> +{
>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>> +
>>>>> +    return iostat_ctx->sbi;
>>>>> +}
>>>>> +
>>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>    {
>>>>>        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>> --- a/fs/f2fs/iostat.h
>>>>> +++ b/fs/f2fs/iostat.h
>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>        return iostat_ctx->post_read_ctx;
>>>>>    }
>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>    #else
>>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>            enum iostat_type type, unsigned long long io_bytes) {}
>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>            return err;
>>>>>        stat_inc_seg_type(sbi, curseg);
>>>>>        locate_dirty_segment(sbi, old_segno);
>>>>> +
>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>> +
>>>>>        return 0;
>>>>>    }
>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>            f2fs_bug_on(fio->sbi, true);
>>>>>        }
>>>>> -    if (IS_HOT(type))
>>>>> -        fio->temp = HOT;
>>>>> -    else if (IS_WARM(type))
>>>>> -        fio->temp = WARM;
>>>>> -    else
>>>>> -        fio->temp = COLD;
>>>>> +    fio->temp = type_to_temp(type);
>>>>>        return type;
>>>>>    }
>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>            return -EINVAL;
>>>>>        }
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>> +            enum page_type ptype;
>>>>> +            enum temp_type temp;
>>>>> +
>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>> +                continue;
>>>>> +
>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>> +                continue;
>>>>> +
>>>>> +            ptype = PAGE_TYPE(type);
>>>>> +            temp = type_to_temp(type);
>>>>> +            down(&sbi->available_open_zones);
>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>> +        }
>>>>> +    }
>>>>> +#endif
>>>>>        return 0;
>>>>>    }
>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>        for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>            kfree(array[i].sum_blk);
>>>>>            kfree(array[i].journal);
>>>>> +
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>> +
>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>> +                up(&sbi->available_open_zones);
>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>> +            }
>>>>> +        }
>>>>> +#endif
>>>>>        }
>>>>>        kfree(array);
>>>>>    }
>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>> --- a/fs/f2fs/segment.h
>>>>> +++ b/fs/f2fs/segment.h
>>>>> @@ -24,7 +24,8 @@
>>>>>    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>                            unsigned short seg_type)
>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>                return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>        return 0;
>>>>>    }
>>>>> +
>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>> +{
>>>>> +    if (IS_HOT(type))
>>>>> +        return HOT;
>>>>> +    else if (IS_WARM(type))
>>>>> +        return WARM;
>>>>> +    return COLD;
>>>>> +}
>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>                return -EINVAL;
>>>>>            }
>>>>> +
>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>        }
>>>>>        zone_sectors = bdev_zone_sectors(bdev);
>>>>> -- 
>>>>> 2.40.1
> 
> 
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-15 14:01         ` Chao Yu
@ 2024-04-16  0:59           ` Chao Yu
  2024-04-16 16:51             ` Jaegeuk Kim
  0 siblings, 1 reply; 10+ messages in thread
From: Chao Yu @ 2024-04-16  0:59 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2024/4/15 22:01, Chao Yu wrote:
> On 2024/4/15 11:26, Chao Yu wrote:
>> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>>> It seems this caused kernel hang. Chao, have you tested this patch enough?
>>
>> Jaegeuk,
>>
>> Oh, I've checked this patch w/ fsstress before submitting it, but missed
>> the SPO testcase... do you encounter kernel hang w/ SPO testcase?
> 
> I did see any hang issue w/ por_fsstress testcase, which testcase do you use?

Sorry, I mean I haven't reproduced it yet...

Thanks,

> 
> Thanks,
> 
>>
>> Anyway, let me test it more.
>>
>> Thanks,
>>
>>>
>>> On 04/13, Chao Yu wrote:
>>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>>> On 04/07, Chao Yu wrote:
>>>>>> max open zone may be larger than log header number of f2fs, for
>>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>>> submit first write IO in a zone, and increase it after completion
>>>>>> of last IO in the zone.
>>>>>>
>>>>>> Cc: Daeho Jeong <daeho43@gmail.com>
>>>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>>>> ---
>>>>>> v3:
>>>>>> - avoid race condition in between __submit_merged_bio()
>>>>>> and __allocate_new_segment().
>>>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>>>    fs/f2fs/iostat.h  |   2 +
>>>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>>>    fs/f2fs/super.c   |   2 +
>>>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>>    {
>>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>>> -    bio->bi_private = io->bi_private;
>>>>>> -    complete(&io->zone_wait);
>>>>>>        f2fs_write_end_io(bio);
>>>>>> +    up(&sbi->available_open_zones);
>>>>>>    }
>>>>>>    #endif
>>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>>        if (!io->bio)
>>>>>>            return;
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (io->open_zone) {
>>>>>> +        /*
>>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>>> +         * previous zone before submitting new IO.
>>>>>> +         */
>>>>>> +        down(&fio->sbi->available_open_zones);
>>>>>> +        io->open_zone = false;
>>>>>> +        io->zone_openned = true;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (io->close_zone) {
>>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>> +        io->zone_openned = false;
>>>>>> +        io->close_zone = false;
>>>>>> +    }
>>>>>> +#endif
>>>>>> +
>>>>>>        if (is_read_io(fio->op)) {
>>>>>>            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>>            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>>                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>>                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>>    #endif
>>>>>>            }
>>>>>>        }
>>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>>        f2fs_up_write(&io->io_rwsem);
>>>>>>    }
>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>>> +{
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    struct f2fs_bio_info *io;
>>>>>> +
>>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>>> +        return;
>>>>>> +
>>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>>> +
>>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>>> +    if (io->zone_openned) {
>>>>>> +        if (io->bio) {
>>>>>> +            io->close_zone = true;
>>>>>> +            __submit_merged_bio(io);
>>>>>> +        } else if (io->zone_openned) {
>>>>>> +            up(&sbi->available_open_zones);
>>>>>> +            io->zone_openned = false;
>>>>>> +        }
>>>>>> +    }
>>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>>> +#endif
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>                    struct inode *inode, struct page *page,
>>>>>>                    nid_t ino, enum page_type type, bool force)
>>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>>    }
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>>> +                    block_t blkaddr, bool start)
>>>>>>    {
>>>>>> -    int devi = 0;
>>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    if (start)
>>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>> -            return false;
>>>>>> -        }
>>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>>> -    }
>>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>    }
>>>>>>    #endif
>>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>>        struct page *bio_page;
>>>>>>        enum count_type type;
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>>> +#endif
>>>>>>        f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>>        f2fs_down_write(&io->io_rwsem);
>>>>>>    next:
>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>>> -        bio_put(io->zone_pending_bio);
>>>>>> -        io->zone_pending_bio = NULL;
>>>>>> -        io->bi_private = NULL;
>>>>>> -    }
>>>>>> -#endif
>>>>>> -
>>>>>>        if (fio->in_list) {
>>>>>>            spin_lock(&io->io_lock);
>>>>>>            if (list_empty(&io->io_list)) {
>>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>>        inc_page_count(sbi, type);
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>>> +        io->open_zone = true;
>>>>>> +#endif
>>>>>> +
>>>>>>        if (io->bio &&
>>>>>>            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>>                      fio->new_blkaddr) ||
>>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        io->last_block_in_bio = fio->new_blkaddr;
>>>>>>        trace_f2fs_submit_page_write(fio->page, fio);
>>>>>> +
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>>> -        bio_get(io->bio);
>>>>>> -        reinit_completion(&io->zone_wait);
>>>>>> -        io->bi_private = io->bio->bi_private;
>>>>>> -        io->bio->bi_private = io;
>>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>> -        io->zone_pending_bio = io->bio;
>>>>>> +    if (blkzoned &&
>>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>>> +        io->close_zone = true;
>>>>>>            __submit_merged_bio(io);
>>>>>>        }
>>>>>>    #endif
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>>        struct bio *bio;        /* bios to merge */
>>>>>>        sector_t last_block_in_bio;    /* last block number */
>>>>>>        struct f2fs_io_info fio;    /* store buffered io info. */
>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>>> -#endif
>>>>>>        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>>        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>>        struct list_head io_list;    /* track fios */
>>>>>>        struct list_head bio_list;    /* bio entry list head */
>>>>>>        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    bool open_zone;            /* open a zone */
>>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>>> +    bool close_zone;        /* close a zone */
>>>>>> +#endif
>>>>>>    };
>>>>>>    #define FDEV(i)                (sbi->devs[i])
>>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>>        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>>    #endif
>>>>>>        /* for node-related operations */
>>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>>                  enum page_type type);
>>>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>                    struct inode *inode, struct page *page,
>>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>>        return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>>    }
>>>>>> +
>>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>> +                            block_t blkaddr)
>>>>>> +{
>>>>>> +    int devi = 0;
>>>>>> +
>>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>> +            return false;
>>>>>> +        }
>>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    return true;
>>>>>
>>>>> Applied as below.
>>>>>
>>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>                   blkaddr -= FDEV(devi).start_blk;
>>>>>           }
>>>>>
>>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>> -               return false;
>>>>> -
>>>>> -       return true;
>>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>>
>>>> Looks good, thank you for cleanup.
>>>>
>>>> Thanks,
>>>>
>>>>>    }
>>>>>    #endif
>>>>>
>>>>>> +}
>>>>>>    #endif
>>>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>>> index f8703038e1d8..a8626e297876 100644
>>>>>> --- a/fs/f2fs/iostat.c
>>>>>> +++ b/fs/f2fs/iostat.c
>>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>>        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>>    }
>>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>>> +{
>>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>> +
>>>>>> +    return iostat_ctx->sbi;
>>>>>> +}
>>>>>> +
>>>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>>    {
>>>>>>        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>>> --- a/fs/f2fs/iostat.h
>>>>>> +++ b/fs/f2fs/iostat.h
>>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>>        return iostat_ctx->post_read_ctx;
>>>>>>    }
>>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>>    #else
>>>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>>            enum iostat_type type, unsigned long long io_bytes) {}
>>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>>            return err;
>>>>>>        stat_inc_seg_type(sbi, curseg);
>>>>>>        locate_dirty_segment(sbi, old_segno);
>>>>>> +
>>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>>> +
>>>>>>        return 0;
>>>>>>    }
>>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>>            f2fs_bug_on(fio->sbi, true);
>>>>>>        }
>>>>>> -    if (IS_HOT(type))
>>>>>> -        fio->temp = HOT;
>>>>>> -    else if (IS_WARM(type))
>>>>>> -        fio->temp = WARM;
>>>>>> -    else
>>>>>> -        fio->temp = COLD;
>>>>>> +    fio->temp = type_to_temp(type);
>>>>>>        return type;
>>>>>>    }
>>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +            enum page_type ptype;
>>>>>> +            enum temp_type temp;
>>>>>> +
>>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>>> +                continue;
>>>>>> +
>>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>>> +                continue;
>>>>>> +
>>>>>> +            ptype = PAGE_TYPE(type);
>>>>>> +            temp = type_to_temp(type);
>>>>>> +            down(&sbi->available_open_zones);
>>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>>> +        }
>>>>>> +    }
>>>>>> +#endif
>>>>>>        return 0;
>>>>>>    }
>>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>>        for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>            kfree(array[i].sum_blk);
>>>>>>            kfree(array[i].journal);
>>>>>> +
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>>> +
>>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>>> +                up(&sbi->available_open_zones);
>>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>>> +            }
>>>>>> +        }
>>>>>> +#endif
>>>>>>        }
>>>>>>        kfree(array);
>>>>>>    }
>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>>> --- a/fs/f2fs/segment.h
>>>>>> +++ b/fs/f2fs/segment.h
>>>>>> @@ -24,7 +24,8 @@
>>>>>>    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>>    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>>                            unsigned short seg_type)
>>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>>                return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>>        return 0;
>>>>>>    }
>>>>>> +
>>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>>> +{
>>>>>> +    if (IS_HOT(type))
>>>>>> +        return HOT;
>>>>>> +    else if (IS_WARM(type))
>>>>>> +        return WARM;
>>>>>> +    return COLD;
>>>>>> +}
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>>                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>>                return -EINVAL;
>>>>>>            }
>>>>>> +
>>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>>        }
>>>>>>        zone_sectors = bdev_zone_sectors(bdev);
>>>>>> -- 
>>>>>> 2.40.1
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> 
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-16  0:59           ` Chao Yu
@ 2024-04-16 16:51             ` Jaegeuk Kim
  2024-04-17  1:05               ` Chao Yu
  0 siblings, 1 reply; 10+ messages in thread
From: Jaegeuk Kim @ 2024-04-16 16:51 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 04/16, Chao Yu wrote:
> On 2024/4/15 22:01, Chao Yu wrote:
> > On 2024/4/15 11:26, Chao Yu wrote:
> > > On 2024/4/14 23:19, Jaegeuk Kim wrote:
> > > > It seems this caused kernel hang. Chao, have you tested this patch enough?
> > > 
> > > Jaegeuk,
> > > 
> > > Oh, I've checked this patch w/ fsstress before submitting it, but missed
> > > the SPO testcase... do you encounter kernel hang w/ SPO testcase?
> > 
> > I did see any hang issue w/ por_fsstress testcase, which testcase do you use?
> 
> Sorry, I mean I haven't reproduced it yet...

I'd prefer to check this patch later. Have you tested on Zoned device with
nullblk?

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> > > 
> > > Anyway, let me test it more.
> > > 
> > > Thanks,
> > > 
> > > > 
> > > > On 04/13, Chao Yu wrote:
> > > > > On 2024/4/13 5:11, Jaegeuk Kim wrote:
> > > > > > On 04/07, Chao Yu wrote:
> > > > > > > max open zone may be larger than log header number of f2fs, for
> > > > > > > such case, it doesn't need to wait last IO in previous zone, let's
> > > > > > > introduce available_open_zone semaphore, and reduce it once we
> > > > > > > submit first write IO in a zone, and increase it after completion
> > > > > > > of last IO in the zone.
> > > > > > > 
> > > > > > > Cc: Daeho Jeong <daeho43@gmail.com>
> > > > > > > Signed-off-by: Chao Yu <chao@kernel.org>
> > > > > > > ---
> > > > > > > v3:
> > > > > > > - avoid race condition in between __submit_merged_bio()
> > > > > > > and __allocate_new_segment().
> > > > > > >    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
> > > > > > >    fs/f2fs/f2fs.h    |  34 ++++++++++++---
> > > > > > >    fs/f2fs/iostat.c  |   7 ++++
> > > > > > >    fs/f2fs/iostat.h  |   2 +
> > > > > > >    fs/f2fs/segment.c |  43 ++++++++++++++++---
> > > > > > >    fs/f2fs/segment.h |  12 +++++-
> > > > > > >    fs/f2fs/super.c   |   2 +
> > > > > > >    7 files changed, 156 insertions(+), 49 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > > > index 0d88649c60a5..18a4ac0a06bc 100644
> > > > > > > --- a/fs/f2fs/data.c
> > > > > > > +++ b/fs/f2fs/data.c
> > > > > > > @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> > > > > > >    #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > >    static void f2fs_zone_write_end_io(struct bio *bio)
> > > > > > >    {
> > > > > > > -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> > > > > > > +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
> > > > > > > -    bio->bi_private = io->bi_private;
> > > > > > > -    complete(&io->zone_wait);
> > > > > > >        f2fs_write_end_io(bio);
> > > > > > > +    up(&sbi->available_open_zones);
> > > > > > >    }
> > > > > > >    #endif
> > > > > > > @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> > > > > > >        if (!io->bio)
> > > > > > >            return;
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    if (io->open_zone) {
> > > > > > > +        /*
> > > > > > > +         * if there is no open zone, it will wait for last IO in
> > > > > > > +         * previous zone before submitting new IO.
> > > > > > > +         */
> > > > > > > +        down(&fio->sbi->available_open_zones);
> > > > > > > +        io->open_zone = false;
> > > > > > > +        io->zone_openned = true;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (io->close_zone) {
> > > > > > > +        io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > > > > > +        io->zone_openned = false;
> > > > > > > +        io->close_zone = false;
> > > > > > > +    }
> > > > > > > +#endif
> > > > > > > +
> > > > > > >        if (is_read_io(fio->op)) {
> > > > > > >            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> > > > > > >            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> > > > > > > @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> > > > > > >                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> > > > > > >                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> > > > > > >    #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -            init_completion(&sbi->write_io[i][j].zone_wait);
> > > > > > > -            sbi->write_io[i][j].zone_pending_bio = NULL;
> > > > > > > -            sbi->write_io[i][j].bi_private = NULL;
> > > > > > > +            sbi->write_io[i][j].open_zone = false;
> > > > > > > +            sbi->write_io[i][j].zone_openned = false;
> > > > > > > +            sbi->write_io[i][j].close_zone = false;
> > > > > > >    #endif
> > > > > > >            }
> > > > > > >        }
> > > > > > > @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> > > > > > >        f2fs_up_write(&io->io_rwsem);
> > > > > > >    }
> > > > > > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> > > > > > > +{
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    struct f2fs_bio_info *io;
> > > > > > > +
> > > > > > > +    if (!f2fs_sb_has_blkzoned(sbi))
> > > > > > > +        return;
> > > > > > > +
> > > > > > > +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> > > > > > > +
> > > > > > > +    f2fs_down_write(&io->io_rwsem);
> > > > > > > +    if (io->zone_openned) {
> > > > > > > +        if (io->bio) {
> > > > > > > +            io->close_zone = true;
> > > > > > > +            __submit_merged_bio(io);
> > > > > > > +        } else if (io->zone_openned) {
> > > > > > > +            up(&sbi->available_open_zones);
> > > > > > > +            io->zone_openned = false;
> > > > > > > +        }
> > > > > > > +    }
> > > > > > > +    f2fs_up_write(&io->io_rwsem);
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +}
> > > > > > > +
> > > > > > >    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > > > > >                    struct inode *inode, struct page *page,
> > > > > > >                    nid_t ino, enum page_type type, bool force)
> > > > > > > @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> > > > > > >    }
> > > > > > >    #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> > > > > > > +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> > > > > > > +                    block_t blkaddr, bool start)
> > > > > > >    {
> > > > > > > -    int devi = 0;
> > > > > > > +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> > > > > > > +        return false;
> > > > > > > +
> > > > > > > +    if (start)
> > > > > > > +        return (blkaddr % sbi->blocks_per_blkz) == 0;
> > > > > > > +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > > > > > -    if (f2fs_is_multi_device(sbi)) {
> > > > > > > -        devi = f2fs_target_device_index(sbi, blkaddr);
> > > > > > > -        if (blkaddr < FDEV(devi).start_blk ||
> > > > > > > -            blkaddr > FDEV(devi).end_blk) {
> > > > > > > -            f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > > > > > -            return false;
> > > > > > > -        }
> > > > > > > -        blkaddr -= FDEV(devi).start_blk;
> > > > > > > -    }
> > > > > > > -    return bdev_is_zoned(FDEV(devi).bdev) &&
> > > > > > > -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> > > > > > > -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > > > > >    }
> > > > > > >    #endif
> > > > > > > @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > >        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> > > > > > >        struct page *bio_page;
> > > > > > >        enum count_type type;
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> > > > > > > +#endif
> > > > > > >        f2fs_bug_on(sbi, is_read_io(fio->op));
> > > > > > >        f2fs_down_write(&io->io_rwsem);
> > > > > > >    next:
> > > > > > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> > > > > > > -        wait_for_completion_io(&io->zone_wait);
> > > > > > > -        bio_put(io->zone_pending_bio);
> > > > > > > -        io->zone_pending_bio = NULL;
> > > > > > > -        io->bi_private = NULL;
> > > > > > > -    }
> > > > > > > -#endif
> > > > > > > -
> > > > > > >        if (fio->in_list) {
> > > > > > >            spin_lock(&io->io_lock);
> > > > > > >            if (list_empty(&io->io_list)) {
> > > > > > > @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > >        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> > > > > > >        inc_page_count(sbi, type);
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> > > > > > > +        io->open_zone = true;
> > > > > > > +#endif
> > > > > > > +
> > > > > > >        if (io->bio &&
> > > > > > >            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> > > > > > >                      fio->new_blkaddr) ||
> > > > > > > @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > >        io->last_block_in_bio = fio->new_blkaddr;
> > > > > > >        trace_f2fs_submit_page_write(fio->page, fio);
> > > > > > > +
> > > > > > >    #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> > > > > > > -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> > > > > > > -        bio_get(io->bio);
> > > > > > > -        reinit_completion(&io->zone_wait);
> > > > > > > -        io->bi_private = io->bio->bi_private;
> > > > > > > -        io->bio->bi_private = io;
> > > > > > > -        io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > > > > > -        io->zone_pending_bio = io->bio;
> > > > > > > +    if (blkzoned &&
> > > > > > > +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> > > > > > > +        io->close_zone = true;
> > > > > > >            __submit_merged_bio(io);
> > > > > > >        }
> > > > > > >    #endif
> > > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > > index 694f8a52cb84..8a26530cf4fb 100644
> > > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > > @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> > > > > > >        struct bio *bio;        /* bios to merge */
> > > > > > >        sector_t last_block_in_bio;    /* last block number */
> > > > > > >        struct f2fs_io_info fio;    /* store buffered io info. */
> > > > > > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -    struct completion zone_wait;    /* condition value for the previous open zone to close */
> > > > > > > -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
> > > > > > > -    void *bi_private;        /* previous bi_private for pending bio */
> > > > > > > -#endif
> > > > > > >        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
> > > > > > >        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
> > > > > > >        struct list_head io_list;    /* track fios */
> > > > > > >        struct list_head bio_list;    /* bio entry list head */
> > > > > > >        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    bool open_zone;            /* open a zone */
> > > > > > > +    bool zone_openned;        /* zone has been openned */
> > > > > > > +    bool close_zone;        /* close a zone */
> > > > > > > +#endif
> > > > > > >    };
> > > > > > >    #define FDEV(i)                (sbi->devs[i])
> > > > > > > @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> > > > > > >    #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > >        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
> > > > > > >        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
> > > > > > > +    struct semaphore available_open_zones;    /* available open zones */
> > > > > > >    #endif
> > > > > > >        /* for node-related operations */
> > > > > > > @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> > > > > > >    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> > > > > > >                  enum page_type type);
> > > > > > >    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> > > > > > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> > > > > > >    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> > > > > > >    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > > > > >                    struct inode *inode, struct page *page,
> > > > > > > @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
> > > > > > >        return test_bit(zno, FDEV(devi).blkz_seq);
> > > > > > >    }
> > > > > > > +
> > > > > > > +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > > > > > +                            block_t blkaddr)
> > > > > > > +{
> > > > > > > +    int devi = 0;
> > > > > > > +
> > > > > > > +    if (f2fs_is_multi_device(sbi)) {
> > > > > > > +        devi = f2fs_target_device_index(sbi, blkaddr);
> > > > > > > +        if (blkaddr < FDEV(devi).start_blk ||
> > > > > > > +            blkaddr > FDEV(devi).end_blk) {
> > > > > > > +            f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > > > > > +            return false;
> > > > > > > +        }
> > > > > > > +        blkaddr -= FDEV(devi).start_blk;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > > > > > +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > > > > > +        return false;
> > > > > > > +
> > > > > > > +    return true;
> > > > > > 
> > > > > > Applied as below.
> > > > > > 
> > > > > > @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > > > >                   blkaddr -= FDEV(devi).start_blk;
> > > > > >           }
> > > > > > 
> > > > > > -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > > > > -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > > > > -               return false;
> > > > > > -
> > > > > > -       return true;
> > > > > > +       return bdev_is_zoned(FDEV(devi).bdev) &&
> > > > > > +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
> > > > > 
> > > > > Looks good, thank you for cleanup.
> > > > > 
> > > > > Thanks,
> > > > > 
> > > > > >    }
> > > > > >    #endif
> > > > > > 
> > > > > > > +}
> > > > > > >    #endif
> > > > > > >    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> > > > > > > diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> > > > > > > index f8703038e1d8..a8626e297876 100644
> > > > > > > --- a/fs/f2fs/iostat.c
> > > > > > > +++ b/fs/f2fs/iostat.c
> > > > > > > @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> > > > > > >        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> > > > > > >    }
> > > > > > > +void *iostat_get_bio_private(struct bio *bio)
> > > > > > > +{
> > > > > > > +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > > > > > +
> > > > > > > +    return iostat_ctx->sbi;
> > > > > > > +}
> > > > > > > +
> > > > > > >    void iostat_update_and_unbind_ctx(struct bio *bio)
> > > > > > >    {
> > > > > > >        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > > > > > diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> > > > > > > index eb99d05cf272..9006c3d41590 100644
> > > > > > > --- a/fs/f2fs/iostat.h
> > > > > > > +++ b/fs/f2fs/iostat.h
> > > > > > > @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> > > > > > >        return iostat_ctx->post_read_ctx;
> > > > > > >    }
> > > > > > > +extern void *iostat_get_bio_private(struct bio *bio);
> > > > > > >    extern void iostat_update_and_unbind_ctx(struct bio *bio);
> > > > > > >    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > > > > >            struct bio *bio, struct bio_post_read_ctx *ctx);
> > > > > > > @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> > > > > > >    #else
> > > > > > >    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> > > > > > >            enum iostat_type type, unsigned long long io_bytes) {}
> > > > > > > +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> > > > > > >    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> > > > > > >    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > > > > >            struct bio *bio, struct bio_post_read_ctx *ctx) {}
> > > > > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > > > > index 4fd76e867e0a..4a3cf2888faf 100644
> > > > > > > --- a/fs/f2fs/segment.c
> > > > > > > +++ b/fs/f2fs/segment.c
> > > > > > > @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > > > > > >            return err;
> > > > > > >        stat_inc_seg_type(sbi, curseg);
> > > > > > >        locate_dirty_segment(sbi, old_segno);
> > > > > > > +
> > > > > > > +    f2fs_blkzoned_submit_merged_write(sbi, type);
> > > > > > > +
> > > > > > >        return 0;
> > > > > > >    }
> > > > > > > @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> > > > > > >            f2fs_bug_on(fio->sbi, true);
> > > > > > >        }
> > > > > > > -    if (IS_HOT(type))
> > > > > > > -        fio->temp = HOT;
> > > > > > > -    else if (IS_WARM(type))
> > > > > > > -        fio->temp = WARM;
> > > > > > > -    else
> > > > > > > -        fio->temp = COLD;
> > > > > > > +    fio->temp = type_to_temp(type);
> > > > > > >        return type;
> > > > > > >    }
> > > > > > > @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > > > > > >            return -EINVAL;
> > > > > > >        }
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +    if (f2fs_sb_has_blkzoned(sbi)) {
> > > > > > > +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> > > > > > > +            struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > +            enum page_type ptype;
> > > > > > > +            enum temp_type temp;
> > > > > > > +
> > > > > > > +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> > > > > > > +                continue;
> > > > > > > +
> > > > > > > +            if (!f2fs_blkaddr_in_seqzone(sbi,
> > > > > > > +                    START_BLOCK(sbi, curseg->segno)))
> > > > > > > +                continue;
> > > > > > > +
> > > > > > > +            ptype = PAGE_TYPE(type);
> > > > > > > +            temp = type_to_temp(type);
> > > > > > > +            down(&sbi->available_open_zones);
> > > > > > > +            sbi->write_io[ptype][temp].zone_openned = true;
> > > > > > > +        }
> > > > > > > +    }
> > > > > > > +#endif
> > > > > > >        return 0;
> > > > > > >    }
> > > > > > > @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> > > > > > >        for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > > > > > >            kfree(array[i].sum_blk);
> > > > > > >            kfree(array[i].journal);
> > > > > > > +
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +        if (f2fs_sb_has_blkzoned(sbi)) {
> > > > > > > +            enum page_type ptype = PAGE_TYPE(i);
> > > > > > > +            enum temp_type temp = type_to_temp(i);
> > > > > > > +
> > > > > > > +            if (sbi->write_io[ptype][temp].zone_openned) {
> > > > > > > +                up(&sbi->available_open_zones);
> > > > > > > +                sbi->write_io[ptype][temp].zone_openned = false;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +#endif
> > > > > > >        }
> > > > > > >        kfree(array);
> > > > > > >    }
> > > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > > index e1c0f418aa11..855978ca869f 100644
> > > > > > > --- a/fs/f2fs/segment.h
> > > > > > > +++ b/fs/f2fs/segment.h
> > > > > > > @@ -24,7 +24,8 @@
> > > > > > >    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
> > > > > > >    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > > > > > -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
> > > > > > > +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
> > > > > > > +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
> > > > > > >    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> > > > > > >                            unsigned short seg_type)
> > > > > > > @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> > > > > > >                return GET_SEGNO(sbi, FDEV(devi).start_blk);
> > > > > > >        return 0;
> > > > > > >    }
> > > > > > > +
> > > > > > > +static inline enum temp_type type_to_temp(int type)
> > > > > > > +{
> > > > > > > +    if (IS_HOT(type))
> > > > > > > +        return HOT;
> > > > > > > +    else if (IS_WARM(type))
> > > > > > > +        return WARM;
> > > > > > > +    return COLD;
> > > > > > > +}
> > > > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > > > index fdf358c7f808..954baa6c100d 100644
> > > > > > > --- a/fs/f2fs/super.c
> > > > > > > +++ b/fs/f2fs/super.c
> > > > > > > @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> > > > > > >                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> > > > > > >                return -EINVAL;
> > > > > > >            }
> > > > > > > +
> > > > > > > +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> > > > > > >        }
> > > > > > >        zone_sectors = bdev_zone_sectors(bdev);
> > > > > > > -- 
> > > > > > > 2.40.1
> > > 
> > > 
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > Linux-f2fs-devel@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > 
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone
  2024-04-16 16:51             ` Jaegeuk Kim
@ 2024-04-17  1:05               ` Chao Yu
  0 siblings, 0 replies; 10+ messages in thread
From: Chao Yu @ 2024-04-17  1:05 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2024/4/17 0:51, Jaegeuk Kim wrote:
> On 04/16, Chao Yu wrote:
>> On 2024/4/15 22:01, Chao Yu wrote:
>>> On 2024/4/15 11:26, Chao Yu wrote:
>>>> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>>>>> It seems this caused kernel hang. Chao, have you tested this patch enough?
>>>>
>>>> Jaegeuk,
>>>>
>>>> Oh, I've checked this patch w/ fsstress before submitting it, but missed
>>>> the SPO testcase... do you encounter kernel hang w/ SPO testcase?
>>>
>>> I did see any hang issue w/ por_fsstress testcase, which testcase do you use?
>>
>> Sorry, I mean I haven't reproduced it yet...
> 
> I'd prefer to check this patch later. Have you tested on Zoned device with
> nullblk?

Yes, I enabled blkzoned feature w/ nullblk device, and set
/sys/kernel/config/nullb/nullb0/zone_max_open to six, so that it can
emulate ZUFS' configuration.

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Anyway, let me test it more.
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> On 04/13, Chao Yu wrote:
>>>>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>>>>> On 04/07, Chao Yu wrote:
>>>>>>>> max open zone may be larger than log header number of f2fs, for
>>>>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>>>>> submit first write IO in a zone, and increase it after completion
>>>>>>>> of last IO in the zone.
>>>>>>>>
>>>>>>>> Cc: Daeho Jeong <daeho43@gmail.com>
>>>>>>>> Signed-off-by: Chao Yu <chao@kernel.org>
>>>>>>>> ---
>>>>>>>> v3:
>>>>>>>> - avoid race condition in between __submit_merged_bio()
>>>>>>>> and __allocate_new_segment().
>>>>>>>>     fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>>>>     fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>>>>     fs/f2fs/iostat.c  |   7 ++++
>>>>>>>>     fs/f2fs/iostat.h  |   2 +
>>>>>>>>     fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>>>>     fs/f2fs/segment.h |  12 +++++-
>>>>>>>>     fs/f2fs/super.c   |   2 +
>>>>>>>>     7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>>>>     #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>     static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>>>>     {
>>>>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>>>>> -    bio->bi_private = io->bi_private;
>>>>>>>> -    complete(&io->zone_wait);
>>>>>>>>         f2fs_write_end_io(bio);
>>>>>>>> +    up(&sbi->available_open_zones);
>>>>>>>>     }
>>>>>>>>     #endif
>>>>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>>>>         if (!io->bio)
>>>>>>>>             return;
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (io->open_zone) {
>>>>>>>> +        /*
>>>>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>>>>> +         * previous zone before submitting new IO.
>>>>>>>> +         */
>>>>>>>> +        down(&fio->sbi->available_open_zones);
>>>>>>>> +        io->open_zone = false;
>>>>>>>> +        io->zone_openned = true;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (io->close_zone) {
>>>>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>>>> +        io->zone_openned = false;
>>>>>>>> +        io->close_zone = false;
>>>>>>>> +    }
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>>         if (is_read_io(fio->op)) {
>>>>>>>>             trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>>>>             f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>>>>                 INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>>>>                 init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>>>>     #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>>>>     #endif
>>>>>>>>             }
>>>>>>>>         }
>>>>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>>>>         f2fs_up_write(&io->io_rwsem);
>>>>>>>>     }
>>>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    struct f2fs_bio_info *io;
>>>>>>>> +
>>>>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>>>>> +        return;
>>>>>>>> +
>>>>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>>>>> +
>>>>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>>>>> +    if (io->zone_openned) {
>>>>>>>> +        if (io->bio) {
>>>>>>>> +            io->close_zone = true;
>>>>>>>> +            __submit_merged_bio(io);
>>>>>>>> +        } else if (io->zone_openned) {
>>>>>>>> +            up(&sbi->available_open_zones);
>>>>>>>> +            io->zone_openned = false;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>     static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>>>                     struct inode *inode, struct page *page,
>>>>>>>>                     nid_t ino, enum page_type type, bool force)
>>>>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>>>>     }
>>>>>>>>     #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>>>>> +                    block_t blkaddr, bool start)
>>>>>>>>     {
>>>>>>>> -    int devi = 0;
>>>>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>>>>> +        return false;
>>>>>>>> +
>>>>>>>> +    if (start)
>>>>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>>>> -            return false;
>>>>>>>> -        }
>>>>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>>>>> -    }
>>>>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>>>     }
>>>>>>>>     #endif
>>>>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>>>>         struct page *bio_page;
>>>>>>>>         enum count_type type;
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>>>>> +#endif
>>>>>>>>         f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>>>>         f2fs_down_write(&io->io_rwsem);
>>>>>>>>     next:
>>>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>>>>> -        bio_put(io->zone_pending_bio);
>>>>>>>> -        io->zone_pending_bio = NULL;
>>>>>>>> -        io->bi_private = NULL;
>>>>>>>> -    }
>>>>>>>> -#endif
>>>>>>>> -
>>>>>>>>         if (fio->in_list) {
>>>>>>>>             spin_lock(&io->io_lock);
>>>>>>>>             if (list_empty(&io->io_list)) {
>>>>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>         type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>>>>         inc_page_count(sbi, type);
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>>>>> +        io->open_zone = true;
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>>         if (io->bio &&
>>>>>>>>             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>>>>                       fio->new_blkaddr) ||
>>>>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>         io->last_block_in_bio = fio->new_blkaddr;
>>>>>>>>         trace_f2fs_submit_page_write(fio->page, fio);
>>>>>>>> +
>>>>>>>>     #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>>>>> -        bio_get(io->bio);
>>>>>>>> -        reinit_completion(&io->zone_wait);
>>>>>>>> -        io->bi_private = io->bio->bi_private;
>>>>>>>> -        io->bio->bi_private = io;
>>>>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>>>> -        io->zone_pending_bio = io->bio;
>>>>>>>> +    if (blkzoned &&
>>>>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>>>>> +        io->close_zone = true;
>>>>>>>>             __submit_merged_bio(io);
>>>>>>>>         }
>>>>>>>>     #endif
>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>>>>         struct bio *bio;        /* bios to merge */
>>>>>>>>         sector_t last_block_in_bio;    /* last block number */
>>>>>>>>         struct f2fs_io_info fio;    /* store buffered io info. */
>>>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>>>>> -#endif
>>>>>>>>         struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>>>>         spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>>>>         struct list_head io_list;    /* track fios */
>>>>>>>>         struct list_head bio_list;    /* bio entry list head */
>>>>>>>>         struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    bool open_zone;            /* open a zone */
>>>>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>>>>> +    bool close_zone;        /* close a zone */
>>>>>>>> +#endif
>>>>>>>>     };
>>>>>>>>     #define FDEV(i)                (sbi->devs[i])
>>>>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>>>>     #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>         unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>>>>         unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>>>>     #endif
>>>>>>>>         /* for node-related operations */
>>>>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>>>>     void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>>>>                   enum page_type type);
>>>>>>>>     int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>>>>     void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>>>>     void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>>>                     struct inode *inode, struct page *page,
>>>>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>>>>         return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>>>>     }
>>>>>>>> +
>>>>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>>>> +                            block_t blkaddr)
>>>>>>>> +{
>>>>>>>> +    int devi = 0;
>>>>>>>> +
>>>>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>>>> +            return false;
>>>>>>>> +        }
>>>>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>>>> +        return false;
>>>>>>>> +
>>>>>>>> +    return true;
>>>>>>>
>>>>>>> Applied as below.
>>>>>>>
>>>>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>>>                    blkaddr -= FDEV(devi).start_blk;
>>>>>>>            }
>>>>>>>
>>>>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>>> -               return false;
>>>>>>> -
>>>>>>> -       return true;
>>>>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>>>>
>>>>>> Looks good, thank you for cleanup.
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>     }
>>>>>>>     #endif
>>>>>>>
>>>>>>>> +}
>>>>>>>>     #endif
>>>>>>>>     static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>>>>> index f8703038e1d8..a8626e297876 100644
>>>>>>>> --- a/fs/f2fs/iostat.c
>>>>>>>> +++ b/fs/f2fs/iostat.c
>>>>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>>>>         spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>>>>     }
>>>>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>>>>> +{
>>>>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>>>> +
>>>>>>>> +    return iostat_ctx->sbi;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>     void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>>>>     {
>>>>>>>>         struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>>>>> --- a/fs/f2fs/iostat.h
>>>>>>>> +++ b/fs/f2fs/iostat.h
>>>>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>>>>         return iostat_ctx->post_read_ctx;
>>>>>>>>     }
>>>>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>>>>     extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>>>>     extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>>>             struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>>>>     #else
>>>>>>>>     static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>>>>             enum iostat_type type, unsigned long long io_bytes) {}
>>>>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>>>>     static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>>>>     static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>>>             struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>>>>             return err;
>>>>>>>>         stat_inc_seg_type(sbi, curseg);
>>>>>>>>         locate_dirty_segment(sbi, old_segno);
>>>>>>>> +
>>>>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>>>>> +
>>>>>>>>         return 0;
>>>>>>>>     }
>>>>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>>>>             f2fs_bug_on(fio->sbi, true);
>>>>>>>>         }
>>>>>>>> -    if (IS_HOT(type))
>>>>>>>> -        fio->temp = HOT;
>>>>>>>> -    else if (IS_WARM(type))
>>>>>>>> -        fio->temp = WARM;
>>>>>>>> -    else
>>>>>>>> -        fio->temp = COLD;
>>>>>>>> +    fio->temp = type_to_temp(type);
>>>>>>>>         return type;
>>>>>>>>     }
>>>>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>>>             return -EINVAL;
>>>>>>>>         }
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +            enum page_type ptype;
>>>>>>>> +            enum temp_type temp;
>>>>>>>> +
>>>>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>>>>> +                continue;
>>>>>>>> +
>>>>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>>>>> +                continue;
>>>>>>>> +
>>>>>>>> +            ptype = PAGE_TYPE(type);
>>>>>>>> +            temp = type_to_temp(type);
>>>>>>>> +            down(&sbi->available_open_zones);
>>>>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +#endif
>>>>>>>>         return 0;
>>>>>>>>     }
>>>>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>         for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>>>             kfree(array[i].sum_blk);
>>>>>>>>             kfree(array[i].journal);
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>>>>> +
>>>>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>>>>> +                up(&sbi->available_open_zones);
>>>>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>>>>> +            }
>>>>>>>> +        }
>>>>>>>> +#endif
>>>>>>>>         }
>>>>>>>>         kfree(array);
>>>>>>>>     }
>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>> @@ -24,7 +24,8 @@
>>>>>>>>     #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>>>>     #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>>>>     static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>>>>                             unsigned short seg_type)
>>>>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>>>>                 return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>>>>         return 0;
>>>>>>>>     }
>>>>>>>> +
>>>>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>>>>> +{
>>>>>>>> +    if (IS_HOT(type))
>>>>>>>> +        return HOT;
>>>>>>>> +    else if (IS_WARM(type))
>>>>>>>> +        return WARM;
>>>>>>>> +    return COLD;
>>>>>>>> +}
>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>>>>                     sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>>>>                 return -EINVAL;
>>>>>>>>             }
>>>>>>>> +
>>>>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>>>>         }
>>>>>>>>         zone_sectors = bdev_zone_sectors(bdev);
>>>>>>>> -- 
>>>>>>>> 2.40.1
>>>>
>>>>
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>>
>>> _______________________________________________
>>> Linux-f2fs-devel mailing list
>>> Linux-f2fs-devel@lists.sourceforge.net
>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-04-17  1:05 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-07 13:58 [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone Chao Yu
2024-04-10 18:26 ` Daeho Jeong
2024-04-12 21:11 ` Jaegeuk Kim
2024-04-13  1:33   ` Chao Yu
2024-04-14 15:19     ` Jaegeuk Kim
2024-04-15  3:26       ` Chao Yu
2024-04-15 14:01         ` Chao Yu
2024-04-16  0:59           ` Chao Yu
2024-04-16 16:51             ` Jaegeuk Kim
2024-04-17  1:05               ` Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).