All of lore.kernel.org
 help / color / mirror / Atom feed
From: Krishna Kanth Reddy <krish.reddy@samsung.com>
To: axboe@kernel.dk
Cc: fio@vger.kernel.org, Ankit Kumar <ankit.kumar@samsung.com>,
	Krishna Kanth Reddy <krish.reddy@samsung.com>
Subject: [PATCH 1/4] Add Zone Append command support for NVMe Zoned Namespaces (ZNS)
Date: Thu, 25 Jun 2020 23:08:49 +0530	[thread overview]
Message-ID: <1593106733-19596-2-git-send-email-krish.reddy@samsung.com> (raw)
In-Reply-To: <1593106733-19596-1-git-send-email-krish.reddy@samsung.com>

From: Ankit Kumar <ankit.kumar@samsung.com>

defined in NVM Express TP4053. Added a new FIO option zone_append.
When zone_append option is enabled, the existing write path will
send Zone Append command with offset as start of the Zone.

Signed-off-by: Krishna Kanth Reddy <krish.reddy@samsung.com>
---
 HOWTO            |  7 +++++
 fio.1            |  7 +++++
 io_u.c           |  4 +--
 io_u.h           | 10 +++++--
 ioengines.c      |  4 +--
 options.c        | 10 +++++++
 thread_options.h |  2 ++
 zbd.c            | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 zbd.h            | 13 +++++---
 9 files changed, 131 insertions(+), 16 deletions(-)

diff --git a/HOWTO b/HOWTO
index 8cf8d65..62b5ac8 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1010,6 +1010,13 @@ Target file/device
 	:option:`zonesize` bytes of data have been transferred. This parameter
 	must be zero for :option:`zonemode` =zbd.
 
+.. option:: zone_append=bool
+
+	For :option:`zonemode` =zbd and for :option:`rw` =write or :option:
+	`rw` =randwrite, if zone_append is enabled, the the io_u points to the
+	starting offset of a zone. On successful completion the multiple of
+	sectors relative to the zone's starting offset is returned.
+
 .. option:: read_beyond_wp=bool
 
 	This parameter applies to :option:`zonemode` =zbd only.
diff --git a/fio.1 b/fio.1
index f134e0b..09add8f 100644
--- a/fio.1
+++ b/fio.1
@@ -782,6 +782,13 @@ sequential workloads and ignored for random workloads. For read workloads,
 see also \fBread_beyond_wp\fR.
 
 .TP
+.BI zone_append
+For \fBzonemode\fR =zbd and for \fBrw\fR=write or \fBrw\fR=randwrite, if
+zone_append is enabled, the io_u points to the starting offset of a zone. On
+successful completion the multiple of sectors relative to the zone's starting
+offset is returned.
+
+.TP
 .BI read_beyond_wp \fR=\fPbool
 This parameter applies to \fBzonemode=zbd\fR only.
 
diff --git a/io_u.c b/io_u.c
index 7f50906..b891a9b 100644
--- a/io_u.c
+++ b/io_u.c
@@ -778,7 +778,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
 {
 	const bool needs_lock = td_async_processing(td);
 
-	zbd_put_io_u(io_u);
+	zbd_put_io_u(td, io_u);
 
 	if (td->parent)
 		td = td->parent;
@@ -1342,7 +1342,7 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
 		if (!fill_io_u(td, io_u))
 			break;
 
-		zbd_put_io_u(io_u);
+		zbd_put_io_u(td, io_u);
 
 		put_file_log(td, f);
 		td_io_close_file(td, f);
diff --git a/io_u.h b/io_u.h
index 87c2920..f5b24fd 100644
--- a/io_u.h
+++ b/io_u.h
@@ -94,19 +94,25 @@ struct io_u {
 	};
 
 	/*
+	 * for zone append this is the start offset of the zone.
+	 */
+	unsigned long long zone_start_offset;
+
+	/*
 	 * ZBD mode zbd_queue_io callback: called after engine->queue operation
 	 * to advance a zone write pointer and eventually unlock the I/O zone.
 	 * @q indicates the I/O queue status (busy, queued or completed).
 	 * @success == true means that the I/O operation has been queued or
 	 * completed successfully.
 	 */
-	void (*zbd_queue_io)(struct io_u *, int q, bool success);
+	void (*zbd_queue_io)(struct thread_data *, struct io_u *, int q,
+			     bool success);
 
 	/*
 	 * ZBD mode zbd_put_io callback: called in after completion of an I/O
 	 * or commit of an async I/O to unlock the I/O target zone.
 	 */
-	void (*zbd_put_io)(const struct io_u *);
+	void (*zbd_put_io)(struct thread_data *, const struct io_u *);
 
 	/*
 	 * Callback for io completion
diff --git a/ioengines.c b/ioengines.c
index 2c7a0df..81ac846 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -328,7 +328,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 	}
 
 	ret = td->io_ops->queue(td, io_u);
-	zbd_queue_io_u(io_u, ret);
+	zbd_queue_io_u(td, io_u, ret);
 
 	unlock_file(td, io_u->file);
 
@@ -370,7 +370,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 	if (!td->io_ops->commit) {
 		io_u_mark_submit(td, 1);
 		io_u_mark_complete(td, 1);
-		zbd_put_io_u(io_u);
+		zbd_put_io_u(td, io_u);
 	}
 
 	if (ret == FIO_Q_COMPLETED) {
diff --git a/options.c b/options.c
index 85a0f49..d54da81 100644
--- a/options.c
+++ b/options.c
@@ -3317,6 +3317,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		},
 	},
 	{
+		.name	= "zone_append",
+		.lname	= "zone_append",
+		.type	= FIO_OPT_BOOL,
+		.off1	= offsetof(struct thread_options, zone_append),
+		.help	= "Use Zone Append for Zone block device",
+		.def	= "0",
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_ZONE,
+	},
+	{
 		.name	= "zonesize",
 		.lname	= "Zone size",
 		.type	= FIO_OPT_STR_VAL,
diff --git a/thread_options.h b/thread_options.h
index 968ea0a..45c5ef8 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -195,6 +195,7 @@ struct thread_options {
 	unsigned long long zone_size;
 	unsigned long long zone_skip;
 	enum fio_zone_mode zone_mode;
+	unsigned int zone_append;
 	unsigned long long lockmem;
 	enum fio_memtype mem_type;
 	unsigned int mem_align;
@@ -631,6 +632,7 @@ struct thread_options_pack {
 	uint32_t allow_mounted_write;
 
 	uint32_t zone_mode;
+	uint32_t zone_append;
 } __attribute__((packed));
 
 extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);
diff --git a/zbd.c b/zbd.c
index 8cf8f81..ffdb766 100644
--- a/zbd.c
+++ b/zbd.c
@@ -455,6 +455,7 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
 		for (i = 0; i < nrz; i++, j++, z++, p++) {
 			mutex_init_pshared_with_type(&p->mutex,
 						     PTHREAD_MUTEX_RECURSIVE);
+			cond_init_pshared(&p->reset_cond);
 			p->start = z->start;
 			switch (z->cond) {
 			case ZBD_ZONE_COND_NOT_WP:
@@ -469,6 +470,7 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
 			}
 			p->type = z->type;
 			p->cond = z->cond;
+			p->pending_ios = 0;
 			if (j > 0 && p->start != p[-1].start + zone_size) {
 				log_info("%s: invalid zone data\n",
 					 f->file_name);
@@ -1196,20 +1198,24 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u,
 
 /**
  * zbd_queue_io - update the write pointer of a sequential zone
+ * @td: fio thread data.
  * @io_u: I/O unit
  * @success: Whether or not the I/O unit has been queued successfully
  * @q: queueing status (busy, completed or queued).
  *
  * For write and trim operations, update the write pointer of the I/O unit
  * target zone.
+ * For zone append operation, release the zone mutex
  */
-static void zbd_queue_io(struct io_u *io_u, int q, bool success)
+static void zbd_queue_io(struct thread_data *td, struct io_u *io_u, int q,
+			 bool success)
 {
 	const struct fio_file *f = io_u->file;
 	struct zoned_block_device_info *zbd_info = f->zbd_info;
 	struct fio_zone_info *z;
 	uint32_t zone_idx;
 	uint64_t zone_end;
+	int ret;
 
 	if (!zbd_info)
 		return;
@@ -1241,6 +1247,8 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
 			zbd_info->sectors_with_data += zone_end - z->wp;
 		pthread_mutex_unlock(&zbd_info->mutex);
 		z->wp = zone_end;
+		if (td->o.zone_append)
+			z->pending_ios++;
 		break;
 	case DDIR_TRIM:
 		assert(z->wp == z->start);
@@ -1250,18 +1258,22 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
 	}
 
 unlock:
-	if (!success || q != FIO_Q_QUEUED) {
+	if (!success || q != FIO_Q_QUEUED || td->o.zone_append) {
 		/* BUSY or COMPLETED: unlock the zone */
-		pthread_mutex_unlock(&z->mutex);
-		io_u->zbd_put_io = NULL;
+		ret = pthread_mutex_unlock(&z->mutex);
+		assert(ret == 0);
+		if (!success || q != FIO_Q_QUEUED)
+			io_u->zbd_put_io = NULL;
 	}
 }
 
 /**
  * zbd_put_io - Unlock an I/O unit target zone lock
+ * For zone append operation we don't hold zone lock
+ * @td: fio thread data.
  * @io_u: I/O unit
  */
-static void zbd_put_io(const struct io_u *io_u)
+static void zbd_put_io(struct thread_data *td, const struct io_u *io_u)
 {
 	const struct fio_file *f = io_u->file;
 	struct zoned_block_device_info *zbd_info = f->zbd_info;
@@ -1283,6 +1295,19 @@ static void zbd_put_io(const struct io_u *io_u)
 	       "%s: terminate I/O (%lld, %llu) for zone %u\n",
 	       f->file_name, io_u->offset, io_u->buflen, zone_idx);
 
+	if (td->o.zone_append) {
+		pthread_mutex_lock(&z->mutex);
+		if (z->pending_ios > 0) {
+			z->pending_ios--;
+			/*
+			 * Other threads may be waiting for pending I/O's to
+			 * complete for this zone. Notify them.
+			 */
+			if (!z->pending_ios)
+				pthread_cond_broadcast(&z->reset_cond);
+		}
+	}
+
 	ret = pthread_mutex_unlock(&z->mutex);
 	assert(ret == 0);
 	zbd_check_swd(f);
@@ -1524,16 +1549,69 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			 * asynchronously and since we will submit the zone
 			 * reset synchronously, wait until previously submitted
 			 * write requests have completed before issuing a
-			 * zone reset.
+			 * zone reset. For append request release the zone lock
+			 * as other threads will acquire it at the time of
+			 * zbd_put_io.
 			 */
+reset:
+			if (td->o.zone_append)
+				pthread_mutex_unlock(&zb->mutex);
 			io_u_quiesce(td);
+			if (td->o.zone_append)
+				pthread_mutex_lock(&zb->mutex);
+
 			zb->reset_zone = 0;
+			if (td->o.zone_append) {
+				/*
+				 * While processing the current thread queued
+				 * requests the other thread may have already
+				 * done zone reset so need to check zone full
+				 * condition again.
+				 */
+				if (!zbd_zone_full(f, zb, min_bs))
+					goto proceed;
+				/*
+				 * Wait for the pending requests to be completed
+				 * else we are ok to reset this zone.
+				 */
+				if (zb->pending_ios) {
+					pthread_cond_wait(&zb->reset_cond, &zb->mutex);
+					goto proceed;
+				}
+			}
+
 			if (zbd_reset_zone(td, f, zb) < 0)
 				goto eof;
+
+			/* Notify other threads waiting for zone mutex */
+			if (td->o.zone_append)
+				pthread_cond_broadcast(&zb->reset_cond);
+		}
+proceed:
+		/*
+		 * Check for zone full condition again. For zone append request
+		 * the zone may already be reset, written and full while we
+		 * were waiting for our turn.
+		 */
+		if (zbd_zone_full(f, zb, min_bs)) {
+			goto reset;
 		}
+
 		/* Make writes occur at the write pointer */
 		assert(!zbd_zone_full(f, zb, min_bs));
 		io_u->offset = zb->wp;
+
+		/*
+		 * Support zone append for both regular and zoned block
+		 * device.
+		 */
+		if (td->o.zone_append) {
+			if (f->zbd_info->model == ZBD_NONE)
+				io_u->zone_start_offset = zb->wp;
+			else
+				io_u->zone_start_offset = zb->start;
+		}
+
 		if (!is_valid_offset(f, io_u->offset)) {
 			dprint(FD_ZBD, "Dropped request with offset %llu\n",
 			       io_u->offset);
diff --git a/zbd.h b/zbd.h
index e942a7f..eac42f7 100644
--- a/zbd.h
+++ b/zbd.h
@@ -23,8 +23,10 @@ enum io_u_action {
  * struct fio_zone_info - information about a single ZBD zone
  * @start: zone start location (bytes)
  * @wp: zone write pointer location (bytes)
+ * @pending_ios: Number of IO's pending in this zone
  * @verify_block: number of blocks that have been verified for this zone
  * @mutex: protects the modifiable members in this structure
+ * @reset_cond: zone reset check condition. only relevant for zone_append.
  * @type: zone type (BLK_ZONE_TYPE_*)
  * @cond: zone state (BLK_ZONE_COND_*)
  * @open: whether or not this zone is currently open. Only relevant if
@@ -33,8 +35,10 @@ enum io_u_action {
  */
 struct fio_zone_info {
 	pthread_mutex_t		mutex;
+	pthread_cond_t		reset_cond;
 	uint64_t		start;
 	uint64_t		wp;
+	uint32_t		pending_ios;
 	uint32_t		verify_block;
 	enum zbd_zone_type	type:2;
 	enum zbd_zone_cond	cond:4;
@@ -96,18 +100,19 @@ static inline void zbd_close_file(struct fio_file *f)
 		zbd_free_zone_info(f);
 }
 
-static inline void zbd_queue_io_u(struct io_u *io_u, enum fio_q_status status)
+static inline void zbd_queue_io_u(struct thread_data *td, struct io_u *io_u,
+				  enum fio_q_status status)
 {
 	if (io_u->zbd_queue_io) {
-		io_u->zbd_queue_io(io_u, status, io_u->error == 0);
+		io_u->zbd_queue_io(td, io_u, status, io_u->error == 0);
 		io_u->zbd_queue_io = NULL;
 	}
 }
 
-static inline void zbd_put_io_u(struct io_u *io_u)
+static inline void zbd_put_io_u(struct thread_data *td, struct io_u *io_u)
 {
 	if (io_u->zbd_put_io) {
-		io_u->zbd_put_io(io_u);
+		io_u->zbd_put_io(td, io_u);
 		io_u->zbd_queue_io = NULL;
 		io_u->zbd_put_io = NULL;
 	}
-- 
2.7.4



  parent reply	other threads:[~2020-06-25 17:38 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20200625174119epcas5p122a2197102fe336aa35fdea1273fd1b0@epcas5p1.samsung.com>
2020-06-25 17:38 ` [PATCH 0/4] v2 Patchset : Zone Append command support for NVMe Zoned Namespaces (ZNS) Krishna Kanth Reddy
     [not found]   ` <CGME20200625174124epcas5p18e4fbc502c9cf1fef7e84ba5cefba945@epcas5p1.samsung.com>
2020-06-25 17:38     ` Krishna Kanth Reddy [this message]
2020-06-26  5:33       ` [PATCH 1/4] Add " Damien Le Moal
2020-07-03 17:17         ` Krishna Kanth Reddy
2020-06-26  5:50       ` Damien Le Moal
2020-07-03 16:50         ` Krishna Kanth Reddy
     [not found]   ` <CGME20200625174129epcas5p304bf58bb381b4b0c39e0ff91b50a23a9@epcas5p3.samsung.com>
2020-06-25 17:38     ` [PATCH 2/4] libaio: support for Zone Append command Krishna Kanth Reddy
2020-06-26  5:38       ` Damien Le Moal
2020-07-03 10:47         ` Krishna Kanth Reddy
     [not found]   ` <CGME20200625174131epcas5p36cf7cd413dcb698f117474df71e5648b@epcas5p3.samsung.com>
2020-06-25 17:38     ` [PATCH 3/4] iouring: " Krishna Kanth Reddy
2020-06-26  5:43       ` Damien Le Moal
2020-07-03 10:37         ` Krishna Kanth Reddy
     [not found]   ` <CGME20200625174133epcas5p1eace8f03319bee805b93c50fe6c690c7@epcas5p1.samsung.com>
2020-06-25 17:38     ` [PATCH 4/4] t/zbd: Add support to verify Zone Append command with libaio, io_uring IO engine tests Krishna Kanth Reddy
2020-06-25 18:44       ` Dmitry Fomichev
2020-07-03  8:46         ` Krishna Kanth Reddy
2020-06-26  5:45       ` Damien Le Moal
2020-07-03  9:09         ` Krishna Kanth Reddy
2020-07-23 14:32 [PATCH 1/4] Add Zone Append command support for NVMe Zoned Namespaces (ZNS) Alexey Dobriyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593106733-19596-2-git-send-email-krish.reddy@samsung.com \
    --to=krish.reddy@samsung.com \
    --cc=ankit.kumar@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=fio@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.