All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ankit Kumar <ankit.kumar@samsung.com>
To: axboe@kernel.dk
Cc: fio@vger.kernel.org, krish.reddy@samsung.com,
	joshi.k@samsung.com, anuj20.g@samsung.com,
	Ankit Kumar <ankit.kumar@samsung.com>
Subject: [PATCH v3 8/9] engines/io_uring: Enable zone device support for io_uring_cmd I/O engine
Date: Tue, 31 May 2022 19:01:54 +0530	[thread overview]
Message-ID: <20220531133155.17493-9-ankit.kumar@samsung.com> (raw)
In-Reply-To: <20220531133155.17493-1-ankit.kumar@samsung.com>

Add zone device specific ioengine_ops for io_uring_cmd.
* get_zoned_model
* report_zones
* reset_wp
* get_max_open_zones

Add the necessary NVMe ZNS specfication opcodes and structures. Add
helper functions to submit admin and I/O passthrough commands for these
new NVMe ZNS specific commands.

For write workload iodepth must be set to 1 as there is no IO scheduler

Tested-by: Vincent Fu <vincent.fu@samsung.com>
Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com>
---
 engines/io_uring.c |  32 ++++++
 engines/nvme.c     | 242 +++++++++++++++++++++++++++++++++++++++++++++
 engines/nvme.h     |  80 ++++++++++++++-
 3 files changed, 353 insertions(+), 1 deletion(-)

diff --git a/engines/io_uring.c b/engines/io_uring.c
index a7b7b166..5a5406d4 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -1164,6 +1164,34 @@ static int fio_ioring_cmd_get_file_size(struct thread_data *td,
 	return generic_get_file_size(td, f);
 }
 
+static int fio_ioring_cmd_get_zoned_model(struct thread_data *td,
+					  struct fio_file *f,
+					  enum zbd_zoned_model *model)
+{
+	return fio_nvme_get_zoned_model(td, f, model);
+}
+
+static int fio_ioring_cmd_report_zones(struct thread_data *td,
+				       struct fio_file *f, uint64_t offset,
+				       struct zbd_zone *zbdz,
+				       unsigned int nr_zones)
+{
+	return fio_nvme_report_zones(td, f, offset, zbdz, nr_zones);
+}
+
+static int fio_ioring_cmd_reset_wp(struct thread_data *td, struct fio_file *f,
+				   uint64_t offset, uint64_t length)
+{
+	return fio_nvme_reset_wp(td, f, offset, length);
+}
+
+static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td,
+					     struct fio_file *f,
+					     unsigned int *max_open_zones)
+{
+	return fio_nvme_get_max_open_zones(td, f, max_open_zones);
+}
+
 static struct ioengine_ops ioengine_uring = {
 	.name			= "io_uring",
 	.version		= FIO_IOOPS_VERSION,
@@ -1200,6 +1228,10 @@ static struct ioengine_ops ioengine_uring_cmd = {
 	.open_file		= fio_ioring_cmd_open_file,
 	.close_file		= fio_ioring_cmd_close_file,
 	.get_file_size		= fio_ioring_cmd_get_file_size,
+	.get_zoned_model	= fio_ioring_cmd_get_zoned_model,
+	.report_zones		= fio_ioring_cmd_report_zones,
+	.reset_wp		= fio_ioring_cmd_reset_wp,
+	.get_max_open_zones	= fio_ioring_cmd_get_max_open_zones,
 	.options		= options,
 	.option_struct_size	= sizeof(struct ioring_options),
 };
diff --git a/engines/nvme.c b/engines/nvme.c
index 6fecf0ba..59550def 100644
--- a/engines/nvme.c
+++ b/engines/nvme.c
@@ -101,3 +101,245 @@ int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
 	close(fd);
 	return 0;
 }
+
+int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
+			     enum zbd_zoned_model *model)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_id_ns ns;
+	struct nvme_passthru_cmd cmd;
+	int fd, ret = 0;
+
+	if (f->filetype != FIO_TYPE_CHAR)
+		return -EINVAL;
+
+	/* File is not yet opened */
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	/* Using nvme_id_ns for data as sizes are same */
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
+				NVME_CSI_ZNS, &ns);
+	if (ret) {
+		*model = ZBD_NONE;
+		goto out;
+	}
+
+	memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
+
+	/* Using nvme_id_ns for data as sizes are same */
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &ns);
+	if (ret) {
+		*model = ZBD_NONE;
+		goto out;
+	}
+
+	*model = ZBD_HOST_MANAGED;
+out:
+	close(fd);
+	return 0;
+}
+
+static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
+			     __u32 data_len, void *data)
+{
+	struct nvme_passthru_cmd cmd = {
+		.opcode         = nvme_zns_cmd_mgmt_recv,
+		.nsid           = nsid,
+		.addr           = (__u64)(uintptr_t)data,
+		.data_len       = data_len,
+		.cdw10          = slba & 0xffffffff,
+		.cdw11          = slba >> 32,
+		.cdw12		= (data_len >> 2) - 1,
+		.cdw13		= NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
+		.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+	};
+
+	return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+}
+
+int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
+			  uint64_t offset, struct zbd_zone *zbdz,
+			  unsigned int nr_zones)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_zone_report *zr;
+	struct nvme_zns_id_ns zns_ns;
+	struct nvme_id_ns ns;
+	unsigned int i = 0, j, zones_fetched = 0;
+	unsigned int max_zones, zones_chunks = 1024;
+	int fd, ret = 0;
+	__u32 zr_len;
+	__u64 zlen;
+
+	/* File is not yet opened */
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	zones_fetched = 0;
+	zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
+	zr = calloc(1, zr_len);
+	if (!zr)
+		return -ENOMEM;
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
+				NVME_CSI_NVM, &ns);
+	if (ret) {
+		log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
+			ret);
+		goto out;
+	}
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &zns_ns);
+	if (ret) {
+		log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
+			f->file_name, ret);
+		goto out;
+	}
+	zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
+
+	max_zones = (f->real_file_size - offset) / zlen;
+	if (max_zones < nr_zones)
+		nr_zones = max_zones;
+
+	if (nr_zones < zones_chunks)
+		zones_chunks = nr_zones;
+
+	while (zones_fetched < nr_zones) {
+		if (zones_fetched + zones_chunks >= nr_zones) {
+			zones_chunks = nr_zones - zones_fetched;
+			zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
+		}
+		ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
+					NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
+		if (ret) {
+			log_err("%s: nvme_zns_report_zones failed, err=%d\n",
+				f->file_name, ret);
+			goto out;
+		}
+
+		/* Transform the zone-report */
+		for (j = 0; j < zr->nr_zones; j++, i++) {
+			struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
+
+			zbdz[i].start = desc->zslba << data->lba_shift;
+			zbdz[i].len = zlen;
+			zbdz[i].wp = desc->wp << data->lba_shift;
+			zbdz[i].capacity = desc->zcap << data->lba_shift;
+
+			/* Zone Type is stored in first 4 bits. */
+			switch (desc->zt & 0x0f) {
+			case NVME_ZONE_TYPE_SEQWRITE_REQ:
+				zbdz[i].type = ZBD_ZONE_TYPE_SWR;
+				break;
+			default:
+				log_err("%s: invalid type for zone at offset %llu.\n",
+					f->file_name, desc->zslba);
+				ret = -EIO;
+				goto out;
+			}
+
+			/* Zone State is stored in last 4 bits. */
+			switch (desc->zs >> 4) {
+			case NVME_ZNS_ZS_EMPTY:
+				zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
+				break;
+			case NVME_ZNS_ZS_IMPL_OPEN:
+				zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
+				break;
+			case NVME_ZNS_ZS_EXPL_OPEN:
+				zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
+				break;
+			case NVME_ZNS_ZS_CLOSED:
+				zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
+				break;
+			case NVME_ZNS_ZS_FULL:
+				zbdz[i].cond = ZBD_ZONE_COND_FULL;
+				break;
+			case NVME_ZNS_ZS_READ_ONLY:
+			case NVME_ZNS_ZS_OFFLINE:
+			default:
+				/* Treat all these conditions as offline (don't use!) */
+				zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
+				zbdz[i].wp = zbdz[i].start;
+			}
+		}
+		zones_fetched += zr->nr_zones;
+		offset += zr->nr_zones * zlen;
+	}
+
+	ret = zones_fetched;
+out:
+	free(zr);
+	close(fd);
+
+	return ret;
+}
+
+int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
+		      uint64_t offset, uint64_t length)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	unsigned int nr_zones;
+	unsigned long long zslba;
+	int i, fd, ret = 0;
+
+	/* If the file is not yet opened, open it for this function. */
+	fd = f->fd;
+	if (fd < 0) {
+		fd = open(f->file_name, O_RDWR | O_LARGEFILE);
+		if (fd < 0)
+			return -errno;
+	}
+
+	zslba = offset >> data->lba_shift;
+	nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
+
+	for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
+		struct nvme_passthru_cmd cmd = {
+			.opcode         = nvme_zns_cmd_mgmt_send,
+			.nsid           = data->nsid,
+			.cdw10          = zslba & 0xffffffff,
+			.cdw11          = zslba >> 32,
+			.cdw13          = NVME_ZNS_ZSA_RESET,
+			.addr           = (__u64)(uintptr_t)NULL,
+			.data_len       = 0,
+			.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+		};
+
+		ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	}
+
+	if (f->fd < 0)
+		close(fd);
+	return -ret;
+}
+
+int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+				unsigned int *max_open_zones)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_zns_id_ns zns_ns;
+	int fd, ret = 0;
+
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &zns_ns);
+	if (ret) {
+		log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
+			f->file_name, ret);
+		goto out;
+	}
+
+	*max_open_zones = zns_ns.mor + 1;
+out:
+	close(fd);
+	return ret;
+}
diff --git a/engines/nvme.h b/engines/nvme.h
index 8e626bb2..70a89b74 100644
--- a/engines/nvme.h
+++ b/engines/nvme.h
@@ -43,8 +43,15 @@ struct nvme_uring_cmd {
 #define NVME_IDENTIFY_DATA_SIZE 4096
 #define NVME_IDENTIFY_CSI_SHIFT 24
 
+#define NVME_ZNS_ZRA_REPORT_ZONES 0
+#define NVME_ZNS_ZRAS_FEAT_ERZ (1 << 16)
+#define NVME_ZNS_ZSA_RESET 0x4
+#define NVME_ZONE_TYPE_SEQWRITE_REQ 0x2
+
 enum nvme_identify_cns {
-	NVME_IDENTIFY_CNS_NS = 0x00,
+	NVME_IDENTIFY_CNS_NS		= 0x00,
+	NVME_IDENTIFY_CNS_CSI_NS	= 0x05,
+	NVME_IDENTIFY_CNS_CSI_CTRL	= 0x06,
 };
 
 enum nvme_csi {
@@ -60,6 +67,18 @@ enum nvme_admin_opcode {
 enum nvme_io_opcode {
 	nvme_cmd_write			= 0x01,
 	nvme_cmd_read			= 0x02,
+	nvme_zns_cmd_mgmt_send		= 0x79,
+	nvme_zns_cmd_mgmt_recv		= 0x7a,
+};
+
+enum nvme_zns_zs {
+	NVME_ZNS_ZS_EMPTY		= 0x1,
+	NVME_ZNS_ZS_IMPL_OPEN		= 0x2,
+	NVME_ZNS_ZS_EXPL_OPEN		= 0x3,
+	NVME_ZNS_ZS_CLOSED		= 0x4,
+	NVME_ZNS_ZS_READ_ONLY		= 0xd,
+	NVME_ZNS_ZS_FULL		= 0xe,
+	NVME_ZNS_ZS_OFFLINE		= 0xf,
 };
 
 struct nvme_data {
@@ -127,10 +146,69 @@ static inline int ilog2(uint32_t i)
 	return log;
 }
 
+struct nvme_zns_lbafe {
+	__le64	zsze;
+	__u8	zdes;
+	__u8	rsvd9[7];
+};
+
+struct nvme_zns_id_ns {
+	__le16			zoc;
+	__le16			ozcs;
+	__le32			mar;
+	__le32			mor;
+	__le32			rrl;
+	__le32			frl;
+	__le32			rrl1;
+	__le32			rrl2;
+	__le32			rrl3;
+	__le32			frl1;
+	__le32			frl2;
+	__le32			frl3;
+	__le32			numzrwa;
+	__le16			zrwafg;
+	__le16			zrwasz;
+	__u8			zrwacap;
+	__u8			rsvd53[2763];
+	struct nvme_zns_lbafe	lbafe[64];
+	__u8			vs[256];
+};
+
+struct nvme_zns_desc {
+	__u8	zt;
+	__u8	zs;
+	__u8	za;
+	__u8	zai;
+	__u8	rsvd4[4];
+	__le64	zcap;
+	__le64	zslba;
+	__le64	wp;
+	__u8	rsvd32[32];
+};
+
+struct nvme_zone_report {
+	__le64			nr_zones;
+	__u8			rsvd8[56];
+	struct nvme_zns_desc	entries[];
+};
+
 int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
 		      __u64 *nlba);
 
 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
 			    struct iovec *iov);
 
+int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
+			     enum zbd_zoned_model *model);
+
+int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
+			  uint64_t offset, struct zbd_zone *zbdz,
+			  unsigned int nr_zones);
+
+int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
+		      uint64_t offset, uint64_t length);
+
+int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+				unsigned int *max_open_zones);
+
 #endif
-- 
2.17.1


  parent reply	other threads:[~2022-06-01  9:41 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20220531133739epcas5p31aa9479d49d5ae82a85b493bd0bb4e47@epcas5p3.samsung.com>
2022-05-31 13:31 ` [PATCH v3 0/9] Add support for uring passthrough commands Ankit Kumar
     [not found]   ` <CGME20220531133740epcas5p1e7c1049ef44fc7d25b78f239f035a29d@epcas5p1.samsung.com>
2022-05-31 13:31     ` [PATCH v3 1/9] io_uring.h: add IORING_SETUP_SQE128 and IORING_SETUP_CQE32 Ankit Kumar
     [not found]   ` <CGME20220531133741epcas5p16383da97824ffedd50f5e3231d09430e@epcas5p1.samsung.com>
2022-05-31 13:31     ` [PATCH v3 2/9] configure: check nvme uring command support Ankit Kumar
     [not found]   ` <CGME20220531133743epcas5p4d400c341f29489c51e3ee5590415420c@epcas5p4.samsung.com>
2022-05-31 13:31     ` [PATCH v3 3/9] init: return error incase an invalid value is passed as option Ankit Kumar
     [not found]   ` <CGME20220531133745epcas5p3546b36e799931251c4020e4fe13fa14d@epcas5p3.samsung.com>
2022-05-31 13:31     ` [PATCH v3 4/9] nvme: add nvme opcodes, structures and helper functions Ankit Kumar
     [not found]   ` <CGME20220531133746epcas5p36ec535b219f3e2008b14d2adc59e30f2@epcas5p3.samsung.com>
2022-05-31 13:31     ` [PATCH v3 5/9] engines/io_uring: add new I/O engine for uring passthrough support Ankit Kumar
2022-06-02  4:19       ` Jens Axboe
     [not found]   ` <CGME20220531133747epcas5p4a9a89962301b8853ca13ba017546ae38@epcas5p4.samsung.com>
2022-05-31 13:31     ` [PATCH v3 6/9] docs: document options for io_uring_cmd I/O engine Ankit Kumar
     [not found]   ` <CGME20220531133748epcas5p1e2d8f913398d3ea8c165e1044e6914d8@epcas5p1.samsung.com>
2022-05-31 13:31     ` [PATCH v3 7/9] zbd: Check for direct flag only if its block device Ankit Kumar
2022-06-01 20:32       ` Vincent Fu
2022-06-02  1:53         ` Shinichiro Kawasaki
2022-06-02  0:55       ` Shinichiro Kawasaki
     [not found]   ` <CGME20220531133750epcas5p1602001843ff6971719f2435faf631cf4@epcas5p1.samsung.com>
2022-05-31 13:31     ` Ankit Kumar [this message]
     [not found]   ` <CGME20220531133751epcas5p3819dec97a26ac12bf81d03d947a7272d@epcas5p3.samsung.com>
2022-05-31 13:31     ` [PATCH v3 9/9] examples: add 2 example job file for io_uring_cmd engine Ankit Kumar
2022-06-02  8:24   ` [PATCH v3 0/9] Add support for uring passthrough commands Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220531133155.17493-9-ankit.kumar@samsung.com \
    --to=ankit.kumar@samsung.com \
    --cc=anuj20.g@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=fio@vger.kernel.org \
    --cc=joshi.k@samsung.com \
    --cc=krish.reddy@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.