All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ankit Kumar <ankit.kumar@samsung.com>
To: axboe@kernel.dk, vincentfu@gmail.com
Cc: fio@vger.kernel.org, kbusch@kernel.org, joshi.k@samsung.com,
	martin.petersen@oracle.com, Ankit Kumar <ankit.kumar@samsung.com>
Subject: [PATCH v3 03/10] engines:io_uring: enable support for separate metadata buffer
Date: Mon, 14 Aug 2023 20:27:40 +0530	[thread overview]
Message-ID: <20230814145747.114725-4-ankit.kumar@samsung.com> (raw)
In-Reply-To: <20230814145747.114725-1-ankit.kumar@samsung.com>

This patch enables support for separate metadata buffer with
io_uring_cmd ioengine. As we are unaware of metadata size during buffer
allocation, we provide an option md_per_io_size. This option must be
used to specify metadata buffer size for single IO, if namespace is
formatted with a separate metadata buffer.

For the sake of consistency this is the same option as used by SPDK's
external ioengine.

Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com>
---
 HOWTO.rst          |  4 ++++
 engines/io_uring.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++
 engines/nvme.c     | 16 ++++++--------
 fio.1              |  3 +++
 4 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/HOWTO.rst b/HOWTO.rst
index ac8314f3..6e0677f2 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2487,6 +2487,10 @@ with the caveat that when used on the command line, they must come after the
         want fio to use placement identifier only at indices 0, 2 and 5 specify
         ``fdp_pli=0,2,5``.
 
+.. option:: md_per_io_size=int : [io_uring_cmd]
+
+	Size in bytes for separate metadata buffer per IO. Default: 0.
+
 .. option:: cpuload=int : [cpuio]
 
 	Attempt to use the specified percentage of CPU cycles. This is a mandatory
diff --git a/engines/io_uring.c b/engines/io_uring.c
index 30d9ccd7..4916e3b0 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -59,6 +59,7 @@ struct ioring_data {
 	int ring_fd;
 
 	struct io_u **io_u_index;
+	char *md_buf;
 
 	int *fds;
 
@@ -95,6 +96,7 @@ struct ioring_options {
 	unsigned int uncached;
 	unsigned int nowait;
 	unsigned int force_async;
+	unsigned int md_per_io_size;
 	enum uring_cmd_type cmd_type;
 };
 
@@ -217,6 +219,16 @@ static struct fio_option options[] = {
 		.group	= FIO_OPT_G_IOURING,
 	},
 	CMDPRIO_OPTIONS(struct ioring_options, FIO_OPT_G_IOURING),
+	{
+		.name	= "md_per_io_size",
+		.lname	= "Separate Metadata Buffer Size per I/O",
+		.type	= FIO_OPT_INT,
+		.off1	= offsetof(struct ioring_options, md_per_io_size),
+		.def	= "0",
+		.help	= "Size of separate metadata buffer per I/O (Default: 0)",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_IOURING,
+	},
 	{
 		.name	= NULL,
 	},
@@ -631,6 +643,7 @@ static void fio_ioring_cleanup(struct thread_data *td)
 
 		fio_cmdprio_cleanup(&ld->cmdprio);
 		free(ld->io_u_index);
+		free(ld->md_buf);
 		free(ld->iovecs);
 		free(ld->fds);
 		free(ld->dsm);
@@ -1016,6 +1029,7 @@ static int fio_ioring_init(struct thread_data *td)
 {
 	struct ioring_options *o = td->eo;
 	struct ioring_data *ld;
+	unsigned long long md_size;
 	int ret;
 
 	/* sqthread submission requires registered files */
@@ -1036,6 +1050,28 @@ static int fio_ioring_init(struct thread_data *td)
 
 	/* io_u index */
 	ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *));
+
+	/*
+	 * metadata buffer for nvme command.
+	 * We are only supporting iomem=malloc / mem=malloc as of now.
+	 */
+	if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
+	    (o->cmd_type == FIO_URING_CMD_NVME) && o->md_per_io_size) {
+		md_size = (unsigned long long) o->md_per_io_size
+				* (unsigned long long) td->o.iodepth;
+		md_size += page_mask + td->o.mem_align;
+		if (td->o.mem_align && td->o.mem_align > page_size)
+			md_size += td->o.mem_align - page_size;
+		if (td->o.mem_type == MEM_MALLOC) {
+			ld->md_buf = malloc(md_size);
+			if (!ld->md_buf)
+				return 1;
+		} else {
+			log_err("fio: Only iomem=malloc or mem=malloc is supported\n");
+			return 1;
+		}
+	}
+
 	ld->iovecs = calloc(td->o.iodepth, sizeof(struct iovec));
 
 	td->io_ops_data = ld;
@@ -1062,8 +1098,17 @@ static int fio_ioring_init(struct thread_data *td)
 static int fio_ioring_io_u_init(struct thread_data *td, struct io_u *io_u)
 {
 	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+	char *p;
 
 	ld->io_u_index[io_u->index] = io_u;
+
+	if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
+		p = PTR_ALIGN(ld->md_buf, page_mask) + td->o.mem_align;
+		p += o->md_per_io_size * io_u->index;
+		io_u->mmap_data = p;
+	}
+
 	return 0;
 }
 
@@ -1117,6 +1162,15 @@ static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
 				td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
 				return 1;
 			}
+			if (data->ms && !data->lba_ext && ddir != DDIR_TRIM &&
+			    (o->md_per_io_size < ((td->o.max_bs[ddir] / data->lba_size) *
+						  data->ms))) {
+				log_err("%s: md_per_io_size should be at least %llu bytes\n",
+					f->file_name,
+					((td->o.max_bs[ddir] / data->lba_size) * data->ms));
+				td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
+				return 1;
+			}
                 }
 	}
 	if (!ld || !o->registerfiles)
diff --git a/engines/nvme.c b/engines/nvme.c
index 7e891eed..65725e3c 100644
--- a/engines/nvme.c
+++ b/engines/nvme.c
@@ -79,6 +79,10 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
 		cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
 		cmd->data_len = io_u->xfer_buflen;
 	}
+	if (data->lba_shift && data->ms) {
+		cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
+		cmd->metadata_len = (nlb + 1) * data->ms;
+	}
 	cmd->nsid = data->nsid;
 	return 0;
 }
@@ -149,21 +153,13 @@ int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, struct nvme_data *data)
 		format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
 
 	data->lba_size = 1 << ns.lbaf[format_idx].ds;
+	data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
 
 	/*
-	 * Only extended LBA can be supported.
 	 * Bit 4 for flbas indicates if metadata is transferred at the end of
 	 * logical block creating an extended LBA.
 	 */
-	data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
-	if (data->ms && !((ns.flbas >> 4) & 0x1)) {
-		log_err("%s: only extended logical block can be supported\n",
-			f->file_name);
-		err = -ENOTSUP;
-		goto out;
-	}
-
-	if (data->ms)
+	if (data->ms && ((ns.flbas >> 4) & 0x1))
 		data->lba_ext = data->lba_size + data->ms;
 	else
 		data->lba_shift = ilog2(data->lba_size);
diff --git a/fio.1 b/fio.1
index f62617e7..6b49a747 100644
--- a/fio.1
+++ b/fio.1
@@ -2247,6 +2247,9 @@ By default, the job will cycle through all available Placement IDs, so use this
 to isolate these identifiers to specific jobs. If you want fio to use placement
 identifier only at indices 0, 2 and 5 specify, you would set `fdp_pli=0,2,5`.
 .TP
+.BI (io_uring_cmd)md_per_io_size \fR=\fPint
+Size in bytes for separate metadata buffer per IO. Default: 0.
+.TP
 .BI (cpuio)cpuload \fR=\fPint
 Attempt to use the specified percentage of CPU cycles. This is a mandatory
 option when using cpuio I/O engine.
-- 
2.25.1


  parent reply	other threads:[~2023-08-14  9:41 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20230814093836epcas5p44bc762fa6e4368d532126494115aeba4@epcas5p4.samsung.com>
2023-08-14 14:57 ` [PATCH v3 00/10] Protection information support for io_uring passthrough engine Ankit Kumar
2023-08-14 14:40   ` Vincent Fu
     [not found]   ` <CGME20230814093838epcas5p31b8e832e14afecd49ac828505f325b74@epcas5p3.samsung.com>
2023-08-14 14:57     ` [PATCH v3 01/10] engines:io_uring: add missing error during open file Ankit Kumar
     [not found]   ` <CGME20230814093840epcas5p4177e2c9b9b01f99ea5fd1edd6bc8648b@epcas5p4.samsung.com>
2023-08-14 14:57     ` [PATCH v3 02/10] engines:io_uring: update arguments to fetch nvme data Ankit Kumar
     [not found]   ` <CGME20230814093842epcas5p22d5ee07c754ebd5067df214fe4f26dec@epcas5p2.samsung.com>
2023-08-14 14:57     ` Ankit Kumar [this message]
     [not found]   ` <CGME20230814093844epcas5p2a8eddaeddfbfd01e219491c2c4d79cd7@epcas5p2.samsung.com>
2023-08-14 14:57     ` [PATCH v3 04/10] engines:io_uring: uring_cmd add support for protection info Ankit Kumar
     [not found]   ` <CGME20230814093845epcas5p1e05696c12c9a9720252b80a522d7199c@epcas5p1.samsung.com>
2023-08-14 14:57     ` [PATCH v3 05/10] io_u: move engine data out of union Ankit Kumar
     [not found]   ` <CGME20230814093848epcas5p412a0c5dbf28155c30fe89297135c021d@epcas5p4.samsung.com>
2023-08-14 14:57     ` [PATCH v3 06/10] crc: pull required crc16-t10 files from linux kernel Ankit Kumar
     [not found]   ` <CGME20230814093849epcas5p431ac99960acca2847c4c3226c5781c7e@epcas5p4.samsung.com>
2023-08-14 14:57     ` [PATCH v3 07/10] engines:io_uring: generate and verify pi for 16b guard Ankit Kumar
     [not found]   ` <CGME20230814093851epcas5p28d3ff209e6f92d407d021a566946cacd@epcas5p2.samsung.com>
2023-08-14 14:57     ` [PATCH v3 08/10] crc: pull required crc64 nvme apis from linux kernel Ankit Kumar
     [not found]   ` <CGME20230814093853epcas5p4973ef04622e9bafbc73a9a71c84b94ca@epcas5p4.samsung.com>
2023-08-14 14:57     ` [PATCH v3 09/10] engines:nvme: pull required 48 bit accessors " Ankit Kumar
     [not found]   ` <CGME20230814093854epcas5p3875babea8f98e2414d06ca075d0cd18e@epcas5p3.samsung.com>
2023-08-14 14:57     ` [PATCH v3 10/10] engines:io_uring: generate and verify pi for 64b guard Ankit Kumar
2023-08-14 21:53   ` [PATCH v3 00/10] Protection information support for io_uring passthrough engine Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230814145747.114725-4-ankit.kumar@samsung.com \
    --to=ankit.kumar@samsung.com \
    --cc=axboe@kernel.dk \
    --cc=fio@vger.kernel.org \
    --cc=joshi.k@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=vincentfu@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.