From: Anuj Gupta <anuj20.g@samsung.com>
To: Kanchan Joshi <joshi.k@samsung.com>
Cc: axboe@kernel.dk, vincentfu@gmail.com, ankit.kumar@samsung.com,
fio@vger.kernel.org
Subject: Re: [PATCH 2/2] t/io_uring: add support for async-passthru
Date: Thu, 25 Aug 2022 17:09:52 +0530 [thread overview]
Message-ID: <20220825113952.GB16653@test-zns> (raw)
In-Reply-To: <20220825082444.GB22496@test-zns>
[-- Attachment #1: Type: text/plain, Size: 7596 bytes --]
On Thu, Aug 25, 2022 at 01:54:44PM +0530, Kanchan Joshi wrote:
> On Wed, Aug 24, 2022 at 03:01:09PM +0530, Anuj Gupta wrote:
> > This patch adds support for async-passthru in t/io_uring. User needs to
> > specify -u1 option in the command
> >
> > Example commandline:
> > t/io_uring -b512 -d128 -c32 -s32 -p0 -F1 -B0 -O0 -n1 -u1 /dev/ng0n1
> >
> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > ---
> > t/io_uring.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++---
> > 1 file changed, 138 insertions(+), 8 deletions(-)
> >
> > diff --git a/t/io_uring.c b/t/io_uring.c
> > index a42abd46..78fedaf1 100644
> > --- a/t/io_uring.c
> > +++ b/t/io_uring.c
> > @@ -35,6 +35,7 @@
> > #include "../lib/rand.h"
> > #include "../minmax.h"
> > #include "../os/linux/io_uring.h"
> > +#include "../engines/nvme.h"
> >
> > struct io_sq_ring {
> > unsigned *head;
> > @@ -139,6 +140,9 @@ static int random_io = 1; /* random or sequential IO */
> > static int register_ring = 1; /* register ring */
> > static int use_sync = 0; /* use preadv2 */
> > static int numa_placement = 0; /* set to node of device */
> > +static int pt = 0; /* passthrough I/O or not */
> > +static unsigned int nsid = 0; /* nsid field required for nvme-passthrough */
> > +static unsigned int lbs = 0; /* logical_block_size field required for nvme-passthrough */
> >
> > static unsigned long tsc_rate;
> >
> > @@ -161,6 +165,54 @@ struct io_uring_map_buffers {
> > };
> > #endif
> >
> > +static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
> > + enum nvme_csi csi, void *data)
> > +{
> > + struct nvme_passthru_cmd cmd = {
> > + .opcode = nvme_admin_identify,
> > + .nsid = nsid,
> > + .addr = (__u64)(uintptr_t)data,
> > + .data_len = NVME_IDENTIFY_DATA_SIZE,
> > + .cdw10 = cns,
> > + .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
> > + .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
> > + };
> > +
> > + return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
> > +}
> > +
> > +static int nvme_get_info(int fd, __u32 *nsid, __u32 *lba_sz, __u64 *nlba)
> > +{
> > + struct nvme_id_ns ns;
> > + int namespace_id;
> > + int err;
> > +
> > + namespace_id = ioctl(fd, NVME_IOCTL_ID);
> > + if (namespace_id < 0) {
> > + fprintf(stderr, "error failed to fetch namespace-id\n");
> > + close(fd);
> > + return -errno;
> > + }
> > +
> > + /*
> > + * Identify namespace to get namespace-id, namespace size in LBA's
> > + * and LBA data size.
> > + */
> > + err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
> > + NVME_CSI_NVM, &ns);
> > + if (err) {
> > + fprintf(stderr, "error failed to fetch identify namespace\n");
> > + close(fd);
> > + return err;
> > + }
> > +
> > + *nsid = namespace_id;
> > + *lba_sz = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds;
> > + *nlba = ns.nsze;
> > +
> > + return 0;
> > +}
> > +
> > static unsigned long cycles_to_nsec(unsigned long cycles)
> > {
> > uint64_t val;
> > @@ -457,12 +509,13 @@ static unsigned file_depth(struct submitter *s)
> >
> > static void init_io(struct submitter *s, unsigned index)
> > {
> > - struct io_uring_sqe *sqe = &s->sqes[index];
> > + struct io_uring_sqe *sqe = NULL;
> > unsigned long offset;
> > struct file *f;
> > long r;
> >
> > if (do_nop) {
> > + sqe = &s->sqes[index];
>
> Nit: this and previous change is not must as <see below>
>
got it, will change this
> > sqe->opcode = IORING_OP_NOP;
> > return;
> > }
> > @@ -490,6 +543,41 @@ static void init_io(struct submitter *s, unsigned index)
> > f->cur_off = 0;
> > }
> >
> > + if (pt) {
> > + struct nvme_uring_cmd *cmd;
> > + unsigned long long slba;
> > + unsigned long long nlb;
> > +
> > + sqe = &s->sqes[index<<1];
>
> this is enough.
>
> > + if (register_files) {
> > + sqe->fd = f->fixed_fd;
> > + sqe->flags = IOSQE_FIXED_FILE;
> > + } else {
> > + sqe->fd = f->real_fd;
> > + sqe->flags = 0;
> > + }
> > + sqe->opcode = IORING_OP_URING_CMD;
> > + sqe->user_data = (unsigned long) f->fileno;
> > + if (stats)
> > + sqe->user_data |= ((unsigned long)s->clock_index << 32);
> > + sqe->cmd_op = NVME_URING_CMD_IO;
> > + slba = offset / lbs;
> > + nlb = bs/lbs - 1;
> > + cmd = (struct nvme_uring_cmd *)&sqe->cmd;
> > + memset(cmd, 0, sizeof(struct nvme_uring_cmd));
> > + /* cdw10 and cdw11 represent starting slba*/
> > + cmd->cdw10 = slba & 0xffffffff;
> > + cmd->cdw11 = slba >> 32;
> > + /* cdw12 represent number of lba to be read*/
> > + cmd->cdw12 = nlb;
> > + cmd->addr = (unsigned long) s->iovecs[index].iov_base;
> > + cmd->data_len = bs;
> > + cmd->nsid = nsid;
> > + cmd->opcode = 2;
> > + return;
> > + }
> > +
> > + sqe = &s->sqes[index];
> > if (register_files) {
> > sqe->flags = IOSQE_FIXED_FILE;
> > sqe->fd = f->fixed_fd;
> > @@ -549,7 +637,22 @@ static int get_file_size(struct file *f)
> >
> > if (fstat(f->real_fd, &st) < 0)
> > return -1;
> > - if (S_ISBLK(st.st_mode)) {
> > + if (pt) {
> > + __u64 nlba;
> > + int ret;
> > +
> > + if (!S_ISCHR(st.st_mode)) {
> > + fprintf(stderr, "passthrough works with only nvme-ns "
> > + "generic devices (/dev/ngXnY)\n");
> > + return -1;
> > + }
> > + ret = nvme_get_info(f->real_fd, &nsid, &lbs, &nlba);
> > + if (ret)
> > + return -1;
> > + f->max_blocks = nlba / bs;
> > + f->max_size = nlba;
> > + return 0;
> > + } else if (S_ISBLK(st.st_mode)) {
> > unsigned long long bytes;
> >
> > if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0)
> > @@ -587,11 +690,14 @@ static int reap_events_uring(struct submitter *s)
> >
> > f = &s->files[fileno];
> > f->pending_ios--;
> > - if (cqe->res != bs) {
> > + if (!pt && cqe->res != bs) {
> > printf("io: unexpected ret=%d\n", cqe->res);
> > if (polled && cqe->res == -EOPNOTSUPP)
> > printf("Your filesystem/driver/kernel doesn't support polled IO\n");
> > return -1;
> > + } else if (pt && cqe->res != 0) {
> > + printf("io: unexpected ret=%d\n", cqe->res);
> > + return -1;
>
> Missing inner check (polled one) for pt case?
right, adding a check for polling makes sense.
> How about something like this fragement -
>
> if (!do_nop) {
> int fileno = cqe->user_data & 0xffffffff;
> + bool error;
>
> f = &s->files[fileno];
> f->pending_ios--;
> - if (!pt && cqe->res != bs) {
> + if (!pt)
> + error = (cqe->res != bs);
> + else
> + error = (cqe->res != 0);
> + if (error) {
> printf("io: unexpected ret=%d\n", cqe->res);
> if (polled && cqe->res == -EOPNOTSUPP)
> printf("Your filesystem/driver/kernel doesn't support polled IO\n");
> return -1;
>
> [...]
>
I think, the result is -EINVAL rather than -EOPNOTSUPP, if the user enables polling for passthru i/o.
With that we can check for polled-io for passthru
> > @@ -1512,6 +1638,10 @@ int main(int argc, char *argv[])
> > printf("failed getting size of device/file\n");
> > return 1;
> > }
> > + if (pt && bs < lbs) {
> > + printf("error: bs:%d is less than logical_block_size:%d\n", bs, lbs);
> > + return 1;
> > + }
>
> would it be better to replace the check/print with "bs % lbs" instead?
> since io size needs to be multiple of lbs to go through.
>
right, will modify the check accordingly
--
Anuj Gupta
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
prev parent reply other threads:[~2022-08-25 12:02 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20220824094118epcas5p461b651ef5c3b1b04fc731275cf2fe0c2@epcas5p4.samsung.com>
2022-08-24 9:31 ` [PATCH 0/2] Add support for uring-passthrough in t/io_uring Anuj Gupta
[not found] ` <CGME20220824094125epcas5p48b88f023cce31317adc68abc225df95c@epcas5p4.samsung.com>
2022-08-24 9:31 ` [PATCH 1/2] t/io_uring: prep for including engines/nvme.h " Anuj Gupta
[not found] ` <CGME20220824094134epcas5p4b7cacb00484146b5d510ded15a80c712@epcas5p4.samsung.com>
2022-08-24 9:31 ` [PATCH 2/2] t/io_uring: add support for async-passthru Anuj Gupta
2022-08-24 20:09 ` Jens Axboe
2022-08-25 11:20 ` Anuj Gupta
2022-08-25 8:24 ` Kanchan Joshi
2022-08-25 11:39 ` Anuj Gupta [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220825113952.GB16653@test-zns \
--to=anuj20.g@samsung.com \
--cc=ankit.kumar@samsung.com \
--cc=axboe@kernel.dk \
--cc=fio@vger.kernel.org \
--cc=joshi.k@samsung.com \
--cc=vincentfu@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).