All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kanchan Joshi <joshiiitr@gmail.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Pankaj Raghav <p.raghav@samsung.com>,
	Jens Axboe <axboe@kernel.dk>,
	io-uring@vger.kernel.org, linux-nvme@lists.infradead.org,
	Pavel Begunkov <asml.silence@gmail.com>,
	Ming Lei <ming.lei@redhat.com>,
	Luis Chamberlain <mcgrof@kernel.org>, Stefan Roesch <shr@fb.com>,
	gost.dev@samsung.com, Kanchan Joshi <joshi.k@samsung.com>
Subject: Re: [PATCH v3 1/5] fs,io_uring: add infrastructure for uring-cmd
Date: Wed, 4 May 2022 08:12:44 -0700	[thread overview]
Message-ID: <CA+1E3rKe6G8UC9Pzkm4Wbu50X=TT5tise8g6umduhj1eTbN0+w@mail.gmail.com> (raw)
In-Reply-To: <20220503205202.GA9567@lst.de>

On Tue, May 3, 2022 at 1:52 PM Christoph Hellwig <hch@lst.de> wrote:
>
> This mostly looks good, but a few comments an an (untested) patch
> to fix these complaints below:
>
> > +struct io_uring_cmd {
> > +     struct file     *file;
> > +     void            *cmd;
> > +     /* for irq-completion - if driver requires doing stuff in task-context*/
> > +     void (*driver_cb)(struct io_uring_cmd *cmd);
>
> I'd rename this task_Work_cb or similar, as driver is not very
> meaningful.
>
> > +     u32             flags;
> > +     u32             cmd_op;
> > +     u32             unused;
> > +     u8              pdu[28]; /* available inline for free use */
>
> The unused field here is not only unused, but also messed up the
> alignment so that any pointer in pdu will not be properly aligned
> on 64-bit platforms and tjus might cause crashes on some platforms.

Indeed, thanks. Since we are going to remove that, we can give that
space to pdu for now i.e. pdu[32].This will help in removing "packed"
attribute in nvme pdu (which is 32 bytes without packing).

> For now we can just remove it, but I really want to get rid of the pdu
> thing that causes these problems, but I will not hold the series off for
> that and plan to look into that later.
> Also a lot of the fields here were using spaces instead of tabs for
> indentation.
>
> >       union {
> >               __u64   addr;   /* pointer to buffer or iovecs */
> >               __u64   splice_off_in;
> > +             __u16   cmd_len;
> >       };
>
> This field isn't actually used and can be removed.
>
> >       __u32   len;            /* buffer size or number of iovecs */
> >       union {
> > @@ -61,7 +63,10 @@ struct io_uring_sqe {
> >               __s32   splice_fd_in;
> >               __u32   file_index;
> >       };
> > -     __u64   addr3;
> > +     union {
> > +             __u64   addr3;
> > +             __u64   cmd;
> > +     };
> >       __u64   __pad2[1];
>
> The way how the tail of the structure is handled here is a mess.  I
> think something like a union of two structs for the small and large
> case would be much better, so that cmd can be the actual array for
> the data that can be used directly and without the nasty cast (that
> also casted away the constness):
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index b774e6eac5384..fe24d606ca306 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4544,7 +4544,7 @@ static int io_getxattr_prep(struct io_kiocb *req,
>         if (ret)
>                 return ret;
>
> -       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
> +       path = u64_to_user_ptr(READ_ONCE(sqe->small.addr3));
>
>         ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
>         if (IS_ERR(ix->filename)) {
> @@ -4645,7 +4645,7 @@ static int io_setxattr_prep(struct io_kiocb *req,
>         if (ret)
>                 return ret;
>
> -       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
> +       path = u64_to_user_ptr(READ_ONCE(sqe->small.addr3));
>
>         ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
>         if (IS_ERR(ix->filename)) {
> @@ -4909,15 +4909,15 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
>
>  static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
>  {
> -       req->uring_cmd.driver_cb(&req->uring_cmd);
> +       req->uring_cmd.task_work_cb(&req->uring_cmd);
>  }
>
>  void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *))
> +                       void (*task_work_cb)(struct io_uring_cmd *))
>  {
>         struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
>
> -       req->uring_cmd.driver_cb = driver_cb;
> +       req->uring_cmd.task_work_cb = task_work_cb;
>         req->io_task_work.func = io_uring_cmd_work;
>         io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
>  }
> @@ -4949,7 +4949,7 @@ static int io_uring_cmd_prep(struct io_kiocb *req,
>                 return -EOPNOTSUPP;
>         if (!(req->ctx->flags & IORING_SETUP_CQE32))
>                 return -EOPNOTSUPP;
> -       ioucmd->cmd = (void *) &sqe->cmd;
> +       ioucmd->cmd = sqe->big.cmd;
>         ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>         ioucmd->flags = 0;
>         return 0;
> @@ -12735,7 +12735,7 @@ static int __init io_uring_init(void)
>         BUILD_BUG_SQE_ELEM(42, __u16,  personality);
>         BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
>         BUILD_BUG_SQE_ELEM(44, __u32,  file_index);
> -       BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
> +       BUILD_BUG_SQE_ELEM(48, __u64,  small.addr3);
>
>         BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
>                      sizeof(struct io_uring_rsrc_update));
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index a4ff4696cbead..7274884859910 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -13,20 +13,19 @@ enum io_uring_cmd_flags {
>  };
>
>  struct io_uring_cmd {
> -       struct file     *file;
> -       void            *cmd;
> -       /* for irq-completion - if driver requires doing stuff in task-context*/
> -       void (*driver_cb)(struct io_uring_cmd *cmd);
> -       u32             flags;
> -       u32             cmd_op;
> -       u32             unused;
> +       struct file     *file;
> +       const u8        *cmd;
> +       /* callback to defer completions to task context */
> +       void (*task_work_cb)(struct io_uring_cmd *cmd);
> +       u32             flags;
> +       u32             cmd_op;
>         u8              pdu[28]; /* available inline for free use */
>  };
>
>  #if defined(CONFIG_IO_URING)
>  void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
>  void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *));
> +                       void (*task_work_cb)(struct io_uring_cmd *));
>  struct sock *io_uring_get_socket(struct file *file);
>  void __io_uring_cancel(bool cancel_all);
>  void __io_uring_free(struct task_struct *tsk);
> @@ -56,7 +55,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
>  {
>  }
>  static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
> -                       void (*driver_cb)(struct io_uring_cmd *))
> +                       void (*task_work_cb)(struct io_uring_cmd *))
>  {
>  }
>  static inline struct sock *io_uring_get_socket(struct file *file)
> diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
> index 630982b3c34c5..b1f64c2412935 100644
> --- a/include/trace/events/io_uring.h
> +++ b/include/trace/events/io_uring.h
> @@ -539,8 +539,8 @@ TRACE_EVENT(io_uring_req_failed,
>                 __entry->buf_index      = sqe->buf_index;
>                 __entry->personality    = sqe->personality;
>                 __entry->file_index     = sqe->file_index;
> -               __entry->pad1           = sqe->__pad2[0];
> -               __entry->addr3          = sqe->addr3;
> +               __entry->addr3          = sqe->small.addr3;
> +               __entry->pad1           = sqe->small.__pad2[0];
>                 __entry->error          = error;
>         ),
>
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index c081511119bfa..bd4221184f594 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -27,7 +27,6 @@ struct io_uring_sqe {
>         union {
>                 __u64   addr;   /* pointer to buffer or iovecs */
>                 __u64   splice_off_in;
> -               __u16   cmd_len;
>         };
>         __u32   len;            /* buffer size or number of iovecs */
>         union {
> @@ -64,16 +63,19 @@ struct io_uring_sqe {
>                 __u32   file_index;
>         };
>         union {
> -               __u64   addr3;
> -               __u64   cmd;
> +               struct {
> +                       __u64   addr3;
> +                       __u64   __pad2[1];
> +               } small;

Thinking if this can cause any issue for existing users of addr3, i.e.
in the userspace side? Since it needs to access this field with
small.addr3.
Jens - is xattr infra already frozen?

> +               /*
> +                * If the ring is initialized with IORING_SETUP_SQE128, then
> +                * this field is used for 80 bytes of arbitrary command data.
> +                */
> +               struct {
> +                       __u8    cmd[0];
> +               } big;
>         };
> -       __u64   __pad2[1];
> -
> -       /*
> -        * If the ring is initialized with IORING_SETUP_SQE128, then this field
> -        * contains 64-bytes of padding, doubling the size of the SQE.
> -        */
> -       __u64   __big_sqe_pad[0];
>  };
>
>  enum {



-- 
Joshi

  reply	other threads:[~2022-05-04  8:13 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20220503184911eucas1p1beb172219537d78fcaf2a1417f532cf7@eucas1p1.samsung.com>
2022-05-03 18:48 ` [PATCH v3 0/5] io_uring passthough for nvme Pankaj Raghav
     [not found]   ` <CGME20220503184912eucas1p1bb0e3d36c06cfde8436df3a45e67bd32@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 1/5] fs,io_uring: add infrastructure for uring-cmd Pankaj Raghav
2022-05-03 20:52       ` Christoph Hellwig
2022-05-04 15:12         ` Kanchan Joshi [this message]
2022-05-04 12:09           ` Jens Axboe
2022-05-04 15:48           ` Kanchan Joshi
2022-05-03 21:03       ` Jens Axboe
     [not found]   ` <CGME20220503184913eucas1p156abb6e2273c8dabc22e87ec8b218a5c@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 2/5] block: wire-up support for passthrough plugging Pankaj Raghav
2022-05-03 20:53       ` Christoph Hellwig
     [not found]   ` <CGME20220503184914eucas1p1d9df18afe3234c0698a66cdb9c664ddc@eucas1p1.samsung.com>
2022-05-03 18:48     ` [PATCH v3 3/5] nvme: refactor nvme_submit_user_cmd() Pankaj Raghav
     [not found]   ` <CGME20220503184915eucas1p2ae04772900c24ef0b23fd8bedead20ae@eucas1p2.samsung.com>
2022-05-03 18:48     ` [PATCH v3 4/5] nvme: wire-up uring-cmd support for io-passthru on char-device Pankaj Raghav
2022-05-03 20:55       ` Christoph Hellwig
     [not found]   ` <CGME20220503184916eucas1p266cbb3ffc1622b292bf59b5eccec9933@eucas1p2.samsung.com>
2022-05-03 18:48     ` [PATCH v3 5/5] nvme: add vectored-io support for uring-cmd Pankaj Raghav
2022-05-03 20:56       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+1E3rKe6G8UC9Pzkm4Wbu50X=TT5tise8g6umduhj1eTbN0+w@mail.gmail.com' \
    --to=joshiiitr@gmail.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=gost.dev@samsung.com \
    --cc=hch@lst.de \
    --cc=io-uring@vger.kernel.org \
    --cc=joshi.k@samsung.com \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mcgrof@kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=p.raghav@samsung.com \
    --cc=shr@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.