Re: [BUG] About "io_uring: add more uring info to fdinfo for debug"

From: Eric Dumazet <edumazet@google.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>,
	Hao Xu <haoxu@linux.alibaba.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [BUG] About "io_uring: add more uring info to fdinfo for debug"
Date: Thu, 28 Oct 2021 17:43:20 -0700	[thread overview]
Message-ID: <CANn89iJv6cRbK95Gwj=rrz2+X+Q4KQ7EtEcpz0Md0Vs1_iABPw@mail.gmail.com> (raw)
In-Reply-To: <3b7dbb90-7fd5-d30f-cfe0-dd83f2cd4482@kernel.dk>

On Thu, Oct 28, 2021 at 5:13 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 10/28/21 3:40 PM, Jens Axboe wrote:
> > On 10/28/21 3:24 PM, Eric Dumazet wrote:
> >> Hi
> >>
> >> I was looking at commit 83f84356bc8f2d
> >> ("io_uring: add more uring info to fdinfo for debug") after receiving
> >> syzbot reports.
> >>
> >> I suspect that the following :
> >>
> >> +       for (i = cached_sq_head; i < sq_tail; i++) {
> >> +               unsigned int sq_idx = READ_ONCE(ctx->sq_array[i & sq_mask]);
> >> +
> >> +               if (likely(sq_idx <= sq_mask)) {
> >> +                       struct io_uring_sqe *sqe = &ctx->sq_sqes[sq_idx];
> >> +
> >> +                       seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n",
> >> +                                  sq_idx, sqe->opcode, sqe->fd, sqe->flags, sqe->user_data);
> >> +               }
> >> +       }
> >>
> >>
> >> Can loop around ~2^32 times if sq_tail is close to ~0U
> >>
> >> I see various READ_ONCE(), which are probably not good enough.
> >>
> >> At very minimum I would handling wrapping...
> >
> > Thanks for reporting this. I think on top of wrapping, the loop should
> > just be capped at sq_entries as well. There's no point dumping more than
> > that, ever.
> >
> > I'll take a stab at this.
>
> I'd probably do something like this - make sure wrap is sane and that we
> always cap at the max number of entries we expect. This doesn't quite
> hold true for CQEs, but honestly for debugging purposes, we only really
> care about the sq ring side in terms of stalls. Or if we have unreaped
> CQEs, which we'll still show.
>
> This also removes the masking, as it's better to expose the ring indexes
> directly. And just dump the raw ring head/tail for sq/cq. We still
> include the cached info, but I think dumping the raw contents is saner
> and more useful.
>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 17cb0e1b88f0..babd9950ae9f 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -10065,12 +10065,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
>         struct io_overflow_cqe *ocqe;
>         struct io_rings *r = ctx->rings;
>         unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
> -       unsigned int cached_sq_head = ctx->cached_sq_head;
> -       unsigned int cached_cq_tail = ctx->cached_cq_tail;
>         unsigned int sq_head = READ_ONCE(r->sq.head);
>         unsigned int sq_tail = READ_ONCE(r->sq.tail);
>         unsigned int cq_head = READ_ONCE(r->cq.head);
>         unsigned int cq_tail = READ_ONCE(r->cq.tail);
> +       unsigned int sq_entries, cq_entries;
>         bool has_lock;
>         unsigned int i;
>
> @@ -10080,15 +10079,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
>          * and sq_tail and cq_head are changed by userspace. But it's ok since
>          * we usually use these info when it is stuck.
>          */
> -       seq_printf(m, "SqHead:\t%u\n", sq_head & sq_mask);
> -       seq_printf(m, "SqTail:\t%u\n", sq_tail & sq_mask);
> -       seq_printf(m, "CachedSqHead:\t%u\n", cached_sq_head & sq_mask);
> -       seq_printf(m, "CqHead:\t%u\n", cq_head & cq_mask);
> -       seq_printf(m, "CqTail:\t%u\n", cq_tail & cq_mask);
> -       seq_printf(m, "CachedCqTail:\t%u\n", cached_cq_tail & cq_mask);
> -       seq_printf(m, "SQEs:\t%u\n", sq_tail - cached_sq_head);
> -       for (i = cached_sq_head; i < sq_tail; i++) {
> -               unsigned int sq_idx = READ_ONCE(ctx->sq_array[i & sq_mask]);
> +       seq_printf(m, "SqMask:\t\t0x%x\n", sq_mask);
> +       seq_printf(m, "SqHead:\t%u\n", sq_head);
> +       seq_printf(m, "SqTail:\t%u\n", sq_tail);
> +       seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head);
> +       seq_printf(m, "CqMask:\t0x%x\n", cq_mask);
> +       seq_printf(m, "CqHead:\t%u\n", cq_head);
> +       seq_printf(m, "CqTail:\t%u\n", cq_tail);
> +       seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail);
> +       seq_printf(m, "SQEs:\t%u\n", sq_tail - ctx->cached_sq_head);
> +       sq_entries = min(sq_tail - sq_head, ctx->sq_entries);
> +       for (i = 0; i < sq_entries; i++) {
> +               unsigned int entry = i + sq_head;
> +               unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
>
>                 if (likely(sq_idx <= sq_mask)) {
>                         struct io_uring_sqe *sqe = &ctx->sq_sqes[sq_idx];
> @@ -10097,9 +10100,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
>                                    sq_idx, sqe->opcode, sqe->fd, sqe->flags, sqe->user_data);
>                 }
>         }
> -       seq_printf(m, "CQEs:\t%u\n", cached_cq_tail - cq_head);
> -       for (i = cq_head; i < cached_cq_tail; i++) {
> -               struct io_uring_cqe *cqe = &r->cqes[i & cq_mask];
> +       seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head);
> +       cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
> +       for (i = 0; i < cq_entries; i++) {
> +               unsigned int entry = i + cq_head;
> +               struct io_uring_cqe *cqe = &r->cqes[entry & cq_mask];
>
>                 seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
>                            i & cq_mask, cqe->user_data, cqe->res, cqe->flags);

Note : you probably want to replace  (i & cq_mask) to (entry & cq_mask) here

Otherwise, patch looks good to me.

>
> --
> Jens Axboe
>