Developers may need some uring info to help themselves debug and address issues, these info includes sqring/cqring head/tail and the detail sqe/cqe info, which is very useful when it stucks. Signed-off-by: Hao Xu <haoxu@linux.alibaba.com> --- fs/io_uring.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f795ad281038..ac048592a3e8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9950,8 +9950,48 @@ static int io_uring_show_cred(struct seq_file *m, unsigned int id, static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_sq_data *sq = NULL; + struct io_overflow_cqe *ocqe; + struct io_rings *r = ctx->rings; + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; + unsigned int cached_sq_head = ctx->cached_sq_head; + unsigned int cached_cq_tail = ctx->cached_cq_tail; + unsigned int sq_head = READ_ONCE(r->sq.head); + unsigned int sq_tail = READ_ONCE(r->sq.tail); + unsigned int cq_head = READ_ONCE(r->cq.head); + unsigned int cq_tail = READ_ONCE(r->cq.tail); bool has_lock; - int i; + unsigned int i; + + /* + * we may get imprecise sqe and cqe info if uring is actively running + * since we get cached_sq_head and cached_cq_tail without uring_lock + * and sq_tail and cq_head are changed by userspace. But it's ok since + * we usually use these info when it is stuck. + */ + seq_printf(m, "SqHead:\t%u\n", sq_head & sq_mask); + seq_printf(m, "SqTail:\t%u\n", sq_tail & sq_mask); + seq_printf(m, "CachedSqHead:\t%u\n", cached_sq_head & sq_mask); + seq_printf(m, "CqHead:\t%u\n", cq_head & cq_mask); + seq_printf(m, "CqTail:\t%u\n", cq_tail & cq_mask); + seq_printf(m, "CachedCqTail:\t%u\n", cached_cq_tail & cq_mask); + seq_printf(m, "SQEs:\t%u\n", sq_tail - cached_sq_head); + for (i = cached_sq_head; i < sq_tail; i++) { + unsigned int sq_idx = READ_ONCE(ctx->sq_array[i & sq_mask]); + + if (likely(sq_idx <= sq_mask)) { + struct io_uring_sqe *sqe = &ctx->sq_sqes[sq_idx]; + + seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", + sq_idx, sqe->opcode, sqe->fd, sqe->flags, sqe->user_data); + } + } + seq_printf(m, "CQEs:\t%u\n", cached_cq_tail - cq_head); + for (i = cq_head; i < cached_cq_tail; i++) { + struct io_uring_cqe *cqe = &r->cqes[i & cq_mask]; + + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", + i & cq_mask, cqe->user_data, cqe->res, cqe->flags); + } /* * Avoid ABBA deadlock between the seq lock and the io_uring mutex, @@ -9993,7 +10033,10 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) xa_for_each(&ctx->personalities, index, cred) io_uring_show_cred(m, index, cred); } - seq_printf(m, "PollList:\n"); + if (has_lock) + mutex_unlock(&ctx->uring_lock); + + seq_puts(m, "PollList:\n"); spin_lock(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { struct hlist_head *list = &ctx->cancel_hash[i]; @@ -10003,9 +10046,17 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) seq_printf(m, " op=%d, task_works=%d\n", req->opcode, req->task->task_works != NULL); } + + seq_puts(m, "CqOverflowList:\n"); + list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { + struct io_uring_cqe *cqe = &ocqe->cqe; + + seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", + cqe->user_data, cqe->res, cqe->flags); + + } + spin_unlock(&ctx->completion_lock); - if (has_lock) - mutex_unlock(&ctx->uring_lock); } static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) -- 2.24.4
ping...
在 2021/9/13 下午9:08, Hao Xu 写道:
> Developers may need some uring info to help themselves debug and address
> issues, these info includes sqring/cqring head/tail and the detail
> sqe/cqe info, which is very useful when it stucks.
>
> Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
> ---
> fs/io_uring.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 55 insertions(+), 4 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index f795ad281038..ac048592a3e8 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -9950,8 +9950,48 @@ static int io_uring_show_cred(struct seq_file *m, unsigned int id,
> static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
> {
> struct io_sq_data *sq = NULL;
> + struct io_overflow_cqe *ocqe;
> + struct io_rings *r = ctx->rings;
> + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
> + unsigned int cached_sq_head = ctx->cached_sq_head;
> + unsigned int cached_cq_tail = ctx->cached_cq_tail;
> + unsigned int sq_head = READ_ONCE(r->sq.head);
> + unsigned int sq_tail = READ_ONCE(r->sq.tail);
> + unsigned int cq_head = READ_ONCE(r->cq.head);
> + unsigned int cq_tail = READ_ONCE(r->cq.tail);
> bool has_lock;
> - int i;
> + unsigned int i;
> +
> + /*
> + * we may get imprecise sqe and cqe info if uring is actively running
> + * since we get cached_sq_head and cached_cq_tail without uring_lock
> + * and sq_tail and cq_head are changed by userspace. But it's ok since
> + * we usually use these info when it is stuck.
> + */
> + seq_printf(m, "SqHead:\t%u\n", sq_head & sq_mask);
> + seq_printf(m, "SqTail:\t%u\n", sq_tail & sq_mask);
> + seq_printf(m, "CachedSqHead:\t%u\n", cached_sq_head & sq_mask);
> + seq_printf(m, "CqHead:\t%u\n", cq_head & cq_mask);
> + seq_printf(m, "CqTail:\t%u\n", cq_tail & cq_mask);
> + seq_printf(m, "CachedCqTail:\t%u\n", cached_cq_tail & cq_mask);
> + seq_printf(m, "SQEs:\t%u\n", sq_tail - cached_sq_head);
> + for (i = cached_sq_head; i < sq_tail; i++) {
> + unsigned int sq_idx = READ_ONCE(ctx->sq_array[i & sq_mask]);
> +
> + if (likely(sq_idx <= sq_mask)) {
> + struct io_uring_sqe *sqe = &ctx->sq_sqes[sq_idx];
> +
> + seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n",
> + sq_idx, sqe->opcode, sqe->fd, sqe->flags, sqe->user_data);
> + }
> + }
> + seq_printf(m, "CQEs:\t%u\n", cached_cq_tail - cq_head);
> + for (i = cq_head; i < cached_cq_tail; i++) {
> + struct io_uring_cqe *cqe = &r->cqes[i & cq_mask];
> +
> + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
> + i & cq_mask, cqe->user_data, cqe->res, cqe->flags);
> + }
>
> /*
> * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
> @@ -9993,7 +10033,10 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
> xa_for_each(&ctx->personalities, index, cred)
> io_uring_show_cred(m, index, cred);
> }
> - seq_printf(m, "PollList:\n");
> + if (has_lock)
> + mutex_unlock(&ctx->uring_lock);
> +
> + seq_puts(m, "PollList:\n");
> spin_lock(&ctx->completion_lock);
> for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
> struct hlist_head *list = &ctx->cancel_hash[i];
> @@ -10003,9 +10046,17 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
> seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
> req->task->task_works != NULL);
> }
> +
> + seq_puts(m, "CqOverflowList:\n");
> + list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
> + struct io_uring_cqe *cqe = &ocqe->cqe;
> +
> + seq_printf(m, " user_data=%llu, res=%d, flags=%x\n",
> + cqe->user_data, cqe->res, cqe->flags);
> +
> + }
> +
> spin_unlock(&ctx->completion_lock);
> - if (has_lock)
> - mutex_unlock(&ctx->uring_lock);
> }
>
> static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>
On 9/13/21 7:08 AM, Hao Xu wrote:
> Developers may need some uring info to help themselves debug and address
> issues, these info includes sqring/cqring head/tail and the detail
> sqe/cqe info, which is very useful when it stucks.
I think this is a good addition, more info to help you debug a stuck case
is always good. I'll queue this up for 5.16.
--
Jens Axboe
On 9/15/21 4:26 PM, Jens Axboe wrote:
> On 9/13/21 7:08 AM, Hao Xu wrote:
>> Developers may need some uring info to help themselves debug and address
>> issues, these info includes sqring/cqring head/tail and the detail
>> sqe/cqe info, which is very useful when it stucks.
>
> I think this is a good addition, more info to help you debug a stuck case
> is always good. I'll queue this up for 5.16.
Are there limits how much we can print? I remember people were couldn't
even show a list of CPUs (was it proc?). The overflow list may be huge.
--
Pavel Begunkov
On 9/15/21 9:31 AM, Pavel Begunkov wrote:
> On 9/15/21 4:26 PM, Jens Axboe wrote:
>> On 9/13/21 7:08 AM, Hao Xu wrote:
>>> Developers may need some uring info to help themselves debug and address
>>> issues, these info includes sqring/cqring head/tail and the detail
>>> sqe/cqe info, which is very useful when it stucks.
>>
>> I think this is a good addition, more info to help you debug a stuck case
>> is always good. I'll queue this up for 5.16.
>
> Are there limits how much we can print? I remember people were couldn't
> even show a list of CPUs (was it proc?). The overflow list may be huge.
It's using seq_file, so I _think_ we should be fine here. Not sure when/if
it truncates.
--
Jens Axboe
在 2021/9/15 下午11:32, Jens Axboe 写道:
> On 9/15/21 9:31 AM, Pavel Begunkov wrote:
>> On 9/15/21 4:26 PM, Jens Axboe wrote:
>>> On 9/13/21 7:08 AM, Hao Xu wrote:
>>>> Developers may need some uring info to help themselves debug and address
>>>> issues, these info includes sqring/cqring head/tail and the detail
>>>> sqe/cqe info, which is very useful when it stucks.
>>>
>>> I think this is a good addition, more info to help you debug a stuck case
>>> is always good. I'll queue this up for 5.16.
>>
>> Are there limits how much we can print? I remember people were couldn't
>> even show a list of CPUs (was it proc?). The overflow list may be huge.
>
> It's using seq_file, so I _think_ we should be fine here. Not sure when/if
> it truncates.
>
It seems the default buf size of seq_file is PAGE_SIZE.