All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: "linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	keescook@chromium.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] block: switch to atomic_t for request references
Date: Sun, 5 Dec 2021 22:53:49 -0800	[thread overview]
Message-ID: <Ya2zfVAwh4aQ7KVd@infradead.org> (raw)
In-Reply-To: <9f2ad6f1-c1bb-dfac-95c8-7d9eaa7110cc@kernel.dk>

On Fri, Dec 03, 2021 at 08:35:40AM -0700, Jens Axboe wrote:
> refcount_t is not as expensive as it used to be, but it's still more
> expensive than the io_uring method of using atomic_t and just checking
> for potential over/underflow.
> 
> This borrows that same implementation, which in turn is based on the
> mm implementation from Linus.

If refcount_t isn't good enough for a normal kernel fast path we have
a problem.  Can we discuss that with the maintainers instead of coming
up with our home grown schemes again?

> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> ---
> 
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index f78bb39e589e..e4df894189ce 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -229,7 +229,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
>  	/* release the tag's ownership to the req cloned from */
>  	spin_lock_irqsave(&fq->mq_flush_lock, flags);
>  
> -	if (!refcount_dec_and_test(&flush_rq->ref)) {
> +	if (!req_ref_put_and_test(flush_rq)) {
>  		fq->rq_status = error;
>  		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
>  		return;
> @@ -349,7 +349,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
>  	 * and READ flush_rq->end_io
>  	 */
>  	smp_wmb();
> -	refcount_set(&flush_rq->ref, 1);
> +	req_ref_set(flush_rq, 1);
>  
>  	blk_flush_queue_rq(flush_rq, false);
>  }
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 995336abee33..380e2dd31bfc 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -228,7 +228,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
>  
>  	spin_lock_irqsave(&tags->lock, flags);
>  	rq = tags->rqs[bitnr];
> -	if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref))
> +	if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
>  		rq = NULL;
>  	spin_unlock_irqrestore(&tags->lock, flags);
>  	return rq;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index fa464a3e2f9a..22ec21aa0c22 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -386,7 +386,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>  	INIT_LIST_HEAD(&rq->queuelist);
>  	/* tag was already set */
>  	WRITE_ONCE(rq->deadline, 0);
> -	refcount_set(&rq->ref, 1);
> +	req_ref_set(rq, 1);
>  
>  	if (rq->rq_flags & RQF_ELV) {
>  		struct elevator_queue *e = data->q->elevator;
> @@ -634,7 +634,7 @@ void blk_mq_free_request(struct request *rq)
>  	rq_qos_done(q, rq);
>  
>  	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
> -	if (refcount_dec_and_test(&rq->ref))
> +	if (req_ref_put_and_test(rq))
>  		__blk_mq_free_request(rq);
>  }
>  EXPORT_SYMBOL_GPL(blk_mq_free_request);
> @@ -930,7 +930,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
>  		rq_qos_done(rq->q, rq);
>  
>  		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
> -		if (!refcount_dec_and_test(&rq->ref))
> +		if (!req_ref_put_and_test(rq))
>  			continue;
>  
>  		blk_crypto_free_request(rq);
> @@ -1373,7 +1373,7 @@ void blk_mq_put_rq_ref(struct request *rq)
>  {
>  	if (is_flush_rq(rq))
>  		rq->end_io(rq, 0);
> -	else if (refcount_dec_and_test(&rq->ref))
> +	else if (req_ref_put_and_test(rq))
>  		__blk_mq_free_request(rq);
>  }
>  
> @@ -3003,7 +3003,7 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
>  			unsigned long rq_addr = (unsigned long)rq;
>  
>  			if (rq_addr >= start && rq_addr < end) {
> -				WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
> +				WARN_ON_ONCE(req_ref_read(rq) != 0);
>  				cmpxchg(&drv_tags->rqs[i], rq, NULL);
>  			}
>  		}
> @@ -3337,7 +3337,7 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
>  	if (!tags)
>  		return;
>  
> -	WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
> +	WARN_ON_ONCE(req_ref_read(flush_rq) != 0);
>  
>  	for (i = 0; i < queue_depth; i++)
>  		cmpxchg(&tags->rqs[i], flush_rq, NULL);
> diff --git a/block/blk.h b/block/blk.h
> index 296411900c55..f869f4b2dec9 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -473,4 +473,35 @@ static inline bool should_fail_request(struct block_device *part,
>  }
>  #endif /* CONFIG_FAIL_MAKE_REQUEST */
>  
> +/*
> + * Optimized request reference counting. Ideally we'd make timeouts be more
> + * clever, as that's the only reason we need references at all... But until
> + * this happens, this is faster than using refcount_t. Also see:
> + *
> + * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
> + */
> +#define req_ref_zero_or_close_to_overflow(req)	\
> +	((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
> +
> +static inline bool req_ref_inc_not_zero(struct request *req)
> +{
> +	return atomic_inc_not_zero(&req->ref);
> +}
> +
> +static inline bool req_ref_put_and_test(struct request *req)
> +{
> +	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
> +	return atomic_dec_and_test(&req->ref);
> +}
> +
> +static inline void req_ref_set(struct request *req, int value)
> +{
> +	atomic_set(&req->ref, value);
> +}
> +
> +static inline int req_ref_read(struct request *req)
> +{
> +	return atomic_read(&req->ref);
> +}
> +
>  #endif /* BLK_INTERNAL_H */
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index bfc3cc61f653..ecdc049b52fa 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -138,7 +138,7 @@ struct request {
>  	unsigned short ioprio;
>  
>  	enum mq_rq_state state;
> -	refcount_t ref;
> +	atomic_t ref;
>  
>  	unsigned long deadline;
>  
> -- 
> Jens Axboe
> 
---end quoted text---

  parent reply	other threads:[~2021-12-06  6:53 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-03 15:35 [PATCH] block: switch to atomic_t for request references Jens Axboe
2021-12-03 15:56 ` Keith Busch
2021-12-06  6:53 ` Christoph Hellwig [this message]
2021-12-06  8:31   ` Peter Zijlstra
2021-12-06 16:32     ` Jens Axboe
2021-12-06 17:19       ` Peter Zijlstra
2021-12-06 17:35     ` Linus Torvalds
2021-12-06 18:13       ` Jens Axboe
2021-12-06 20:51         ` Kees Cook
2021-12-06 21:17           ` Linus Torvalds
2021-12-06 23:28             ` Kees Cook
2021-12-07  0:13               ` Linus Torvalds
2021-12-07  4:56                 ` Kees Cook
2021-12-07  9:34                 ` Peter Zijlstra
2021-12-07 16:03                   ` Linus Torvalds
2021-12-07 10:30                 ` Peter Zijlstra
2021-12-07 16:10                   ` Linus Torvalds
2021-12-07 16:23                     ` Peter Zijlstra
2021-12-06 16:31   ` Jens Axboe
2021-12-07 11:26   ` Peter Zijlstra
2021-12-07 13:28     ` Peter Zijlstra
2021-12-07 15:51       ` Peter Zijlstra
2021-12-07 16:13       ` Linus Torvalds
2021-12-07 16:52         ` Peter Zijlstra
2021-12-07 17:41           ` Peter Zijlstra
2021-12-07 17:43           ` Linus Torvalds
2021-12-07 17:45             ` Linus Torvalds
2021-12-07 20:28       ` Peter Zijlstra
2021-12-07 23:23         ` Linus Torvalds
2021-12-08 17:07           ` Peter Zijlstra
2021-12-08 18:00             ` Linus Torvalds
2021-12-08 18:44               ` Peter Zijlstra
2021-12-08 18:50                 ` Linus Torvalds
2021-12-08 20:32                   ` Peter Zijlstra
2021-12-10 10:57                   ` Peter Zijlstra
2021-12-10 12:38               ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Ya2zfVAwh4aQ7KVd@infradead.org \
    --to=hch@infradead.org \
    --cc=axboe@kernel.dk \
    --cc=keescook@chromium.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.