All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: switch to atomic_t for request references
@ 2021-12-03 15:35 Jens Axboe
  2021-12-03 15:56 ` Keith Busch
  2021-12-06  6:53 ` Christoph Hellwig
  0 siblings, 2 replies; 36+ messages in thread
From: Jens Axboe @ 2021-12-03 15:35 UTC (permalink / raw)
  To: linux-block

refcount_t is not as expensive as it used to be, but it's still more
expensive than the io_uring method of using atomic_t and just checking
for potential over/underflow.

This borrows that same implementation, which in turn is based on the
mm implementation from Linus.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

diff --git a/block/blk-flush.c b/block/blk-flush.c
index f78bb39e589e..e4df894189ce 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -229,7 +229,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 	/* release the tag's ownership to the req cloned from */
 	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 
-	if (!refcount_dec_and_test(&flush_rq->ref)) {
+	if (!req_ref_put_and_test(flush_rq)) {
 		fq->rq_status = error;
 		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 		return;
@@ -349,7 +349,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 	 * and READ flush_rq->end_io
 	 */
 	smp_wmb();
-	refcount_set(&flush_rq->ref, 1);
+	req_ref_set(flush_rq, 1);
 
 	blk_flush_queue_rq(flush_rq, false);
 }
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 995336abee33..380e2dd31bfc 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -228,7 +228,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
 
 	spin_lock_irqsave(&tags->lock, flags);
 	rq = tags->rqs[bitnr];
-	if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref))
+	if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
 		rq = NULL;
 	spin_unlock_irqrestore(&tags->lock, flags);
 	return rq;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fa464a3e2f9a..22ec21aa0c22 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -386,7 +386,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* tag was already set */
 	WRITE_ONCE(rq->deadline, 0);
-	refcount_set(&rq->ref, 1);
+	req_ref_set(rq, 1);
 
 	if (rq->rq_flags & RQF_ELV) {
 		struct elevator_queue *e = data->q->elevator;
@@ -634,7 +634,7 @@ void blk_mq_free_request(struct request *rq)
 	rq_qos_done(q, rq);
 
 	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
-	if (refcount_dec_and_test(&rq->ref))
+	if (req_ref_put_and_test(rq))
 		__blk_mq_free_request(rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
@@ -930,7 +930,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
 		rq_qos_done(rq->q, rq);
 
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
-		if (!refcount_dec_and_test(&rq->ref))
+		if (!req_ref_put_and_test(rq))
 			continue;
 
 		blk_crypto_free_request(rq);
@@ -1373,7 +1373,7 @@ void blk_mq_put_rq_ref(struct request *rq)
 {
 	if (is_flush_rq(rq))
 		rq->end_io(rq, 0);
-	else if (refcount_dec_and_test(&rq->ref))
+	else if (req_ref_put_and_test(rq))
 		__blk_mq_free_request(rq);
 }
 
@@ -3003,7 +3003,7 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
 			unsigned long rq_addr = (unsigned long)rq;
 
 			if (rq_addr >= start && rq_addr < end) {
-				WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
+				WARN_ON_ONCE(req_ref_read(rq) != 0);
 				cmpxchg(&drv_tags->rqs[i], rq, NULL);
 			}
 		}
@@ -3337,7 +3337,7 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
 	if (!tags)
 		return;
 
-	WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
+	WARN_ON_ONCE(req_ref_read(flush_rq) != 0);
 
 	for (i = 0; i < queue_depth; i++)
 		cmpxchg(&tags->rqs[i], flush_rq, NULL);
diff --git a/block/blk.h b/block/blk.h
index 296411900c55..f869f4b2dec9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -473,4 +473,35 @@ static inline bool should_fail_request(struct block_device *part,
 }
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
+/*
+ * Optimized request reference counting. Ideally we'd make timeouts be more
+ * clever, as that's the only reason we need references at all... But until
+ * this happens, this is faster than using refcount_t. Also see:
+ *
+ * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
+ */
+#define req_ref_zero_or_close_to_overflow(req)	\
+	((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
+
+static inline bool req_ref_inc_not_zero(struct request *req)
+{
+	return atomic_inc_not_zero(&req->ref);
+}
+
+static inline bool req_ref_put_and_test(struct request *req)
+{
+	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+	return atomic_dec_and_test(&req->ref);
+}
+
+static inline void req_ref_set(struct request *req, int value)
+{
+	atomic_set(&req->ref, value);
+}
+
+static inline int req_ref_read(struct request *req)
+{
+	return atomic_read(&req->ref);
+}
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bfc3cc61f653..ecdc049b52fa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -138,7 +138,7 @@ struct request {
 	unsigned short ioprio;
 
 	enum mq_rq_state state;
-	refcount_t ref;
+	atomic_t ref;
 
 	unsigned long deadline;
 
-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2021-12-10 12:38 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-03 15:35 [PATCH] block: switch to atomic_t for request references Jens Axboe
2021-12-03 15:56 ` Keith Busch
2021-12-06  6:53 ` Christoph Hellwig
2021-12-06  8:31   ` Peter Zijlstra
2021-12-06 16:32     ` Jens Axboe
2021-12-06 17:19       ` Peter Zijlstra
2021-12-06 17:35     ` Linus Torvalds
2021-12-06 18:13       ` Jens Axboe
2021-12-06 20:51         ` Kees Cook
2021-12-06 21:17           ` Linus Torvalds
2021-12-06 23:28             ` Kees Cook
2021-12-07  0:13               ` Linus Torvalds
2021-12-07  4:56                 ` Kees Cook
2021-12-07  9:34                 ` Peter Zijlstra
2021-12-07 16:03                   ` Linus Torvalds
2021-12-07 10:30                 ` Peter Zijlstra
2021-12-07 16:10                   ` Linus Torvalds
2021-12-07 16:23                     ` Peter Zijlstra
2021-12-06 16:31   ` Jens Axboe
2021-12-07 11:26   ` Peter Zijlstra
2021-12-07 13:28     ` Peter Zijlstra
2021-12-07 15:51       ` Peter Zijlstra
2021-12-07 16:13       ` Linus Torvalds
2021-12-07 16:52         ` Peter Zijlstra
2021-12-07 17:41           ` Peter Zijlstra
2021-12-07 17:43           ` Linus Torvalds
2021-12-07 17:45             ` Linus Torvalds
2021-12-07 20:28       ` Peter Zijlstra
2021-12-07 23:23         ` Linus Torvalds
2021-12-08 17:07           ` Peter Zijlstra
2021-12-08 18:00             ` Linus Torvalds
2021-12-08 18:44               ` Peter Zijlstra
2021-12-08 18:50                 ` Linus Torvalds
2021-12-08 20:32                   ` Peter Zijlstra
2021-12-10 10:57                   ` Peter Zijlstra
2021-12-10 12:38               ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.