All of lore.kernel.org
 help / color / mirror / Atom feed
* blk_mq_complete_request overhaul
@ 2020-06-11  6:44 Christoph Hellwig
  2020-06-11  6:44 ` [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c Christoph Hellwig
                   ` (12 more replies)
  0 siblings, 13 replies; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Hi Jens,

Peters recent inquiry made me dust off various bits of unfinished work
around blk_mq_complete_request and massage it into a coherent series.

This does three different things all touching the same area:

 - merge the softirq based single queue completion into the main
   blk-mq completion mechanism, as there is a lot of duplicate logic
   between the two
 - move the error injection that just fails to complete requests out into
   the drivers.  The fact that blk_mq_complete_request just wouldn't
   complete when called from the error handle has been causing all kinds
   of pain
 - optimize the fast path by allowing drivers to avoid the indirect call
   with a little more work for polled or per-CPU IRQ completions.  With
   this a polled block device I/O only has two indirect calls let, one for
   the file operation that ends up in the block device code, and another
   one for disptching to the nvme driver

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 14:35   ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

__blk_complete_request is only called from the blk-mq code, and
duplicates a lot of code from blk-mq.c.  Move it there to prepare
for better code sharing and simplifications.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/Makefile         |   2 +-
 block/blk-mq.c         | 135 +++++++++++++++++++++++++++++++++++
 block/blk-softirq.c    | 156 -----------------------------------------
 include/linux/blkdev.h |   1 -
 4 files changed, 136 insertions(+), 158 deletions(-)
 delete mode 100644 block/blk-softirq.c

diff --git a/block/Makefile b/block/Makefile
index 78719169fb2af8..8d841f5f986fe4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
-			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+			blk-exec.o blk-merge.o blk-timeout.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9a36ac1c1fa1d8..bbdc6c97bd42db 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -41,6 +41,8 @@
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
 
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
@@ -574,6 +576,130 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
 }
 EXPORT_SYMBOL(blk_mq_end_request);
 
+/*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
+{
+	struct list_head *cpu_list, local_list;
+
+	local_irq_disable();
+	cpu_list = this_cpu_ptr(&blk_cpu_done);
+	list_replace_init(cpu_list, &local_list);
+	local_irq_enable();
+
+	while (!list_empty(&local_list)) {
+		struct request *rq;
+
+		rq = list_entry(local_list.next, struct request, ipi_list);
+		list_del_init(&rq->ipi_list);
+		rq->q->mq_ops->complete(rq);
+	}
+}
+
+#ifdef CONFIG_SMP
+static void trigger_softirq(void *data)
+{
+	struct request *rq = data;
+	struct list_head *list;
+
+	list = this_cpu_ptr(&blk_cpu_done);
+	list_add_tail(&rq->ipi_list, list);
+
+	if (list->next == &rq->ipi_list)
+		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+}
+
+/*
+ * Setup and invoke a run of 'trigger_softirq' on the given cpu.
+ */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	if (cpu_online(cpu)) {
+		call_single_data_t *data = &rq->csd;
+
+		data->func = trigger_softirq;
+		data->info = rq;
+		data->flags = 0;
+
+		smp_call_function_single_async(cpu, data);
+		return 0;
+	}
+
+	return 1;
+}
+#else /* CONFIG_SMP */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	return 1;
+}
+#endif
+
+static int blk_softirq_cpu_dead(unsigned int cpu)
+{
+	/*
+	 * If a CPU goes away, splice its entries to the current CPU
+	 * and trigger a run of the softirq
+	 */
+	local_irq_disable();
+	list_splice_init(&per_cpu(blk_cpu_done, cpu),
+			 this_cpu_ptr(&blk_cpu_done));
+	raise_softirq_irqoff(BLOCK_SOFTIRQ);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void __blk_complete_request(struct request *req)
+{
+	struct request_queue *q = req->q;
+	int cpu, ccpu = req->mq_ctx->cpu;
+	unsigned long flags;
+	bool shared = false;
+
+	BUG_ON(!q->mq_ops->complete);
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+
+	/*
+	 * Select completion CPU
+	 */
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
+		if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
+			shared = cpus_share_cache(cpu, ccpu);
+	} else
+		ccpu = cpu;
+
+	/*
+	 * If current CPU and requested CPU share a cache, run the softirq on
+	 * the current CPU. One might concern this is just like
+	 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
+	 * running in interrupt handler, and currently I/O controller doesn't
+	 * support multiple interrupts, so current CPU is unique actually. This
+	 * avoids IPI sending from current CPU to the first CPU of a group.
+	 */
+	if (ccpu == cpu || shared) {
+		struct list_head *list;
+do_local:
+		list = this_cpu_ptr(&blk_cpu_done);
+		list_add_tail(&req->ipi_list, list);
+
+		/*
+		 * if the list only contains our just added request,
+		 * signal a raise of the softirq. If there are already
+		 * entries there, someone already raised the irq but it
+		 * hasn't run yet.
+		 */
+		if (list->next == &req->ipi_list)
+			raise_softirq_irqoff(BLOCK_SOFTIRQ);
+	} else if (raise_blk_irq(ccpu, req))
+		goto do_local;
+
+	local_irq_restore(flags);
+}
+
 static void __blk_mq_complete_request_remote(void *data)
 {
 	struct request *rq = data;
@@ -3787,6 +3913,15 @@ EXPORT_SYMBOL(blk_mq_rq_cpu);
 
 static int __init blk_mq_init(void)
 {
+	int i;
+
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+
+	cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+				  "block/softirq:dead", NULL,
+				  blk_softirq_cpu_dead);
 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
 				blk_mq_hctx_notify_dead);
 	cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
deleted file mode 100644
index 6e7ec87d49faa1..00000000000000
--- a/block/blk-softirq.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Functions related to softirq rq completions
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/sched/topology.h>
-
-#include "blk.h"
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-
-/*
- * Softirq action handler - move entries to local list and loop over them
- * while passing them to the queue registered handler.
- */
-static __latent_entropy void blk_done_softirq(struct softirq_action *h)
-{
-	struct list_head *cpu_list, local_list;
-
-	local_irq_disable();
-	cpu_list = this_cpu_ptr(&blk_cpu_done);
-	list_replace_init(cpu_list, &local_list);
-	local_irq_enable();
-
-	while (!list_empty(&local_list)) {
-		struct request *rq;
-
-		rq = list_entry(local_list.next, struct request, ipi_list);
-		list_del_init(&rq->ipi_list);
-		rq->q->mq_ops->complete(rq);
-	}
-}
-
-#ifdef CONFIG_SMP
-static void trigger_softirq(void *data)
-{
-	struct request *rq = data;
-	struct list_head *list;
-
-	list = this_cpu_ptr(&blk_cpu_done);
-	list_add_tail(&rq->ipi_list, list);
-
-	if (list->next == &rq->ipi_list)
-		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-}
-
-/*
- * Setup and invoke a run of 'trigger_softirq' on the given cpu.
- */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-	if (cpu_online(cpu)) {
-		call_single_data_t *data = &rq->csd;
-
-		data->func = trigger_softirq;
-		data->info = rq;
-		data->flags = 0;
-
-		smp_call_function_single_async(cpu, data);
-		return 0;
-	}
-
-	return 1;
-}
-#else /* CONFIG_SMP */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-	return 1;
-}
-#endif
-
-static int blk_softirq_cpu_dead(unsigned int cpu)
-{
-	/*
-	 * If a CPU goes away, splice its entries to the current CPU
-	 * and trigger a run of the softirq
-	 */
-	local_irq_disable();
-	list_splice_init(&per_cpu(blk_cpu_done, cpu),
-			 this_cpu_ptr(&blk_cpu_done));
-	raise_softirq_irqoff(BLOCK_SOFTIRQ);
-	local_irq_enable();
-
-	return 0;
-}
-
-void __blk_complete_request(struct request *req)
-{
-	struct request_queue *q = req->q;
-	int cpu, ccpu = req->mq_ctx->cpu;
-	unsigned long flags;
-	bool shared = false;
-
-	BUG_ON(!q->mq_ops->complete);
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-
-	/*
-	 * Select completion CPU
-	 */
-	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
-		if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-			shared = cpus_share_cache(cpu, ccpu);
-	} else
-		ccpu = cpu;
-
-	/*
-	 * If current CPU and requested CPU share a cache, run the softirq on
-	 * the current CPU. One might concern this is just like
-	 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
-	 * running in interrupt handler, and currently I/O controller doesn't
-	 * support multiple interrupts, so current CPU is unique actually. This
-	 * avoids IPI sending from current CPU to the first CPU of a group.
-	 */
-	if (ccpu == cpu || shared) {
-		struct list_head *list;
-do_local:
-		list = this_cpu_ptr(&blk_cpu_done);
-		list_add_tail(&req->ipi_list, list);
-
-		/*
-		 * if the list only contains our just added request,
-		 * signal a raise of the softirq. If there are already
-		 * entries there, someone already raised the irq but it
-		 * hasn't run yet.
-		 */
-		if (list->next == &req->ipi_list)
-			raise_softirq_irqoff(BLOCK_SOFTIRQ);
-	} else if (raise_blk_irq(ccpu, req))
-		goto do_local;
-
-	local_irq_restore(flags);
-}
-
-static __init int blk_softirq_init(void)
-{
-	int i;
-
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
-
-	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
-	cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
-				  "block/softirq:dead", NULL,
-				  blk_softirq_cpu_dead);
-	return 0;
-}
-subsys_initcall(blk_softirq_init);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fd900998b4e2e..98712cfc7a3489 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1078,7 +1078,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq);
 extern bool blk_update_request(struct request *rq, blk_status_t error,
 			       unsigned int nr_bytes);
 
-extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 
 /*
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
  2020-06-11  6:44 ` [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 14:34   ` Daniel Wagner
  2020-06-18 14:50     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 03/12] blk-mq: remove raise_blk_irq Christoph Hellwig
                   ` (10 subsequent siblings)
  12 siblings, 2 replies; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Add a helper to deduplicate the logic that raises the block softirq.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bbdc6c97bd42db..aa1917949f0ded 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -598,19 +598,27 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 	}
 }
 
-#ifdef CONFIG_SMP
-static void trigger_softirq(void *data)
+static void blk_mq_trigger_softirq(struct request *rq)
 {
-	struct request *rq = data;
-	struct list_head *list;
+	struct list_head *list = this_cpu_ptr(&blk_cpu_done);
 
-	list = this_cpu_ptr(&blk_cpu_done);
 	list_add_tail(&rq->ipi_list, list);
 
+	/*
+	 * If the list only contains our just added request, signal a raise of
+	 * the softirq.  If there are already entries there, someone already
+	 * raised the irq but it hasn't run yet.
+	 */
 	if (list->next == &rq->ipi_list)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
 }
 
+#ifdef CONFIG_SMP
+static void trigger_softirq(void *data)
+{
+	blk_mq_trigger_softirq(data);
+}
+
 /*
  * Setup and invoke a run of 'trigger_softirq' on the given cpu.
  */
@@ -681,19 +689,8 @@ static void __blk_complete_request(struct request *req)
 	 * avoids IPI sending from current CPU to the first CPU of a group.
 	 */
 	if (ccpu == cpu || shared) {
-		struct list_head *list;
 do_local:
-		list = this_cpu_ptr(&blk_cpu_done);
-		list_add_tail(&req->ipi_list, list);
-
-		/*
-		 * if the list only contains our just added request,
-		 * signal a raise of the softirq. If there are already
-		 * entries there, someone already raised the irq but it
-		 * hasn't run yet.
-		 */
-		if (list->next == &req->ipi_list)
-			raise_softirq_irqoff(BLOCK_SOFTIRQ);
+		blk_mq_trigger_softirq(req);
 	} else if (raise_blk_irq(ccpu, req))
 		goto do_local;
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 03/12] blk-mq: remove raise_blk_irq
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
  2020-06-11  6:44 ` [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c Christoph Hellwig
  2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 15:02     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 04/12] blk-mq: complete polled requests directly Christoph Hellwig
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

By open coding raise_blk_irq in the only caller, and replacing the
ifdef CONFIG_SMP with an IS_ENABLED check the flow in the caller
can be significantly simplified.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 40 ++++++++++------------------------------
 1 file changed, 10 insertions(+), 30 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa1917949f0ded..9d3928456af1c8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -613,37 +613,11 @@ static void blk_mq_trigger_softirq(struct request *rq)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
 }
 
-#ifdef CONFIG_SMP
 static void trigger_softirq(void *data)
 {
 	blk_mq_trigger_softirq(data);
 }
 
-/*
- * Setup and invoke a run of 'trigger_softirq' on the given cpu.
- */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-	if (cpu_online(cpu)) {
-		call_single_data_t *data = &rq->csd;
-
-		data->func = trigger_softirq;
-		data->info = rq;
-		data->flags = 0;
-
-		smp_call_function_single_async(cpu, data);
-		return 0;
-	}
-
-	return 1;
-}
-#else /* CONFIG_SMP */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-	return 1;
-}
-#endif
-
 static int blk_softirq_cpu_dead(unsigned int cpu)
 {
 	/*
@@ -688,11 +662,17 @@ static void __blk_complete_request(struct request *req)
 	 * support multiple interrupts, so current CPU is unique actually. This
 	 * avoids IPI sending from current CPU to the first CPU of a group.
 	 */
-	if (ccpu == cpu || shared) {
-do_local:
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    ccpu != cpu && !shared && cpu_online(ccpu)) {
+		call_single_data_t *data = &req->csd;
+
+		data->func = trigger_softirq;
+		data->info = req;
+		data->flags = 0;
+		smp_call_function_single_async(cpu, data);
+	} else {
 		blk_mq_trigger_softirq(req);
-	} else if (raise_blk_irq(ccpu, req))
-		goto do_local;
+	}
 
 	local_irq_restore(flags);
 }
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 04/12] blk-mq: complete polled requests directly
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 03/12] blk-mq: remove raise_blk_irq Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 15:50     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP Christoph Hellwig
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Even for single queue devices there is no point in offloading a polled
completion to the softirq, given that blk_mq_force_complete_rq is called
from the polling thread in that case and thus there are no starvation
issues.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9d3928456af1c8..25ce40b367a635 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -703,6 +703,16 @@ void blk_mq_force_complete_rq(struct request *rq)
 	int cpu;
 
 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+
+	/*
+	 * For a polled request, always complete locallly, it's pointless
+	 * to redirect the completion.
+	 */
+	if (rq->cmd_flags & REQ_HIPRI) {
+		q->mq_ops->complete(rq);
+		return;
+	}
+
 	/*
 	 * Most of single queue controllers, there is only one irq vector
 	 * for handling IO completion, and the only irq's affinity is set
@@ -717,12 +727,7 @@ void blk_mq_force_complete_rq(struct request *rq)
 		return;
 	}
 
-	/*
-	 * For a polled request, always complete locallly, it's pointless
-	 * to redirect the completion.
-	 */
-	if ((rq->cmd_flags & REQ_HIPRI) ||
-	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
+	if (!test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
 		q->mq_ops->complete(rq);
 		return;
 	}
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 04/12] blk-mq: complete polled requests directly Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 15:52     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic Christoph Hellwig
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Let the compile optimize out the entire IPI path, given that we are
obviously not going to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 25ce40b367a635..059d30c522f2ad 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -727,7 +727,8 @@ void blk_mq_force_complete_rq(struct request *rq)
 		return;
 	}
 
-	if (!test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
+	if (!IS_ENABLED(CONFIG_SMP) ||
+	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
 		q->mq_ops->complete(rq);
 		return;
 	}
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:12     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request Christoph Hellwig
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Both the softirq path for single queue devices and the multi-queue
completion handler share the same logic to figure out if we need an
IPI for the completion and eventually issue it.  Merge the two
versions into a single unified code path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 85 ++++++++++++--------------------------------------
 1 file changed, 20 insertions(+), 65 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 059d30c522f2ad..11c706a9942043 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -600,8 +600,11 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 
 static void blk_mq_trigger_softirq(struct request *rq)
 {
-	struct list_head *list = this_cpu_ptr(&blk_cpu_done);
+	struct list_head *list;
+	unsigned long flags;
 
+	local_irq_save(flags);
+	list = this_cpu_ptr(&blk_cpu_done);
 	list_add_tail(&rq->ipi_list, list);
 
 	/*
@@ -611,11 +614,7 @@ static void blk_mq_trigger_softirq(struct request *rq)
 	 */
 	if (list->next == &rq->ipi_list)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-}
-
-static void trigger_softirq(void *data)
-{
-	blk_mq_trigger_softirq(data);
+	local_irq_restore(flags);
 }
 
 static int blk_softirq_cpu_dead(unsigned int cpu)
@@ -633,56 +632,26 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
-static void __blk_complete_request(struct request *req)
+static void __blk_mq_complete_request(struct request *rq)
 {
-	struct request_queue *q = req->q;
-	int cpu, ccpu = req->mq_ctx->cpu;
-	unsigned long flags;
-	bool shared = false;
-
-	BUG_ON(!q->mq_ops->complete);
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-
-	/*
-	 * Select completion CPU
-	 */
-	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
-		if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-			shared = cpus_share_cache(cpu, ccpu);
-	} else
-		ccpu = cpu;
-
 	/*
-	 * If current CPU and requested CPU share a cache, run the softirq on
-	 * the current CPU. One might concern this is just like
-	 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
-	 * running in interrupt handler, and currently I/O controller doesn't
-	 * support multiple interrupts, so current CPU is unique actually. This
-	 * avoids IPI sending from current CPU to the first CPU of a group.
+	 * For most of single queue controllers, there is only one irq vector
+	 * for handling I/O completion, and the only irq's affinity is set
+	 * to all possible CPUs.  On most of ARCHs, this affinity means the irq
+	 * is handled on one specific CPU.
+	 *
+	 * So complete I/O requests in softirq context in case of single queue
+	 * devices to avoid degrading I/O performance due to irqsoff latency.
 	 */
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    ccpu != cpu && !shared && cpu_online(ccpu)) {
-		call_single_data_t *data = &req->csd;
-
-		data->func = trigger_softirq;
-		data->info = req;
-		data->flags = 0;
-		smp_call_function_single_async(cpu, data);
-	} else {
-		blk_mq_trigger_softirq(req);
-	}
-
-	local_irq_restore(flags);
+	if (rq->q->nr_hw_queues == 1)
+		blk_mq_trigger_softirq(rq);
+	else
+		rq->q->mq_ops->complete(rq);
 }
 
 static void __blk_mq_complete_request_remote(void *data)
 {
-	struct request *rq = data;
-	struct request_queue *q = rq->q;
-
-	q->mq_ops->complete(rq);
+	__blk_mq_complete_request(data);
 }
 
 /**
@@ -713,23 +682,9 @@ void blk_mq_force_complete_rq(struct request *rq)
 		return;
 	}
 
-	/*
-	 * Most of single queue controllers, there is only one irq vector
-	 * for handling IO completion, and the only irq's affinity is set
-	 * as all possible CPUs. On most of ARCHs, this affinity means the
-	 * irq is handled on one specific CPU.
-	 *
-	 * So complete IO reqeust in softirq context in case of single queue
-	 * for not degrading IO performance by irqsoff latency.
-	 */
-	if (q->nr_hw_queues == 1) {
-		__blk_complete_request(rq);
-		return;
-	}
-
 	if (!IS_ENABLED(CONFIG_SMP) ||
 	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
-		q->mq_ops->complete(rq);
+		__blk_mq_complete_request(rq);
 		return;
 	}
 
@@ -743,7 +698,7 @@ void blk_mq_force_complete_rq(struct request *rq)
 		rq->csd.flags = 0;
 		smp_call_function_single_async(ctx->cpu, &rq->csd);
 	} else {
-		q->mq_ops->complete(rq);
+		__blk_mq_complete_request(rq);
 	}
 	put_cpu();
 }
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (5 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:18   ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request Christoph Hellwig
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Move the call to blk_should_fake_timeout out of blk_mq_complete_request
and into the drivers, skipping call sites that are obvious error
handlers, and remove the now superflous blk_mq_force_complete_rq helper.
This ensures we don't keep injecting errors into completions that just
terminate the Linux request after the hardware has been reset or the
command has been aborted.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c                    | 34 +++++++------------------------
 block/blk-timeout.c               |  6 ++----
 block/blk.h                       |  9 --------
 block/bsg-lib.c                   |  5 ++++-
 drivers/block/loop.c              |  6 ++++--
 drivers/block/mtip32xx/mtip32xx.c |  3 ++-
 drivers/block/nbd.c               |  5 ++++-
 drivers/block/null_blk_main.c     |  5 +++--
 drivers/block/skd_main.c          |  9 +++++---
 drivers/block/virtio_blk.c        |  3 ++-
 drivers/block/xen-blkfront.c      |  3 ++-
 drivers/md/dm-rq.c                |  3 ++-
 drivers/mmc/core/block.c          |  8 ++++----
 drivers/nvme/host/core.c          |  2 +-
 drivers/nvme/host/nvme.h          |  3 ++-
 drivers/s390/block/dasd.c         |  2 +-
 drivers/s390/block/scm_blk.c      |  3 ++-
 drivers/scsi/scsi_lib.c           | 12 +++--------
 include/linux/blk-mq.h            | 12 +++++++++--
 19 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 11c706a9942043..b73b193809097b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -655,16 +655,13 @@ static void __blk_mq_complete_request_remote(void *data)
 }
 
 /**
- * blk_mq_force_complete_rq() - Force complete the request, bypassing any error
- * 				injection that could drop the completion.
- * @rq: Request to be force completed
+ * blk_mq_complete_request - end I/O on a request
+ * @rq:		the request being processed
  *
- * Drivers should use blk_mq_complete_request() to complete requests in their
- * normal IO path. For timeout error recovery, drivers may call this forced
- * completion routine after they've reclaimed timed out requests to bypass
- * potentially subsequent fake timeouts.
- */
-void blk_mq_force_complete_rq(struct request *rq)
+ * Description:
+ *	Complete a request by scheduling the ->complete_rq operation.
+ **/
+void blk_mq_complete_request(struct request *rq)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct request_queue *q = rq->q;
@@ -702,7 +699,7 @@ void blk_mq_force_complete_rq(struct request *rq)
 	}
 	put_cpu();
 }
-EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
+EXPORT_SYMBOL(blk_mq_complete_request);
 
 static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
 	__releases(hctx->srcu)
@@ -724,23 +721,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
 		*srcu_idx = srcu_read_lock(hctx->srcu);
 }
 
-/**
- * blk_mq_complete_request - end I/O on a request
- * @rq:		the request being processed
- *
- * Description:
- *	Ends all I/O on a request. It does not handle partial completions.
- *	The actual completion happens out-of-order, through a IPI handler.
- **/
-bool blk_mq_complete_request(struct request *rq)
-{
-	if (unlikely(blk_should_fake_timeout(rq->q)))
-		return false;
-	blk_mq_force_complete_rq(rq);
-	return true;
-}
-EXPORT_SYMBOL(blk_mq_complete_request);
-
 /**
  * blk_mq_start_request - Start processing a request
  * @rq: Pointer to request to be started
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 8aa68fae96ad8d..3a1ac64347588d 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -20,13 +20,11 @@ static int __init setup_fail_io_timeout(char *str)
 }
 __setup("fail_io_timeout=", setup_fail_io_timeout);
 
-int blk_should_fake_timeout(struct request_queue *q)
+bool __blk_should_fake_timeout(struct request_queue *q)
 {
-	if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
-		return 0;
-
 	return should_fail(&fail_io_timeout, 1);
 }
+EXPORT_SYMBOL_GPL(__blk_should_fake_timeout);
 
 static int __init fail_io_timeout_debugfs(void)
 {
diff --git a/block/blk.h b/block/blk.h
index aa16e524dc35e0..220220d868eac6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -223,18 +223,9 @@ ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
 		char *buf);
 ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count);
-
-#ifdef CONFIG_FAIL_IO_TIMEOUT
-int blk_should_fake_timeout(struct request_queue *);
 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
 				const char *, size_t);
-#else
-static inline int blk_should_fake_timeout(struct request_queue *q)
-{
-	return 0;
-}
-#endif
 
 void __blk_queue_split(struct request_queue *q, struct bio **bio,
 		unsigned int *nr_segs);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 6cbb7926534cd7..fb7b347f80105b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -181,9 +181,12 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len)
 {
+	struct request *rq = blk_mq_rq_from_pdu(job);
+
 	job->result = result;
 	job->reply_payload_rcv_len = reply_payload_rcv_len;
-	blk_mq_complete_request(blk_mq_rq_from_pdu(job));
+	if (likely(!blk_should_fake_timeout(rq->q)))
+		blk_mq_complete_request(rq);
 }
 EXPORT_SYMBOL_GPL(bsg_job_done);
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 2e96d8b8758b63..c5b659a27cc354 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -509,7 +509,8 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
 		return;
 	kfree(cmd->bvec);
 	cmd->bvec = NULL;
-	blk_mq_complete_request(rq);
+	if (likely(!blk_should_fake_timeout(rq->q)))
+		blk_mq_complete_request(rq);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -2048,7 +2049,8 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 			cmd->ret = ret;
 		else
 			cmd->ret = ret ? -EIO : 0;
-		blk_mq_complete_request(rq);
+		if (likely(!blk_should_fake_timeout(rq->q)))
+			blk_mq_complete_request(rq);
 	}
 }
 
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f6bafa9a68b9df..153e2cdecb4d40 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -492,7 +492,8 @@ static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 
 	cmd->status = status;
-	blk_mq_complete_request(req);
+	if (likely(!blk_should_fake_timeout(req->q)))
+		blk_mq_complete_request(req);
 }
 
 /*
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 43cff01a5a675d..01794cd2b6cae4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -784,6 +784,7 @@ static void recv_work(struct work_struct *work)
 	struct nbd_device *nbd = args->nbd;
 	struct nbd_config *config = nbd->config;
 	struct nbd_cmd *cmd;
+	struct request *rq;
 
 	while (1) {
 		cmd = nbd_read_stat(nbd, args->index);
@@ -796,7 +797,9 @@ static void recv_work(struct work_struct *work)
 			break;
 		}
 
-		blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
+		rq = blk_mq_rq_from_pdu(cmd);
+		if (likely(!blk_should_fake_timeout(rq->q)))
+			blk_mq_complete_request(rq);
 	}
 	atomic_dec(&config->recv_threads);
 	wake_up(&config->recv_wq);
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 87b31f9ca362ee..82259242b9b5c9 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1283,7 +1283,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
 	case NULL_IRQ_SOFTIRQ:
 		switch (cmd->nq->dev->queue_mode) {
 		case NULL_Q_MQ:
-			blk_mq_complete_request(cmd->rq);
+			if (likely(!blk_should_fake_timeout(cmd->rq->q)))
+				blk_mq_complete_request(cmd->rq);
 			break;
 		case NULL_Q_BIO:
 			/*
@@ -1423,7 +1424,7 @@ static bool should_requeue_request(struct request *rq)
 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
 {
 	pr_info("rq %p timed out\n", rq);
-	blk_mq_force_complete_rq(rq);
+	blk_mq_complete_request(rq);
 	return BLK_EH_DONE;
 }
 
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 51569c199a6cc9..3a476dc1d14f57 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1417,7 +1417,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
 		skreq->status = BLK_STS_OK;
-		blk_mq_complete_request(req);
+		if (likely(!blk_should_fake_timeout(req->q)))
+			blk_mq_complete_request(req);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -1440,7 +1441,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
 		skreq->status = BLK_STS_IOERR;
-		blk_mq_complete_request(req);
+		if (likely(!blk_should_fake_timeout(req->q)))
+			blk_mq_complete_request(req);
 		break;
 	}
 }
@@ -1560,7 +1562,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 		 */
 		if (likely(cmp_status == SAM_STAT_GOOD)) {
 			skreq->status = BLK_STS_OK;
-			blk_mq_complete_request(rq);
+			if (likely(!blk_should_fake_timeout(rq->q)))
+				blk_mq_complete_request(rq);
 		} else {
 			skd_resolve_req_exception(skdev, skreq, rq);
 		}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 9d21bf0f155eed..741804bd8a1403 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -171,7 +171,8 @@ static void virtblk_done(struct virtqueue *vq)
 		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 			struct request *req = blk_mq_rq_from_pdu(vbr);
 
-			blk_mq_complete_request(req);
+			if (likely(!blk_should_fake_timeout(req->q)))
+				blk_mq_complete_request(req);
 			req_done = true;
 		}
 		if (unlikely(virtqueue_is_broken(vq)))
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3b889ea950c21c..3bb3dd8da9b0c1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1655,7 +1655,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			BUG();
 		}
 
-		blk_mq_complete_request(req);
+		if (likely(!blk_should_fake_timeout(req->q)))
+			blk_mq_complete_request(req);
 	}
 
 	rinfo->ring.rsp_cons = i;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index f60c025121215b..5aec1cd093481d 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -288,7 +288,8 @@ static void dm_complete_request(struct request *rq, blk_status_t error)
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
 	tio->error = error;
-	blk_mq_complete_request(rq);
+	if (likely(!blk_should_fake_timeout(rq->q)))
+		blk_mq_complete_request(rq);
 }
 
 /*
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 7896952de1ac75..4791c82f8f7c78 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1446,7 +1446,7 @@ static void mmc_blk_cqe_req_done(struct mmc_request *mrq)
 	 */
 	if (mq->in_recovery)
 		mmc_blk_cqe_complete_rq(mq, req);
-	else
+	else if (likely(!blk_should_fake_timeout(req->q)))
 		blk_mq_complete_request(req);
 }
 
@@ -1926,7 +1926,7 @@ static void mmc_blk_hsq_req_done(struct mmc_request *mrq)
 	 */
 	if (mq->in_recovery)
 		mmc_blk_cqe_complete_rq(mq, req);
-	else
+	else if (likely(!blk_should_fake_timeout(req->q)))
 		blk_mq_complete_request(req);
 }
 
@@ -1936,7 +1936,7 @@ void mmc_blk_mq_complete(struct request *req)
 
 	if (mq->use_cqe)
 		mmc_blk_cqe_complete_rq(mq, req);
-	else
+	else if (likely(!blk_should_fake_timeout(req->q)))
 		mmc_blk_mq_complete_rq(mq, req);
 }
 
@@ -1988,7 +1988,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
 	 */
 	if (mq->in_recovery)
 		mmc_blk_mq_complete_rq(mq, req);
-	else
+	else if (likely(!blk_should_fake_timeout(req->q)))
 		blk_mq_complete_request(req);
 
 	mmc_blk_mq_dec_in_flight(mq, req);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0585efa47d8fd8..569671e264b509 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -304,7 +304,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 		return true;
 
 	nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
-	blk_mq_force_complete_rq(req);
+	blk_mq_complete_request(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fa5c75501049de..cb500d51c3a968 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -481,7 +481,8 @@ static inline void nvme_end_request(struct request *req, __le16 status,
 	rq->result = result;
 	/* inject error when permitted by fault injection framework */
 	nvme_should_fail(req);
-	blk_mq_complete_request(req);
+	if (likely(!blk_should_fake_timeout(req->q)))
+		blk_mq_complete_request(req);
 }
 
 static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index cf87eb27879f08..eb17fea8075c6f 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2802,7 +2802,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 			blk_update_request(req, BLK_STS_OK,
 					   blk_rq_bytes(req) - proc_bytes);
 			blk_mq_requeue_request(req, true);
-		} else {
+		} else if (likely(!blk_should_fake_timeout(req->q))) {
 			blk_mq_complete_request(req);
 		}
 	}
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index e01889394c8412..a4f6f2e62b1dcf 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -256,7 +256,8 @@ static void scm_request_finish(struct scm_request *scmrq)
 	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
 		error = blk_mq_rq_to_pdu(scmrq->request[i]);
 		*error = scmrq->error;
-		blk_mq_complete_request(scmrq->request[i]);
+		if (likely(!blk_should_fake_timeout(scmrq->request[i]->q)))
+			blk_mq_complete_request(scmrq->request[i]);
 	}
 
 	atomic_dec(&bdev->queued_reqs);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0ba7a65e7c8d96..6ca91d09eca1ec 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1589,18 +1589,12 @@ static blk_status_t scsi_mq_prep_fn(struct request *req)
 
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
+	if (unlikely(blk_should_fake_timeout(cmd->request->q)))
+		return;
 	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
 		return;
 	trace_scsi_dispatch_cmd_done(cmd);
-
-	/*
-	 * If the block layer didn't complete the request due to a timeout
-	 * injection, scsi must clear its internal completed state so that the
-	 * timeout handler will see it needs to escalate its own error
-	 * recovery.
-	 */
-	if (unlikely(!blk_mq_complete_request(cmd->request)))
-		clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
+	blk_mq_complete_request(cmd->request);
 }
 
 static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d6fcae17da5a2a..8e6ab766aef76e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -503,8 +503,7 @@ void __blk_mq_end_request(struct request *rq, blk_status_t error);
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-bool blk_mq_complete_request(struct request *rq);
-void blk_mq_force_complete_rq(struct request *rq);
+void blk_mq_complete_request(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio, unsigned int nr_segs);
 bool blk_mq_queue_stopped(struct request_queue *q);
@@ -537,6 +536,15 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
+bool __blk_should_fake_timeout(struct request_queue *q);
+static inline bool blk_should_fake_timeout(struct request_queue *q)
+{
+	if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
+	    test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+		return __blk_should_fake_timeout(q);
+	return false;
+}
+
 /**
  * blk_mq_rq_from_pdu - cast a PDU to a request
  * @pdu: the PDU (Protocol Data Unit) to be casted
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (6 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:24   ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper Christoph Hellwig
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

We don't really care if we get migrated during the I/O completion.
In the worth case we either perform an IPI that wasn't required, or
complete the request on a CPU which we just migrated off.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b73b193809097b..45294cd5d875cc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -685,7 +685,7 @@ void blk_mq_complete_request(struct request *rq)
 		return;
 	}
 
-	cpu = get_cpu();
+	cpu = raw_smp_processor_id();
 	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
 		shared = cpus_share_cache(cpu, ctx->cpu);
 
@@ -697,7 +697,6 @@ void blk_mq_complete_request(struct request *rq)
 	} else {
 		__blk_mq_complete_request(rq);
 	}
-	put_cpu();
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (7 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:33     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API Christoph Hellwig
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Add a helper to decide if we can complete locally or need an IPI.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 45294cd5d875cc..f83c81566de904 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -654,6 +654,24 @@ static void __blk_mq_complete_request_remote(void *data)
 	__blk_mq_complete_request(data);
 }
 
+static inline bool blk_mq_complete_need_ipi(struct request *rq)
+{
+	int cpu = raw_smp_processor_id();
+
+	if (!IS_ENABLED(CONFIG_SMP) ||
+	    !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
+		return false;
+
+	/* same CPU or cache domain?  Complete locally */
+	if (cpu == rq->mq_ctx->cpu ||
+	    (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
+	     cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+		return false;
+
+	/* don't try to IPI to an offline CPU */
+	return cpu_online(rq->mq_ctx->cpu);
+}
+
 /**
  * blk_mq_complete_request - end I/O on a request
  * @rq:		the request being processed
@@ -663,11 +681,6 @@ static void __blk_mq_complete_request_remote(void *data)
  **/
 void blk_mq_complete_request(struct request *rq)
 {
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-	struct request_queue *q = rq->q;
-	bool shared = false;
-	int cpu;
-
 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 
 	/*
@@ -675,25 +688,15 @@ void blk_mq_complete_request(struct request *rq)
 	 * to redirect the completion.
 	 */
 	if (rq->cmd_flags & REQ_HIPRI) {
-		q->mq_ops->complete(rq);
-		return;
-	}
-
-	if (!IS_ENABLED(CONFIG_SMP) ||
-	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
-		__blk_mq_complete_request(rq);
+		rq->q->mq_ops->complete(rq);
 		return;
 	}
 
-	cpu = raw_smp_processor_id();
-	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-		shared = cpus_share_cache(cpu, ctx->cpu);
-
-	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+	if (blk_mq_complete_need_ipi(rq)) {
 		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
-		smp_call_function_single_async(ctx->cpu, &rq->csd);
+		smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
 	} else {
 		__blk_mq_complete_request(rq);
 	}
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (8 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:39     ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper Christoph Hellwig
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

This is a variant of blk_mq_complete_request_remote that only completes
the request if it needs to be bounced to another CPU or a softirq.  If
the request can be completed locally the function returns false and lets
the driver complete it without requring and indirect function call.

Suggestions for a better name welcome.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         | 45 ++++++++++++++++++++++++------------------
 include/linux/blk-mq.h |  1 +
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f83c81566de904..31d4bdd5bdb7bd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -632,8 +632,11 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
-static void __blk_mq_complete_request(struct request *rq)
+
+static void __blk_mq_complete_request_remote(void *data)
 {
+	struct request *rq = data;
+
 	/*
 	 * For most of single queue controllers, there is only one irq vector
 	 * for handling I/O completion, and the only irq's affinity is set
@@ -649,11 +652,6 @@ static void __blk_mq_complete_request(struct request *rq)
 		rq->q->mq_ops->complete(rq);
 }
 
-static void __blk_mq_complete_request_remote(void *data)
-{
-	__blk_mq_complete_request(data);
-}
-
 static inline bool blk_mq_complete_need_ipi(struct request *rq)
 {
 	int cpu = raw_smp_processor_id();
@@ -672,14 +670,7 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
 	return cpu_online(rq->mq_ctx->cpu);
 }
 
-/**
- * blk_mq_complete_request - end I/O on a request
- * @rq:		the request being processed
- *
- * Description:
- *	Complete a request by scheduling the ->complete_rq operation.
- **/
-void blk_mq_complete_request(struct request *rq)
+bool blk_mq_complete_request_remote(struct request *rq)
 {
 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 
@@ -687,10 +678,8 @@ void blk_mq_complete_request(struct request *rq)
 	 * For a polled request, always complete locallly, it's pointless
 	 * to redirect the completion.
 	 */
-	if (rq->cmd_flags & REQ_HIPRI) {
-		rq->q->mq_ops->complete(rq);
-		return;
-	}
+	if (rq->cmd_flags & REQ_HIPRI)
+		return false;
 
 	if (blk_mq_complete_need_ipi(rq)) {
 		rq->csd.func = __blk_mq_complete_request_remote;
@@ -698,8 +687,26 @@ void blk_mq_complete_request(struct request *rq)
 		rq->csd.flags = 0;
 		smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
 	} else {
-		__blk_mq_complete_request(rq);
+		if (rq->q->nr_hw_queues > 1)
+			return false;
+		blk_mq_trigger_softirq(rq);
 	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
+
+/**
+ * blk_mq_complete_request - end I/O on a request
+ * @rq:		the request being processed
+ *
+ * Description:
+ *	Complete a request by scheduling the ->complete_rq operation.
+ **/
+void blk_mq_complete_request(struct request *rq)
+{
+	if (!blk_mq_complete_request_remote(rq))
+		rq->q->mq_ops->complete(rq);
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8e6ab766aef76e..1641ec6cd7e551 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -504,6 +504,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_complete_request(struct request *rq);
+bool blk_mq_complete_request_remote(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio, unsigned int nr_segs);
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (9 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:42   ` Daniel Wagner
  2020-06-11  6:44 ` [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call Christoph Hellwig
  2020-06-18 14:11   ` Christoph Hellwig
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Factor a small sniplet of duplicated code into a new helper in
preparation for making this sniplet a little bit less trivial.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f8f856dc0c67bd..9c02cf494a8257 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1149,6 +1149,15 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 	queue_work(nvme_reset_wq, &ctrl->err_work);
 }
 
+static void nvme_rdma_end_request(struct nvme_rdma_request *req)
+{
+	struct request *rq = blk_mq_rq_from_pdu(req);
+
+	if (!refcount_dec_and_test(&req->ref))
+		return;
+	nvme_end_request(rq, req->status, req->result);
+}
+
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
 		const char *op)
 {
@@ -1173,16 +1182,11 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvme_rdma_request *req =
 		container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
-	struct request *rq = blk_mq_rq_from_pdu(req);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (unlikely(wc->status != IB_WC_SUCCESS))
 		nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
-		return;
-	}
-
-	if (refcount_dec_and_test(&req->ref))
-		nvme_end_request(rq, req->status, req->result);
-
+	else
+		nvme_rdma_end_request(req);
 }
 
 static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
@@ -1547,15 +1551,11 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
 	struct nvme_rdma_request *req =
 		container_of(qe, struct nvme_rdma_request, sqe);
-	struct request *rq = blk_mq_rq_from_pdu(req);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (unlikely(wc->status != IB_WC_SUCCESS))
 		nvme_rdma_wr_error(cq, wc, "SEND");
-		return;
-	}
-
-	if (refcount_dec_and_test(&req->ref))
-		nvme_end_request(rq, req->status, req->result);
+	else
+		nvme_rdma_end_request(req);
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@ -1694,11 +1694,9 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			nvme_rdma_error_recovery(queue->ctrl);
 		}
 		/* the local invalidation completion will end the request */
-		return;
+	} else {
+		nvme_rdma_end_request(req);
 	}
-
-	if (refcount_dec_and_test(&req->ref))
-		nvme_end_request(rq, req->status, req->result);
 }
 
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
                   ` (10 preceding siblings ...)
  2020-06-11  6:44 ` [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper Christoph Hellwig
@ 2020-06-11  6:44 ` Christoph Hellwig
  2020-06-18 16:47     ` Daniel Wagner
  2020-06-18 14:11   ` Christoph Hellwig
  12 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-11  6:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Peter Zijlstra, linux-block, linux-nvme

Use the new blk_mq_complete_request_remote helper to avoid an indirect
function call in the completion fast path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c     | 4 +++-
 drivers/nvme/host/nvme.h   | 7 ++++---
 drivers/nvme/host/pci.c    | 3 ++-
 drivers/nvme/host/rdma.c   | 4 +++-
 drivers/nvme/host/tcp.c    | 6 ++++--
 drivers/nvme/target/loop.c | 3 ++-
 6 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index cb0007592c12ad..c04e97b858e54f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -227,6 +227,7 @@ static DECLARE_COMPLETION(nvme_fc_unload_proceed);
  */
 static struct device *fc_udev_device;
 
+static void nvme_fc_complete_rq(struct request *rq);
 
 /* *********************** FC-NVME Port Management ************************ */
 
@@ -2033,7 +2034,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	}
 
 	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
-	nvme_end_request(rq, status, result);
+	if (!nvme_end_request(rq, status, result))
+		nvme_fc_complete_rq(rq);
 
 check_error:
 	if (terminate_assoc)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cb500d51c3a968..6f3a9e49f6b6b3 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -472,7 +472,7 @@ static inline u32 nvme_bytes_to_numd(size_t len)
 	return (len >> 2) - 1;
 }
 
-static inline void nvme_end_request(struct request *req, __le16 status,
+static inline bool nvme_end_request(struct request *req, __le16 status,
 		union nvme_result result)
 {
 	struct nvme_request *rq = nvme_req(req);
@@ -481,8 +481,9 @@ static inline void nvme_end_request(struct request *req, __le16 status,
 	rq->result = result;
 	/* inject error when permitted by fault injection framework */
 	nvme_should_fail(req);
-	if (likely(!blk_should_fake_timeout(req->q)))
-		blk_mq_complete_request(req);
+	if (unlikely(blk_should_fake_timeout(req->q)))
+		return true;
+	return blk_mq_complete_request_remote(req);
 }
 
 static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d690d5593a8095..6a44d5874c7945 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -963,7 +963,8 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 
 	req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
 	trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
-	nvme_end_request(req, cqe->status, cqe->result);
+	if (!nvme_end_request(req, cqe->status, cqe->result))
+		nvme_pci_complete_rq(req);
 }
 
 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 9c02cf494a8257..97b1054efb8409 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -149,6 +149,7 @@ MODULE_PARM_DESC(register_always,
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event);
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvme_rdma_complete_rq(struct request *rq);
 
 static const struct blk_mq_ops nvme_rdma_mq_ops;
 static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
@@ -1155,7 +1156,8 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
 
 	if (!refcount_dec_and_test(&req->ref))
 		return;
-	nvme_end_request(rq, req->status, req->result);
+	if (!nvme_end_request(rq, req->status, req->result))
+		nvme_rdma_complete_rq(rq);
 }
 
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 1843110ec34f8a..1b5c24624df167 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -464,7 +464,8 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		return -EINVAL;
 	}
 
-	nvme_end_request(rq, cqe->status, cqe->result);
+	if (!nvme_end_request(rq, cqe->status, cqe->result))
+		nvme_complete_rq(rq);
 	queue->nr_cqe++;
 
 	return 0;
@@ -654,7 +655,8 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 {
 	union nvme_result res = {};
 
-	nvme_end_request(rq, cpu_to_le16(status << 1), res);
+	if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
+		nvme_complete_rq(rq);
 }
 
 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 0d54e730cbf2a7..1b9c246a849bc3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -116,7 +116,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
 			return;
 		}
 
-		nvme_end_request(rq, cqe->status, cqe->result);
+		if (!nvme_end_request(rq, cqe->status, cqe->result))
+			nvme_loop_complete_rq(rq);
 	}
 }
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: blk_mq_complete_request overhaul
  2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
@ 2020-06-18 14:11   ` Christoph Hellwig
  2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
                     ` (11 subsequent siblings)
  12 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-18 14:11 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

Any comments?

(and yes, I misspelled the nvme list address, but the interested
parties should be on linux-block anyway)

On Thu, Jun 11, 2020 at 08:44:40AM +0200, Christoph Hellwig wrote:
> Hi Jens,
> 
> Peters recent inquiry made me dust off various bits of unfinished work
> around blk_mq_complete_request and massage it into a coherent series.
> 
> This does three different things all touching the same area:
> 
>  - merge the softirq based single queue completion into the main
>    blk-mq completion mechanism, as there is a lot of duplicate logic
>    between the two
>  - move the error injection that just fails to complete requests out into
>    the drivers.  The fact that blk_mq_complete_request just wouldn't
>    complete when called from the error handle has been causing all kinds
>    of pain
>  - optimize the fast path by allowing drivers to avoid the indirect call
>    with a little more work for polled or per-CPU IRQ completions.  With
>    this a polled block device I/O only has two indirect calls let, one for
>    the file operation that ends up in the block device code, and another
>    one for disptching to the nvme driver
---end quoted text---

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: blk_mq_complete_request overhaul
@ 2020-06-18 14:11   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2020-06-18 14:11 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

Any comments?

(and yes, I misspelled the nvme list address, but the interested
parties should be on linux-block anyway)

On Thu, Jun 11, 2020 at 08:44:40AM +0200, Christoph Hellwig wrote:
> Hi Jens,
> 
> Peters recent inquiry made me dust off various bits of unfinished work
> around blk_mq_complete_request and massage it into a coherent series.
> 
> This does three different things all touching the same area:
> 
>  - merge the softirq based single queue completion into the main
>    blk-mq completion mechanism, as there is a lot of duplicate logic
>    between the two
>  - move the error injection that just fails to complete requests out into
>    the drivers.  The fact that blk_mq_complete_request just wouldn't
>    complete when called from the error handle has been causing all kinds
>    of pain
>  - optimize the fast path by allowing drivers to avoid the indirect call
>    with a little more work for polled or per-CPU IRQ completions.  With
>    this a polled block device I/O only has two indirect calls let, one for
>    the file operation that ends up in the block device code, and another
>    one for disptching to the nvme driver
---end quoted text---

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
  2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
@ 2020-06-18 14:34   ` Daniel Wagner
  2020-06-18 14:37       ` Daniel Wagner
  2020-06-18 14:50     ` Daniel Wagner
  1 sibling, 1 reply; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:42AM +0200, Christoph Hellwig wrote:
>  /*
>   * Setup and invoke a run of 'trigger_softirq' on the given cpu.
>   */
> @@ -681,19 +689,8 @@ static void __blk_complete_request(struct request *req)
>  	 * avoids IPI sending from current CPU to the first CPU of a group.
>  	 */
>  	if (ccpu == cpu || shared) {
> -		struct list_head *list;
>  do_local:
> -		list = this_cpu_ptr(&blk_cpu_done);
> -		list_add_tail(&req->ipi_list, list);
> -
> -		/*
> -		 * if the list only contains our just added request,
> -		 * signal a raise of the softirq. If there are already
> -		 * entries there, someone already raised the irq but it
> -		 * hasn't run yet.
> -		 */
> -		if (list->next == &req->ipi_list)
> -			raise_softirq_irqoff(BLOCK_SOFTIRQ);
> +		blk_mq_trigger_softirq(req);
>  	} else if (raise_blk_irq(ccpu, req))
>  		goto do_local;

Couldn't this be folded into the previous condition, e.g

	if (ccpu == cpu || shared || raised_blk_irq(ccpu, req))

?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c
  2020-06-11  6:44 ` [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c Christoph Hellwig
@ 2020-06-18 14:35   ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:41AM +0200, Christoph Hellwig wrote:
> __blk_complete_request is only called from the blk-mq code, and
> duplicates a lot of code from blk-mq.c.  Move it there to prepare
> for better code sharing and simplifications.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
  2020-06-18 14:34   ` Daniel Wagner
@ 2020-06-18 14:37       ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:37 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 18, 2020 at 04:34:04PM +0200, Daniel Wagner wrote:
> Couldn't this be folded into the previous condition, e.g
> 
> 	if (ccpu == cpu || shared || raised_blk_irq(ccpu, req))

to answer my own question, patch #3 does addresses this :)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
@ 2020-06-18 14:37       ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:37 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 18, 2020 at 04:34:04PM +0200, Daniel Wagner wrote:
> Couldn't this be folded into the previous condition, e.g
> 
> 	if (ccpu == cpu || shared || raised_blk_irq(ccpu, req))

to answer my own question, patch #3 does addresses this :)

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
  2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
@ 2020-06-18 14:50     ` Daniel Wagner
  2020-06-18 14:50     ` Daniel Wagner
  1 sibling, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:42AM +0200, Christoph Hellwig wrote:
> Add a helper to deduplicate the logic that raises the block softirq.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq
@ 2020-06-18 14:50     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 14:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:42AM +0200, Christoph Hellwig wrote:
> Add a helper to deduplicate the logic that raises the block softirq.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: blk_mq_complete_request overhaul
  2020-06-18 14:11   ` Christoph Hellwig
@ 2020-06-18 14:54     ` Jens Axboe
  -1 siblings, 0 replies; 40+ messages in thread
From: Jens Axboe @ 2020-06-18 14:54 UTC (permalink / raw)
  To: Christoph Hellwig, Christoph Hellwig
  Cc: Peter Zijlstra, linux-block, linux-nvme

On 6/18/20 8:11 AM, Christoph Hellwig wrote:
> Any comments?
> 
> (and yes, I misspelled the nvme list address, but the interested
> parties should be on linux-block anyway)

I've looked over it, and it looks fine to me. Haven't done a full
test pass yet, but assuming that's good, I'll queue this up for
5.9.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: blk_mq_complete_request overhaul
@ 2020-06-18 14:54     ` Jens Axboe
  0 siblings, 0 replies; 40+ messages in thread
From: Jens Axboe @ 2020-06-18 14:54 UTC (permalink / raw)
  To: Christoph Hellwig, Christoph Hellwig
  Cc: Peter Zijlstra, linux-block, linux-nvme

On 6/18/20 8:11 AM, Christoph Hellwig wrote:
> Any comments?
> 
> (and yes, I misspelled the nvme list address, but the interested
> parties should be on linux-block anyway)

I've looked over it, and it looks fine to me. Haven't done a full
test pass yet, but assuming that's good, I'll queue this up for
5.9.

-- 
Jens Axboe


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/12] blk-mq: remove raise_blk_irq
  2020-06-11  6:44 ` [PATCH 03/12] blk-mq: remove raise_blk_irq Christoph Hellwig
@ 2020-06-18 15:02     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:02 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:43AM +0200, Christoph Hellwig wrote:
> By open coding raise_blk_irq in the only caller, and replacing the
> ifdef CONFIG_SMP with an IS_ENABLED check the flow in the caller
> can be significantly simplified.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/12] blk-mq: remove raise_blk_irq
@ 2020-06-18 15:02     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:02 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:43AM +0200, Christoph Hellwig wrote:
> By open coding raise_blk_irq in the only caller, and replacing the
> ifdef CONFIG_SMP with an IS_ENABLED check the flow in the caller
> can be significantly simplified.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/12] blk-mq: complete polled requests directly
  2020-06-11  6:44 ` [PATCH 04/12] blk-mq: complete polled requests directly Christoph Hellwig
@ 2020-06-18 15:50     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:44AM +0200, Christoph Hellwig wrote:
> Even for single queue devices there is no point in offloading a polled
> completion to the softirq, given that blk_mq_force_complete_rq is called
> from the polling thread in that case and thus there are no starvation
> issues.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/12] blk-mq: complete polled requests directly
@ 2020-06-18 15:50     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:44AM +0200, Christoph Hellwig wrote:
> Even for single queue devices there is no point in offloading a polled
> completion to the softirq, given that blk_mq_force_complete_rq is called
> from the polling thread in that case and thus there are no starvation
> issues.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP
  2020-06-11  6:44 ` [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP Christoph Hellwig
@ 2020-06-18 15:52     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:45AM +0200, Christoph Hellwig wrote:
> Let the compile optimize out the entire IPI path, given that we are
> obviously not going to use it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP
@ 2020-06-18 15:52     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 15:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:45AM +0200, Christoph Hellwig wrote:
> Let the compile optimize out the entire IPI path, given that we are
> obviously not going to use it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>



_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic
  2020-06-11  6:44 ` [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic Christoph Hellwig
@ 2020-06-18 16:12     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:46AM +0200, Christoph Hellwig wrote:
> Both the softirq path for single queue devices and the multi-queue
> completion handler share the same logic to figure out if we need an
> IPI for the completion and eventually issue it.  Merge the two
> versions into a single unified code path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic
@ 2020-06-18 16:12     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:46AM +0200, Christoph Hellwig wrote:
> Both the softirq path for single queue devices and the multi-queue
> completion handler share the same logic to figure out if we need an
> IPI for the completion and eventually issue it.  Merge the two
> versions into a single unified code path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request
  2020-06-11  6:44 ` [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request Christoph Hellwig
@ 2020-06-18 16:18   ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:18 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:47AM +0200, Christoph Hellwig wrote:
> Move the call to blk_should_fake_timeout out of blk_mq_complete_request
> and into the drivers, skipping call sites that are obvious error
> handlers, and remove the now superflous blk_mq_force_complete_rq helper.
> This ensures we don't keep injecting errors into completions that just
> terminate the Linux request after the hardware has been reset or the
> command has been aborted.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request
  2020-06-11  6:44 ` [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request Christoph Hellwig
@ 2020-06-18 16:24   ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:48AM +0200, Christoph Hellwig wrote:
> We don't really care if we get migrated during the I/O completion.
> In the worth case we either perform an IPI that wasn't required, or
> complete the request on a CPU which we just migrated off.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper
  2020-06-11  6:44 ` [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper Christoph Hellwig
@ 2020-06-18 16:33     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:49AM +0200, Christoph Hellwig wrote:
> Add a helper to decide if we can complete locally or need an IPI.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper
@ 2020-06-18 16:33     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:49AM +0200, Christoph Hellwig wrote:
> Add a helper to decide if we can complete locally or need an IPI.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API
  2020-06-11  6:44 ` [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API Christoph Hellwig
@ 2020-06-18 16:39     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:50AM +0200, Christoph Hellwig wrote:
> This is a variant of blk_mq_complete_request_remote that only completes
> the request if it needs to be bounced to another CPU or a softirq.  If
> the request can be completed locally the function returns false and lets
> the driver complete it without requring and indirect function call.

s/requring/requiring/

> Suggestions for a better name welcome.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API
@ 2020-06-18 16:39     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:50AM +0200, Christoph Hellwig wrote:
> This is a variant of blk_mq_complete_request_remote that only completes
> the request if it needs to be bounced to another CPU or a softirq.  If
> the request can be completed locally the function returns false and lets
> the driver complete it without requring and indirect function call.

s/requring/requiring/

> Suggestions for a better name welcome.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper
  2020-06-11  6:44 ` [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper Christoph Hellwig
@ 2020-06-18 16:42   ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:51AM +0200, Christoph Hellwig wrote:
> Factor a small sniplet of duplicated code into a new helper in
> preparation for making this sniplet a little bit less trivial.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call
  2020-06-11  6:44 ` [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call Christoph Hellwig
@ 2020-06-18 16:47     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:52AM +0200, Christoph Hellwig wrote:
> Use the new blk_mq_complete_request_remote helper to avoid an indirect
> function call in the completion fast path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call
@ 2020-06-18 16:47     ` Daniel Wagner
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Wagner @ 2020-06-18 16:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Peter Zijlstra, linux-block, linux-nvme

On Thu, Jun 11, 2020 at 08:44:52AM +0200, Christoph Hellwig wrote:
> Use the new blk_mq_complete_request_remote helper to avoid an indirect
> function call in the completion fast path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Daniel Wagner <dwagner@suse.de>

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2020-06-18 16:47 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-11  6:44 blk_mq_complete_request overhaul Christoph Hellwig
2020-06-11  6:44 ` [PATCH 01/12] blk-mq: merge blk-softirq.c into blk-mq.c Christoph Hellwig
2020-06-18 14:35   ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 02/12] blk-mq: factor out a helper to reise the block softirq Christoph Hellwig
2020-06-18 14:34   ` Daniel Wagner
2020-06-18 14:37     ` Daniel Wagner
2020-06-18 14:37       ` Daniel Wagner
2020-06-18 14:50   ` Daniel Wagner
2020-06-18 14:50     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 03/12] blk-mq: remove raise_blk_irq Christoph Hellwig
2020-06-18 15:02   ` Daniel Wagner
2020-06-18 15:02     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 04/12] blk-mq: complete polled requests directly Christoph Hellwig
2020-06-18 15:50   ` Daniel Wagner
2020-06-18 15:50     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 05/12] blk-mq: short cut the IPI path in blk_mq_force_complete_rq for !SMP Christoph Hellwig
2020-06-18 15:52   ` Daniel Wagner
2020-06-18 15:52     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 06/12] blk-mq: merge the softirq vs non-softirq IPI logic Christoph Hellwig
2020-06-18 16:12   ` Daniel Wagner
2020-06-18 16:12     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 07/12] blk-mq: move failure injection out of blk_mq_complete_request Christoph Hellwig
2020-06-18 16:18   ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 08/12] blk-mq: remove the get_cpu/put_cpu pair in blk_mq_complete_request Christoph Hellwig
2020-06-18 16:24   ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 09/12] blk-mq: factor out a blk_mq_complete_need_ipi helper Christoph Hellwig
2020-06-18 16:33   ` Daniel Wagner
2020-06-18 16:33     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 10/12] blk-mq: add a new blk_mq_complete_request_remote API Christoph Hellwig
2020-06-18 16:39   ` Daniel Wagner
2020-06-18 16:39     ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 11/12] nvme-rdma: factor out a nvme_rdma_end_request helper Christoph Hellwig
2020-06-18 16:42   ` Daniel Wagner
2020-06-11  6:44 ` [PATCH 12/12] nvme: use blk_mq_complete_request_remote to avoid an indirect function call Christoph Hellwig
2020-06-18 16:47   ` Daniel Wagner
2020-06-18 16:47     ` Daniel Wagner
2020-06-18 14:11 ` blk_mq_complete_request overhaul Christoph Hellwig
2020-06-18 14:11   ` Christoph Hellwig
2020-06-18 14:54   ` Jens Axboe
2020-06-18 14:54     ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.