From: Ming Lei <ming.lei@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>, Jens Axboe <axboe@kernel.dk>
Cc: linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
Ming Lei <ming.lei@redhat.com>, Long Li <longli@microsoft.com>,
Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Keith Busch <keith.busch@intel.com>,
Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
John Garry <john.garry@huawei.com>,
Hannes Reinecke <hare@suse.com>
Subject: [RFC PATCH 3/3] blk-mq: complete request in rescuer process context in case of irq flood
Date: Wed, 18 Dec 2019 15:19:42 +0800 [thread overview]
Message-ID: <20191218071942.22336-4-ming.lei@redhat.com> (raw)
In-Reply-To: <20191218071942.22336-1-ming.lei@redhat.com>
When irq flood is detected, complete requests in the percpu rescuer
context for avoiding lockup cpu.
IO interrupt flood might be triggered in the following situations:
1) the storage device is quicker to handle IO than single CPU core
2) N:1 queue mapping, single CPU core is saturated by handling IO interrupts
from multiple storage disks or multiple HBAs
Cc: Long Li <longli@microsoft.com>
Cc: Ingo Molnar <mingo@redhat.com>,
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: John Garry <john.garry@huawei.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 323c9cb28066..a7fe00f1a313 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -40,6 +40,14 @@
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
+struct blk_mq_comp_rescuer {
+ struct list_head head;
+ bool running;
+ struct work_struct work;
+};
+
+static DEFINE_PER_CPU(struct blk_mq_comp_rescuer, blk_mq_comp_rescuer);
+
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
@@ -624,6 +632,50 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
*srcu_idx = srcu_read_lock(hctx->srcu);
}
+static void blk_mq_complete_rq_in_rescuer(struct request *rq)
+{
+ struct blk_mq_comp_rescuer *rescuer;
+ unsigned long flags;
+
+ WARN_ON(!in_interrupt());
+
+ local_irq_save(flags);
+ rescuer = this_cpu_ptr(&blk_mq_comp_rescuer);
+ list_add_tail(&rq->queuelist, &rescuer->head);
+ if (!rescuer->running) {
+ rescuer->running = true;
+ kblockd_schedule_work(&rescuer->work);
+ }
+ local_irq_restore(flags);
+
+}
+
+static void blk_mq_complete_rescue_work(struct work_struct *work)
+{
+ struct blk_mq_comp_rescuer *rescuer =
+ container_of(work, struct blk_mq_comp_rescuer, work);
+ struct list_head local_list;
+
+ local_irq_disable();
+ run_again:
+ list_replace_init(&rescuer->head, &local_list);
+ local_irq_enable();
+
+ while (!list_empty(&local_list)) {
+ struct request *rq = list_entry(local_list.next,
+ struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ __blk_mq_complete_request(rq);
+ cond_resched();
+ }
+
+ local_irq_disable();
+ if (!list_empty(&rescuer->head))
+ goto run_again;
+ rescuer->running = false;
+ local_irq_enable();
+}
+
/**
* blk_mq_complete_request - end I/O on a request
* @rq: the request being processed
@@ -636,7 +688,11 @@ bool blk_mq_complete_request(struct request *rq)
{
if (unlikely(blk_should_fake_timeout(rq->q)))
return false;
- __blk_mq_complete_request(rq);
+
+ if (likely(!irq_is_flood() || !in_interrupt()))
+ __blk_mq_complete_request(rq);
+ else
+ blk_mq_complete_rq_in_rescuer(rq);
return true;
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -3525,6 +3581,16 @@ EXPORT_SYMBOL(blk_mq_rq_cpu);
static int __init blk_mq_init(void)
{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct blk_mq_comp_rescuer *rescuer =
+ &per_cpu(blk_mq_comp_rescuer, i);
+
+ INIT_LIST_HEAD(&rescuer->head);
+ INIT_WORK(&rescuer->work, blk_mq_complete_rescue_work);
+ }
+
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
blk_mq_hctx_notify_dead);
return 0;
--
2.20.1
prev parent reply other threads:[~2019-12-18 7:20 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-18 7:19 [RFC PATCH 0/3] softirq/blk-mq: implement interrupt flood detection for avoiding cpu lockup Ming Lei
2019-12-18 7:19 ` [RFC PATCH 1/3] sched/core: add API for exporting runqueue clock Ming Lei
2019-12-18 9:51 ` Peter Zijlstra
2019-12-19 1:29 ` Ming Lei
2019-12-19 9:20 ` Peter Zijlstra
2019-12-18 7:19 ` [RFC PATCH 2/3] softirq: implement interrupt flood detection Ming Lei
2019-12-18 10:49 ` Peter Zijlstra
2019-12-18 12:29 ` Peter Zijlstra
2019-12-19 1:59 ` Ming Lei
2019-12-19 9:23 ` Peter Zijlstra
2019-12-19 9:52 ` Ming Lei
2019-12-19 10:43 ` Daniel Wagner
2019-12-31 3:48 ` Ming Lei
2020-01-02 10:28 ` Daniel Wagner
2019-12-18 7:19 ` Ming Lei [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191218071942.22336-4-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=hare@suse.com \
--cc=hch@lst.de \
--cc=john.garry@huawei.com \
--cc=keith.busch@intel.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=sagi@grimberg.me \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).