From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org,
Jeffle Xu <jefflexu@linux.alibaba.com>,
Mike Snitzer <snitzer@redhat.com>,
dm-devel@redhat.com, Hannes Reinecke <hare@suse.de>,
Ming Lei <ming.lei@redhat.com>, Christoph Hellwig <hch@lst.de>
Subject: [PATCH V6 04/12] block: move block polling code into one dedicated source file
Date: Thu, 22 Apr 2021 20:20:30 +0800 [thread overview]
Message-ID: <20210422122038.2192933-5-ming.lei@redhat.com> (raw)
In-Reply-To: <20210422122038.2192933-1-ming.lei@redhat.com>
Prepare for supporting bio based io polling, and move blk polling
code into one dedicated source file. And three shared functions are
put into private header of blk-mq.h
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/Makefile | 3 +-
block/blk-mq.c | 230 -----------------------------------------------
block/blk-mq.h | 40 +++++++++
block/blk-poll.c | 196 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 238 insertions(+), 231 deletions(-)
create mode 100644 block/blk-poll.c
diff --git a/block/Makefile b/block/Makefile
index 8d841f5f986f..d7abe2333407 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
blk-exec.o blk-merge.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
- genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
+ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
+ blk-poll.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 47e650bb836b..f9162295f4f2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -43,26 +43,6 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
-static void blk_mq_poll_stats_start(struct request_queue *q);
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-
-static int blk_mq_poll_stats_bkt(const struct request *rq)
-{
- int ddir, sectors, bucket;
-
- ddir = rq_data_dir(rq);
- sectors = blk_rq_stats_sectors(rq);
-
- bucket = ddir + 2 * ilog2(sectors);
-
- if (bucket < 0)
- return -1;
- else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
- return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
-
- return bucket;
-}
-
/*
* Check if any of the ctx, dispatch list or elevator
* have pending work in this hardware queue.
@@ -3726,216 +3706,6 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
-/* Enable polling stats and return whether they were already enabled. */
-static bool blk_poll_stats_enable(struct request_queue *q)
-{
- if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
- blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
- return true;
- blk_stat_add_callback(q, q->poll_cb);
- return false;
-}
-
-static void blk_mq_poll_stats_start(struct request_queue *q)
-{
- /*
- * We don't arm the callback if polling stats are not enabled or the
- * callback is already active.
- */
- if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
- blk_stat_is_active(q->poll_cb))
- return;
-
- blk_stat_activate_msecs(q->poll_cb, 100);
-}
-
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
-{
- struct request_queue *q = cb->data;
- int bucket;
-
- for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
- if (cb->stat[bucket].nr_samples)
- q->poll_stat[bucket] = cb->stat[bucket];
- }
-}
-
-static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
- struct request *rq)
-{
- unsigned long ret = 0;
- int bucket;
-
- /*
- * If stats collection isn't on, don't sleep but turn it on for
- * future users
- */
- if (!blk_poll_stats_enable(q))
- return 0;
-
- /*
- * As an optimistic guess, use half of the mean service time
- * for this type of request. We can (and should) make this smarter.
- * For instance, if the completion latencies are tight, we can
- * get closer than just half the mean. This is especially
- * important on devices where the completion latencies are longer
- * than ~10 usec. We do use the stats for the relevant IO size
- * if available which does lead to better estimates.
- */
- bucket = blk_mq_poll_stats_bkt(rq);
- if (bucket < 0)
- return ret;
-
- if (q->poll_stat[bucket].nr_samples)
- ret = (q->poll_stat[bucket].mean + 1) / 2;
-
- return ret;
-}
-
-static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
- struct request *rq)
-{
- struct hrtimer_sleeper hs;
- enum hrtimer_mode mode;
- unsigned int nsecs;
- ktime_t kt;
-
- if (rq->rq_flags & RQF_MQ_POLL_SLEPT)
- return false;
-
- /*
- * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
- *
- * 0: use half of prev avg
- * >0: use this specific value
- */
- if (q->poll_nsec > 0)
- nsecs = q->poll_nsec;
- else
- nsecs = blk_mq_poll_nsecs(q, rq);
-
- if (!nsecs)
- return false;
-
- rq->rq_flags |= RQF_MQ_POLL_SLEPT;
-
- /*
- * This will be replaced with the stats tracking code, using
- * 'avg_completion_time / 2' as the pre-sleep target.
- */
- kt = nsecs;
-
- mode = HRTIMER_MODE_REL;
- hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
- hrtimer_set_expires(&hs.timer, kt);
-
- do {
- if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
- break;
- set_current_state(TASK_UNINTERRUPTIBLE);
- hrtimer_sleeper_start_expires(&hs, mode);
- if (hs.task)
- io_schedule();
- hrtimer_cancel(&hs.timer);
- mode = HRTIMER_MODE_ABS;
- } while (hs.task && !signal_pending(current));
-
- __set_current_state(TASK_RUNNING);
- destroy_hrtimer_on_stack(&hs.timer);
- return true;
-}
-
-static bool blk_mq_poll_hybrid(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
-{
- struct request *rq;
-
- if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
- return false;
-
- if (!blk_qc_t_is_internal(cookie))
- rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
- else {
- rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
- /*
- * With scheduling, if the request has completed, we'll
- * get a NULL return here, as we clear the sched tag when
- * that happens. The request still remains valid, like always,
- * so we should be safe with just the NULL check.
- */
- if (!rq)
- return false;
- }
-
- return blk_mq_poll_hybrid_sleep(q, rq);
-}
-
-/**
- * blk_poll - poll for IO completions
- * @q: the queue
- * @cookie: cookie passed back at IO submission time
- * @spin: whether to spin for completions
- *
- * Description:
- * Poll for completions on the passed in queue. Returns number of
- * completed entries found. If @spin is true, then blk_poll will continue
- * looping until at least one completion is found, unless the task is
- * otherwise marked running (or we need to reschedule).
- */
-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
-{
- struct blk_mq_hw_ctx *hctx;
- long state;
-
- if (!blk_qc_t_valid(cookie) || !blk_queue_poll(q))
- return 0;
-
- if (current->plug)
- blk_flush_plug_list(current->plug, false);
-
- hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
-
- /*
- * If we sleep, have the caller restart the poll loop to reset
- * the state. Like for the other success return cases, the
- * caller is responsible for checking if the IO completed. If
- * the IO isn't complete, we'll get called again and will go
- * straight to the busy poll loop. If specified not to spin,
- * we also should not sleep.
- */
- if (spin && blk_mq_poll_hybrid(q, hctx, cookie))
- return 1;
-
- hctx->poll_considered++;
-
- state = current->state;
- do {
- int ret;
-
- hctx->poll_invoked++;
-
- ret = q->mq_ops->poll(hctx);
- if (ret > 0) {
- hctx->poll_success++;
- __set_current_state(TASK_RUNNING);
- return ret;
- }
-
- if (signal_pending_state(state, current))
- __set_current_state(TASK_RUNNING);
-
- if (current->state == TASK_RUNNING)
- return 1;
- if (ret < 0 || !spin)
- break;
- cpu_relax();
- } while (!need_resched());
-
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blk_poll);
-
unsigned int blk_mq_rq_cpu(struct request *rq)
{
return rq->mq_ctx->cpu;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9ccb1818303b..2eea38cd8048 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -324,5 +324,45 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return __blk_mq_active_requests(hctx) < depth;
}
+static inline int blk_mq_poll_stats_bkt(const struct request *rq)
+{
+ int ddir, sectors, bucket;
+
+ ddir = rq_data_dir(rq);
+ sectors = blk_rq_stats_sectors(rq);
+
+ bucket = ddir + 2 * ilog2(sectors);
+
+ if (bucket < 0)
+ return -1;
+ else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
+ return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
+
+ return bucket;
+}
+
+static inline void blk_mq_poll_stats_start(struct request_queue *q)
+{
+ /*
+ * We don't arm the callback if polling stats are not enabled or the
+ * callback is already active.
+ */
+ if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+ blk_stat_is_active(q->poll_cb))
+ return;
+
+ blk_stat_activate_msecs(q->poll_cb, 100);
+}
+
+static inline void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
+{
+ struct request_queue *q = cb->data;
+ int bucket;
+
+ for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
+ if (cb->stat[bucket].nr_samples)
+ q->poll_stat[bucket] = cb->stat[bucket];
+ }
+}
#endif
diff --git a/block/blk-poll.c b/block/blk-poll.c
new file mode 100644
index 000000000000..daa307f84792
--- /dev/null
+++ b/block/blk-poll.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+
+#include <linux/blk-mq.h>
+#include "blk.h"
+#include "blk-mq.h"
+
+/* Enable polling stats and return whether they were already enabled. */
+static bool blk_poll_stats_enable(struct request_queue *q)
+{
+ if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+ blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
+ return true;
+ blk_stat_add_callback(q, q->poll_cb);
+ return false;
+}
+
+static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
+ struct request *rq)
+{
+ unsigned long ret = 0;
+ int bucket;
+
+ /*
+ * If stats collection isn't on, don't sleep but turn it on for
+ * future users
+ */
+ if (!blk_poll_stats_enable(q))
+ return 0;
+
+ /*
+ * As an optimistic guess, use half of the mean service time
+ * for this type of request. We can (and should) make this smarter.
+ * For instance, if the completion latencies are tight, we can
+ * get closer than just half the mean. This is especially
+ * important on devices where the completion latencies are longer
+ * than ~10 usec. We do use the stats for the relevant IO size
+ * if available which does lead to better estimates.
+ */
+ bucket = blk_mq_poll_stats_bkt(rq);
+ if (bucket < 0)
+ return ret;
+
+ if (q->poll_stat[bucket].nr_samples)
+ ret = (q->poll_stat[bucket].mean + 1) / 2;
+
+ return ret;
+}
+
+static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
+ struct request *rq)
+{
+ struct hrtimer_sleeper hs;
+ enum hrtimer_mode mode;
+ unsigned int nsecs;
+ ktime_t kt;
+
+ if (rq->rq_flags & RQF_MQ_POLL_SLEPT)
+ return false;
+
+ /*
+ * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
+ *
+ * 0: use half of prev avg
+ * >0: use this specific value
+ */
+ if (q->poll_nsec > 0)
+ nsecs = q->poll_nsec;
+ else
+ nsecs = blk_mq_poll_nsecs(q, rq);
+
+ if (!nsecs)
+ return false;
+
+ rq->rq_flags |= RQF_MQ_POLL_SLEPT;
+
+ /*
+ * This will be replaced with the stats tracking code, using
+ * 'avg_completion_time / 2' as the pre-sleep target.
+ */
+ kt = nsecs;
+
+ mode = HRTIMER_MODE_REL;
+ hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
+ hrtimer_set_expires(&hs.timer, kt);
+
+ do {
+ if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
+ break;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ hrtimer_sleeper_start_expires(&hs, mode);
+ if (hs.task)
+ io_schedule();
+ hrtimer_cancel(&hs.timer);
+ mode = HRTIMER_MODE_ABS;
+ } while (hs.task && !signal_pending(current));
+
+ __set_current_state(TASK_RUNNING);
+ destroy_hrtimer_on_stack(&hs.timer);
+ return true;
+}
+
+static bool blk_mq_poll_hybrid(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
+{
+ struct request *rq;
+
+ if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
+ return false;
+
+ if (!blk_qc_t_is_internal(cookie))
+ rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
+ else {
+ rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+ /*
+ * With scheduling, if the request has completed, we'll
+ * get a NULL return here, as we clear the sched tag when
+ * that happens. The request still remains valid, like always,
+ * so we should be safe with just the NULL check.
+ */
+ if (!rq)
+ return false;
+ }
+
+ return blk_mq_poll_hybrid_sleep(q, rq);
+}
+
+/**
+ * blk_poll - poll for IO completions
+ * @q: the queue
+ * @cookie: cookie passed back at IO submission time
+ * @spin: whether to spin for completions
+ *
+ * Description:
+ * Poll for completions on the passed in queue. Returns number of
+ * completed entries found. If @spin is true, then blk_poll will continue
+ * looping until at least one completion is found, unless the task is
+ * otherwise marked running (or we need to reschedule).
+ */
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
+{
+ struct blk_mq_hw_ctx *hctx;
+ long state;
+
+ if (!blk_qc_t_valid(cookie) || !blk_queue_poll(q))
+ return 0;
+
+ if (current->plug)
+ blk_flush_plug_list(current->plug, false);
+
+ hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+
+ /*
+ * If we sleep, have the caller restart the poll loop to reset
+ * the state. Like for the other success return cases, the
+ * caller is responsible for checking if the IO completed. If
+ * the IO isn't complete, we'll get called again and will go
+ * straight to the busy poll loop. If specified not to spin,
+ * we also should not sleep.
+ */
+ if (spin && blk_mq_poll_hybrid(q, hctx, cookie))
+ return 1;
+
+ hctx->poll_considered++;
+
+ state = current->state;
+ do {
+ int ret;
+
+ hctx->poll_invoked++;
+
+ ret = q->mq_ops->poll(hctx);
+ if (ret > 0) {
+ hctx->poll_success++;
+ __set_current_state(TASK_RUNNING);
+ return ret;
+ }
+
+ if (signal_pending_state(state, current))
+ __set_current_state(TASK_RUNNING);
+
+ if (current->state == TASK_RUNNING)
+ return 1;
+ if (ret < 0 || !spin)
+ break;
+ cpu_relax();
+ } while (!need_resched());
+
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blk_poll);
--
2.29.2
next prev parent reply other threads:[~2021-04-22 12:22 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-22 12:20 [PATCH V6 00/12] block: support bio based io polling Ming Lei
2021-04-22 12:20 ` [PATCH V6 01/12] block: add helper of blk_queue_poll Ming Lei
2021-04-22 12:20 ` [PATCH V6 02/12] block: define 'struct bvec_iter' as packed Ming Lei
2021-04-22 13:18 ` Hannes Reinecke
2021-04-22 12:20 ` [PATCH V6 03/12] block: add one helper to free io_context Ming Lei
2021-04-22 12:20 ` Ming Lei [this message]
2021-04-22 13:19 ` [PATCH V6 04/12] block: move block polling code into one dedicated source file Hannes Reinecke
2021-04-26 7:12 ` Hannes Reinecke
2021-04-22 12:20 ` [PATCH V6 05/12] block: extract one helper function polling hw queue Ming Lei
2021-04-22 12:20 ` [PATCH V6 06/12] block: prepare for supporting bio_list via other link Ming Lei
2021-04-22 12:20 ` [PATCH V6 07/12] block: create io poll context for submission and poll task Ming Lei
2021-04-22 12:20 ` [PATCH V6 08/12] block: add req flag of REQ_POLL_CTX Ming Lei
2021-04-22 12:20 ` [PATCH V6 09/12] block: use per-task poll context to implement bio based io polling Ming Lei
2021-04-26 7:17 ` Hannes Reinecke
2021-04-22 12:20 ` [PATCH V6 10/12] block: limit hw queues to be polled in each blk_poll() Ming Lei
2021-04-26 7:19 ` Hannes Reinecke
2021-04-26 8:00 ` Ming Lei
2021-04-26 9:05 ` Hannes Reinecke
2021-04-22 12:20 ` [PATCH V6 11/12] block: allow to control FLAG_POLL via sysfs for bio poll capable queue Ming Lei
2021-04-26 7:20 ` Hannes Reinecke
2021-04-22 12:20 ` [PATCH V6 12/12] dm: support IO polling for bio-based dm device Ming Lei
2021-04-23 1:32 ` JeffleXu
2021-04-23 2:39 ` Ming Lei
2021-04-23 2:38 ` [PATCH V7 " Ming Lei
2021-05-17 6:16 ` [PATCH V6 00/12] block: support bio based io polling JeffleXu
2021-05-17 7:13 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210422122038.2192933-5-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=dm-devel@redhat.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=jefflexu@linux.alibaba.com \
--cc=linux-block@vger.kernel.org \
--cc=snitzer@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).