* [PATCH 1/3] blk-throttle: support io delay stats
@ 2019-07-25 3:03 Joseph Qi
2019-07-25 3:03 ` [PATCH 2/3] blk-throttle: add counter for completed io Joseph Qi
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Joseph Qi @ 2019-07-25 3:03 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Tejun Heo
Add blkio.throttle.io_service_time and blkio.throttle.io_wait_time to
get per-cgroup io delay statistics in blk-throttle layer.
io_service_time represents the time spent after io throttle to io
completion, while io_wait_time represents the time spent on throttle
queue.
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
block/bio.c | 4 ++
block/blk-throttle.c | 130 +++++++++++++++++++++++++++++++++++++++++++++-
include/linux/blk_types.h | 34 ++++++++++++
3 files changed, 167 insertions(+), 1 deletion(-)
diff --git a/block/bio.c b/block/bio.c
index 299a0e7..3206462 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1826,6 +1826,10 @@ void bio_endio(struct bio *bio)
blk_throtl_bio_endio(bio);
/* release cgroup info */
bio_uninit(bio);
+#ifdef CONFIG_BLK_DEV_THROTTLING
+ if (bio->bi_tg_end_io)
+ bio->bi_tg_end_io(bio);
+#endif
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8ab6c81..a5880f0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -176,6 +176,11 @@ struct throtl_grp {
unsigned int bio_cnt; /* total bios */
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
+
+ /* total time spent on lower layer: scheduler, device and others */
+ struct blkg_rwstat service_time;
+ /* total time spent on block throttle */
+ struct blkg_rwstat wait_time;
};
/* We measure latency for request size from <= 4k to >= 1M */
@@ -487,6 +492,10 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
if (!tg)
return NULL;
+ if (blkg_rwstat_init(&tg->service_time, gfp) ||
+ blkg_rwstat_init(&tg->wait_time, gfp))
+ goto err;
+
throtl_service_queue_init(&tg->service_queue);
for (rw = READ; rw <= WRITE; rw++) {
@@ -511,6 +520,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
return &tg->pd;
+
+err:
+ blkg_rwstat_exit(&tg->service_time);
+ blkg_rwstat_exit(&tg->wait_time);
+ kfree(tg);
+ return NULL;
}
static void throtl_pd_init(struct blkg_policy_data *pd)
@@ -592,6 +607,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
static void throtl_pd_offline(struct blkg_policy_data *pd)
{
struct throtl_grp *tg = pd_to_tg(pd);
+ struct blkcg_gq *blkg = pd_to_blkg(pd);
+ struct blkcg_gq *parent = blkg->parent;
tg->bps[READ][LIMIT_LOW] = 0;
tg->bps[WRITE][LIMIT_LOW] = 0;
@@ -602,6 +619,12 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
if (!tg->td->limit_valid[tg->td->limit_index])
throtl_upgrade_state(tg->td);
+ if (parent) {
+ blkg_rwstat_add_aux(&blkg_to_tg(parent)->service_time,
+ &tg->service_time);
+ blkg_rwstat_add_aux(&blkg_to_tg(parent)->wait_time,
+ &tg->wait_time);
+ }
}
static void throtl_pd_free(struct blkg_policy_data *pd)
@@ -609,9 +632,19 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
struct throtl_grp *tg = pd_to_tg(pd);
del_timer_sync(&tg->service_queue.pending_timer);
+ blkg_rwstat_exit(&tg->service_time);
+ blkg_rwstat_exit(&tg->wait_time);
kfree(tg);
}
+static void throtl_pd_reset(struct blkg_policy_data *pd)
+{
+ struct throtl_grp *tg = pd_to_tg(pd);
+
+ blkg_rwstat_reset(&tg->service_time);
+ blkg_rwstat_reset(&tg->wait_time);
+}
+
static struct throtl_grp *
throtl_rb_first(struct throtl_service_queue *parent_sq)
{
@@ -1019,6 +1052,64 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
return false;
}
+static void throtl_stats_update_completion(struct throtl_grp *tg,
+ uint64_t start_time,
+ uint64_t io_start_time,
+ int op)
+{
+ unsigned long flags;
+ uint64_t now = sched_clock();
+
+ local_irq_save(flags);
+ if (time_after64(now, io_start_time))
+ blkg_rwstat_add(&tg->service_time, op, now - io_start_time);
+ if (time_after64(io_start_time, start_time))
+ blkg_rwstat_add(&tg->wait_time, op, io_start_time - start_time);
+ local_irq_restore(flags);
+}
+
+static void throtl_bio_end_io(struct bio *bio)
+{
+ struct throtl_grp *tg;
+
+ rcu_read_lock();
+ /* see comments in throtl_bio_stats_start() */
+ if (bio_flagged(bio, BIO_THROTL_STATED))
+ goto out;
+
+ tg = (struct throtl_grp *)bio->bi_tg_private;
+ if (!tg)
+ goto out;
+
+ throtl_stats_update_completion(tg, bio_start_time_ns(bio),
+ bio_io_start_time_ns(bio),
+ bio_op(bio));
+ blkg_put(tg_to_blkg(tg));
+ bio_clear_flag(bio, BIO_THROTL_STATED);
+out:
+ rcu_read_unlock();
+}
+
+static inline void throtl_bio_stats_start(struct bio *bio, struct throtl_grp *tg)
+{
+ int op = bio_op(bio);
+
+ /*
+ * It may happen that end_io will be called twice like dm-thin,
+ * which will save origin end_io first, and call its overwrite
+ * end_io and then the saved end_io. We use bio flag
+ * BIO_THROTL_STATED to do only once statistics.
+ */
+ if ((op == REQ_OP_READ || op == REQ_OP_WRITE) &&
+ !bio_flagged(bio, BIO_THROTL_STATED)) {
+ blkg_get(tg_to_blkg(tg));
+ bio_set_flag(bio, BIO_THROTL_STATED);
+ bio->bi_tg_end_io = throtl_bio_end_io;
+ bio->bi_tg_private = tg;
+ bio_set_start_time_ns(bio);
+ }
+}
+
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
@@ -1462,6 +1553,25 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
return tg_set_conf(of, buf, nbytes, off, false);
}
+static u64 tg_prfill_rwstat_field(struct seq_file *sf,
+ struct blkg_policy_data *pd,
+ int off)
+{
+ struct throtl_grp *tg = pd_to_tg(pd);
+ struct blkg_rwstat_sample rwstat = { };
+
+ blkg_rwstat_read((void *)tg + off, &rwstat);
+ return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+static int tg_print_rwstat(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ tg_prfill_rwstat_field, &blkcg_policy_throtl,
+ seq_cft(sf)->private, true);
+ return 0;
+}
+
static struct cftype throtl_legacy_files[] = {
{
.name = "throttle.read_bps_device",
@@ -1507,6 +1617,16 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
.private = (unsigned long)&blkcg_policy_throtl,
.seq_show = blkg_print_stat_ios_recursive,
},
+ {
+ .name = "throttle.io_service_time",
+ .private = offsetof(struct throtl_grp, service_time),
+ .seq_show = tg_print_rwstat,
+ },
+ {
+ .name = "throttle.io_wait_time",
+ .private = offsetof(struct throtl_grp, wait_time),
+ .seq_show = tg_print_rwstat,
+ },
{ } /* terminate */
};
@@ -1732,6 +1852,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
.pd_online_fn = throtl_pd_online,
.pd_offline_fn = throtl_pd_offline,
.pd_free_fn = throtl_pd_free,
+ .pd_reset_stats_fn = throtl_pd_reset,
};
static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
@@ -2125,7 +2246,12 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
WARN_ON_ONCE(!rcu_read_lock_held());
/* see throtl_charge_bio() */
- if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
+ if (bio_flagged(bio, BIO_THROTTLED))
+ goto out;
+
+ throtl_bio_stats_start(bio, tg);
+
+ if (!tg->has_rules[rw])
goto out;
spin_lock_irq(&q->queue_lock);
@@ -2212,6 +2338,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
out_unlock:
spin_unlock_irq(&q->queue_lock);
out:
+ if (!throttled)
+ bio_set_io_start_time_ns(bio);
bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index feff3fe..6906bc6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/bvec.h>
#include <linux/ktime.h>
+#include <linux/sched/clock.h>
struct bio_set;
struct bio;
@@ -169,6 +170,12 @@ struct bio {
*/
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
+#ifdef CONFIG_BLK_DEV_THROTTLING
+ unsigned long long start_time_ns; /* when passed to block throttle */
+ unsigned long long io_start_time_ns; /* when no more throttle */
+ bio_end_io_t *bi_tg_end_io;
+ void *bi_tg_private;
+#endif
#endif
union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -218,6 +225,7 @@ enum {
* of this bio. */
BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */
BIO_TRACKED, /* set if bio goes through the rq_qos path */
+ BIO_THROTL_STATED, /* bio already stated */
BIO_FLAG_LAST
};
@@ -248,6 +256,32 @@ enum {
*/
#define BIO_RESET_BITS BVEC_POOL_OFFSET
+#ifdef CONFIG_BLK_DEV_THROTTLING
+static inline void bio_set_start_time_ns(struct bio *bio)
+{
+ preempt_disable();
+ bio->start_time_ns = sched_clock();
+ preempt_enable();
+}
+
+static inline void bio_set_io_start_time_ns(struct bio *bio)
+{
+ preempt_disable();
+ bio->io_start_time_ns = sched_clock();
+ preempt_enable();
+}
+
+static inline uint64_t bio_start_time_ns(struct bio *bio)
+{
+ return bio->start_time_ns;
+}
+
+static inline uint64_t bio_io_start_time_ns(struct bio *bio)
+{
+ return bio->io_start_time_ns;
+}
+#endif
+
typedef __u32 __bitwise blk_mq_req_flags_t;
/*
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] blk-throttle: add counter for completed io
2019-07-25 3:03 [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
@ 2019-07-25 3:03 ` Joseph Qi
2019-07-25 3:03 ` [PATCH 3/3] blk-throttle: add throttled io/bytes counter Joseph Qi
2019-07-28 6:55 ` [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
2 siblings, 0 replies; 4+ messages in thread
From: Joseph Qi @ 2019-07-25 3:03 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Tejun Heo
Now we have counters for wait_time and service_time, so add another
counter for completed ios, then the average latency can be measured.
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
block/blk-throttle.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a5880f0..1db461c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -181,6 +181,8 @@ struct throtl_grp {
struct blkg_rwstat service_time;
/* total time spent on block throttle */
struct blkg_rwstat wait_time;
+ /* total IOs completed */
+ struct blkg_rwstat completed;
};
/* We measure latency for request size from <= 4k to >= 1M */
@@ -493,7 +495,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
return NULL;
if (blkg_rwstat_init(&tg->service_time, gfp) ||
- blkg_rwstat_init(&tg->wait_time, gfp))
+ blkg_rwstat_init(&tg->wait_time, gfp) ||
+ blkg_rwstat_init(&tg->completed, gfp))
goto err;
throtl_service_queue_init(&tg->service_queue);
@@ -524,6 +527,7 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
err:
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
+ blkg_rwstat_exit(&tg->completed);
kfree(tg);
return NULL;
}
@@ -624,6 +628,8 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
&tg->service_time);
blkg_rwstat_add_aux(&blkg_to_tg(parent)->wait_time,
&tg->wait_time);
+ blkg_rwstat_add_aux(&blkg_to_tg(parent)->completed,
+ &tg->completed);
}
}
@@ -634,6 +640,7 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
del_timer_sync(&tg->service_queue.pending_timer);
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
+ blkg_rwstat_exit(&tg->completed);
kfree(tg);
}
@@ -643,6 +650,7 @@ static void throtl_pd_reset(struct blkg_policy_data *pd)
blkg_rwstat_reset(&tg->service_time);
blkg_rwstat_reset(&tg->wait_time);
+ blkg_rwstat_reset(&tg->completed);
}
static struct throtl_grp *
@@ -1065,6 +1073,7 @@ static void throtl_stats_update_completion(struct throtl_grp *tg,
blkg_rwstat_add(&tg->service_time, op, now - io_start_time);
if (time_after64(io_start_time, start_time))
blkg_rwstat_add(&tg->wait_time, op, io_start_time - start_time);
+ blkg_rwstat_add(&tg->completed, op, 1);
local_irq_restore(flags);
}
@@ -1627,6 +1636,11 @@ static int tg_print_rwstat(struct seq_file *sf, void *v)
.private = offsetof(struct throtl_grp, wait_time),
.seq_show = tg_print_rwstat,
},
+ {
+ .name = "throttle.io_completed",
+ .private = offsetof(struct throtl_grp, completed),
+ .seq_show = tg_print_rwstat,
+ },
{ } /* terminate */
};
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] blk-throttle: add throttled io/bytes counter
2019-07-25 3:03 [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
2019-07-25 3:03 ` [PATCH 2/3] blk-throttle: add counter for completed io Joseph Qi
@ 2019-07-25 3:03 ` Joseph Qi
2019-07-28 6:55 ` [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
2 siblings, 0 replies; 4+ messages in thread
From: Joseph Qi @ 2019-07-25 3:03 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Tejun Heo
Add another 2 interfaces to stat io throttle information:
blkio.throttle.total_io_queued
blkio.throttle.total_bytes_queued
These interfaces are used for monitoring throttled io/bytes and
analyzing if delay has relations with io throttle.
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
block/blk-throttle.c | 31 ++++++++++++++++++++++++++++++-
1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1db461c..acc9feb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -183,6 +183,10 @@ struct throtl_grp {
struct blkg_rwstat wait_time;
/* total IOs completed */
struct blkg_rwstat completed;
+ /* total bytes throttled */
+ struct blkg_rwstat total_bytes_queued;
+ /* total IOs throttled */
+ struct blkg_rwstat total_io_queued;
};
/* We measure latency for request size from <= 4k to >= 1M */
@@ -496,7 +500,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
if (blkg_rwstat_init(&tg->service_time, gfp) ||
blkg_rwstat_init(&tg->wait_time, gfp) ||
- blkg_rwstat_init(&tg->completed, gfp))
+ blkg_rwstat_init(&tg->completed, gfp) ||
+ blkg_rwstat_init(&tg->total_bytes_queued, gfp) ||
+ blkg_rwstat_init(&tg->total_io_queued, gfp))
goto err;
throtl_service_queue_init(&tg->service_queue);
@@ -528,6 +534,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
blkg_rwstat_exit(&tg->completed);
+ blkg_rwstat_exit(&tg->total_bytes_queued);
+ blkg_rwstat_exit(&tg->total_io_queued);
kfree(tg);
return NULL;
}
@@ -630,6 +638,10 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
&tg->wait_time);
blkg_rwstat_add_aux(&blkg_to_tg(parent)->completed,
&tg->completed);
+ blkg_rwstat_add_aux(&blkg_to_tg(parent)->total_bytes_queued,
+ &tg->total_bytes_queued);
+ blkg_rwstat_add_aux(&blkg_to_tg(parent)->total_io_queued,
+ &tg->total_io_queued);
}
}
@@ -641,6 +653,8 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
blkg_rwstat_exit(&tg->completed);
+ blkg_rwstat_reset(&tg->total_bytes_queued);
+ blkg_rwstat_reset(&tg->total_io_queued);
kfree(tg);
}
@@ -651,6 +665,8 @@ static void throtl_pd_reset(struct blkg_policy_data *pd)
blkg_rwstat_reset(&tg->service_time);
blkg_rwstat_reset(&tg->wait_time);
blkg_rwstat_reset(&tg->completed);
+ blkg_rwstat_reset(&tg->total_bytes_queued);
+ blkg_rwstat_reset(&tg->total_io_queued);
}
static struct throtl_grp *
@@ -1170,6 +1186,9 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
sq->nr_queued[rw]++;
+ blkg_rwstat_add(&tg->total_bytes_queued, bio_op(bio),
+ throtl_bio_data_size(bio));
+ blkg_rwstat_add(&tg->total_io_queued, bio_op(bio), 1);
throtl_enqueue_tg(tg);
}
@@ -1641,6 +1660,16 @@ static int tg_print_rwstat(struct seq_file *sf, void *v)
.private = offsetof(struct throtl_grp, completed),
.seq_show = tg_print_rwstat,
},
+ {
+ .name = "throttle.total_bytes_queued",
+ .private = offsetof(struct throtl_grp, total_bytes_queued),
+ .seq_show = tg_print_rwstat,
+ },
+ {
+ .name = "throttle.total_io_queued",
+ .private = offsetof(struct throtl_grp, total_io_queued),
+ .seq_show = tg_print_rwstat,
+ },
{ } /* terminate */
};
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/3] blk-throttle: support io delay stats
2019-07-25 3:03 [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
2019-07-25 3:03 ` [PATCH 2/3] blk-throttle: add counter for completed io Joseph Qi
2019-07-25 3:03 ` [PATCH 3/3] blk-throttle: add throttled io/bytes counter Joseph Qi
@ 2019-07-28 6:55 ` Joseph Qi
2 siblings, 0 replies; 4+ messages in thread
From: Joseph Qi @ 2019-07-28 6:55 UTC (permalink / raw)
To: linux-block, Jens Axboe, Tejun Heo
Ping...
Thanks,
Joseph
On 19/7/25 11:03, Joseph Qi wrote:
> Add blkio.throttle.io_service_time and blkio.throttle.io_wait_time to
> get per-cgroup io delay statistics in blk-throttle layer.
> io_service_time represents the time spent after io throttle to io
> completion, while io_wait_time represents the time spent on throttle
> queue.
>
> Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
> ---
> block/bio.c | 4 ++
> block/blk-throttle.c | 130 +++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/blk_types.h | 34 ++++++++++++
> 3 files changed, 167 insertions(+), 1 deletion(-)
>
> diff --git a/block/bio.c b/block/bio.c
> index 299a0e7..3206462 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1826,6 +1826,10 @@ void bio_endio(struct bio *bio)
> blk_throtl_bio_endio(bio);
> /* release cgroup info */
> bio_uninit(bio);
> +#ifdef CONFIG_BLK_DEV_THROTTLING
> + if (bio->bi_tg_end_io)
> + bio->bi_tg_end_io(bio);
> +#endif
> if (bio->bi_end_io)
> bio->bi_end_io(bio);
> }
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index 8ab6c81..a5880f0 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -176,6 +176,11 @@ struct throtl_grp {
> unsigned int bio_cnt; /* total bios */
> unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
> unsigned long bio_cnt_reset_time;
> +
> + /* total time spent on lower layer: scheduler, device and others */
> + struct blkg_rwstat service_time;
> + /* total time spent on block throttle */
> + struct blkg_rwstat wait_time;
> };
>
> /* We measure latency for request size from <= 4k to >= 1M */
> @@ -487,6 +492,10 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
> if (!tg)
> return NULL;
>
> + if (blkg_rwstat_init(&tg->service_time, gfp) ||
> + blkg_rwstat_init(&tg->wait_time, gfp))
> + goto err;
> +
> throtl_service_queue_init(&tg->service_queue);
>
> for (rw = READ; rw <= WRITE; rw++) {
> @@ -511,6 +520,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
> tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
>
> return &tg->pd;
> +
> +err:
> + blkg_rwstat_exit(&tg->service_time);
> + blkg_rwstat_exit(&tg->wait_time);
> + kfree(tg);
> + return NULL;
> }
>
> static void throtl_pd_init(struct blkg_policy_data *pd)
> @@ -592,6 +607,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
> static void throtl_pd_offline(struct blkg_policy_data *pd)
> {
> struct throtl_grp *tg = pd_to_tg(pd);
> + struct blkcg_gq *blkg = pd_to_blkg(pd);
> + struct blkcg_gq *parent = blkg->parent;
>
> tg->bps[READ][LIMIT_LOW] = 0;
> tg->bps[WRITE][LIMIT_LOW] = 0;
> @@ -602,6 +619,12 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
>
> if (!tg->td->limit_valid[tg->td->limit_index])
> throtl_upgrade_state(tg->td);
> + if (parent) {
> + blkg_rwstat_add_aux(&blkg_to_tg(parent)->service_time,
> + &tg->service_time);
> + blkg_rwstat_add_aux(&blkg_to_tg(parent)->wait_time,
> + &tg->wait_time);
> + }
> }
>
> static void throtl_pd_free(struct blkg_policy_data *pd)
> @@ -609,9 +632,19 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
> struct throtl_grp *tg = pd_to_tg(pd);
>
> del_timer_sync(&tg->service_queue.pending_timer);
> + blkg_rwstat_exit(&tg->service_time);
> + blkg_rwstat_exit(&tg->wait_time);
> kfree(tg);
> }
>
> +static void throtl_pd_reset(struct blkg_policy_data *pd)
> +{
> + struct throtl_grp *tg = pd_to_tg(pd);
> +
> + blkg_rwstat_reset(&tg->service_time);
> + blkg_rwstat_reset(&tg->wait_time);
> +}
> +
> static struct throtl_grp *
> throtl_rb_first(struct throtl_service_queue *parent_sq)
> {
> @@ -1019,6 +1052,64 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
> return false;
> }
>
> +static void throtl_stats_update_completion(struct throtl_grp *tg,
> + uint64_t start_time,
> + uint64_t io_start_time,
> + int op)
> +{
> + unsigned long flags;
> + uint64_t now = sched_clock();
> +
> + local_irq_save(flags);
> + if (time_after64(now, io_start_time))
> + blkg_rwstat_add(&tg->service_time, op, now - io_start_time);
> + if (time_after64(io_start_time, start_time))
> + blkg_rwstat_add(&tg->wait_time, op, io_start_time - start_time);
> + local_irq_restore(flags);
> +}
> +
> +static void throtl_bio_end_io(struct bio *bio)
> +{
> + struct throtl_grp *tg;
> +
> + rcu_read_lock();
> + /* see comments in throtl_bio_stats_start() */
> + if (bio_flagged(bio, BIO_THROTL_STATED))
> + goto out;
> +
> + tg = (struct throtl_grp *)bio->bi_tg_private;
> + if (!tg)
> + goto out;
> +
> + throtl_stats_update_completion(tg, bio_start_time_ns(bio),
> + bio_io_start_time_ns(bio),
> + bio_op(bio));
> + blkg_put(tg_to_blkg(tg));
> + bio_clear_flag(bio, BIO_THROTL_STATED);
> +out:
> + rcu_read_unlock();
> +}
> +
> +static inline void throtl_bio_stats_start(struct bio *bio, struct throtl_grp *tg)
> +{
> + int op = bio_op(bio);
> +
> + /*
> + * It may happen that end_io will be called twice like dm-thin,
> + * which will save origin end_io first, and call its overwrite
> + * end_io and then the saved end_io. We use bio flag
> + * BIO_THROTL_STATED to do only once statistics.
> + */
> + if ((op == REQ_OP_READ || op == REQ_OP_WRITE) &&
> + !bio_flagged(bio, BIO_THROTL_STATED)) {
> + blkg_get(tg_to_blkg(tg));
> + bio_set_flag(bio, BIO_THROTL_STATED);
> + bio->bi_tg_end_io = throtl_bio_end_io;
> + bio->bi_tg_private = tg;
> + bio_set_start_time_ns(bio);
> + }
> +}
> +
> static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
> {
> bool rw = bio_data_dir(bio);
> @@ -1462,6 +1553,25 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
> return tg_set_conf(of, buf, nbytes, off, false);
> }
>
> +static u64 tg_prfill_rwstat_field(struct seq_file *sf,
> + struct blkg_policy_data *pd,
> + int off)
> +{
> + struct throtl_grp *tg = pd_to_tg(pd);
> + struct blkg_rwstat_sample rwstat = { };
> +
> + blkg_rwstat_read((void *)tg + off, &rwstat);
> + return __blkg_prfill_rwstat(sf, pd, &rwstat);
> +}
> +
> +static int tg_print_rwstat(struct seq_file *sf, void *v)
> +{
> + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
> + tg_prfill_rwstat_field, &blkcg_policy_throtl,
> + seq_cft(sf)->private, true);
> + return 0;
> +}
> +
> static struct cftype throtl_legacy_files[] = {
> {
> .name = "throttle.read_bps_device",
> @@ -1507,6 +1617,16 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
> .private = (unsigned long)&blkcg_policy_throtl,
> .seq_show = blkg_print_stat_ios_recursive,
> },
> + {
> + .name = "throttle.io_service_time",
> + .private = offsetof(struct throtl_grp, service_time),
> + .seq_show = tg_print_rwstat,
> + },
> + {
> + .name = "throttle.io_wait_time",
> + .private = offsetof(struct throtl_grp, wait_time),
> + .seq_show = tg_print_rwstat,
> + },
> { } /* terminate */
> };
>
> @@ -1732,6 +1852,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
> .pd_online_fn = throtl_pd_online,
> .pd_offline_fn = throtl_pd_offline,
> .pd_free_fn = throtl_pd_free,
> + .pd_reset_stats_fn = throtl_pd_reset,
> };
>
> static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
> @@ -2125,7 +2246,12 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> WARN_ON_ONCE(!rcu_read_lock_held());
>
> /* see throtl_charge_bio() */
> - if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
> + if (bio_flagged(bio, BIO_THROTTLED))
> + goto out;
> +
> + throtl_bio_stats_start(bio, tg);
> +
> + if (!tg->has_rules[rw])
> goto out;
>
> spin_lock_irq(&q->queue_lock);
> @@ -2212,6 +2338,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> out_unlock:
> spin_unlock_irq(&q->queue_lock);
> out:
> + if (!throttled)
> + bio_set_io_start_time_ns(bio);
> bio_set_flag(bio, BIO_THROTTLED);
>
> #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index feff3fe..6906bc6 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -9,6 +9,7 @@
> #include <linux/types.h>
> #include <linux/bvec.h>
> #include <linux/ktime.h>
> +#include <linux/sched/clock.h>
>
> struct bio_set;
> struct bio;
> @@ -169,6 +170,12 @@ struct bio {
> */
> struct blkcg_gq *bi_blkg;
> struct bio_issue bi_issue;
> +#ifdef CONFIG_BLK_DEV_THROTTLING
> + unsigned long long start_time_ns; /* when passed to block throttle */
> + unsigned long long io_start_time_ns; /* when no more throttle */
> + bio_end_io_t *bi_tg_end_io;
> + void *bi_tg_private;
> +#endif
> #endif
> union {
> #if defined(CONFIG_BLK_DEV_INTEGRITY)
> @@ -218,6 +225,7 @@ enum {
> * of this bio. */
> BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */
> BIO_TRACKED, /* set if bio goes through the rq_qos path */
> + BIO_THROTL_STATED, /* bio already stated */
> BIO_FLAG_LAST
> };
>
> @@ -248,6 +256,32 @@ enum {
> */
> #define BIO_RESET_BITS BVEC_POOL_OFFSET
>
> +#ifdef CONFIG_BLK_DEV_THROTTLING
> +static inline void bio_set_start_time_ns(struct bio *bio)
> +{
> + preempt_disable();
> + bio->start_time_ns = sched_clock();
> + preempt_enable();
> +}
> +
> +static inline void bio_set_io_start_time_ns(struct bio *bio)
> +{
> + preempt_disable();
> + bio->io_start_time_ns = sched_clock();
> + preempt_enable();
> +}
> +
> +static inline uint64_t bio_start_time_ns(struct bio *bio)
> +{
> + return bio->start_time_ns;
> +}
> +
> +static inline uint64_t bio_io_start_time_ns(struct bio *bio)
> +{
> + return bio->io_start_time_ns;
> +}
> +#endif
> +
> typedef __u32 __bitwise blk_mq_req_flags_t;
>
> /*
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-07-28 6:55 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-25 3:03 [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
2019-07-25 3:03 ` [PATCH 2/3] blk-throttle: add counter for completed io Joseph Qi
2019-07-25 3:03 ` [PATCH 3/3] blk-throttle: add throttled io/bytes counter Joseph Qi
2019-07-28 6:55 ` [PATCH 1/3] blk-throttle: support io delay stats Joseph Qi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).