* [PATCH 1/7] block: move some wbt helpers to blk-wbt.c
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 2/7] block: pass struct request instead of struct blk_issue_stat to wbt Omar Sandoval
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
A few helpers are only used from blk-wbt.c, so move them there, and put
wbt_track() behind the CONFIG_BLK_WBT typedef. This is in preparation
for changing how the wbt flags are tracked.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/blk-wbt.c | 20 ++++++++++++++++++++
block/blk-wbt.h | 33 ++++++++-------------------------
2 files changed, 28 insertions(+), 25 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f92fc84b5e2c..030d6bdcaa4c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,6 +29,26 @@
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
+static inline void wbt_clear_state(struct blk_issue_stat *stat)
+{
+ stat->stat &= ~BLK_STAT_RES_MASK;
+}
+
+static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
+{
+ return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
+}
+
+static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
+{
+ return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
+}
+
+static inline bool wbt_is_read(struct blk_issue_stat *stat)
+{
+ return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
+}
+
enum {
/*
* Default setting, we'll scale up (to 75% of QD max) or down (min 1)
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index a232c98fbf4d..d3f47a721aec 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -31,31 +31,6 @@ enum {
WBT_STATE_ON_MANUAL = 2,
};
-static inline void wbt_clear_state(struct blk_issue_stat *stat)
-{
- stat->stat &= ~BLK_STAT_RES_MASK;
-}
-
-static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
-{
- return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
-}
-
-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
-{
- stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
-}
-
-static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
-{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
-}
-
-static inline bool wbt_is_read(struct blk_issue_stat *stat)
-{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
-}
-
struct rq_wait {
wait_queue_head_t wait;
atomic_t inflight;
@@ -109,6 +84,11 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
#ifdef CONFIG_BLK_WBT
+static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags flags)
+{
+ stat->stat |= ((u64)flags) << BLK_STAT_RES_SHIFT;
+}
+
void __wbt_done(struct rq_wb *, enum wbt_flags);
void wbt_done(struct rq_wb *, struct blk_issue_stat *);
enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
@@ -127,6 +107,9 @@ u64 wbt_default_latency_nsec(struct request_queue *);
#else
+static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags flags)
+{
+}
static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
{
}
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/7] block: pass struct request instead of struct blk_issue_stat to wbt
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
2018-05-02 17:34 ` [PATCH 1/7] block: move some wbt helpers to blk-wbt.c Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 3/7] block: replace bio->bi_issue_stat with u64 Omar Sandoval
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
issue_stat is going to go away, so first make writeback throttling take
the containing request, update the internal wbt helpers accordingly, and
change rwb->sync_cookie to be the request pointer instead of the
issue_stat pointer. No functional change.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/blk-core.c | 10 ++++-----
block/blk-mq.c | 10 ++++-----
block/blk-wbt.c | 53 ++++++++++++++++++++++++------------------------
block/blk-wbt.h | 18 ++++++++--------
4 files changed, 45 insertions(+), 46 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 85909b431eb0..851ea3c0c0b4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1660,7 +1660,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, &rq->issue_stat);
+ wbt_requeue(q->rq_wb, rq);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1767,7 +1767,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
- wbt_done(q->rq_wb, &req->issue_stat);
+ wbt_done(q->rq_wb, req);
/*
* Request may not have originated from ll_rw_blk. if not,
@@ -2078,7 +2078,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
goto out_unlock;
}
- wbt_track(&req->issue_stat, wb_acct);
+ wbt_track(req, wb_acct);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -2994,7 +2994,7 @@ void blk_start_request(struct request *req)
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
req->rq_flags |= RQF_STATS;
- wbt_issue(req->q->rq_wb, &req->issue_stat);
+ wbt_issue(req->q->rq_wb, req);
}
BUG_ON(blk_rq_is_complete(req));
@@ -3213,7 +3213,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
blk_account_io_done(req);
if (req->end_io) {
- wbt_done(req->q->rq_wb, &req->issue_stat);
+ wbt_done(req->q->rq_wb, req);
req->end_io(req, error);
} else {
if (blk_bidi_rq(req))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c3621453ad87..5c9e3ea6cc4e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -472,7 +472,7 @@ void blk_mq_free_request(struct request *rq)
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->backing_dev_info);
- wbt_done(q->rq_wb, &rq->issue_stat);
+ wbt_done(q->rq_wb, rq);
if (blk_rq_rl(rq))
blk_put_rl(blk_rq_rl(rq));
@@ -492,7 +492,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
blk_account_io_done(rq);
if (rq->end_io) {
- wbt_done(rq->q->rq_wb, &rq->issue_stat);
+ wbt_done(rq->q->rq_wb, rq);
rq->end_io(rq, error);
} else {
if (unlikely(blk_bidi_rq(rq)))
@@ -655,7 +655,7 @@ void blk_mq_start_request(struct request *rq)
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
rq->rq_flags |= RQF_STATS;
- wbt_issue(q->rq_wb, &rq->issue_stat);
+ wbt_issue(q->rq_wb, rq);
}
WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -703,7 +703,7 @@ static void __blk_mq_requeue_request(struct request *rq)
blk_mq_put_driver_tag(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, &rq->issue_stat);
+ wbt_requeue(q->rq_wb, rq);
if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
@@ -1866,7 +1866,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
- wbt_track(&rq->issue_stat, wb_acct);
+ wbt_track(rq, wb_acct);
cookie = request_to_qc_t(data.hctx, rq);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 030d6bdcaa4c..10aecfadc57d 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,24 +29,24 @@
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
-static inline void wbt_clear_state(struct blk_issue_stat *stat)
+static inline void wbt_clear_state(struct request *rq)
{
- stat->stat &= ~BLK_STAT_RES_MASK;
+ rq->issue_stat.stat &= ~BLK_STAT_RES_MASK;
}
-static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
+static inline enum wbt_flags wbt_flags(struct request *rq)
{
- return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
+ return (rq->issue_stat.stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
}
-static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
+static inline bool wbt_is_tracked(struct request *rq)
{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
+ return (rq->issue_stat.stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
}
-static inline bool wbt_is_read(struct blk_issue_stat *stat)
+static inline bool wbt_is_read(struct request *rq)
{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
+ return (rq->issue_stat.stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
}
enum {
@@ -185,24 +185,24 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
* Called on completion of a request. Note that it's also called when
* a request is merged, when the request gets freed.
*/
-void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_done(struct rq_wb *rwb, struct request *rq)
{
if (!rwb)
return;
- if (!wbt_is_tracked(stat)) {
- if (rwb->sync_cookie == stat) {
+ if (!wbt_is_tracked(rq)) {
+ if (rwb->sync_cookie == rq) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
- if (wbt_is_read(stat))
+ if (wbt_is_read(rq))
wb_timestamp(rwb, &rwb->last_comp);
} else {
- WARN_ON_ONCE(stat == rwb->sync_cookie);
- __wbt_done(rwb, wbt_stat_to_mask(stat));
+ WARN_ON_ONCE(rq == rwb->sync_cookie);
+ __wbt_done(rwb, wbt_flags(rq));
}
- wbt_clear_state(stat);
+ wbt_clear_state(rq);
}
/*
@@ -629,30 +629,29 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
return ret | WBT_TRACKED;
}
-void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_issue(struct rq_wb *rwb, struct request *rq)
{
if (!rwb_enabled(rwb))
return;
/*
- * Track sync issue, in case it takes a long time to complete. Allows
- * us to react quicker, if a sync IO takes a long time to complete.
- * Note that this is just a hint. 'stat' can go away when the
- * request completes, so it's important we never dereference it. We
- * only use the address to compare with, which is why we store the
- * sync_issue time locally.
+ * Track sync issue, in case it takes a long time to complete. Allows us
+ * to react quicker, if a sync IO takes a long time to complete. Note
+ * that this is just a hint. The request can go away when it completes,
+ * so it's important we never dereference it. We only use the address to
+ * compare with, which is why we store the sync_issue time locally.
*/
- if (wbt_is_read(stat) && !rwb->sync_issue) {
- rwb->sync_cookie = stat;
- rwb->sync_issue = blk_stat_time(stat);
+ if (wbt_is_read(rq) && !rwb->sync_issue) {
+ rwb->sync_cookie = rq;
+ rwb->sync_issue = blk_stat_time(&rq->issue_stat);
}
}
-void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_requeue(struct rq_wb *rwb, struct request *rq)
{
if (!rwb_enabled(rwb))
return;
- if (stat == rwb->sync_cookie) {
+ if (rq == rwb->sync_cookie) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index d3f47a721aec..dd9211fa853f 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -84,19 +84,19 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
#ifdef CONFIG_BLK_WBT
-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags flags)
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
{
- stat->stat |= ((u64)flags) << BLK_STAT_RES_SHIFT;
+ rq->issue_stat.stat |= ((u64)flags) << BLK_STAT_RES_SHIFT;
}
void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct blk_issue_stat *);
+void wbt_done(struct rq_wb *, struct request *);
enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
int wbt_init(struct request_queue *);
void wbt_exit(struct request_queue *);
void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
-void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
+void wbt_requeue(struct rq_wb *, struct request *);
+void wbt_issue(struct rq_wb *, struct request *);
void wbt_disable_default(struct request_queue *);
void wbt_enable_default(struct request_queue *);
@@ -107,13 +107,13 @@ u64 wbt_default_latency_nsec(struct request_queue *);
#else
-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags flags)
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
{
}
static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
{
}
-static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
{
}
static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
@@ -131,10 +131,10 @@ static inline void wbt_exit(struct request_queue *q)
static inline void wbt_update_limits(struct rq_wb *rwb)
{
}
-static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
{
}
-static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
{
}
static inline void wbt_disable_default(struct request_queue *q)
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/7] block: replace bio->bi_issue_stat with u64
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
2018-05-02 17:34 ` [PATCH 1/7] block: move some wbt helpers to blk-wbt.c Omar Sandoval
2018-05-02 17:34 ` [PATCH 2/7] block: pass struct request instead of struct blk_issue_stat to wbt Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 4/7] block: get rid of struct blk_issue_stat Omar Sandoval
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
struct blk_issue_stat is going away, and bio->bi_issue_stat doesn't even
use the blk-stats interface, so we can provide a separate implementation
private to blk-throtl.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/blk-throttle.c | 50 ++++++++++++++++++++++++++++++---------
include/linux/blk_types.h | 2 +-
2 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c5a131673733..e2957a93ed11 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -36,8 +36,6 @@ static int throtl_quantum = 32;
*/
#define LATENCY_FILTERED_HD (1000L) /* 1ms */
-#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
-
static struct blkcg_policy blkcg_policy_throtl;
/* A workqueue to queue throttle related work */
@@ -2048,6 +2046,37 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
}
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+/*
+ * bio->bi_throtl:
+ * high bit is SKIP_LATENCY flag
+ * next 12 bits are original size, in sectors
+ * rest of the bits (51) are issue time, in nanoseconds
+ */
+#define BI_THROTL_SKIP_LATENCY (1ULL << 63)
+#define BI_THROTL_SIZE_BITS 12
+#define BI_THROTL_SIZE_MASK ((1ULL << BI_THROTL_SIZE_BITS) - 1)
+#define BI_THROTL_SIZE_SHIFT (63 - BI_THROTL_SIZE_BITS)
+#define BI_THROTL_TIME_MASK ((1ULL << BI_THROTL_SIZE_SHIFT) - 1)
+
+static void throtl_set_issue(struct bio *bio)
+{
+ u64 time, size;
+
+ time = ktime_get_ns() & BI_THROTL_TIME_MASK;
+ size = bio_sectors(bio) & BI_THROTL_SIZE_MASK;
+ bio->bi_throtl = time | (size << BI_THROTL_SIZE_SHIFT);
+}
+
+static u64 throtl_issue_time(struct bio *bio)
+{
+ return bio->bi_throtl & BI_THROTL_TIME_MASK;
+}
+
+static sector_t throtl_issue_size(struct bio *bio)
+{
+ return (bio->bi_throtl >> BI_THROTL_SIZE_SHIFT) & BI_THROTL_SIZE_MASK;
+}
+
static void throtl_update_latency_buckets(struct throtl_data *td)
{
struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
@@ -2139,7 +2168,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
bio->bi_cg_private = tg;
blkg_get(tg_to_blkg(tg));
}
- blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
+ throtl_set_issue(bio);
#endif
}
@@ -2251,7 +2280,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
- bio->bi_issue_stat.stat |= SKIP_LATENCY;
+ bio->bi_throtl |= BI_THROTL_SKIP_LATENCY;
#endif
return throttled;
}
@@ -2302,8 +2331,8 @@ void blk_throtl_bio_endio(struct bio *bio)
finish_time_ns = ktime_get_ns();
tg->last_finish_time = finish_time_ns >> 10;
- start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
- finish_time = __blk_stat_time(finish_time_ns) >> 10;
+ start_time = throtl_issue_time(bio) >> 10;
+ finish_time = (finish_time_ns & BI_THROTL_TIME_MASK) >> 10;
if (!start_time || finish_time <= start_time) {
blkg_put(tg_to_blkg(tg));
return;
@@ -2311,16 +2340,15 @@ void blk_throtl_bio_endio(struct bio *bio)
lat = finish_time - start_time;
/* this is only for bio based driver */
- if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
- throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
- bio_op(bio), lat);
+ if (!(bio->bi_throtl & BI_THROTL_SKIP_LATENCY))
+ throtl_track_latency(tg->td, throtl_issue_size(bio),
+ bio_op(bio), lat);
if (tg->latency_target && lat >= tg->td->filtered_latency) {
int bucket;
unsigned int threshold;
- bucket = request_bucket_index(
- blk_stat_size(&bio->bi_issue_stat));
+ bucket = request_bucket_index(throtl_issue_size(bio));
threshold = tg->td->avg_buckets[rw][bucket].latency +
tg->latency_target;
if (lat > threshold)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b91ebac..3a263538d315 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -138,7 +138,7 @@ struct bio {
struct cgroup_subsys_state *bi_css;
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
void *bi_cg_private;
- struct blk_issue_stat bi_issue_stat;
+ u64 bi_throtl;
#endif
#endif
union {
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/7] block: get rid of struct blk_issue_stat
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
` (2 preceding siblings ...)
2018-05-02 17:34 ` [PATCH 3/7] block: replace bio->bi_issue_stat with u64 Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 5/7] block: use ktime_get_ns() instead of sched_clock() for cfq and bfq Omar Sandoval
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
struct blk_issue_stat squashes three things into one u64:
- The time the driver started working on a request
- The original size of the request (for the io.low controller)
- Flags for writeback throttling
It turns out that on x86_64, we have a 4 byte hole in struct request
which we can fill with the non-timestamp fields from blk_issue_stat,
simplifying things quite a bit.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/blk-core.c | 5 ++++-
block/blk-mq.c | 8 ++++++--
block/blk-stat.c | 7 ++-----
block/blk-stat.h | 43 ---------------------------------------
block/blk-throttle.c | 3 +--
block/blk-wbt.c | 12 +++++------
block/blk-wbt.h | 4 ++--
block/kyber-iosched.c | 6 +++---
include/linux/blk_types.h | 4 ----
include/linux/blkdev.h | 26 +++++++++++++++--------
10 files changed, 41 insertions(+), 77 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 851ea3c0c0b4..02b0cc23fd1b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2992,7 +2992,10 @@ void blk_start_request(struct request *req)
blk_dequeue_request(req);
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
- blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
+ req->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ req->throtl_size = blk_rq_sectors(req);
+#endif
req->rq_flags |= RQF_STATS;
wbt_issue(req->q->rq_wb, req);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5c9e3ea6cc4e..9fa8069af032 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -294,6 +294,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
rq->rq_disk = NULL;
rq->part = NULL;
rq->start_time = jiffies;
+ rq->io_start_time_ns = 0;
rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
rq->nr_integrity_segments = 0;
@@ -313,7 +314,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
#ifdef CONFIG_BLK_CGROUP
rq->rl = NULL;
set_start_time_ns(rq);
- rq->io_start_time_ns = 0;
+ rq->cgroup_io_start_time_ns = 0;
#endif
data->ctx->rq_dispatched[op_is_sync(op)]++;
@@ -653,7 +654,10 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(q, rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
+ rq->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ rq->throtl_size = blk_rq_sectors(rq);
+#endif
rq->rq_flags |= RQF_STATS;
wbt_issue(q->rq_wb, rq);
}
diff --git a/block/blk-stat.c b/block/blk-stat.c
index bd365a95fcf8..725a881723b0 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -55,11 +55,8 @@ void blk_stat_add(struct request *rq)
int bucket;
u64 now, value;
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
- if (now < blk_stat_time(&rq->issue_stat))
- return;
-
- value = now - blk_stat_time(&rq->issue_stat);
+ now = ktime_get_ns();
+ value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
blk_throtl_stat_add(rq, value);
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 2dd36347252a..17c812db0aca 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -8,21 +8,6 @@
#include <linux/rcupdate.h>
#include <linux/timer.h>
-/*
- * from upper:
- * 3 bits: reserved for other usage
- * 12 bits: size
- * 49 bits: time
- */
-#define BLK_STAT_RES_BITS 3
-#define BLK_STAT_SIZE_BITS 12
-#define BLK_STAT_RES_SHIFT (64 - BLK_STAT_RES_BITS)
-#define BLK_STAT_SIZE_SHIFT (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
-#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
-#define BLK_STAT_SIZE_MASK \
- (((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
-#define BLK_STAT_RES_MASK (~((1ULL << BLK_STAT_RES_SHIFT) - 1))
-
/**
* struct blk_stat_callback - Block statistics callback.
*
@@ -82,34 +67,6 @@ void blk_free_queue_stats(struct blk_queue_stats *);
void blk_stat_add(struct request *);
-static inline u64 __blk_stat_time(u64 time)
-{
- return time & BLK_STAT_TIME_MASK;
-}
-
-static inline u64 blk_stat_time(struct blk_issue_stat *stat)
-{
- return __blk_stat_time(stat->stat);
-}
-
-static inline sector_t blk_capped_size(sector_t size)
-{
- return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
-}
-
-static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
-{
- return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
-}
-
-static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
- sector_t size)
-{
- stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
- (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
- (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
-}
-
/* record time/size info in request but not add a callback */
void blk_stat_enable_accounting(struct request_queue *q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e2957a93ed11..0b75eaa7acb9 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2310,8 +2310,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
struct request_queue *q = rq->q;
struct throtl_data *td = q->td;
- throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
- req_op(rq), time_ns >> 10);
+ throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
}
void blk_throtl_bio_endio(struct bio *bio)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 10aecfadc57d..903283d15ee4 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -31,22 +31,22 @@
static inline void wbt_clear_state(struct request *rq)
{
- rq->issue_stat.stat &= ~BLK_STAT_RES_MASK;
+ rq->wbt_flags = 0;
}
static inline enum wbt_flags wbt_flags(struct request *rq)
{
- return (rq->issue_stat.stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
+ return rq->wbt_flags;
}
static inline bool wbt_is_tracked(struct request *rq)
{
- return (rq->issue_stat.stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
+ return rq->wbt_flags & WBT_TRACKED;
}
static inline bool wbt_is_read(struct request *rq)
{
- return (rq->issue_stat.stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
+ return rq->wbt_flags & WBT_READ;
}
enum {
@@ -643,7 +643,7 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq)
*/
if (wbt_is_read(rq) && !rwb->sync_issue) {
rwb->sync_cookie = rq;
- rwb->sync_issue = blk_stat_time(&rq->issue_stat);
+ rwb->sync_issue = rq->io_start_time_ns;
}
}
@@ -732,8 +732,6 @@ int wbt_init(struct request_queue *q)
struct rq_wb *rwb;
int i;
- BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
-
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return -ENOMEM;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index dd9211fa853f..9e09de29a28e 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -59,7 +59,7 @@ struct rq_wb {
struct blk_stat_callback *cb;
- s64 sync_issue;
+ u64 sync_issue;
void *sync_cookie;
unsigned int wc;
@@ -86,7 +86,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
static inline void wbt_track(struct request *rq, enum wbt_flags flags)
{
- rq->issue_stat.stat |= ((u64)flags) << BLK_STAT_RES_SHIFT;
+ rq->wbt_flags |= flags;
}
void __wbt_done(struct rq_wb *, enum wbt_flags);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 0d6d25e32e1f..564967fafe5f 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -485,11 +485,11 @@ static void kyber_completed_request(struct request *rq)
if (blk_stat_is_active(kqd->cb))
return;
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
- if (now < blk_stat_time(&rq->issue_stat))
+ now = ktime_get_ns();
+ if (now < rq->io_start_time_ns)
return;
- latency = now - blk_stat_time(&rq->issue_stat);
+ latency = now - rq->io_start_time_ns;
if (latency > target)
blk_stat_activate_msecs(kqd->cb, 10);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3a263538d315..7b55513407ee 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -90,10 +90,6 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-struct blk_issue_stat {
- u64 stat;
-};
-
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c4eee043191..f2c2fc011e6b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -206,8 +206,18 @@ struct request {
struct gendisk *rq_disk;
struct hd_struct *part;
unsigned long start_time;
- struct blk_issue_stat issue_stat;
- /* Number of scatter-gather DMA addr+len pairs after
+ /* Time that I/O was submitted to the device. */
+ u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+ unsigned short wbt_flags;
+#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ unsigned short throtl_size;
+#endif
+
+ /*
+ * Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
@@ -267,8 +277,8 @@ struct request {
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
- unsigned long long start_time_ns;
- unsigned long long io_start_time_ns; /* when passed to hardware */
+ unsigned long long cgroup_start_time_ns;
+ unsigned long long cgroup_io_start_time_ns; /* when passed to hardware */
#endif
};
@@ -1797,25 +1807,25 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
static inline void set_start_time_ns(struct request *req)
{
preempt_disable();
- req->start_time_ns = sched_clock();
+ req->cgroup_start_time_ns = sched_clock();
preempt_enable();
}
static inline void set_io_start_time_ns(struct request *req)
{
preempt_disable();
- req->io_start_time_ns = sched_clock();
+ req->cgroup_io_start_time_ns = sched_clock();
preempt_enable();
}
static inline uint64_t rq_start_time_ns(struct request *req)
{
- return req->start_time_ns;
+ return req->cgroup_start_time_ns;
}
static inline uint64_t rq_io_start_time_ns(struct request *req)
{
- return req->io_start_time_ns;
+ return req->cgroup_io_start_time_ns;
}
#else
static inline void set_start_time_ns(struct request *req) {}
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/7] block: use ktime_get_ns() instead of sched_clock() for cfq and bfq
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
` (3 preceding siblings ...)
2018-05-02 17:34 ` [PATCH 4/7] block: get rid of struct blk_issue_stat Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 6/7] block: move blk_stat_add() to __blk_mq_end_request() Omar Sandoval
2018-05-02 17:34 ` [PATCH 7/7] block: consolidate struct request timestamp fields Omar Sandoval
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
cfq and bfq have some internal fields that use sched_clock() which can
trivially use ktime_get_ns() instead. Their timestamp fields in struct
request can also use ktime_get_ns(), which resolves the 8 year old
comment added by commit 28f4197e5d47 ("block: disable preemption before
using sched_clock()").
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/bfq-cgroup.c | 40 +++++++++++++++++-----------------
block/bfq-iosched.h | 10 ++++-----
block/cfq-iosched.c | 49 ++++++++++++++++++++++--------------------
include/linux/blkdev.h | 21 ++++++------------
4 files changed, 57 insertions(+), 63 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d819dc77fe65..a9e8633388f4 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty)
/* This should be called with the scheduler lock held. */
static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!bfqg_stats_waiting(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
+ now = ktime_get_ns();
+ if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
bfqg_stats_clear_waiting(stats);
@@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
return;
if (bfqg == curr_bfqg)
return;
- stats->start_group_wait_time = sched_clock();
+ stats->start_group_wait_time = ktime_get_ns();
bfqg_stats_mark_waiting(stats);
}
/* This should be called with the scheduler lock held. */
static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!bfqg_stats_empty(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
+ now = ktime_get_ns();
+ if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
bfqg_stats_clear_empty(stats);
@@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
if (bfqg_stats_empty(stats))
return;
- stats->start_empty_time = sched_clock();
+ stats->start_empty_time = ktime_get_ns();
bfqg_stats_mark_empty(stats);
}
@@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
struct bfqg_stats *stats = &bfqg->stats;
if (bfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, stats->start_idle_time))
+ if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
bfqg_stats_clear_idling(stats);
@@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
{
struct bfqg_stats *stats = &bfqg->stats;
- stats->start_idle_time = sched_clock();
+ stats->start_idle_time = ktime_get_ns();
bfqg_stats_mark_idling(stats);
}
@@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
}
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op)
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op)
{
struct bfqg_stats *stats = &bfqg->stats;
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, io_start_time))
+ if (now > io_start_time_ns)
blkg_rwstat_add(&stats->service_time, op,
- now - io_start_time);
- if (time_after64(io_start_time, start_time))
+ now - io_start_time_ns);
+ if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
- io_start_time - start_time);
+ io_start_time_ns - start_time_ns);
}
#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
@@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op) { }
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ae2f3dadec44..8ec7ff92cd6f 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -732,9 +732,9 @@ struct bfqg_stats {
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
- uint64_t start_group_wait_time;
- uint64_t start_idle_time;
- uint64_t start_empty_time;
+ u64 start_group_wait_time;
+ u64 start_idle_time;
+ u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
};
@@ -856,8 +856,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op);
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op);
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9f342ef1ad42..652ca064de20 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,9 +210,9 @@ struct cfqg_stats {
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
- uint64_t start_group_wait_time;
- uint64_t start_idle_time;
- uint64_t start_empty_time;
+ u64 start_group_wait_time;
+ u64 start_idle_time;
+ u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_DEBUG_BLK_CGROUP */
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -491,13 +491,13 @@ CFQG_FLAG_FNS(empty)
/* This should be called with the queue_lock held. */
static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_waiting(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
+ now = ktime_get_ns();
+ if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
cfqg_stats_clear_waiting(stats);
@@ -513,20 +513,20 @@ static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
return;
if (cfqg == curr_cfqg)
return;
- stats->start_group_wait_time = sched_clock();
+ stats->start_group_wait_time = ktime_get_ns();
cfqg_stats_mark_waiting(stats);
}
/* This should be called with the queue_lock held. */
static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_empty(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
+ now = ktime_get_ns();
+ if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
cfqg_stats_clear_empty(stats);
@@ -552,7 +552,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
if (cfqg_stats_empty(stats))
return;
- stats->start_empty_time = sched_clock();
+ stats->start_empty_time = ktime_get_ns();
cfqg_stats_mark_empty(stats);
}
@@ -561,9 +561,9 @@ static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
struct cfqg_stats *stats = &cfqg->stats;
if (cfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, stats->start_idle_time))
+ if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
cfqg_stats_clear_idling(stats);
@@ -576,7 +576,7 @@ static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
BUG_ON(cfqg_stats_idling(stats));
- stats->start_idle_time = sched_clock();
+ stats->start_idle_time = ktime_get_ns();
cfqg_stats_mark_idling(stats);
}
@@ -701,17 +701,19 @@ static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
}
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time,
- unsigned int op)
+ u64 start_time_ns,
+ u64 io_start_time_ns,
+ unsigned int op)
{
struct cfqg_stats *stats = &cfqg->stats;
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
- if (time_after64(io_start_time, start_time))
+ if (now > io_start_time_ns)
+ blkg_rwstat_add(&stats->service_time, op,
+ now - io_start_time_ns);
+ if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
- io_start_time - start_time);
+ io_start_time_ns - start_time_ns);
}
/* @stats = 0 */
@@ -797,8 +799,9 @@ static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
unsigned int op) { }
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time,
- unsigned int op) { }
+ u64 start_time_ns,
+ u64 io_start_time_ns,
+ unsigned int op) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f2c2fc011e6b..9ef412666df1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1799,42 +1799,33 @@ int kblockd_schedule_work_on(int cpu, struct work_struct *work);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
-/*
- * This should not be using sched_clock(). A real patch is in progress
- * to fix this up, until that is in place we need to disable preemption
- * around sched_clock() in this function and set_io_start_time_ns().
- */
static inline void set_start_time_ns(struct request *req)
{
- preempt_disable();
- req->cgroup_start_time_ns = sched_clock();
- preempt_enable();
+ req->cgroup_start_time_ns = ktime_get_ns();
}
static inline void set_io_start_time_ns(struct request *req)
{
- preempt_disable();
- req->cgroup_io_start_time_ns = sched_clock();
- preempt_enable();
+ req->cgroup_io_start_time_ns = ktime_get_ns();
}
-static inline uint64_t rq_start_time_ns(struct request *req)
+static inline u64 rq_start_time_ns(struct request *req)
{
return req->cgroup_start_time_ns;
}
-static inline uint64_t rq_io_start_time_ns(struct request *req)
+static inline u64 rq_io_start_time_ns(struct request *req)
{
return req->cgroup_io_start_time_ns;
}
#else
static inline void set_start_time_ns(struct request *req) {}
static inline void set_io_start_time_ns(struct request *req) {}
-static inline uint64_t rq_start_time_ns(struct request *req)
+static inline u64 rq_start_time_ns(struct request *req)
{
return 0;
}
-static inline uint64_t rq_io_start_time_ns(struct request *req)
+static inline u64 rq_io_start_time_ns(struct request *req)
{
return 0;
}
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/7] block: move blk_stat_add() to __blk_mq_end_request()
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
` (4 preceding siblings ...)
2018-05-02 17:34 ` [PATCH 5/7] block: use ktime_get_ns() instead of sched_clock() for cfq and bfq Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
2018-05-02 17:34 ` [PATCH 7/7] block: consolidate struct request timestamp fields Omar Sandoval
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
We want this next to blk_account_io_done() for the next change so that
we can call ktime_get() only once for both.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/blk-mq.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9fa8069af032..65141edda189 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -490,6 +490,11 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
+ if (rq->rq_flags & RQF_STATS) {
+ blk_mq_poll_stats_start(rq->q);
+ blk_stat_add(rq);
+ }
+
blk_account_io_done(rq);
if (rq->end_io) {
@@ -529,10 +534,6 @@ static void __blk_mq_complete_request(struct request *rq)
if (rq->internal_tag != -1)
blk_mq_sched_completed_request(rq);
- if (rq->rq_flags & RQF_STATS) {
- blk_mq_poll_stats_start(rq->q);
- blk_stat_add(rq);
- }
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
rq->q->softirq_done_fn(rq);
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 7/7] block: consolidate struct request timestamp fields
2018-05-02 17:34 [PATCH 0/7] block: consolidate struct request timestamp fields Omar Sandoval
` (5 preceding siblings ...)
2018-05-02 17:34 ` [PATCH 6/7] block: move blk_stat_add() to __blk_mq_end_request() Omar Sandoval
@ 2018-05-02 17:34 ` Omar Sandoval
6 siblings, 0 replies; 8+ messages in thread
From: Omar Sandoval @ 2018-05-02 17:34 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, kernel-team
From: Omar Sandoval <osandov@fb.com>
Currently, struct request has four timestamp fields:
- A start time, set at get_request time, in jiffies, used for iostats
- An I/O start time, set at start_request time, in ktime nanoseconds,
used for blk-stats (i.e., wbt, kyber, hybrid polling)
- Another start time and another I/O start time, used for cfq and bfq
These can all be consolidated into one start time and one I/O start
time, both in ktime nanoseconds, shaving off up to 16 bytes from struct
request depending on the kernel config.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
block/bfq-iosched.c | 4 ++--
block/blk-core.c | 17 ++++++++---------
block/blk-merge.c | 11 +++++------
block/blk-mq.c | 10 +++++-----
block/blk-stat.c | 5 ++---
block/blk-stat.h | 2 +-
block/blk.h | 2 +-
block/cfq-iosched.c | 15 +++------------
drivers/md/dm-rq.c | 2 +-
include/linux/blkdev.h | 38 ++------------------------------------
10 files changed, 30 insertions(+), 76 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 771ae9730ac6..ebc264c87a09 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4778,8 +4778,8 @@ static void bfq_finish_requeue_request(struct request *rq)
if (rq->rq_flags & RQF_STARTED)
bfqg_stats_update_completion(bfqq_group(bfqq),
- rq_start_time_ns(rq),
- rq_io_start_time_ns(rq),
+ rq->start_time_ns,
+ rq->io_start_time_ns,
rq->cmd_flags);
if (likely(rq->rq_flags & RQF_STARTED)) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 02b0cc23fd1b..571a4fd3c394 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -196,8 +196,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
RB_CLEAR_NODE(&rq->rb_node);
rq->tag = -1;
rq->internal_tag = -1;
- rq->start_time = jiffies;
- set_start_time_ns(rq);
+ rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
seqcount_init(&rq->gstate_seq);
u64_stats_init(&rq->aborted_gstate_sync);
@@ -2727,7 +2726,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
}
}
-void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req, u64 now)
{
/*
* Account IO completion. flush_rq isn't accounted as a
@@ -2735,11 +2734,12 @@ void blk_account_io_done(struct request *req)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
- unsigned long duration = jiffies - req->start_time;
+ unsigned long duration;
const int rw = rq_data_dir(req);
struct hd_struct *part;
int cpu;
+ duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
@@ -2970,10 +2970,8 @@ static void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
- if (blk_account_rq(rq)) {
+ if (blk_account_rq(rq))
q->in_flight[rq_is_sync(rq)]++;
- set_io_start_time_ns(rq);
- }
}
/**
@@ -3193,12 +3191,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
void blk_finish_request(struct request *req, blk_status_t error)
{
struct request_queue *q = req->q;
+ u64 now = ktime_get_ns();
lockdep_assert_held(req->q->queue_lock);
WARN_ON_ONCE(q->mq_ops);
if (req->rq_flags & RQF_STATS)
- blk_stat_add(req);
+ blk_stat_add(req, now);
if (req->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, req);
@@ -3213,7 +3212,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
if (req->rq_flags & RQF_DONTPREP)
blk_unprep_request(req);
- blk_account_io_done(req);
+ blk_account_io_done(req, now);
if (req->end_io) {
wbt_done(req->q->rq_wb, req);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 782940c65d8a..5573d0fbec53 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -724,13 +724,12 @@ static struct request *attempt_merge(struct request_queue *q,
}
/*
- * At this point we have either done a back merge
- * or front merge. We need the smaller start_time of
- * the merged requests to be the current request
- * for accounting purposes.
+ * At this point we have either done a back merge or front merge. We
+ * need the smaller start_time_ns of the merged requests to be the
+ * current request for accounting purposes.
*/
- if (time_after(req->start_time, next->start_time))
- req->start_time = next->start_time;
+ if (next->start_time_ns < req->start_time_ns)
+ req->start_time_ns = next->start_time_ns;
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 65141edda189..31a5a3b00871 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -293,7 +293,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
RB_CLEAR_NODE(&rq->rb_node);
rq->rq_disk = NULL;
rq->part = NULL;
- rq->start_time = jiffies;
+ rq->start_time_ns = ktime_get_ns();
rq->io_start_time_ns = 0;
rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -313,8 +313,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
#ifdef CONFIG_BLK_CGROUP
rq->rl = NULL;
- set_start_time_ns(rq);
- rq->cgroup_io_start_time_ns = 0;
#endif
data->ctx->rq_dispatched[op_is_sync(op)]++;
@@ -490,12 +488,14 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
+ u64 now = ktime_get_ns();
+
if (rq->rq_flags & RQF_STATS) {
blk_mq_poll_stats_start(rq->q);
- blk_stat_add(rq);
+ blk_stat_add(rq, now);
}
- blk_account_io_done(rq);
+ blk_account_io_done(rq, now);
if (rq->end_io) {
wbt_done(rq->q->rq_wb, rq);
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 725a881723b0..175c143ac5b9 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -47,15 +47,14 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
stat->nr_samples++;
}
-void blk_stat_add(struct request *rq)
+void blk_stat_add(struct request *rq, u64 now)
{
struct request_queue *q = rq->q;
struct blk_stat_callback *cb;
struct blk_rq_stat *stat;
int bucket;
- u64 now, value;
+ u64 value;
- now = ktime_get_ns();
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
blk_throtl_stat_add(rq, value);
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 17c812db0aca..78399cdde9c9 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -65,7 +65,7 @@ struct blk_stat_callback {
struct blk_queue_stats *blk_alloc_queue_stats(void);
void blk_free_queue_stats(struct blk_queue_stats *);
-void blk_stat_add(struct request *);
+void blk_stat_add(struct request *rq, u64 now);
/* record time/size info in request but not add a callback */
void blk_stat_enable_accounting(struct request_queue *q);
diff --git a/block/blk.h b/block/blk.h
index b034fd2460c4..eaf1a8e87d11 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -186,7 +186,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q);
void blk_account_io_start(struct request *req, bool new_io);
void blk_account_io_completion(struct request *req, unsigned int bytes);
-void blk_account_io_done(struct request *req);
+void blk_account_io_done(struct request *req, u64 now);
/*
* EH timer and IO completion will both attempt to 'grab' the request, make
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 652ca064de20..6b9f6b1cd33b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4228,8 +4228,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_driver--;
cfqq->dispatched--;
(RQ_CFQG(rq))->dispatched--;
- cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
- rq_io_start_time_ns(rq), rq->cmd_flags);
+ cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
+ rq->io_start_time_ns, rq->cmd_flags);
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
@@ -4245,16 +4245,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqq_type(cfqq));
st->ttime.last_end_request = now;
- /*
- * We have to do this check in jiffies since start_time is in
- * jiffies and it is not trivial to convert to ns. If
- * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
- * will become problematic but so far we are fine (the default
- * is 128 ms).
- */
- if (!time_after(rq->start_time +
- nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
- jiffies))
+ if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
cfqd->last_delayed_sync = now;
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bf0b840645cc..1c18f335da04 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -406,7 +406,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
- clone->start_time = jiffies;
+ clone->start_time_ns = ktime_get_ns();
r = blk_insert_cloned_request(clone->q, clone);
if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
/* must complete clone in terms of original request */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9ef412666df1..e42d510daf3c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -205,7 +205,8 @@ struct request {
struct gendisk *rq_disk;
struct hd_struct *part;
- unsigned long start_time;
+ /* Time that I/O was submitted to the kernel. */
+ u64 start_time_ns;
/* Time that I/O was submitted to the device. */
u64 io_start_time_ns;
@@ -277,8 +278,6 @@ struct request {
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
- unsigned long long cgroup_start_time_ns;
- unsigned long long cgroup_io_start_time_ns; /* when passed to hardware */
#endif
};
@@ -1798,39 +1797,6 @@ int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
-#ifdef CONFIG_BLK_CGROUP
-static inline void set_start_time_ns(struct request *req)
-{
- req->cgroup_start_time_ns = ktime_get_ns();
-}
-
-static inline void set_io_start_time_ns(struct request *req)
-{
- req->cgroup_io_start_time_ns = ktime_get_ns();
-}
-
-static inline u64 rq_start_time_ns(struct request *req)
-{
- return req->cgroup_start_time_ns;
-}
-
-static inline u64 rq_io_start_time_ns(struct request *req)
-{
- return req->cgroup_io_start_time_ns;
-}
-#else
-static inline void set_start_time_ns(struct request *req) {}
-static inline void set_io_start_time_ns(struct request *req) {}
-static inline u64 rq_start_time_ns(struct request *req)
-{
- return 0;
-}
-static inline u64 rq_io_start_time_ns(struct request *req)
-{
- return 0;
-}
-#endif
-
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
--
2.17.0
^ permalink raw reply related [flat|nested] 8+ messages in thread