Linux-Block Archive on lore.kernel.org
 help / Atom feed
* [PATCH 0/2] fixes for block stats
@ 2019-05-21  7:59 Hou Tao
  2019-05-21  7:59 ` [PATCH 1/2] block: make rq sector size accessible " Hou Tao
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Hou Tao @ 2019-05-21  7:59 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: osandov, ming.lei

The first patch fixes the problem that there is no sample in
/sys/kernel/debug/block/nvmeXn1/poll_stat and hybrid poll may
don't work as expected. The second patch tries to ensure
the latency accounting for block stats will work normally
even when iostat is disabled.

Comments are wecome.

Regard,
Tao

Hou Tao (2):
  block: make rq sector size accessible for block stats
  block: also check RQF_STATS in blk_mq_need_time_stamp()

 block/blk-mq.c         | 17 ++++++++---------
 block/blk-throttle.c   |  3 ++-
 include/linux/blkdev.h | 15 ++++++++++++---
 3 files changed, 22 insertions(+), 13 deletions(-)

-- 
2.16.2.dirty


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] block: make rq sector size accessible for block stats
  2019-05-21  7:59 [PATCH 0/2] fixes for block stats Hou Tao
@ 2019-05-21  7:59 ` " Hou Tao
  2019-05-25 11:46   ` Pavel Begunkov
  2019-05-21  7:59 ` [PATCH 2/2] block: also check RQF_STATS in blk_mq_need_time_stamp() Hou Tao
  2019-05-25  5:12 ` [PATCH 0/2] fixes for block stats Hou Tao
  2 siblings, 1 reply; 8+ messages in thread
From: Hou Tao @ 2019-05-21  7:59 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: osandov, ming.lei

Currently rq->data_len will be decreased by partial completion or
zeroed by completion, so when blk_stat_add() is invoked, data_len
will be zero and there will never be samples in poll_cb because
blk_mq_poll_stats_bkt() will return -1 if data_len is zero.

We could move blk_stat_add() back to __blk_mq_complete_request(),
but that would make the effort of trying to call ktime_get_ns()
once in vain. Instead we can reuse throtl_size field, and use
it for both block stats and block throttle, and adjust the
logic in blk_mq_poll_stats_bkt() accordingly.

Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 block/blk-mq.c         | 11 +++++------
 block/blk-throttle.c   |  3 ++-
 include/linux/blkdev.h | 15 ++++++++++++---
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 08a6248d8536..4d1462172f0f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
-	int ddir, bytes, bucket;
+	int ddir, sectors, bucket;
 
 	ddir = rq_data_dir(rq);
-	bytes = blk_rq_bytes(rq);
+	sectors = blk_rq_stats_sectors(rq);
 
-	bucket = ddir + 2*(ilog2(bytes) - 9);
+	bucket = ddir + 2 * ilog2(sectors);
 
 	if (bucket < 0)
 		return -1;
@@ -329,6 +329,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	else
 		rq->start_time_ns = 0;
 	rq->io_start_time_ns = 0;
+	rq->stats_sectors = 0;
 	rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
@@ -678,9 +679,7 @@ void blk_mq_start_request(struct request *rq)
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
 		rq->io_start_time_ns = ktime_get_ns();
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-		rq->throtl_size = blk_rq_sectors(rq);
-#endif
+		rq->stats_sectors = blk_rq_sectors(rq);
 		rq->rq_flags |= RQF_STATS;
 		rq_qos_issue(q, rq);
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1b97a73d2fb1..88459a4ac704 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2249,7 +2249,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
 	struct request_queue *q = rq->q;
 	struct throtl_data *td = q->td;
 
-	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
+	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
+			     time_ns >> 10);
 }
 
 void blk_throtl_bio_endio(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1aafeb923e7b..68a0841d3554 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -202,9 +202,12 @@ struct request {
 #ifdef CONFIG_BLK_WBT
 	unsigned short wbt_flags;
 #endif
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	unsigned short throtl_size;
-#endif
+	/*
+	 * rq sectors used for blk stats. It has the same value
+	 * with blk_rq_sectors(rq), except that it never be zeroed
+	 * by completion.
+	 */
+	unsigned short stats_sectors;
 
 	/*
 	 * Number of scatter-gather DMA addr+len pairs after
@@ -892,6 +895,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
+ * blk_rq_stats_sectors()	: sectors of the entire request used for stats
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -920,6 +924,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 }
 
+static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
+{
+	return rq->stats_sectors;
+}
+
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
-- 
2.16.2.dirty


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] block: also check RQF_STATS in blk_mq_need_time_stamp()
  2019-05-21  7:59 [PATCH 0/2] fixes for block stats Hou Tao
  2019-05-21  7:59 ` [PATCH 1/2] block: make rq sector size accessible " Hou Tao
@ 2019-05-21  7:59 ` Hou Tao
  2019-05-25  5:12 ` [PATCH 0/2] fixes for block stats Hou Tao
  2 siblings, 0 replies; 8+ messages in thread
From: Hou Tao @ 2019-05-21  7:59 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: osandov, ming.lei

In __blk_mq_end_request() if block stats needs update, we should
ensure now is valid instead of 0 even when iostat is disabled.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 block/blk-mq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4d1462172f0f..56425e392825 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -281,12 +281,12 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 EXPORT_SYMBOL(blk_mq_can_queue);
 
 /*
- * Only need start/end time stamping if we have stats enabled, or using
- * an IO scheduler.
+ * Only need start/end time stamping if we have iostat or
+ * blk stats enabled, or using an IO scheduler.
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-	return (rq->rq_flags & RQF_IO_STAT) || rq->q->elevator;
+	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
-- 
2.16.2.dirty


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] fixes for block stats
  2019-05-21  7:59 [PATCH 0/2] fixes for block stats Hou Tao
  2019-05-21  7:59 ` [PATCH 1/2] block: make rq sector size accessible " Hou Tao
  2019-05-21  7:59 ` [PATCH 2/2] block: also check RQF_STATS in blk_mq_need_time_stamp() Hou Tao
@ 2019-05-25  5:12 ` Hou Tao
  2 siblings, 0 replies; 8+ messages in thread
From: Hou Tao @ 2019-05-25  5:12 UTC (permalink / raw)
  To: axboe, linux-block; +Cc: osandov, ming.lei

ping ?

On 2019/5/21 15:59, Hou Tao wrote:
> The first patch fixes the problem that there is no sample in
> /sys/kernel/debug/block/nvmeXn1/poll_stat and hybrid poll may
> don't work as expected. The second patch tries to ensure
> the latency accounting for block stats will work normally
> even when iostat is disabled.
> 
> Comments are wecome.
> 
> Regard,
> Tao
> 
> Hou Tao (2):
>   block: make rq sector size accessible for block stats
>   block: also check RQF_STATS in blk_mq_need_time_stamp()
> 
>  block/blk-mq.c         | 17 ++++++++---------
>  block/blk-throttle.c   |  3 ++-
>  include/linux/blkdev.h | 15 ++++++++++++---
>  3 files changed, 22 insertions(+), 13 deletions(-)
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] block: make rq sector size accessible for block stats
  2019-05-21  7:59 ` [PATCH 1/2] block: make rq sector size accessible " Hou Tao
@ 2019-05-25 11:46   ` Pavel Begunkov
  2019-05-25 13:52     ` Pavel Begunkov
  2019-05-27 11:39     ` Hou Tao
  0 siblings, 2 replies; 8+ messages in thread
From: Pavel Begunkov @ 2019-05-25 11:46 UTC (permalink / raw)
  To: Hou Tao, axboe, linux-block; +Cc: osandov, ming.lei

[-- Attachment #1.1: Type: text/plain, Size: 5203 bytes --]

That's the same bug I posted a patch about 3+ weeks ago. No answer yet,
however.
https://www.spinics.net/lists/linux-block/msg40049.html

I think putting time sampling at __blk_mq_end_request() won't give
sufficient precision for more sophisticated hybrid polling, because path
__blk_mq_complete_request() -> __blk_mq_end_request() adds a lot of
overhead (redirect rq to another cpu, dma unmap, etc). That's the case
for mentioned patchset
https://www.spinics.net/lists/linux-block/msg40044.html

It works OK for 50-80us requests (e.g. SSD read), but is not good enough
for 20us and less (e.g. SSD write). I think it would be better to save
the time in advance, and use it later.

By the way, it seems, that __blk_mq_end_request() is really better place
to record stats, as not all drivers use __blk_mq_complete_request(),
even though they don't implement blk_poll().


On 21/05/2019 10:59, Hou Tao wrote:
> Currently rq->data_len will be decreased by partial completion or
> zeroed by completion, so when blk_stat_add() is invoked, data_len
> will be zero and there will never be samples in poll_cb because
> blk_mq_poll_stats_bkt() will return -1 if data_len is zero.
> 
> We could move blk_stat_add() back to __blk_mq_complete_request(),
> but that would make the effort of trying to call ktime_get_ns()
> once in vain. Instead we can reuse throtl_size field, and use
> it for both block stats and block throttle, and adjust the
> logic in blk_mq_poll_stats_bkt() accordingly.
> 
> Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  block/blk-mq.c         | 11 +++++------
>  block/blk-throttle.c   |  3 ++-
>  include/linux/blkdev.h | 15 ++++++++++++---
>  3 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 08a6248d8536..4d1462172f0f 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
>  
>  static int blk_mq_poll_stats_bkt(const struct request *rq)
>  {
> -	int ddir, bytes, bucket;
> +	int ddir, sectors, bucket;
>  
>  	ddir = rq_data_dir(rq);
> -	bytes = blk_rq_bytes(rq);
> +	sectors = blk_rq_stats_sectors(rq);
>  
> -	bucket = ddir + 2*(ilog2(bytes) - 9);
> +	bucket = ddir + 2 * ilog2(sectors);
>  
>  	if (bucket < 0)
>  		return -1;
> @@ -329,6 +329,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>  	else
>  		rq->start_time_ns = 0;
>  	rq->io_start_time_ns = 0;
> +	rq->stats_sectors = 0;
>  	rq->nr_phys_segments = 0;
>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>  	rq->nr_integrity_segments = 0;
> @@ -678,9 +679,7 @@ void blk_mq_start_request(struct request *rq)
>  
>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
>  		rq->io_start_time_ns = ktime_get_ns();
> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
> -		rq->throtl_size = blk_rq_sectors(rq);
> -#endif
> +		rq->stats_sectors = blk_rq_sectors(rq);
>  		rq->rq_flags |= RQF_STATS;
>  		rq_qos_issue(q, rq);
>  	}
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index 1b97a73d2fb1..88459a4ac704 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -2249,7 +2249,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
>  	struct request_queue *q = rq->q;
>  	struct throtl_data *td = q->td;
>  
> -	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
> +	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
> +			     time_ns >> 10);
>  }
>  
>  void blk_throtl_bio_endio(struct bio *bio)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 1aafeb923e7b..68a0841d3554 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -202,9 +202,12 @@ struct request {
>  #ifdef CONFIG_BLK_WBT
>  	unsigned short wbt_flags;
>  #endif
> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
> -	unsigned short throtl_size;
> -#endif
> +	/*
> +	 * rq sectors used for blk stats. It has the same value
> +	 * with blk_rq_sectors(rq), except that it never be zeroed
> +	 * by completion.
> +	 */
> +	unsigned short stats_sectors;
>  
>  	/*
>  	 * Number of scatter-gather DMA addr+len pairs after
> @@ -892,6 +895,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
>   * blk_rq_err_bytes()		: bytes left till the next error boundary
>   * blk_rq_sectors()		: sectors left in the entire request
>   * blk_rq_cur_sectors()		: sectors left in the current segment
> + * blk_rq_stats_sectors()	: sectors of the entire request used for stats
>   */
>  static inline sector_t blk_rq_pos(const struct request *rq)
>  {
> @@ -920,6 +924,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
>  	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
>  }
>  
> +static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
> +{
> +	return rq->stats_sectors;
> +}
> +
>  #ifdef CONFIG_BLK_DEV_ZONED
>  static inline unsigned int blk_rq_zone_no(struct request *rq)
>  {
> 

-- 
Yours sincerely,
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] block: make rq sector size accessible for block stats
  2019-05-25 11:46   ` Pavel Begunkov
@ 2019-05-25 13:52     ` Pavel Begunkov
  2019-05-27 11:39     ` Hou Tao
  1 sibling, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2019-05-25 13:52 UTC (permalink / raw)
  To: Hou Tao, axboe, linux-block; +Cc: osandov, ming.lei

[-- Attachment #1.1: Type: text/plain, Size: 5544 bytes --]

I mean this way
https://lore.kernel.org/linux-block/dd30f4d94aa19956ad4500b1177741fd071ec37f.1558791181.git.asml.silence@gmail.com/T/#u


On 25/05/2019 14:46, Pavel Begunkov wrote:
> That's the same bug I posted a patch about 3+ weeks ago. No answer yet,
> however.
> https://www.spinics.net/lists/linux-block/msg40049.html
> 
> I think putting time sampling at __blk_mq_end_request() won't give
> sufficient precision for more sophisticated hybrid polling, because path
> __blk_mq_complete_request() -> __blk_mq_end_request() adds a lot of
> overhead (redirect rq to another cpu, dma unmap, etc). That's the case
> for mentioned patchset
> https://www.spinics.net/lists/linux-block/msg40044.html
> 
> It works OK for 50-80us requests (e.g. SSD read), but is not good enough
> for 20us and less (e.g. SSD write). I think it would be better to save
> the time in advance, and use it later.
> 
> By the way, it seems, that __blk_mq_end_request() is really better place
> to record stats, as not all drivers use __blk_mq_complete_request(),
> even though they don't implement blk_poll().
> 
> 
> On 21/05/2019 10:59, Hou Tao wrote:
>> Currently rq->data_len will be decreased by partial completion or
>> zeroed by completion, so when blk_stat_add() is invoked, data_len
>> will be zero and there will never be samples in poll_cb because
>> blk_mq_poll_stats_bkt() will return -1 if data_len is zero.
>>
>> We could move blk_stat_add() back to __blk_mq_complete_request(),
>> but that would make the effort of trying to call ktime_get_ns()
>> once in vain. Instead we can reuse throtl_size field, and use
>> it for both block stats and block throttle, and adjust the
>> logic in blk_mq_poll_stats_bkt() accordingly.
>>
>> Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  block/blk-mq.c         | 11 +++++------
>>  block/blk-throttle.c   |  3 ++-
>>  include/linux/blkdev.h | 15 ++++++++++++---
>>  3 files changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 08a6248d8536..4d1462172f0f 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
>>  
>>  static int blk_mq_poll_stats_bkt(const struct request *rq)
>>  {
>> -	int ddir, bytes, bucket;
>> +	int ddir, sectors, bucket;
>>  
>>  	ddir = rq_data_dir(rq);
>> -	bytes = blk_rq_bytes(rq);
>> +	sectors = blk_rq_stats_sectors(rq);
>>  
>> -	bucket = ddir + 2*(ilog2(bytes) - 9);
>> +	bucket = ddir + 2 * ilog2(sectors);
>>  
>>  	if (bucket < 0)
>>  		return -1;
>> @@ -329,6 +329,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>>  	else
>>  		rq->start_time_ns = 0;
>>  	rq->io_start_time_ns = 0;
>> +	rq->stats_sectors = 0;
>>  	rq->nr_phys_segments = 0;
>>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>>  	rq->nr_integrity_segments = 0;
>> @@ -678,9 +679,7 @@ void blk_mq_start_request(struct request *rq)
>>  
>>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
>>  		rq->io_start_time_ns = ktime_get_ns();
>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>> -		rq->throtl_size = blk_rq_sectors(rq);
>> -#endif
>> +		rq->stats_sectors = blk_rq_sectors(rq);
>>  		rq->rq_flags |= RQF_STATS;
>>  		rq_qos_issue(q, rq);
>>  	}
>> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
>> index 1b97a73d2fb1..88459a4ac704 100644
>> --- a/block/blk-throttle.c
>> +++ b/block/blk-throttle.c
>> @@ -2249,7 +2249,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
>>  	struct request_queue *q = rq->q;
>>  	struct throtl_data *td = q->td;
>>  
>> -	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
>> +	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
>> +			     time_ns >> 10);
>>  }
>>  
>>  void blk_throtl_bio_endio(struct bio *bio)
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 1aafeb923e7b..68a0841d3554 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -202,9 +202,12 @@ struct request {
>>  #ifdef CONFIG_BLK_WBT
>>  	unsigned short wbt_flags;
>>  #endif
>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>> -	unsigned short throtl_size;
>> -#endif
>> +	/*
>> +	 * rq sectors used for blk stats. It has the same value
>> +	 * with blk_rq_sectors(rq), except that it never be zeroed
>> +	 * by completion.
>> +	 */
>> +	unsigned short stats_sectors;
>>  
>>  	/*
>>  	 * Number of scatter-gather DMA addr+len pairs after
>> @@ -892,6 +895,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
>>   * blk_rq_err_bytes()		: bytes left till the next error boundary
>>   * blk_rq_sectors()		: sectors left in the entire request
>>   * blk_rq_cur_sectors()		: sectors left in the current segment
>> + * blk_rq_stats_sectors()	: sectors of the entire request used for stats
>>   */
>>  static inline sector_t blk_rq_pos(const struct request *rq)
>>  {
>> @@ -920,6 +924,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
>>  	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
>>  }
>>  
>> +static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
>> +{
>> +	return rq->stats_sectors;
>> +}
>> +
>>  #ifdef CONFIG_BLK_DEV_ZONED
>>  static inline unsigned int blk_rq_zone_no(struct request *rq)
>>  {
>>
> 

-- 
Yours sincerely,
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] block: make rq sector size accessible for block stats
  2019-05-25 11:46   ` Pavel Begunkov
  2019-05-25 13:52     ` Pavel Begunkov
@ 2019-05-27 11:39     ` Hou Tao
  2019-05-28 18:25       ` Pavel Begunkov
  1 sibling, 1 reply; 8+ messages in thread
From: Hou Tao @ 2019-05-27 11:39 UTC (permalink / raw)
  To: Pavel Begunkov, axboe, linux-block; +Cc: osandov, ming.lei

Hi,

On 2019/5/25 19:46, Pavel Begunkov wrote:
> That's the same bug I posted a patch about 3+ weeks ago. No answer yet,
> however.
> https://www.spinics.net/lists/linux-block/msg40049.html
> 
Sorry for missing that.

> I think putting time sampling at __blk_mq_end_request() won't give
> sufficient precision for more sophisticated hybrid polling, because path
> __blk_mq_complete_request() -> __blk_mq_end_request() adds a lot of
> overhead (redirect rq to another cpu, dma unmap, etc). That's the case
> for mentioned patchset
> https://www.spinics.net/lists/linux-block/msg40044.html
> 
> It works OK for 50-80us requests (e.g. SSD read), but is not good enough
> for 20us and less (e.g. SSD write). I think it would be better to save
> the time in advance, and use it later.
> 
Yes, getting the end time from blk_mq_complete_request() will be more accurate.

> By the way, it seems, that __blk_mq_end_request() is really better place
> to record stats, as not all drivers use __blk_mq_complete_request(),
> even though they don't implement blk_poll().
> 
And __blk_mq_complete_request() may be invoked multiple times (although no possible
for nvme device), but __blk_mq_end_request() is only invoked once.

Regards,
Tao

> 
> On 21/05/2019 10:59, Hou Tao wrote:
>> Currently rq->data_len will be decreased by partial completion or
>> zeroed by completion, so when blk_stat_add() is invoked, data_len
>> will be zero and there will never be samples in poll_cb because
>> blk_mq_poll_stats_bkt() will return -1 if data_len is zero.
>>
>> We could move blk_stat_add() back to __blk_mq_complete_request(),
>> but that would make the effort of trying to call ktime_get_ns()
>> once in vain. Instead we can reuse throtl_size field, and use
>> it for both block stats and block throttle, and adjust the
>> logic in blk_mq_poll_stats_bkt() accordingly.
>>
>> Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  block/blk-mq.c         | 11 +++++------
>>  block/blk-throttle.c   |  3 ++-
>>  include/linux/blkdev.h | 15 ++++++++++++---
>>  3 files changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 08a6248d8536..4d1462172f0f 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
>>  
>>  static int blk_mq_poll_stats_bkt(const struct request *rq)
>>  {
>> -	int ddir, bytes, bucket;
>> +	int ddir, sectors, bucket;
>>  
>>  	ddir = rq_data_dir(rq);
>> -	bytes = blk_rq_bytes(rq);
>> +	sectors = blk_rq_stats_sectors(rq);
>>  
>> -	bucket = ddir + 2*(ilog2(bytes) - 9);
>> +	bucket = ddir + 2 * ilog2(sectors);
>>  
>>  	if (bucket < 0)
>>  		return -1;
>> @@ -329,6 +329,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>>  	else
>>  		rq->start_time_ns = 0;
>>  	rq->io_start_time_ns = 0;
>> +	rq->stats_sectors = 0;
>>  	rq->nr_phys_segments = 0;
>>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>>  	rq->nr_integrity_segments = 0;
>> @@ -678,9 +679,7 @@ void blk_mq_start_request(struct request *rq)
>>  
>>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
>>  		rq->io_start_time_ns = ktime_get_ns();
>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>> -		rq->throtl_size = blk_rq_sectors(rq);
>> -#endif
>> +		rq->stats_sectors = blk_rq_sectors(rq);
>>  		rq->rq_flags |= RQF_STATS;
>>  		rq_qos_issue(q, rq);
>>  	}
>> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
>> index 1b97a73d2fb1..88459a4ac704 100644
>> --- a/block/blk-throttle.c
>> +++ b/block/blk-throttle.c
>> @@ -2249,7 +2249,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
>>  	struct request_queue *q = rq->q;
>>  	struct throtl_data *td = q->td;
>>  
>> -	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
>> +	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
>> +			     time_ns >> 10);
>>  }
>>  
>>  void blk_throtl_bio_endio(struct bio *bio)
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 1aafeb923e7b..68a0841d3554 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -202,9 +202,12 @@ struct request {
>>  #ifdef CONFIG_BLK_WBT
>>  	unsigned short wbt_flags;
>>  #endif
>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>> -	unsigned short throtl_size;
>> -#endif
>> +	/*
>> +	 * rq sectors used for blk stats. It has the same value
>> +	 * with blk_rq_sectors(rq), except that it never be zeroed
>> +	 * by completion.
>> +	 */
>> +	unsigned short stats_sectors;
>>  
>>  	/*
>>  	 * Number of scatter-gather DMA addr+len pairs after
>> @@ -892,6 +895,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
>>   * blk_rq_err_bytes()		: bytes left till the next error boundary
>>   * blk_rq_sectors()		: sectors left in the entire request
>>   * blk_rq_cur_sectors()		: sectors left in the current segment
>> + * blk_rq_stats_sectors()	: sectors of the entire request used for stats
>>   */
>>  static inline sector_t blk_rq_pos(const struct request *rq)
>>  {
>> @@ -920,6 +924,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
>>  	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
>>  }
>>  
>> +static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
>> +{
>> +	return rq->stats_sectors;
>> +}
>> +
>>  #ifdef CONFIG_BLK_DEV_ZONED
>>  static inline unsigned int blk_rq_zone_no(struct request *rq)
>>  {
>>
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] block: make rq sector size accessible for block stats
  2019-05-27 11:39     ` Hou Tao
@ 2019-05-28 18:25       ` Pavel Begunkov
  0 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2019-05-28 18:25 UTC (permalink / raw)
  To: Hou Tao, axboe, linux-block; +Cc: osandov, ming.lei

[-- Attachment #1.1: Type: text/plain, Size: 6239 bytes --]

On 27/05/2019 14:39, Hou Tao wrote:
> Hi,
> 
> On 2019/5/25 19:46, Pavel Begunkov wrote:
>> That's the same bug I posted a patch about 3+ weeks ago. No answer yet,
>> however.
>> https://www.spinics.net/lists/linux-block/msg40049.html
>>
> Sorry for missing that.
Anyway I haven't fixed it properly there.

> 
>> I think putting time sampling at __blk_mq_end_request() won't give
>> sufficient precision for more sophisticated hybrid polling, because path
>> __blk_mq_complete_request() -> __blk_mq_end_request() adds a lot of
>> overhead (redirect rq to another cpu, dma unmap, etc). That's the case
>> for mentioned patchset
>> https://www.spinics.net/lists/linux-block/msg40044.html
>>
>> It works OK for 50-80us requests (e.g. SSD read), but is not good enough
>> for 20us and less (e.g. SSD write). I think it would be better to save
>> the time in advance, and use it later.
>>
> Yes, getting the end time from blk_mq_complete_request() will be more accurate.
What bothers me, if we sample time there, could we use it in throttling?
I'm not sure what it exactly expects.

> 
>> By the way, it seems, that __blk_mq_end_request() is really better place
>> to record stats, as not all drivers use __blk_mq_complete_request(),
>> even though they don't implement blk_poll().
>>
> And __blk_mq_complete_request() may be invoked multiple times (although no possible
> for nvme device), but __blk_mq_end_request() is only invoked once.
Doubt it, that would be rather strange to *complete* a request multiple
times. Need to check, but so far I only seen drivers either using it
once or calling blk_mq_end_request() directly.

> 
> Regards,
> Tao
> 
>>
>> On 21/05/2019 10:59, Hou Tao wrote:
>>> Currently rq->data_len will be decreased by partial completion or
>>> zeroed by completion, so when blk_stat_add() is invoked, data_len
>>> will be zero and there will never be samples in poll_cb because
>>> blk_mq_poll_stats_bkt() will return -1 if data_len is zero.
>>>
>>> We could move blk_stat_add() back to __blk_mq_complete_request(),
>>> but that would make the effort of trying to call ktime_get_ns()
>>> once in vain. Instead we can reuse throtl_size field, and use
>>> it for both block stats and block throttle, and adjust the
>>> logic in blk_mq_poll_stats_bkt() accordingly.
>>>
>>> Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
>>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>>> ---
>>>  block/blk-mq.c         | 11 +++++------
>>>  block/blk-throttle.c   |  3 ++-
>>>  include/linux/blkdev.h | 15 ++++++++++++---
>>>  3 files changed, 19 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>>> index 08a6248d8536..4d1462172f0f 100644
>>> --- a/block/blk-mq.c
>>> +++ b/block/blk-mq.c
>>> @@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
>>>  
>>>  static int blk_mq_poll_stats_bkt(const struct request *rq)
>>>  {
>>> -	int ddir, bytes, bucket;
>>> +	int ddir, sectors, bucket;
>>>  
>>>  	ddir = rq_data_dir(rq);
>>> -	bytes = blk_rq_bytes(rq);
>>> +	sectors = blk_rq_stats_sectors(rq);
>>>  
>>> -	bucket = ddir + 2*(ilog2(bytes) - 9);
>>> +	bucket = ddir + 2 * ilog2(sectors);
>>>  
>>>  	if (bucket < 0)
>>>  		return -1;
>>> @@ -329,6 +329,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>>>  	else
>>>  		rq->start_time_ns = 0;
>>>  	rq->io_start_time_ns = 0;
>>> +	rq->stats_sectors = 0;
>>>  	rq->nr_phys_segments = 0;
>>>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>>>  	rq->nr_integrity_segments = 0;
>>> @@ -678,9 +679,7 @@ void blk_mq_start_request(struct request *rq)
>>>  
>>>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
>>>  		rq->io_start_time_ns = ktime_get_ns();
>>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>>> -		rq->throtl_size = blk_rq_sectors(rq);
>>> -#endif
>>> +		rq->stats_sectors = blk_rq_sectors(rq);
>>>  		rq->rq_flags |= RQF_STATS;
>>>  		rq_qos_issue(q, rq);
>>>  	}
>>> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
>>> index 1b97a73d2fb1..88459a4ac704 100644
>>> --- a/block/blk-throttle.c
>>> +++ b/block/blk-throttle.c
>>> @@ -2249,7 +2249,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
>>>  	struct request_queue *q = rq->q;
>>>  	struct throtl_data *td = q->td;
>>>  
>>> -	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
>>> +	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
>>> +			     time_ns >> 10);
>>>  }
>>>  
>>>  void blk_throtl_bio_endio(struct bio *bio)
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 1aafeb923e7b..68a0841d3554 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -202,9 +202,12 @@ struct request {
>>>  #ifdef CONFIG_BLK_WBT
>>>  	unsigned short wbt_flags;
>>>  #endif
>>> -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>>> -	unsigned short throtl_size;
>>> -#endif
>>> +	/*
>>> +	 * rq sectors used for blk stats. It has the same value
>>> +	 * with blk_rq_sectors(rq), except that it never be zeroed
>>> +	 * by completion.
>>> +	 */
>>> +	unsigned short stats_sectors;
>>>  
>>>  	/*
>>>  	 * Number of scatter-gather DMA addr+len pairs after
>>> @@ -892,6 +895,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
>>>   * blk_rq_err_bytes()		: bytes left till the next error boundary
>>>   * blk_rq_sectors()		: sectors left in the entire request
>>>   * blk_rq_cur_sectors()		: sectors left in the current segment
>>> + * blk_rq_stats_sectors()	: sectors of the entire request used for stats
>>>   */
>>>  static inline sector_t blk_rq_pos(const struct request *rq)
>>>  {
>>> @@ -920,6 +924,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
>>>  	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
>>>  }
>>>  
>>> +static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
>>> +{
>>> +	return rq->stats_sectors;
>>> +}
>>> +
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>  static inline unsigned int blk_rq_zone_no(struct request *rq)
>>>  {
>>>
>>
> 

-- 
Yours sincerely,
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-21  7:59 [PATCH 0/2] fixes for block stats Hou Tao
2019-05-21  7:59 ` [PATCH 1/2] block: make rq sector size accessible " Hou Tao
2019-05-25 11:46   ` Pavel Begunkov
2019-05-25 13:52     ` Pavel Begunkov
2019-05-27 11:39     ` Hou Tao
2019-05-28 18:25       ` Pavel Begunkov
2019-05-21  7:59 ` [PATCH 2/2] block: also check RQF_STATS in blk_mq_need_time_stamp() Hou Tao
2019-05-25  5:12 ` [PATCH 0/2] fixes for block stats Hou Tao

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org linux-block@archiver.kernel.org
	public-inbox-index linux-block


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/ public-inbox