All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org, kernel-team@fb.com, newella@fb.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 03/27] blk-iocost: use local[64]_t for percpu stat
Date: Tue,  1 Sep 2020 14:52:33 -0400	[thread overview]
Message-ID: <20200901185257.645114-4-tj@kernel.org> (raw)
In-Reply-To: <20200901185257.645114-1-tj@kernel.org>

blk-iocost has been reading percpu stat counters from remote cpus which on
some archs can lead to torn reads in really rare occassions. Use local[64]_t
for those counters.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-iocost.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index d37b55db2409..e2266e7692b4 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -179,6 +179,8 @@
 #include <linux/parser.h>
 #include <linux/sched/signal.h>
 #include <linux/blk-cgroup.h>
+#include <asm/local.h>
+#include <asm/local64.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 #include "blk-wbt.h"
@@ -373,8 +375,8 @@ struct ioc_params {
 };
 
 struct ioc_missed {
-	u32				nr_met;
-	u32				nr_missed;
+	local_t				nr_met;
+	local_t				nr_missed;
 	u32				last_met;
 	u32				last_missed;
 };
@@ -382,7 +384,7 @@ struct ioc_missed {
 struct ioc_pcpu_stat {
 	struct ioc_missed		missed[2];
 
-	u64				rq_wait_ns;
+	local64_t			rq_wait_ns;
 	u64				last_rq_wait_ns;
 };
 
@@ -1278,8 +1280,8 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
 		u64 this_rq_wait_ns;
 
 		for (rw = READ; rw <= WRITE; rw++) {
-			u32 this_met = READ_ONCE(stat->missed[rw].nr_met);
-			u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed);
+			u32 this_met = local_read(&stat->missed[rw].nr_met);
+			u32 this_missed = local_read(&stat->missed[rw].nr_missed);
 
 			nr_met[rw] += this_met - stat->missed[rw].last_met;
 			nr_missed[rw] += this_missed - stat->missed[rw].last_missed;
@@ -1287,7 +1289,7 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
 			stat->missed[rw].last_missed = this_missed;
 		}
 
-		this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns);
+		this_rq_wait_ns = local64_read(&stat->rq_wait_ns);
 		rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns;
 		stat->last_rq_wait_ns = this_rq_wait_ns;
 	}
@@ -1908,6 +1910,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
 static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
+	struct ioc_pcpu_stat *ccs;
 	u64 on_q_ns, rq_wait_ns, size_nsec;
 	int pidx, rw;
 
@@ -1931,13 +1934,17 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 	rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
 	size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
 
+	ccs = get_cpu_ptr(ioc->pcpu_stat);
+
 	if (on_q_ns <= size_nsec ||
 	    on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
-		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
+		local_inc(&ccs->missed[rw].nr_met);
 	else
-		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
+		local_inc(&ccs->missed[rw].nr_missed);
+
+	local64_add(rq_wait_ns, &ccs->rq_wait_ns);
 
-	this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns);
+	put_cpu_ptr(ccs);
 }
 
 static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos)
@@ -1977,7 +1984,7 @@ static int blk_iocost_init(struct request_queue *q)
 {
 	struct ioc *ioc;
 	struct rq_qos *rqos;
-	int ret;
+	int i, cpu, ret;
 
 	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
 	if (!ioc)
@@ -1989,6 +1996,16 @@ static int blk_iocost_init(struct request_queue *q)
 		return -ENOMEM;
 	}
 
+	for_each_possible_cpu(cpu) {
+		struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu);
+
+		for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) {
+			local_set(&ccs->missed[i].nr_met, 0);
+			local_set(&ccs->missed[i].nr_missed, 0);
+		}
+		local64_set(&ccs->rq_wait_ns, 0);
+	}
+
 	rqos = &ioc->rqos;
 	rqos->id = RQ_QOS_COST;
 	rqos->ops = &ioc_rqos_ops;
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org
Cc: linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	kernel-team-b10kYP2dOMg@public.gmane.org,
	newella-b10kYP2dOMg@public.gmane.org,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Subject: [PATCH 03/27] blk-iocost: use local[64]_t for percpu stat
Date: Tue,  1 Sep 2020 14:52:33 -0400	[thread overview]
Message-ID: <20200901185257.645114-4-tj@kernel.org> (raw)
In-Reply-To: <20200901185257.645114-1-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

blk-iocost has been reading percpu stat counters from remote cpus which on
some archs can lead to torn reads in really rare occassions. Use local[64]_t
for those counters.

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 block/blk-iocost.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index d37b55db2409..e2266e7692b4 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -179,6 +179,8 @@
 #include <linux/parser.h>
 #include <linux/sched/signal.h>
 #include <linux/blk-cgroup.h>
+#include <asm/local.h>
+#include <asm/local64.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 #include "blk-wbt.h"
@@ -373,8 +375,8 @@ struct ioc_params {
 };
 
 struct ioc_missed {
-	u32				nr_met;
-	u32				nr_missed;
+	local_t				nr_met;
+	local_t				nr_missed;
 	u32				last_met;
 	u32				last_missed;
 };
@@ -382,7 +384,7 @@ struct ioc_missed {
 struct ioc_pcpu_stat {
 	struct ioc_missed		missed[2];
 
-	u64				rq_wait_ns;
+	local64_t			rq_wait_ns;
 	u64				last_rq_wait_ns;
 };
 
@@ -1278,8 +1280,8 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
 		u64 this_rq_wait_ns;
 
 		for (rw = READ; rw <= WRITE; rw++) {
-			u32 this_met = READ_ONCE(stat->missed[rw].nr_met);
-			u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed);
+			u32 this_met = local_read(&stat->missed[rw].nr_met);
+			u32 this_missed = local_read(&stat->missed[rw].nr_missed);
 
 			nr_met[rw] += this_met - stat->missed[rw].last_met;
 			nr_missed[rw] += this_missed - stat->missed[rw].last_missed;
@@ -1287,7 +1289,7 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
 			stat->missed[rw].last_missed = this_missed;
 		}
 
-		this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns);
+		this_rq_wait_ns = local64_read(&stat->rq_wait_ns);
 		rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns;
 		stat->last_rq_wait_ns = this_rq_wait_ns;
 	}
@@ -1908,6 +1910,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
 static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
+	struct ioc_pcpu_stat *ccs;
 	u64 on_q_ns, rq_wait_ns, size_nsec;
 	int pidx, rw;
 
@@ -1931,13 +1934,17 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 	rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
 	size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
 
+	ccs = get_cpu_ptr(ioc->pcpu_stat);
+
 	if (on_q_ns <= size_nsec ||
 	    on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
-		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
+		local_inc(&ccs->missed[rw].nr_met);
 	else
-		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
+		local_inc(&ccs->missed[rw].nr_missed);
+
+	local64_add(rq_wait_ns, &ccs->rq_wait_ns);
 
-	this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns);
+	put_cpu_ptr(ccs);
 }
 
 static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos)
@@ -1977,7 +1984,7 @@ static int blk_iocost_init(struct request_queue *q)
 {
 	struct ioc *ioc;
 	struct rq_qos *rqos;
-	int ret;
+	int i, cpu, ret;
 
 	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
 	if (!ioc)
@@ -1989,6 +1996,16 @@ static int blk_iocost_init(struct request_queue *q)
 		return -ENOMEM;
 	}
 
+	for_each_possible_cpu(cpu) {
+		struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu);
+
+		for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) {
+			local_set(&ccs->missed[i].nr_met, 0);
+			local_set(&ccs->missed[i].nr_missed, 0);
+		}
+		local64_set(&ccs->rq_wait_ns, 0);
+	}
+
 	rqos = &ioc->rqos;
 	rqos->id = RQ_QOS_COST;
 	rqos->ops = &ioc_rqos_ops;
-- 
2.26.2


  parent reply	other threads:[~2020-09-01 18:56 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01 18:52 [PATCHSET for-5.10/block] blk-iocost: iocost: improve donation, debt and excess handling Tejun Heo
2020-09-01 18:52 ` [PATCH 01/27] blk-iocost: ioc_pd_free() shouldn't assume irq disabled Tejun Heo
2020-09-01 18:52 ` [PATCH 02/27] blk-stat: make q->stats->lock irqsafe Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` Tejun Heo [this message]
2020-09-01 18:52   ` [PATCH 03/27] blk-iocost: use local[64]_t for percpu stat Tejun Heo
2020-11-20 21:51   ` Stafford Horne
2020-11-20 22:13     ` Tejun Heo
2020-11-20 22:13       ` Tejun Heo
2020-09-01 18:52 ` [PATCH 04/27] blk-iocost: rename propagate_active_weights() to propagate_weights() Tejun Heo
2020-09-01 18:52 ` [PATCH 05/27] blk-iocost: clamp inuse and skip noops in __propagate_weights() Tejun Heo
2020-09-01 18:52 ` [PATCH 06/27] blk-iocost: move iocg_kick_delay() above iocg_kick_waitq() Tejun Heo
2020-09-01 18:52 ` [PATCH 07/27] blk-iocost: make iocg_kick_waitq() call iocg_kick_delay() after paying debt Tejun Heo
2020-09-01 18:52 ` [PATCH 08/27] blk-iocost: s/HWEIGHT_WHOLE/WEIGHT_ONE/g Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 09/27] blk-iocost: use WEIGHT_ONE based fixed point number for weights Tejun Heo
2020-09-01 18:52 ` [PATCH 10/27] blk-iocost: make ioc_now->now and ioc->period_at 64bit Tejun Heo
2020-09-01 18:52 ` [PATCH 11/27] blk-iocost: streamline vtime margin and timer slack handling Tejun Heo
2020-09-01 18:52 ` [PATCH 12/27] blk-iocost: grab ioc->lock for debt handling Tejun Heo
2020-09-01 18:52 ` [PATCH 13/27] blk-iocost: add absolute usage stat Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 14/27] blk-iocost: calculate iocg->usages[] from iocg->local_stat.usage_us Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 15/27] blk-iocost: replace iocg->has_surplus with ->surplus_list Tejun Heo
2020-09-01 18:52 ` [PATCH 16/27] blk-iocost: decouple vrate adjustment from surplus transfers Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 17/27] blk-iocost: restructure surplus donation logic Tejun Heo
2020-09-01 18:52 ` [PATCH 18/27] blk-iocost: implement Andy's method for donation weight updates Tejun Heo
2020-09-01 18:52 ` [PATCH 19/27] blk-iocost: revamp donation amount determination Tejun Heo
2020-09-01 18:52 ` [PATCH 20/27] blk-iocost: revamp in-period donation snapbacks Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 21/27] blk-iocost: revamp debt handling Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 22/27] blk-iocost: implement delay adjustment hysteresis Tejun Heo
2020-09-01 18:52 ` [PATCH 23/27] blk-iocost: halve debts if device stays idle Tejun Heo
2020-09-01 18:52 ` [PATCH 24/27] blk-iocost: implement vtime loss compensation Tejun Heo
2020-09-01 18:52 ` [PATCH 25/27] blk-iocost: restore inuse update tracepoints Tejun Heo
2020-09-01 18:52 ` [PATCH 26/27] blk-iocost: add three debug stat - cost.wait, indebt and indelay Tejun Heo
2020-09-01 18:52   ` Tejun Heo
2020-09-01 18:52 ` [PATCH 27/27] blk-iocost: update iocost_monitor.py Tejun Heo
2020-09-01 22:57 ` [PATCHSET for-5.10/block] blk-iocost: iocost: improve donation, debt and excess handling Jens Axboe
2020-09-01 22:57   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901185257.645114-4-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=newella@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.