linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Joonwoo Park <joonwoop@codeaurora.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	linux-kernel@vger.kernel.org, linux-arm-msm@vger.kernel.org,
	ohaugan@codeaurora.org, Joonwoo Park <joonwoop@codeaurora.org>
Subject: [PATCH v4] sched: fix incorrect wait time and wait count statistics
Date: Tue, 27 Oct 2015 21:46:53 -0700	[thread overview]
Message-ID: <1446007613-6922-1-git-send-email-joonwoop@codeaurora.org> (raw)
In-Reply-To: <20151028024054.GA8839@codeaurora.org>

At present scheduler resets task's wait start timestamp when the task
migrates to another rq.  This misleads scheduler itself into reporting
less wait time than actual by omitting time spent for waiting prior to
migration and also more wait count than actual by counting migration as
wait end event which can be seen by trace or /proc/<pid>/sched with
CONFIG_SCHEDSTATS=y.

Carry forward migrating task's wait time prior to migration and
don't count migration as a wait end event to fix such statistics error.

In order to determine whether fair task is migrating mark task->on_rq with
TASK_ON_RQ_MIGRATING while dequeuing and enqueuing due to migration.

To: Ingo Molnar <mingo@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
Changes in v2: 
 * Set p->on_rq = TASK_ON_RQ_MIGRATING while doing migration dequeue/enqueue
   and check whether task's migrating with task_on_rq_migrating().
Changes in v3: 
 * Fixed "WARNING: CPU: 0 PID: 3 at kernel/sched/fair.c:260 update_stats_wait_end+0x23/0x30()" caught by Intel kernel test robot.
Changes in v4: 
 * Made __migrate_swap_task() to set p->on_rq = TASK_ON_RQ_MIGRATING.
 * Added WARN_ON_ONCE() inside CONFIG_SCHED_DEBUG.
 * Added comments.
 * Cleanup with ifdefy.

 kernel/sched/core.c | 15 +++++++++++--
 kernel/sched/fair.c | 62 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcd214e..1ddbabc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1069,8 +1069,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
 {
 	lockdep_assert_held(&rq->lock);
 
-	dequeue_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_MIGRATING;
+	dequeue_task(rq, p, 0);
 	set_task_cpu(p, new_cpu);
 	raw_spin_unlock(&rq->lock);
 
@@ -1078,8 +1078,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
 
 	raw_spin_lock(&rq->lock);
 	BUG_ON(task_cpu(p) != new_cpu);
-	p->on_rq = TASK_ON_RQ_QUEUED;
 	enqueue_task(rq, p, 0);
+	p->on_rq = TASK_ON_RQ_QUEUED;
 	check_preempt_curr(rq, p, 0);
 
 	return rq;
@@ -1272,6 +1272,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
 			!p->on_rq);
 
+	/*
+	 * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
+	 * because schedstat_wait_{start,end} rebase migrating task's wait_start
+	 * time relying on p->on_rq.
+	 */
+	WARN_ON_ONCE(p->state == TASK_RUNNING &&
+		     p->sched_class == &fair_sched_class &&
+		     (p->on_rq && !task_on_rq_migrating(p)));
+
 #ifdef CONFIG_LOCKDEP
 	/*
 	 * The caller should hold either p->pi_lock or rq->lock, when changing
@@ -1308,9 +1317,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
 		src_rq = task_rq(p);
 		dst_rq = cpu_rq(cpu);
 
+		p->on_rq = TASK_ON_RQ_MIGRATING;
 		deactivate_task(src_rq, p, 0);
 		set_task_cpu(p, cpu);
 		activate_task(dst_rq, p, 0);
+		p->on_rq = TASK_ON_RQ_QUEUED;
 		check_preempt_curr(dst_rq, p, 0);
 	} else {
 		/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9a5e60f..ce7e869 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -737,41 +737,69 @@ static void update_curr_fair(struct rq *rq)
 	update_curr(cfs_rq_of(&rq->curr->se));
 }
 
+#ifdef CONFIG_SCHEDSTATS
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
-}
+	u64 wait_start = rq_clock(rq_of(cfs_rq));
 
-/*
- * Task is being enqueued - update stats:
- */
-static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	/*
-	 * Are we enqueueing a waiting task? (for current tasks
-	 * a dequeue/enqueue event is a NOP)
-	 */
-	if (se != cfs_rq->curr)
-		update_stats_wait_start(cfs_rq, se);
+	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
+	    likely(wait_start > se->statistics.wait_start))
+		wait_start -= se->statistics.wait_start;
+
+	schedstat_set(se->statistics.wait_start, wait_start);
 }
 
 static void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	if (entity_is_task(se) && task_on_rq_migrating(task_of(se))) {
+		/*
+		 * Preserve migrating task's wait time so wait_start time stamp
+		 * can be adjusted to accumulate wait time prior to migration.
+		 */
+		schedstat_set(se->statistics.wait_start,
+			      rq_clock(rq_of(cfs_rq)) -
+			      se->statistics.wait_start);
+		return;
+	}
+
 	schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
 			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
 	schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
 	schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
 			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
-#ifdef CONFIG_SCHEDSTATS
+
 	if (entity_is_task(se)) {
 		trace_sched_stat_wait(task_of(se),
 			rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
 	}
-#endif
 	schedstat_set(se->statistics.wait_start, 0);
 }
+#else
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+#endif
+
+/*
+ * Task is being enqueued - update stats:
+ */
+static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	/*
+	 * Are we enqueueing a waiting task? (for current tasks
+	 * a dequeue/enqueue event is a NOP)
+	 */
+	if (se != cfs_rq->curr)
+		update_stats_wait_start(cfs_rq, se);
+}
 
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -5656,8 +5684,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 {
 	lockdep_assert_held(&env->src_rq->lock);
 
-	deactivate_task(env->src_rq, p, 0);
 	p->on_rq = TASK_ON_RQ_MIGRATING;
+	deactivate_task(env->src_rq, p, 0);
 	set_task_cpu(p, env->dst_cpu);
 }
 
@@ -5790,8 +5818,8 @@ static void attach_task(struct rq *rq, struct task_struct *p)
 	lockdep_assert_held(&rq->lock);
 
 	BUG_ON(task_rq(p) != rq);
-	p->on_rq = TASK_ON_RQ_QUEUED;
 	activate_task(rq, p, 0);
+	p->on_rq = TASK_ON_RQ_QUEUED;
 	check_preempt_curr(rq, p, 0);
 }
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation


  reply	other threads:[~2015-10-28  4:47 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-25  5:23 [PATCH] sched: fix incorrect wait time and wait count statistics Joonwoo Park
2015-10-25 10:08 ` Peter Zijlstra
2015-10-25 10:26 ` Peter Zijlstra
2015-10-27  1:44   ` Joonwoo Park
2015-10-27 12:57     ` Peter Zijlstra
2015-10-28  2:40       ` Joonwoo Park
2015-10-28  4:46         ` Joonwoo Park [this message]
2015-11-06 13:57           ` [PATCH v4] " Peter Zijlstra
2015-11-07  2:41             ` Joonwoo Park
2015-11-09 10:32               ` Peter Zijlstra
2015-11-13  3:38                 ` [PATCH v5] " Joonwoo Park
2015-11-23 16:20                   ` [tip:sched/core] sched/core: Fix " tip-bot for Joonwoo Park
2015-10-27 19:17 ` [PATCH v3] sched: fix " Joonwoo Park

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1446007613-6922-1-git-send-email-joonwoop@codeaurora.org \
    --to=joonwoop@codeaurora.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=ohaugan@codeaurora.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).