All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] sched/fair: rearm hrtick timer when it's senseful
@ 2016-10-19  1:41 Joonwoo Park
  2016-10-19  1:41 ` [PATCH 2/2] sched/fair: avoid unnecessary hrtick rearm when it's possible Joonwoo Park
  0 siblings, 1 reply; 2+ messages in thread
From: Joonwoo Park @ 2016-10-19  1:41 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Joonwoo Park, Ingo Molnar, Thomas Gleixner, linux-kernel

When a new cfs task enqueued, fair scheduler rearms current cfs task's
hrtick expiration time with decreased slice.  But the slice stops
decreasing eventually when it reached sched_min_granularity.
Consequently cfs scheduler also stops rearming of hrtick timer because
hrtick expiration time for the current cfs task doesn't change.  This
is a legitimate optimization but there is a subtle error in the 'if'
condition so at present cfs scheduler stops rearming of hrtick timer
earlier than ideal that will cause a subtle unfairness.

 When sched_latency = 6ms, sched_min_granularity = 0.75ms,
 sched_nr_latency = 8 :

   nr_run  slice    period
   1       6.00 ms  6 ms
   2       3.00 ms  6 ms
   3       4.50 ms  6 ms
   4       1.50 ms  6 ms
   5       1.20 ms  6 ms
   6       1.00 ms  6 ms
   7       0.85 ms  6 ms
   8       0.75 ms  6 ms
   9+      0.75 ms  6.75ms

The first time when sched_slice becomes equal to sched_min_granularity
is when cfs_rq's nr_running becomes equal to sched_nr_latency.  Fix
the condition to rearm the hrtick nr_running up to sched_nr_latency.

Cc: Ingo Molnar <mingo@redhat.com> 
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 71c08a8..f465448 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4507,7 +4507,7 @@ static void hrtick_update(struct rq *rq)
 	if (!hrtick_enabled(rq) || curr->sched_class != &fair_sched_class)
 		return;
 
-	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
+	if (cfs_rq_of(&curr->se)->nr_running <= sched_nr_latency)
 		hrtick_start_fair(rq, curr);
 }
 #else /* !CONFIG_SCHED_HRTICK */
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] sched/fair: avoid unnecessary hrtick rearm when it's possible
  2016-10-19  1:41 [PATCH 1/2] sched/fair: rearm hrtick timer when it's senseful Joonwoo Park
@ 2016-10-19  1:41 ` Joonwoo Park
  0 siblings, 0 replies; 2+ messages in thread
From: Joonwoo Park @ 2016-10-19  1:41 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Joonwoo Park, Ingo Molnar, Thomas Gleixner, linux-kernel

At present, scheduler rearms hrtick always when the nr_running is low
enough to matter.  We can also skip rearming of hrtick when newly
enqueued or dequeued task is on a different cgroup than current task's
because as long as enqueue/dequeue didn't change nr_running of current
task's cfs_rq, current task's slice won't change.

Find the top most ancestor cfs_rq whose cfs_rq has changed since
enqueue/dequeue and rearm hrtick only when current task is under the
cfs_rq.

A modified hackbench which creates sender and receiver groups on a
separate parent and child cgroup showed 13% of hrtick rearm reduction
with this optimization whereas there was no measurable throughput
degradation when both sender and receiver groups are in the same cgroup.

Cc: Ingo Molnar <mingo@redhat.com> 
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/fair.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f465448..2eb091b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4495,19 +4495,72 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 	}
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Find whether slice of 'p' has changed since 'pnew' enqueued or dequeued.
+ */
+static bool task_needs_new_slice(struct task_struct *p,
+				 struct task_struct *pnew)
+{
+	struct cfs_rq *cfs_rq;
+	struct sched_entity *se;
+
+	SCHED_WARN_ON(task_cpu(p) != task_cpu(pnew));
+
+	se = &pnew->se;
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+		/*
+		 * We can stop walking up hierarchy at the ancestor level
+		 * which has more than 1 nr_running because enqueue/dequeue
+		 * of new task won't affect cfs_rq's task_group load_avg from
+		 * that level through the root cfs_rq.
+		 */
+		if (cfs_rq->nr_running > 1)
+			break;
+	}
+
+	/*
+	 * The new task enqueue/dequeue ended up adding or removing of se in
+	 * the root cfs_rq.  All the ses now have new slice.
+	 */
+	if (!se)
+		return true;
+
+	se = &p->se;
+	for_each_sched_entity(se) {
+		/*
+		 * All the ses under 'cfs_rq' now have new slice.  Find if
+		 * 'cfs_rq' is ancestor of 'p'.
+		 */
+		if (cfs_rq == cfs_rq_of(se))
+			return true;
+	}
+
+	return false;
+}
+#else /* !CONFIG_FAIR_GROUP_SCHED */
+static inline bool
+task_needs_new_slice(struct task_struct *p, struct task_struct *pnew)
+{
+	return true;
+}
+#endif
+
 /*
  * called from enqueue/dequeue and updates the hrtick when the
- * current task is from our class and nr_running is low enough
- * to matter.
+ * current task is from our class, nr_running is low enough to matter and
+ * current task's slice can be changed by enqueue or deqeueue of 'p'.
  */
-static void hrtick_update(struct rq *rq)
+static void hrtick_update(struct rq *rq, struct task_struct *p)
 {
 	struct task_struct *curr = rq->curr;
 
 	if (!hrtick_enabled(rq) || curr->sched_class != &fair_sched_class)
 		return;
 
-	if (cfs_rq_of(&curr->se)->nr_running <= sched_nr_latency)
+	if (task_cfs_rq(curr)->nr_running <= sched_nr_latency &&
+	    task_needs_new_slice(curr, p))
 		hrtick_start_fair(rq, curr);
 }
 #else /* !CONFIG_SCHED_HRTICK */
@@ -4516,7 +4569,7 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
 }
 
-static inline void hrtick_update(struct rq *rq)
+static inline void hrtick_update(struct rq *rq, struct task_struct *p)
 {
 }
 #endif
@@ -4573,7 +4626,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!se)
 		add_nr_running(rq, 1);
 
-	hrtick_update(rq);
+	hrtick_update(rq, p);
 }
 
 static void set_next_buddy(struct sched_entity *se);
@@ -4632,7 +4685,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!se)
 		sub_nr_running(rq, 1);
 
-	hrtick_update(rq);
+	hrtick_update(rq, p);
 }
 
 #ifdef CONFIG_SMP
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-10-19  1:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-19  1:41 [PATCH 1/2] sched/fair: rearm hrtick timer when it's senseful Joonwoo Park
2016-10-19  1:41 ` [PATCH 2/2] sched/fair: avoid unnecessary hrtick rearm when it's possible Joonwoo Park

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.