All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Yinghai Lu <yinghai@kernel.org>
Cc: mingo@redhat.com, hpa@zytor.com, linux-kernel@vger.kernel.org,
	torvalds@linux-foundation.org, jes@sgi.com,
	jens.axboe@oracle.com, tglx@linutronix.de, mingo@elte.hu,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Arjan van de Ven <arjan@infradead.org>,
	linux-tip-commits@vger.kernel.org
Subject: [PATCH] sched: Avoid division by zero - really
Date: Thu, 27 Aug 2009 13:08:56 +0200	[thread overview]
Message-ID: <1251371336.18584.77.camel@twins> (raw)
In-Reply-To: <4A94FD58.8060207@kernel.org>

When re-computing the shares for each task group's cpu representation we
need the ratio of weight on each cpu vs the total weight of the sched
domain.

Since load-balancing is loosely (read not) synchronized, the weight of
individual cpus can change between doing the sum and calculating the
ratio.

The previous patch dealt with only one of the race scenarios, this patch
side steps them all by saving a snapshot of all the individual cpu
weights, thereby always working on a consistent set.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c |   50 +++++++++++++++++++++++++++++---------------------
 1 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 0e76b17..4591054 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1515,30 +1515,29 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+struct update_shares_data {
+	unsigned long rq_weight[NR_CPUS];
+};
+
+static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
+
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
 /*
  * Calculate and set the cpu's group shares.
  */
-static void
-update_group_shares_cpu(struct task_group *tg, int cpu,
-			unsigned long sd_shares, unsigned long sd_rq_weight,
-			unsigned long sd_eff_weight)
+static void update_group_shares_cpu(struct task_group *tg, int cpu,
+				    unsigned long sd_shares,
+				    unsigned long sd_rq_weight,
+				    struct update_shares_data *usd)
 {
-	unsigned long rq_weight;
-	unsigned long shares;
+	unsigned long shares, rq_weight;
 	int boost = 0;
 
-	if (!tg->se[cpu])
-		return;
-
-	rq_weight = tg->cfs_rq[cpu]->rq_weight;
+	rq_weight = usd->rq_weight[cpu];
 	if (!rq_weight) {
 		boost = 1;
 		rq_weight = NICE_0_LOAD;
-		if (sd_rq_weight == sd_eff_weight)
-			sd_eff_weight += NICE_0_LOAD;
-		sd_rq_weight = sd_eff_weight;
 	}
 
 	/*
@@ -1555,6 +1554,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
 		unsigned long flags;
 
 		spin_lock_irqsave(&rq->lock, flags);
+		tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
 		tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 		__set_se_shares(tg->se[cpu], shares);
 		spin_unlock_irqrestore(&rq->lock, flags);
@@ -1568,25 +1568,31 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
  */
 static int tg_shares_up(struct task_group *tg, void *data)
 {
-	unsigned long weight, rq_weight = 0, eff_weight = 0;
-	unsigned long shares = 0;
+	unsigned long weight, rq_weight = 0, shares = 0;
+	struct update_shares_data *usd;
 	struct sched_domain *sd = data;
+	unsigned long flags;
 	int i;
 
+	if (!tg->se[0])
+		return 0;
+
+	local_irq_save(flags);
+	usd = &__get_cpu_var(update_shares_data);
+
 	for_each_cpu(i, sched_domain_span(sd)) {
+		weight = tg->cfs_rq[i]->load.weight;
+		usd->rq_weight[i] = weight;
+
 		/*
 		 * If there are currently no tasks on the cpu pretend there
 		 * is one of average load so that when a new task gets to
 		 * run here it will not get delayed by group starvation.
 		 */
-		weight = tg->cfs_rq[i]->load.weight;
-		tg->cfs_rq[i]->rq_weight = weight;
-		rq_weight += weight;
-
 		if (!weight)
 			weight = NICE_0_LOAD;
 
-		eff_weight += weight;
+		rq_weight += weight;
 		shares += tg->cfs_rq[i]->shares;
 	}
 
@@ -1597,7 +1603,9 @@ static int tg_shares_up(struct task_group *tg, void *data)
 		shares = tg->shares;
 
 	for_each_cpu(i, sched_domain_span(sd))
-		update_group_shares_cpu(tg, i, shares, rq_weight, eff_weight);
+		update_group_shares_cpu(tg, i, shares, rq_weight, usd);
+
+	local_irq_restore(flags);
 
 	return 0;
 }



  parent reply	other threads:[~2009-08-27 11:10 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-21 10:53 Latest Linus tree oopses on Nehalem box Jes Sorensen
2009-08-21 11:46 ` Ingo Molnar
2009-08-21 11:58   ` Peter Zijlstra
2009-08-21 14:42     ` [tip:sched/core] sched: Avoid division by zero tip-bot for Peter Zijlstra
2009-08-25 19:11       ` Peter Zijlstra
2009-08-26  9:16         ` Yinghai Lu
2009-08-26  9:25           ` Peter Zijlstra
2009-08-27 11:08           ` Peter Zijlstra [this message]
2009-08-27 12:19             ` [PATCH] sched: Avoid division by zero - really Eric Dumazet
2009-08-27 12:32               ` Peter Zijlstra
2009-08-28  6:30             ` [tip:sched/core] sched: Fix " tip-bot for Peter Zijlstra
2009-08-21 13:04   ` Latest Linus tree oopses on Nehalem box Jes Sorensen
2009-08-21 13:26     ` Ingo Molnar
2009-08-21 13:35       ` Jes Sorensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1251371336.18584.77.camel@twins \
    --to=a.p.zijlstra@chello.nl \
    --cc=arjan@infradead.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=hpa@zytor.com \
    --cc=jens.axboe@oracle.com \
    --cc=jes@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.