linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: Nick Piggin <piggin@cyberone.com.au>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	Anton Blanchard <anton@samba.org>, Ingo Molnar <mingo@redhat.com>,
	"Martin J. Bligh" <mbligh@aracnet.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>,
	Mark Wong <markw@osdl.org>
Subject: Re: [CFT][RFC] HT scheduler
Date: Mon, 15 Dec 2003 16:53:15 +1100	[thread overview]
Message-ID: <20031215060838.BF3D32C257@lists.samba.org> (raw)
In-Reply-To: Your message of "Sat, 13 Dec 2003 17:43:35 +1100." <3FDAB517.4000309@cyberone.com.au>

In message <3FDAB517.4000309@cyberone.com.au> you write:
> Rusty Russell wrote:
> 
> >In message <3FD9679A.1020404@cyberone.com.au> you write:
> >
> >>Thanks for having a look Rusty. I'll try to convince you :)

Actually, having produced the patch, I've changed my mind.

While it was spiritually rewarding to separate "struct runqueue" into
the stuff which was to do with the runqueue, and the stuff which was
per-cpu but there because it was convenient, I'm not sure the churn is
worthwhile since we will want the rest of your stuff anyway.

It (and lots of other things) might become worthwhile if single
processors with HT become the de-facto standard.  For these, lots of
our assumptions about CONFIG_SMP, such as the desirability of per-cpu
data, become bogus.

A few things need work:

1) There's a race between sys_sched_setaffinity() and
   sched_migrate_task() (this is nothing to do with your patch).

2) Please change those #defines into an enum for idle (patch follows,
   untested but trivial)

3) conditional locking in load_balance is v. bad idea.

4) load_balance returns "(!failed && !balanced)", but callers stop
   calling it when it returns true.  Why not simply return "balanced",
   or at least "balanced && !failed"?

Untested patch for #2 below...
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

D: Change #defines to enum, and use it.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal working-2.6.0-test11-w26/kernel/sched.c working-2.6.0-test11-w26-enum/kernel/sched.c
--- working-2.6.0-test11-w26/kernel/sched.c	2003-12-15 16:31:15.000000000 +1100
+++ working-2.6.0-test11-w26-enum/kernel/sched.c	2003-12-15 16:38:49.000000000 +1100
@@ -1268,9 +1268,12 @@ out:
 	return pulled;
 }
 
-#define TYPE_NOIDLE	0
-#define TYPE_IDLE	1
-#define TYPE_NEWIDLE	2
+enum idle_type
+{
+	NOT_IDLE,
+	AM_IDLE,
+	NEWLY_IDLE,
+};
 
 /*
  * find_busiest_group finds and returns the busiest CPU group within the
@@ -1279,11 +1282,11 @@ out:
  */
 static struct sched_group *
 find_busiest_group(struct sched_domain *domain, int this_cpu,
-				unsigned long *imbalance, int idle)
+				unsigned long *imbalance, enum idle_type idle)
 {
 	unsigned long max_load, avg_load, total_load, this_load;
 	int modify, total_nr_cpus;
-	int package_idle = idle;
+	enum idle_type package_idle = idle;
 	struct sched_group *busiest = NULL, *group = domain->groups;
 
 	max_load = 0;
@@ -1299,7 +1302,7 @@ find_busiest_group(struct sched_domain *
 	 * statistics: its triggered by some value of nr_running (ie. 0).
 	 * Timer based balancing is a good statistic though.
 	 */
-	if (idle == TYPE_NEWIDLE)
+	if (idle == NEWLY_IDLE)
 		modify = 0;
 	else
 		modify = 1;
@@ -1318,7 +1321,7 @@ find_busiest_group(struct sched_domain *
 			if (local_group) {
 				load = get_high_cpu_load(i, modify);
 				if (!idle_cpu(i))
-					package_idle = 0;
+					package_idle = NOT_IDLE;
 			} else
 				load = get_low_cpu_load(i, modify);
 
@@ -1403,11 +1406,11 @@ static runqueue_t *find_busiest_queue(st
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
  *
- * Called with this_rq unlocked unless idle == TYPE_NEWIDLE, in which case
+ * Called with this_rq unlocked unless idle == NEWLY_IDLE, in which case
  * it is called with this_rq locked.
  */
 static int load_balance(int this_cpu, runqueue_t *this_rq,
-				struct sched_domain *domain, int idle)
+			struct sched_domain *domain, enum idle_type idle)
 {
 	struct sched_group *group;
 	runqueue_t *busiest = NULL;
@@ -1415,7 +1418,7 @@ static int load_balance(int this_cpu, ru
 	int balanced = 0, failed = 0;
 	int wake_mthread = 0;
 
-	if (idle != TYPE_NEWIDLE)
+	if (idle != NEWLY_IDLE)
 		spin_lock(&this_rq->lock);
 
 	group = find_busiest_group(domain, this_cpu, &imbalance, idle);
@@ -1456,7 +1459,7 @@ static int load_balance(int this_cpu, ru
 	spin_unlock(&busiest->lock);
 
 out:
-	if (idle != TYPE_NEWIDLE) {
+	if (idle != NEWLY_IDLE) {
 		spin_unlock(&this_rq->lock);
 
 		if (failed)
@@ -1495,7 +1498,7 @@ static inline void idle_balance(int this
 			break;
 
 		if (domain->flags & SD_FLAG_NEWIDLE) {
-			if (load_balance(this_cpu, this_rq, domain, TYPE_NEWIDLE)) {
+			if (load_balance(this_cpu, this_rq, domain, NEWLY_IDLE)) {
 				/* We've pulled tasks over so stop searching */
 				break;
 			}
@@ -1537,7 +1540,7 @@ static void active_load_balance(runqueue
 /* Don't have all balancing operations going off at once */
 #define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
 
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq, int idle)
+static void rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle)
 {
 	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
 	struct sched_domain *domain = this_sched_domain();
@@ -1556,7 +1559,7 @@ static void rebalance_tick(int this_cpu,
 		if (!(j % modulo)) {
 			if (load_balance(this_cpu, this_rq, domain, idle)) {
 				/* We've pulled tasks over so no longer idle */
-				idle = 0;
+				idle = NOT_IDLE;
 			}
 		}
 
@@ -1567,7 +1570,7 @@ static void rebalance_tick(int this_cpu,
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int this_cpu, runqueue_t *this_rq, int idle)
+static inline void rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle)
 {
 }
 #endif
@@ -1621,7 +1624,7 @@ void scheduler_tick(int user_ticks, int 
 			cpustat->iowait += sys_ticks;
 		else
 			cpustat->idle += sys_ticks;
-		rebalance_tick(cpu, rq, 1);
+		rebalance_tick(cpu, rq, AM_IDLE);
 		return;
 	}
 	if (TASK_NICE(p) > 0)
@@ -1703,7 +1706,7 @@ void scheduler_tick(int user_ticks, int 
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, 0);
+	rebalance_tick(cpu, rq, NOT_IDLE);
 }
 
 void scheduling_functions_start_here(void) { }




  parent reply	other threads:[~2003-12-15  6:11 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-12-08  4:25 [PATCH][RFC] make cpu_sibling_map a cpumask_t Nick Piggin
2003-12-08 15:59 ` Anton Blanchard
2003-12-08 23:08   ` Nick Piggin
2003-12-09  0:14     ` Anton Blanchard
2003-12-11  4:25       ` [CFT][RFC] HT scheduler Nick Piggin
2003-12-11  7:24         ` Nick Piggin
2003-12-11  8:57           ` Nick Piggin
2003-12-11 11:52             ` William Lee Irwin III
2003-12-11 13:09               ` Nick Piggin
2003-12-11 13:23                 ` William Lee Irwin III
2003-12-11 13:30                   ` Nick Piggin
2003-12-11 13:32                     ` William Lee Irwin III
2003-12-11 15:30                       ` Nick Piggin
2003-12-11 15:38                         ` William Lee Irwin III
2003-12-11 15:51                           ` Nick Piggin
2003-12-11 15:56                             ` William Lee Irwin III
2003-12-11 16:37                               ` Nick Piggin
2003-12-11 16:40                                 ` William Lee Irwin III
2003-12-12  1:52                         ` [PATCH] improve rwsem scalability (was Re: [CFT][RFC] HT scheduler) Nick Piggin
2003-12-12  2:02                           ` Nick Piggin
2003-12-12  9:41                           ` Ingo Molnar
2003-12-13  0:07                             ` Nick Piggin
2003-12-14  0:44                               ` Nick Piggin
2003-12-17  5:27                                 ` Nick Piggin
2003-12-19 11:52                                   ` Nick Piggin
2003-12-19 15:06                                     ` Martin J. Bligh
2003-12-20  0:08                                       ` Nick Piggin
2003-12-12  0:58             ` [CFT][RFC] HT scheduler Rusty Russell
2003-12-11 10:01           ` Rhino
2003-12-11  8:14             ` Nick Piggin
2003-12-11 16:49               ` Rhino
2003-12-11 15:16                 ` Nick Piggin
2003-12-11 11:40             ` William Lee Irwin III
2003-12-11 17:05               ` Rhino
2003-12-11 15:17                 ` William Lee Irwin III
2003-12-11 16:28         ` Kevin P. Fleming
2003-12-11 16:41           ` Nick Piggin
2003-12-12  2:24         ` Rusty Russell
2003-12-12  7:00           ` Nick Piggin
2003-12-12  7:23             ` Rusty Russell
2003-12-13  6:43               ` Nick Piggin
2003-12-14  1:35                 ` bill davidsen
2003-12-14  2:18                   ` Nick Piggin
2003-12-14  4:32                     ` Jamie Lokier
2003-12-14  9:40                       ` Nick Piggin
2003-12-14 10:46                         ` Arjan van de Ven
2003-12-16 17:46                         ` Bill Davidsen
2003-12-16 18:22                       ` Linus Torvalds
2003-12-17  0:24                         ` Davide Libenzi
2003-12-17  0:41                           ` Linus Torvalds
2003-12-17  0:54                             ` Davide Libenzi
2003-12-16 17:34                     ` Bill Davidsen
2003-12-15  5:53                 ` Rusty Russell [this message]
2003-12-15 23:08                   ` Nick Piggin
2003-12-19  4:57                     ` Nick Piggin
2003-12-19  5:13                       ` Nick Piggin
2003-12-20  2:43                       ` Rusty Russell
2003-12-21  2:56                         ` Nick Piggin
2004-01-03 18:57                   ` Bill Davidsen
2003-12-15 20:21                 ` Zwane Mwaikambo
2003-12-15 23:20                   ` Nick Piggin
2003-12-16  0:11                     ` Zwane Mwaikambo
2003-12-12  8:59             ` Nick Piggin
2003-12-12 15:14               ` Martin J. Bligh
2003-12-08 19:44 ` [PATCH][RFC] make cpu_sibling_map a cpumask_t James Cleverdon
2003-12-08 20:38 ` Ingo Molnar
2003-12-08 20:51 ` Zwane Mwaikambo
2003-12-08 20:55   ` Ingo Molnar
2003-12-08 23:17     ` Nick Piggin
2003-12-08 23:36       ` Ingo Molnar
2003-12-08 23:58         ` Nick Piggin
2003-12-08 23:46 ` Rusty Russell
2003-12-09 13:36   ` Nick Piggin
2003-12-11 21:41     ` bill davidsen
     [not found] <20031213022038.300B22C2C1@lists.samba.org.suse.lists.linux.kernel>
     [not found] ` <3FDAB517.4000309@cyberone.com.au.suse.lists.linux.kernel>
     [not found]   ` <brgeo7$huv$1@gatekeeper.tmr.com.suse.lists.linux.kernel>
     [not found]     ` <3FDBC876.3020603@cyberone.com.au.suse.lists.linux.kernel>
     [not found]       ` <20031214043245.GC21241@mail.shareable.org.suse.lists.linux.kernel>
     [not found]         ` <3FDC3023.9030708@cyberone.com.au.suse.lists.linux.kernel>
     [not found]           ` <1071398761.5233.1.camel@laptop.fenrus.com.suse.lists.linux.kernel>
2003-12-14 16:26             ` [CFT][RFC] HT scheduler Andi Kleen
2003-12-14 16:54               ` Arjan van de Ven
     [not found] <200312161127.13691.habanero@us.ibm.com>
2003-12-16 17:37 ` Andrew Theurer
2003-12-17  2:41   ` Nick Piggin
2003-12-16 19:03 Nakajima, Jun
2003-12-17  0:38 Nakajima, Jun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20031215060838.BF3D32C257@lists.samba.org \
    --to=rusty@rustcorp.com.au \
    --cc=anton@samba.org \
    --cc=jun.nakajima@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=markw@osdl.org \
    --cc=mbligh@aracnet.com \
    --cc=mingo@redhat.com \
    --cc=piggin@cyberone.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).