diff -urNp 2.6.0t1/kernel/sched.c 2.6.0t1-1cpufix/kernel/sched.c --- 2.6.0t1/kernel/sched.c 2003-07-14 05:37:14.000000000 +0200 +++ 2.6.0t1-1cpufix/kernel/sched.c 2003-07-31 20:46:30.000000000 +0200 @@ -164,6 +164,7 @@ struct runqueue { prio_array_t *active, *expired, arrays[2]; int prev_cpu_load[NR_CPUS]; #ifdef CONFIG_NUMA + unsigned int nr_lb_failed; atomic_t *node_nr_running; int prev_node_load[MAX_NUMNODES]; #endif @@ -856,6 +857,35 @@ static int find_busiest_node(int this_no return node; } +/* + * Decide whether the scheduler should balance locally (inside the same node) + * or globally depending on the number of failed local balance attempts. + * The number of failed local balance attempts depends on the number of cpus + * in the current node. In case it's just one, go immediately for global + * balancing. On a busy cpu the number of retries is smaller. + */ +static inline unsigned long cpus_to_balance(int this_cpu, runqueue_t *this_rq) +{ + int node, retries, this_node = cpu_to_node(this_cpu); + + retries = nr_cpus_node(this_node) - 1; + if (this_rq->curr != this_rq->idle) + retries >>= 1; + if (this_rq->nr_lb_failed >= retries) { + node = find_busiest_node(this_node); + this_rq->nr_lb_failed = 0; + if (node >= 0) + return (node_to_cpumask(node) | (1UL << this_cpu)); + } + return node_to_cpumask(this_node); +} + +#else /* !CONFIG_NUMA */ + +static inline unsigned long cpus_to_balance(int this_cpu, runqueue_t *this_rq) +{ + return cpu_online_map; +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_SMP @@ -960,6 +990,12 @@ static inline runqueue_t *find_busiest_q busiest = NULL; } out: +#ifdef CONFIG_NUMA + if (!busiest) + this_rq->nr_lb_failed++; + else + this_rq->nr_lb_failed = 0; +#endif return busiest; } @@ -995,7 +1031,7 @@ static inline void pull_task(runqueue_t * We call this with the current runqueue locked, * irqs disabled. */ -static void load_balance(runqueue_t *this_rq, int idle, unsigned long cpumask) +static void load_balance(runqueue_t *this_rq, int idle) { int imbalance, idx, this_cpu = smp_processor_id(); runqueue_t *busiest; @@ -1003,7 +1039,8 @@ static void load_balance(runqueue_t *thi struct list_head *head, *curr; task_t *tmp; - busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance, cpumask); + busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance, + cpus_to_balance(this_cpu, this_rq)); if (!busiest) goto out; @@ -1085,29 +1122,9 @@ out: */ #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1) #define BUSY_REBALANCE_TICK (HZ/5 ?: 1) -#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5) -#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2) - -#ifdef CONFIG_NUMA -static void balance_node(runqueue_t *this_rq, int idle, int this_cpu) -{ - int node = find_busiest_node(cpu_to_node(this_cpu)); - unsigned long cpumask, this_cpumask = 1UL << this_cpu; - - if (node >= 0) { - cpumask = node_to_cpumask(node) | this_cpumask; - spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpumask); - spin_unlock(&this_rq->lock); - } -} -#endif static void rebalance_tick(runqueue_t *this_rq, int idle) { -#ifdef CONFIG_NUMA - int this_cpu = smp_processor_id(); -#endif unsigned long j = jiffies; /* @@ -1119,24 +1136,16 @@ static void rebalance_tick(runqueue_t *t * are not balanced.) */ if (idle) { -#ifdef CONFIG_NUMA - if (!(j % IDLE_NODE_REBALANCE_TICK)) - balance_node(this_rq, idle, this_cpu); -#endif if (!(j % IDLE_REBALANCE_TICK)) { spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpu_to_node_mask(this_cpu)); + load_balance(this_rq, idle); spin_unlock(&this_rq->lock); } return; } -#ifdef CONFIG_NUMA - if (!(j % BUSY_NODE_REBALANCE_TICK)) - balance_node(this_rq, idle, this_cpu); -#endif if (!(j % BUSY_REBALANCE_TICK)) { spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpu_to_node_mask(this_cpu)); + load_balance(this_rq, idle); spin_unlock(&this_rq->lock); } } @@ -1306,7 +1315,7 @@ need_resched: pick_next_task: if (unlikely(!rq->nr_running)) { #ifdef CONFIG_SMP - load_balance(rq, 1, cpu_to_node_mask(smp_processor_id())); + load_balance(rq, 1); if (rq->nr_running) goto pick_next_task; #endif