diff -urNp 2.5.37-node-affine/kernel/sched.c 2.5.37-node-affine-mon/kernel/sched.c --- 2.5.37-node-affine/kernel/sched.c Sun Sep 22 11:13:59 2002 +++ 2.5.37-node-affine-mon/kernel/sched.c Sun Sep 22 11:29:26 2002 @@ -677,6 +677,175 @@ static inline unsigned int double_lock_b return nr_running; } +#define HISTORY_RING_SIZE 25 +/* load balancing history entry */ +struct lb_hist_entry { + unsigned long time; /* jiffy */ + int pid; /* stolen task (0 if none) */ + int busiest_cpu; /* busiest RQ */ +}; +/* load balancing history ring */ +struct lb_hist_ring { + int curr; /* current pointer */ + struct lb_hist_entry data[HISTORY_RING_SIZE]; +} ____cacheline_aligned; +/* per CPU history ring array */ +struct lb_hist_ring lb_ring[NR_CPUS]; + +/* initial load balancing decision entry */ +struct ilb_hist_entry { + unsigned long time; /* jiffy */ + int pid; + int node; /* selected homenode */ + int load[NR_NODES]; /* node loads at decision time */ +}; +/* initial load balancing history ring */ +struct ilb_hist_ring { + int curr; /* current pointer */ + struct ilb_hist_entry data[HISTORY_RING_SIZE]; +} ____cacheline_aligned; +/* per CPU history ring array */ +struct ilb_hist_ring ilb_ring[NR_CPUS]; + +/* add entry to lb_ring */ +void lb_ring_add(int cpu, int pid, int busiest_cpu) +{ + int next=(lb_ring[cpu].curr + 1 ) % HISTORY_RING_SIZE; + + lb_ring[cpu].data[next].time = jiffies; + lb_ring[cpu].data[next].pid = pid; + lb_ring[cpu].data[next].busiest_cpu = busiest_cpu; + lb_ring[cpu].curr = next; +} + +/* add entry to ilb_ring */ +void ilb_ring_add(int cpu, int pid, int node, int *load) +{ + int i, next=(ilb_ring[cpu].curr + 1 ) % HISTORY_RING_SIZE; + + ilb_ring[cpu].data[next].time = jiffies; + ilb_ring[cpu].data[next].pid = pid; + ilb_ring[cpu].data[next].node = node; + for (i=0; icount) len = count; + if (len<0) len = 0; + return len; +} + +/* print initial lb history ring buffer */ +int ilb_ring_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i, j, len, entry; + char *buff=page; + int cpu=(int)data; + + buff += sprintf(buff," tick pid node node_loads\n"); + entry = ilb_ring[cpu].curr; + for (i=0; icount) len = count; + if (len<0) len = 0; + return len; +} + +/* print runqueue load */ +int rq_load_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i, len; + runqueue_t *rq; + char *buff=page; + int cpu=(int)data; + + rq=cpu_rq(cpu); + buff += sprintf(buff,"cpu %d : ",cpu); + buff += sprintf(buff,"curr: %d %s\n",rq->curr->pid,rq->curr->comm); + buff += sprintf(buff,"running uninter nr_homenode\n"); + buff += sprintf(buff,"%7d %7d",rq->nr_running,rq->nr_uninterruptible); + for (i=0; inr_homenode[i]); + buff += sprintf(buff,"\n"); + + len = buff-page; + if (len <= off+count) *eof = 1; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +#include +/* initialize /proc entries */ +void init_sched_proc(void) +{ + int i; + char name[12]; + struct proc_dir_entry *p, *hist, *sched, *load; + + sched = proc_mkdir("sched",&proc_root); + hist = proc_mkdir("history",sched); + for (i=0; iread_proc = lb_ring_read_proc; + p->data = (long)i; + } + sprintf(name,"ilb%02d",i); + p = create_proc_entry(name,S_IRUGO,hist); + if (p) { + p->read_proc = ilb_ring_read_proc; + p->data = (long)i; + } + } + load = proc_mkdir("load",sched); + for (i=0; iread_proc = rq_load_read_proc; + p->data = (long)i; + } + } +} + /* * Calculate load of a CPU pool, store results in data[][NR_CPUS]. * Return the index of the most loaded runqueue. @@ -961,6 +1130,7 @@ static void load_balance(runqueue_t *thi tmp = task_to_steal(busiest, this_cpu); if (!tmp) goto out_unlock; + lb_ring_add(smp_processor_id(), tmp->pid, tmp->thread_info->cpu); pull_task(busiest, tmp->array, tmp, this_rq, this_cpu); out_unlock: spin_unlock(&busiest->lock); @@ -2051,7 +2221,7 @@ static int sched_best_cpu(struct task_st */ static int sched_best_node(struct task_struct *p, int flag) { - int n, best_node=0, min_load, pool_load, min_pool=p->node; + int n, best_node=0, min_load, min_pool=p->node; int pool, load[NR_NODES]; unsigned long mask = p->cpus_allowed & cpu_online_map; @@ -2079,13 +2249,14 @@ static int sched_best_node(struct task_s min_load = 100000000; for (n = 0; n < numpools; n++) { pool = (best_node + n) % numpools; - pool_load = (100*load[pool])/pool_nr_cpus[pool]; - if ((pool_load < min_load) && (pool_mask[pool] & mask)) { - min_load = pool_load; + load[pool] = (100*load[pool])/pool_nr_cpus[pool]; + if ((load[pool] < min_load) && (pool_mask[pool] & mask)) { + min_load = load[pool]; min_pool = pool; } } atomic_set(&sched_node, min_pool); + ilb_ring_add(smp_processor_id(), p->pid, min_pool, load); return min_pool; } @@ -2282,6 +2453,7 @@ void bld_pools(void) find_node_levels(numpools); init_pool_weight(); init_pool_delay(); + init_sched_proc(); } void set_task_node(task_t *p, int node)