--- 2.6.0-t11-cvs/kernel/sched.c	Thu Dec 11 00:49:45 2003
+++ 2.6.0-t11-aib/kernel/sched.c	Thu Dec 11 14:32:39 2003
@@ -17,6 +17,9 @@
  *  2003-09-03	Interactivity tuning by Con Kolivas.
  */
 
+/* idle balancing debugging */
+#define DEBUG 1 
+
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -37,6 +40,8 @@
 #include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/percpu.h>
+#include <linux/list.h> 
+
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
@@ -204,6 +209,7 @@
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int prev_cpu_load[NR_CPUS];
+	unsigned long cpu;
 #ifdef CONFIG_NUMA
 	atomic_t *node_nr_running;
 	int prev_node_load[MAX_NUMNODES];
@@ -338,6 +344,78 @@
 	p->array = array;
 }
 
+/* idle balancing infrastructure */
+struct idle_cpus_list {
+	spinlock_t lock;
+	struct list_head list;
+};
+
+struct idle_cpu {
+	struct list_head list;
+	unsigned long cpuid;
+};
+
+static const unsigned long all_cpus_busy = NR_CPUS + 1; 
+
+DEFINE_PER_CPU(struct idle_cpu, cpu_is_idle);
+/* global idle cpu list */
+struct idle_cpus_list idle_list;
+
+static void dump_idle_list(const struct idle_cpus_list* ic)
+{
+	struct idle_cpu* tmp; 
+	
+	pr_debug("dumping idle list %p (head %p)\n", 
+		 ic, &ic->list); 
+	list_for_each_entry(tmp, &ic->list, list) { 
+		pr_debug("cpu_idle struct at %p (head %p) - cpu %lu\n", 
+			 tmp, &tmp->list, tmp->cpuid); 
+	}
+}
+
+static void set_cpu_idle(struct idle_cpus_list* ic, int cpu)
+{
+        spin_lock(&ic->lock);
+
+        list_add(&per_cpu(cpu_is_idle, cpu).list, &ic->list);
+
+        pr_debug("added %d\n", cpu);
+        dump_idle_list(ic);
+
+        spin_unlock(&ic->lock);
+}
+
+static inline void set_cpu_busy(struct idle_cpus_list* ic, int cpu)
+{
+        spin_lock(&ic->lock);
+
+        list_del(&per_cpu(cpu_is_idle, cpu).list);
+
+        pr_debug("removed %d\n", cpu);
+        dump_idle_list(ic);
+
+        spin_unlock(&ic->lock);
+}
+
+/* return the id of the idlest cpu, or 'all_cpus_busy' if no cpu is idle */ 
+static unsigned long idlest_cpu(struct idle_cpus_list* ic)
+{
+        struct idle_cpu* cpu;
+
+        spin_lock(&ic->lock);
+
+        if (list_empty(&ic->list)) {
+                spin_unlock(&ic->lock);
+                return all_cpus_busy;
+        }
+
+        cpu = list_entry(ic->list.next, struct idle_cpu, list);
+
+        spin_unlock(&ic->lock);
+
+        return cpu->cpuid;
+}
+
 /*
  * effective_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.
@@ -369,6 +447,8 @@
 	return prio;
 }
 
+static inline void resched_task(task_t *p);
+
 /*
  * __activate_task - move a task to the runqueue.
  */
@@ -376,6 +456,24 @@
 {
 	enqueue_task(p, rq->active);
 	nr_running_inc(rq);
+#if 1 
+#ifdef CONFIG_SMP
+	if (rq->nr_running > 1){
+		unsigned long cpu = idlest_cpu(&idle_list);
+		pr_debug("for real: idlest cpu is %lu\n", cpu); 
+		if (cpu != all_cpus_busy) {
+			pr_debug("rescheduling task\n"); 
+			resched_task(cpu_rq(cpu)->idle);
+		}
+	}
+#endif
+#else 
+	if (rq->nr_running > 1) { 
+		unsigned long cpu = idlest_cpu(&idle_list);
+		pr_debug("hit magic spot - idlest cpu is %lu\n", 
+			 cpu); 
+	}
+#endif /* !0 */ 
 }
 
 static void recalc_task_prio(task_t *p, unsigned long long now)
@@ -1378,7 +1476,6 @@
 			cpustat->iowait += sys_ticks;
 		else
 			cpustat->idle += sys_ticks;
-		rebalance_tick(rq, 1);
 		return;
 	}
 	if (TASK_NICE(p) > 0)
@@ -1589,6 +1686,12 @@
 		rq->nr_switches++;
 		rq->curr = next;
 
+		if (unlikely(next == rq->idle))
+			set_cpu_idle(&idle_list, rq->cpu);
+		else
+			if (prev == rq->idle)
+				set_cpu_busy(&idle_list, rq->cpu);
+
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
@@ -2801,6 +2904,27 @@
 	register_cpu_notifier(&kstat_nb);
 }
 
+static void init_idle_cpus_list(struct idle_cpus_list* ic)
+{
+	unsigned long i;
+
+        memset(ic, 0, sizeof(*ic));
+
+        spin_lock_init(&ic->lock);
+
+        /* no CPU is currently idle */
+        INIT_LIST_HEAD(&ic->list);
+
+	/* initialize the per cpu list of idle_cpu structs */
+	for (i = 0; i < NR_CPUS; ++i) {
+		INIT_LIST_HEAD(&per_cpu(cpu_is_idle, i).list);
+		per_cpu(cpu_is_idle, i).cpuid = i;
+	}
+
+	pr_debug("after initializing\n"); 
+	dump_idle_list(ic);
+}
+
 void __init sched_init(void)
 {
 	runqueue_t *rq;
@@ -2808,10 +2932,13 @@
 
 	/* Init the kstat counters */
 	init_kstat();
+	init_idle_cpus_list(&idle_list);
+
 	for (i = 0; i < NR_CPUS; i++) {
 		prio_array_t *array;
 
 		rq = cpu_rq(i);
+		rq->cpu = (unsigned long)(i);
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
