linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] HT scheduler, sched-2.5.68-A9
@ 2003-04-21 19:13 Ingo Molnar
  2003-04-22  4:01 ` Dave Jones
                   ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Ingo Molnar @ 2003-04-21 19:13 UTC (permalink / raw)
  To: linux-kernel


the attached patch (against 2.5.68 or BK-curr) is the latest
implementation of the "no compromises" HT-scheduler. I fixed a couple of
bugs, and the scheduler is now stable and behaves properly on a
2-CPU-2-sibling HT testbox. The patch can also be downloaded from:

	http://redhat.com/~mingo/O(1)-scheduler/

bug reports, suggestions welcome,

	Ingo

--- linux/include/linux/sched.h.orig	
+++ linux/include/linux/sched.h	
@@ -147,6 +147,24 @@ typedef struct task_struct task_t;
 extern void sched_init(void);
 extern void init_idle(task_t *idle, int cpu);
 
+/*
+ * Is there a way to do this via Kconfig?
+ */
+#if CONFIG_NR_SIBLINGS_2
+# define CONFIG_NR_SIBLINGS 2
+#elif CONFIG_NR_SIBLINGS_4
+# define CONFIG_NR_SIBLINGS 4
+#else
+# define CONFIG_NR_SIBLINGS 0
+#endif
+
+#if CONFIG_NR_SIBLINGS
+# define CONFIG_SHARE_RUNQUEUE 1
+#else
+# define CONFIG_SHARE_RUNQUEUE 0
+#endif
+extern void sched_map_runqueue(int cpu1, int cpu2);
+
 extern void show_state(void);
 extern void show_trace(unsigned long *stack);
 extern void show_stack(unsigned long *stack);
@@ -814,11 +832,6 @@ static inline unsigned int task_cpu(stru
 	return p->thread_info->cpu;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
-	p->thread_info->cpu = cpu;
-}
-
 #else
 
 static inline unsigned int task_cpu(struct task_struct *p)
@@ -826,10 +839,6 @@ static inline unsigned int task_cpu(stru
 	return 0;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
-}
-
 #endif /* CONFIG_SMP */
 
 #endif /* __KERNEL__ */
--- linux/include/asm-i386/apic.h.orig	
+++ linux/include/asm-i386/apic.h	
@@ -101,4 +101,6 @@ extern unsigned int nmi_watchdog;
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+extern int phys_proc_id[NR_CPUS];
+
 #endif /* __ASM_APIC_H */
--- linux/arch/i386/kernel/cpu/proc.c.orig	
+++ linux/arch/i386/kernel/cpu/proc.c	
@@ -1,4 +1,5 @@
 #include <linux/smp.h>
+#include <linux/sched.h>
 #include <linux/timex.h>
 #include <linux/string.h>
 #include <asm/semaphore.h>
@@ -114,6 +115,13 @@ static int show_cpuinfo(struct seq_file 
 		     fpu_exception ? "yes" : "no",
 		     c->cpuid_level,
 		     c->wp_works_ok ? "yes" : "no");
+#if CONFIG_SHARE_RUNQUEUE
+{
+	extern long __rq_idx[NR_CPUS];
+
+	seq_printf(m, "\nrunqueue\t: %ld\n", __rq_idx[n]);
+}
+#endif
 
 	for ( i = 0 ; i < 32*NCAPINTS ; i++ )
 		if ( test_bit(i, c->x86_capability) &&
--- linux/arch/i386/kernel/smpboot.c.orig	
+++ linux/arch/i386/kernel/smpboot.c	
@@ -38,6 +38,7 @@
 #include <linux/kernel.h>
 
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/smp_lock.h>
 #include <linux/irq.h>
@@ -933,6 +934,16 @@ void *xquad_portio;
 
 int cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 
+static int test_ht;
+
+static int __init ht_setup(char *str)
+{
+	test_ht = 1;
+	return 1;
+}
+
+__setup("test_ht", ht_setup);
+
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
 	int apicid, cpu, bit;
@@ -1074,7 +1085,20 @@ static void __init smp_boot_cpus(unsigne
 	Dprintk("Boot done.\n");
 
 	/*
-	 * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so
+	 * Here we can be sure that there is an IO-APIC in the system. Let's
+	 * go and set it up:
+	 */
+	smpboot_setup_io_apic();
+
+	setup_boot_APIC_clock();
+
+	/*
+	 * Synchronize the TSC with the AP
+	 */
+	if (cpu_has_tsc && cpucount && cpu_khz)
+		synchronize_tsc_bp();
+	/*
+	 * If Hyper-Threading is available, construct cpu_sibling_map[], so
 	 * that we can tell the sibling CPU efficiently.
 	 */
 	if (cpu_has_ht && smp_num_siblings > 1) {
@@ -1083,7 +1107,8 @@ static void __init smp_boot_cpus(unsigne
 		
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			int 	i;
-			if (!test_bit(cpu, &cpu_callout_map)) continue;
+			if (!test_bit(cpu, &cpu_callout_map))
+				continue;
 
 			for (i = 0; i < NR_CPUS; i++) {
 				if (i == cpu || !test_bit(i, &cpu_callout_map))
@@ -1099,17 +1124,41 @@ static void __init smp_boot_cpus(unsigne
 				printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
 			}
 		}
+#if CONFIG_SHARE_RUNQUEUE
+		/*
+		 * At this point APs would have synchronised TSC and
+		 * waiting for smp_commenced, with their APIC timer
+		 * disabled. So BP can go ahead do some initialization
+		 * for Hyper-Threading (if needed).
+		 */
+		for (cpu = 0; cpu < NR_CPUS; cpu++) {
+			int i;
+			if (!test_bit(cpu, &cpu_callout_map))
+				continue;
+			for (i = 0; i < NR_CPUS; i++) {
+				if (i <= cpu)
+					continue;
+				if (!test_bit(i, &cpu_callout_map))
+					continue;
+  
+				if (phys_proc_id[cpu] != phys_proc_id[i])
+					continue;
+				/*
+				 * merge runqueues, resulting in one
+				 * runqueue per package:
+				 */
+				sched_map_runqueue(cpu, i);
+				break;
+			}
+		}
+#endif
 	}
-
-	smpboot_setup_io_apic();
-
-	setup_boot_APIC_clock();
-
-	/*
-	 * Synchronize the TSC with the AP
-	 */
-	if (cpu_has_tsc && cpucount && cpu_khz)
-		synchronize_tsc_bp();
+#if CONFIG_SHARE_RUNQUEUE
+	if (smp_num_siblings == 1 && test_ht) {
+		printk("Simulating shared runqueues - mapping CPU#1's runqueue to CPU#0's runqueue.\n");
+		sched_map_runqueue(0, 1);
+	}
+#endif
 }
 
 /* These are wrappers to interface to the new boot process.  Someone
--- linux/arch/i386/Kconfig.orig	
+++ linux/arch/i386/Kconfig	
@@ -413,6 +413,24 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+choice
+
+	prompt "Hyperthreading Support"
+	depends on SMP
+	default NR_SIBLINGS_0
+
+config NR_SIBLINGS_0
+	bool "off"
+
+config NR_SIBLINGS_2
+	bool "2 siblings"
+
+config NR_SIBLINGS_4
+	bool "4 siblings"
+
+endchoice
+
+
 config PREEMPT
 	bool "Preemptible Kernel"
 	help
--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -73,6 +73,7 @@
 #define INTERACTIVE_DELTA	2
 #define MAX_SLEEP_AVG		(10*HZ)
 #define STARVATION_LIMIT	(10*HZ)
+#define AGRESSIVE_IDLE_STEAL	1
 #define NODE_THRESHOLD		125
 
 /*
@@ -147,6 +148,48 @@ struct prio_array {
 };
 
 /*
+ * It's possible for two CPUs to share the same runqueue.
+ * This makes sense if they eg. share caches.
+ *
+ * We take the common 1:1 (SMP, UP) case and optimize it,
+ * the rest goes via remapping: rq_idx(cpu) gives the
+ * runqueue on which a particular cpu is on, cpu_idx(cpu)
+ * gives the rq-specific index of the cpu.
+ *
+ * (Note that the generic scheduler code does not impose any
+ *  restrictions on the mappings - there can be 4 CPUs per
+ *  runqueue or even assymetric mappings.)
+ */
+#if CONFIG_SHARE_RUNQUEUE
+# define MAX_NR_SIBLINGS CONFIG_NR_SIBLINGS
+  long __rq_idx[NR_CPUS] __cacheline_aligned;
+  static long __cpu_idx[NR_CPUS] __cacheline_aligned;
+# define rq_idx(cpu) (__rq_idx[(cpu)])
+# define cpu_idx(cpu) (__cpu_idx[(cpu)])
+# define for_each_sibling(idx, rq) \
+		for ((idx) = 0; (idx) < (rq)->nr_cpus; (idx)++)
+# define rq_nr_cpus(rq) ((rq)->nr_cpus)
+# define cpu_active_balance(c) (cpu_rq(c)->cpu[0].active_balance)
+#else
+# define MAX_NR_SIBLINGS 1
+# define rq_idx(cpu) (cpu)
+# define cpu_idx(cpu) 0
+# define for_each_sibling(idx, rq) while (0)
+# define cpu_active_balance(c) 0
+# define do_active_balance(rq, cpu) do { } while (0)
+# define rq_nr_cpus(rq) 1
+  static inline void active_load_balance(runqueue_t *rq, int this_cpu) { }
+#endif
+
+typedef struct cpu_s {
+	task_t *curr, *idle;
+	task_t *migration_thread;
+	struct list_head migration_queue;
+	int active_balance;
+	int cpu;
+} cpu_t;
+
+/*
  * This is the main, per-CPU runqueue data structure.
  *
  * Locking rule: those places that want to lock multiple runqueues
@@ -157,7 +200,6 @@ struct runqueue {
 	spinlock_t lock;
 	unsigned long nr_running, nr_switches, expired_timestamp,
 			nr_uninterruptible;
-	task_t *curr, *idle;
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int prev_cpu_load[NR_CPUS];
@@ -165,27 +207,39 @@ struct runqueue {
 	atomic_t *node_nr_running;
 	int prev_node_load[MAX_NUMNODES];
 #endif
-	task_t *migration_thread;
-	struct list_head migration_queue;
+	int nr_cpus;
+	cpu_t cpu[MAX_NR_SIBLINGS];
 
 	atomic_t nr_iowait;
 } ____cacheline_aligned;
 
 static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
 
-#define cpu_rq(cpu)		(runqueues + (cpu))
+#define cpu_rq(cpu)		(runqueues + (rq_idx(cpu)))
+#define cpu_int(c)		((cpu_rq(c))->cpu + cpu_idx(c))
+#define cpu_curr_ptr(cpu)	(cpu_int(cpu)->curr)
+#define cpu_idle_ptr(cpu)	(cpu_int(cpu)->idle)
+
 #define this_rq()		cpu_rq(smp_processor_id())
 #define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define rt_task(p)		((p)->prio < MAX_RT_PRIO)
 
+#define migration_thread(cpu)	(cpu_int(cpu)->migration_thread)
+#define migration_queue(cpu)	(&cpu_int(cpu)->migration_queue)
+
+#if NR_CPUS > 1
+# define task_allowed(p, cpu)	((p)->cpus_allowed & (1UL << (cpu)))
+#else
+# define task_allowed(p, cpu)	1
+#endif
+
 /*
  * Default context-switch locking:
  */
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(rq, next)	do { } while(0)
 # define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
-# define task_running(rq, p)		((rq)->curr == (p))
+# define task_running(p)		(cpu_curr_ptr(task_cpu(p)) == (p))
 #endif
 
 #ifdef CONFIG_NUMA
@@ -414,8 +468,18 @@ static inline void resched_task(task_t *
 #endif
 }
 
+static inline void resched_cpu(int cpu)
+{
+	resched_task(cpu_curr_ptr(cpu));
+}
+
 #ifdef CONFIG_SMP
 
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+	p->thread_info->cpu = cpu;
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -433,7 +497,7 @@ void wait_task_inactive(task_t * p)
 repeat:
 	preempt_disable();
 	rq = task_rq(p);
-	if (unlikely(task_running(rq, p))) {
+	if (unlikely(task_running(p))) {
 		cpu_relax();
 		/*
 		 * enable/disable preemption just to make this
@@ -444,7 +508,7 @@ repeat:
 		goto repeat;
 	}
 	rq = task_rq_lock(p, &flags);
-	if (unlikely(task_running(rq, p))) {
+	if (unlikely(task_running(p))) {
 		task_rq_unlock(rq, &flags);
 		preempt_enable();
 		goto repeat;
@@ -452,6 +516,13 @@ repeat:
 	task_rq_unlock(rq, &flags);
 	preempt_enable();
 }
+
+#else
+
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
 #endif
 
 /*
@@ -466,10 +537,44 @@ repeat:
  */
 void kick_if_running(task_t * p)
 {
-	if ((task_running(task_rq(p), p)) && (task_cpu(p) != smp_processor_id()))
+	if ((task_running(p)) && (task_cpu(p) != smp_processor_id()))
 		resched_task(p);
 }
 
+static void wake_up_cpu(runqueue_t *rq, int cpu, task_t *p)
+{
+	cpu_t *curr_cpu;
+	task_t *curr;
+	int idx;
+
+	if (idle_cpu(cpu))
+		return resched_cpu(cpu);
+
+	for_each_sibling(idx, rq) {
+		curr_cpu = rq->cpu + idx;
+		if (!task_allowed(p, curr_cpu->cpu))
+			continue;
+		if (curr_cpu->idle == curr_cpu->curr)
+			return resched_cpu(curr_cpu->cpu);
+	}
+
+	if (p->prio < cpu_curr_ptr(cpu)->prio)
+		return resched_task(cpu_curr_ptr(cpu));
+	if (p->prio == cpu_curr_ptr(cpu)->prio &&
+			p->time_slice > cpu_curr_ptr(cpu)->time_slice)
+		return resched_task(cpu_curr_ptr(cpu));
+
+	for_each_sibling(idx, rq) {
+		curr_cpu = rq->cpu + idx;
+		if (!task_allowed(p, curr_cpu->cpu))
+			continue;
+		curr = curr_cpu->curr;
+		if (p->prio < curr->prio)
+			return resched_task(curr);
+		if (p->prio == curr->prio && p->time_slice > curr->time_slice)
+			return resched_task(curr);
+	}
+}
 /***
  * try_to_wake_up - wake up a thread
  * @p: the to-be-woken-up thread
@@ -491,6 +596,7 @@ static int try_to_wake_up(task_t * p, un
 	long old_state;
 	runqueue_t *rq;
 
+	BUG_ON(!p);
 repeat_lock_task:
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
@@ -500,7 +606,7 @@ repeat_lock_task:
 			 * Fast-migrate the task if it's not running or runnable
 			 * currently. Do not violate hard affinity.
 			 */
-			if (unlikely(sync && !task_running(rq, p) &&
+			if (unlikely(sync && !task_running(p) &&
 				(task_cpu(p) != smp_processor_id()) &&
 				(p->cpus_allowed & (1UL << smp_processor_id())))) {
 
@@ -514,8 +620,7 @@ repeat_lock_task:
 				__activate_task(p, rq);
 			else {
 				requeue_waker = activate_task(p, rq);
-				if (p->prio < rq->curr->prio)
-					resched_task(rq->curr);
+				wake_up_cpu(rq, task_cpu(p), p);
 			}
 			success = 1;
 		}
@@ -692,8 +797,9 @@ unsigned long nr_running(void)
 	unsigned long i, sum = 0;
 
 	for (i = 0; i < NR_CPUS; i++)
-		sum += cpu_rq(i)->nr_running;
-
+		/* Shared runqueues are counted only once. */
+		if (!cpu_idx(i))
+			sum += cpu_rq(i)->nr_running;
 	return sum;
 }
 
@@ -704,7 +810,9 @@ unsigned long nr_uninterruptible(void)
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_online(i))
 			continue;
-		sum += cpu_rq(i)->nr_uninterruptible;
+		/* Shared runqueues are counted only once. */
+		if (!cpu_idx(i))
+			sum += cpu_rq(i)->nr_uninterruptible;
 	}
 	return sum;
 }
@@ -863,7 +971,15 @@ static int find_busiest_node(int this_no
 
 #endif /* CONFIG_NUMA */
 
-#if CONFIG_SMP
+#if !CONFIG_SMP
+
+/*
+ * on UP we do not need to balance between CPUs:
+ */
+static inline void load_balance(runqueue_t *this_rq, int idle, unsigned long cpumask) { }
+static inline void rebalance_tick(runqueue_t *this_rq, int idle) { }
+
+#else
 
 /*
  * double_lock_balance - lock the busiest runqueue
@@ -979,17 +1095,7 @@ static inline void pull_task(runqueue_t 
 	set_task_cpu(p, this_cpu);
 	nr_running_inc(this_rq);
 	enqueue_task(p, this_rq->active);
-	/*
-	 * Note that idle threads have a prio of MAX_PRIO, for this test
-	 * to be always true for them.
-	 */
-	if (p->prio < this_rq->curr->prio)
-		set_need_resched();
-	else {
-		if (p->prio == this_rq->curr->prio &&
-				p->time_slice > this_rq->curr->time_slice)
-			set_need_resched();
-	}
+	wake_up_cpu(this_rq, this_cpu, p);
 }
 
 /*
@@ -1039,6 +1145,7 @@ skip_bitmap:
 		goto out_unlock;
 	}
 
+
 	head = array->queue + idx;
 	curr = head->prev;
 skip_queue:
@@ -1049,12 +1156,15 @@ skip_queue:
 	 * 1) running (obviously), or
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
+	 *
+	 * (except if we are in idle mode which is a more agressive
+	 *  form of rebalancing.)
 	 */
 
-#define CAN_MIGRATE_TASK(p,rq,this_cpu)					\
-	((jiffies - (p)->last_run > cache_decay_ticks) &&	\
-		!task_running(rq, p) &&					\
-			((p)->cpus_allowed & (1UL << (this_cpu))))
+#define CAN_MIGRATE_TASK(p,rq,cpu)		\
+	(((idle && AGRESSIVE_IDLE_STEAL) ||	\
+		(jiffies - (p)->last_run > cache_decay_ticks)) && \
+			!task_running(p) && task_allowed(p, cpu))
 
 	curr = curr->prev;
 
@@ -1077,6 +1187,133 @@ out:
 	;
 }
 
+#if CONFIG_SHARE_RUNQUEUE
+static void active_load_balance(runqueue_t *this_rq, int this_cpu)
+{
+	runqueue_t *rq;
+	int i, idx;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		rq = cpu_rq(i);
+		if (rq == this_rq)
+			continue;
+ 		/*
+		 * If there's only one CPU mapped to this runqueue
+		 * then there can be no SMT imbalance:
+		 */
+		if (rq->nr_cpus == 1)
+			continue;
+		/*
+		 * Any SMT-specific imbalance?
+		 */
+		for_each_sibling(idx, rq)
+			if (rq->cpu[idx].idle == rq->cpu[idx].curr)
+				goto next_cpu;
+
+		/*
+		 * At this point it's sure that we have a SMT
+		 * imbalance: this (physical) CPU is idle but
+		 * another CPU has two (or more) tasks running.
+		 *
+		 * We wake up one of the migration threads (it
+		 * doesnt matter which one) and let it fix things up:
+		 */
+		if (!cpu_active_balance(i)) {
+			cpu_active_balance(i) = 1;
+			spin_unlock(&this_rq->lock);
+			if (rq->cpu[0].migration_thread)
+				wake_up_process(rq->cpu[0].migration_thread);
+			spin_lock(&this_rq->lock);
+		}
+next_cpu:
+	}
+}
+
+static void do_active_balance(runqueue_t *this_rq, int this_cpu)
+{
+	runqueue_t *rq;
+	int i, idx;
+
+	spin_unlock(&this_rq->lock);
+
+	cpu_active_balance(this_cpu) = 0;
+
+	/*
+	 * Is the imbalance still present?
+	 */
+	for_each_sibling(idx, this_rq)
+		if (this_rq->cpu[idx].idle == this_rq->cpu[idx].curr)
+			goto out;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		rq = cpu_rq(i);
+		if (rq == this_rq)
+			continue;
+
+		/* completely idle CPU? */
+		if (rq->nr_running)
+			continue;
+
+		/*
+		 * At this point it's reasonably sure that we have an
+		 * imbalance. Since we are the migration thread, try to
+	 	 * balance a thread over to the target queue.
+		 */
+		spin_lock(&this_rq->lock);
+		this_rq->nr_running--;
+		spin_unlock(&this_rq->lock);
+
+		spin_lock(&rq->lock);
+		load_balance(rq, 1, cpu_to_node_mask(this_cpu));
+		spin_unlock(&rq->lock);
+
+		spin_lock(&this_rq->lock);
+		this_rq->nr_running++;
+		spin_unlock(&this_rq->lock);
+		goto out;
+	}
+out:
+	spin_lock(&this_rq->lock);
+}
+
+/*
+ * This routine is called to map a CPU into another CPU's runqueue.
+ *
+ * This must be called during bootup with the merged runqueue having
+ * no tasks.
+ */
+void sched_map_runqueue(int cpu1, int cpu2)
+{
+	runqueue_t *rq1 = cpu_rq(cpu1);
+	runqueue_t *rq2 = cpu_rq(cpu2);
+	int cpu2_idx_orig = cpu_idx(cpu2), cpu2_idx;
+
+	BUG_ON(rq1 == rq2 || rq2->nr_running || rq_idx(cpu1) != cpu1);
+	/*
+	 * At this point, we dont have anything in the runqueue yet. So,
+	 * there is no need to move processes between the runqueues.
+	 * Only, the idle processes should be combined and accessed
+	 * properly.
+	 */
+	cpu2_idx = rq1->nr_cpus++;
+
+	rq_idx(cpu2) = cpu1;
+	cpu_idx(cpu2) = cpu2_idx;
+	rq1->cpu[cpu2_idx].cpu = cpu2;
+	rq1->cpu[cpu2_idx].idle = rq2->cpu[cpu2_idx_orig].idle;
+	rq1->cpu[cpu2_idx].curr = rq2->cpu[cpu2_idx_orig].curr;
+	INIT_LIST_HEAD(&rq1->cpu[cpu2_idx].migration_queue);
+
+	/* just to be safe: */
+	rq2->cpu[cpu2_idx_orig].idle = NULL;
+	rq2->cpu[cpu2_idx_orig].curr = NULL;
+}
+#endif
+
 /*
  * One of the idle_cpu_tick() and busy_cpu_tick() functions will
  * get called every timer tick, on every CPU. Our balancing action
@@ -1145,15 +1382,9 @@ static void rebalance_tick(runqueue_t *t
 		spin_unlock(&this_rq->lock);
 	}
 }
-#else
-/*
- * on UP we do not need to balance between CPUs:
- */
-static inline void rebalance_tick(runqueue_t *this_rq, int idle)
-{
-}
 #endif
 
+
 DEFINE_PER_CPU(struct kernel_stat, kstat) = { { 0 } };
 
 /*
@@ -1181,12 +1412,13 @@ void scheduler_tick(int user_ticks, int 
 {
 	int cpu = smp_processor_id();
 	runqueue_t *rq = this_rq();
+	unsigned long j = jiffies;
 	task_t *p = current;
 
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_ticks);
 
-	if (p == rq->idle) {
+	if (p == cpu_idle_ptr(cpu)) {
 		/* note: this timer irq context must be accounted for as well */
 		if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
 			kstat_cpu(cpu).cpustat.system += sys_ticks;
@@ -1209,6 +1441,7 @@ void scheduler_tick(int user_ticks, int 
 		return;
 	}
 	spin_lock(&rq->lock);
+  	/* Task might have expired already, but not scheduled off yet */
 	/*
 	 * The task was running during this tick - update the
 	 * time slice counter and the sleep average. Note: we
@@ -1244,7 +1477,7 @@ void scheduler_tick(int user_ticks, int 
 
 		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
 			if (!rq->expired_timestamp)
-				rq->expired_timestamp = jiffies;
+				rq->expired_timestamp = j;
 			enqueue_task(p, rq->expired);
 		} else
 			enqueue_task(p, rq->active);
@@ -1261,11 +1494,11 @@ void scheduling_functions_start_here(voi
  */
 asmlinkage void schedule(void)
 {
+	int idx, this_cpu, retry = 0;
+	struct list_head *queue;
 	task_t *prev, *next;
-	runqueue_t *rq;
 	prio_array_t *array;
-	struct list_head *queue;
-	int idx;
+	runqueue_t *rq;
 
 	/*
 	 * Test if we are atomic.  Since do_exit() needs to call into
@@ -1283,6 +1516,7 @@ asmlinkage void schedule(void)
 need_resched:
 	preempt_disable();
 	prev = current;
+	this_cpu = smp_processor_id();
 	rq = this_rq();
 
 	release_kernel_lock(prev);
@@ -1307,14 +1541,17 @@ need_resched:
 	case TASK_RUNNING:
 		;
 	}
+
 pick_next_task:
 	if (unlikely(!rq->nr_running)) {
-#if CONFIG_SMP
 		load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
 		if (rq->nr_running)
 			goto pick_next_task;
-#endif
-		next = rq->idle;
+		active_load_balance(rq, this_cpu);
+		if (rq->nr_running)
+			goto pick_next_task;
+pick_idle:
+		next = cpu_idle_ptr(this_cpu);
 		rq->expired_timestamp = 0;
 		goto switch_tasks;
 	}
@@ -1330,18 +1567,49 @@ pick_next_task:
 		rq->expired_timestamp = 0;
 	}
 
+new_array:
 	idx = sched_find_first_bit(array->bitmap);
-	queue = array->queue + idx;
-	next = list_entry(queue->next, task_t, run_list);
+  	queue = array->queue + idx;
+  	next = list_entry(queue->next, task_t, run_list);
+	if ((next != prev) && (rq_nr_cpus(rq) > 1)) {
+		struct list_head *tmp = queue->next;
+
+		while ((task_running(next) && (next != prev)) || !task_allowed(next, this_cpu)) {
+			tmp = tmp->next;
+			if (tmp != queue) {
+				next = list_entry(tmp, task_t, run_list);
+				continue;
+			}
+			idx = find_next_bit(array->bitmap, MAX_PRIO, ++idx);
+			if (idx == MAX_PRIO) {
+				if (retry || !rq->expired->nr_active) {
+					goto pick_idle;
+				}
+				/*
+				 * To avoid infinite changing of arrays,
+				 * when we have only tasks runnable by
+				 * sibling.
+				 */
+				retry = 1;
+
+				array = rq->expired;
+				goto new_array;
+			}
+			queue = array->queue + idx;
+			tmp = queue->next;
+			next = list_entry(tmp, task_t, run_list);
+		}
+	}
 
 switch_tasks:
 	prefetch(next);
 	clear_tsk_need_resched(prev);
-	RCU_qsctr(prev->thread_info->cpu)++;
+	RCU_qsctr(task_cpu(prev))++;
 
 	if (likely(prev != next)) {
 		rq->nr_switches++;
-		rq->curr = next;
+		cpu_curr_ptr(this_cpu) = next;
+		set_task_cpu(next, this_cpu);
 
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);
@@ -1604,9 +1872,8 @@ void set_user_nice(task_t *p, long nice)
 		 * If the task is running and lowered its priority,
 		 * or increased its priority then reschedule its CPU:
 		 */
-		if ((NICE_TO_PRIO(nice) < p->static_prio) ||
-							task_running(rq, p))
-			resched_task(rq->curr);
+		if ((NICE_TO_PRIO(nice) < p->static_prio) || task_running(p))
+			resched_task(cpu_curr_ptr(task_cpu(p)));
 	}
 out_unlock:
 	task_rq_unlock(rq, &flags);
@@ -1684,7 +1951,7 @@ int task_nice(task_t *p)
  */
 int task_curr(task_t *p)
 {
-	return cpu_curr(task_cpu(p)) == p;
+	return cpu_curr_ptr(task_cpu(p)) == p;
 }
 
 /**
@@ -1693,7 +1960,7 @@ int task_curr(task_t *p)
  */
 int idle_cpu(int cpu)
 {
-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+	return cpu_curr_ptr(cpu) == cpu_idle_ptr(cpu);
 }
 
 /**
@@ -2243,7 +2510,7 @@ void __init init_idle(task_t *idle, int 
 	local_irq_save(flags);
 	double_rq_lock(idle_rq, rq);
 
-	idle_rq->curr = idle_rq->idle = idle;
+	cpu_curr_ptr(cpu) = cpu_idle_ptr(cpu) = idle;
 	deactivate_task(idle, rq);
 	idle->array = NULL;
 	idle->prio = MAX_PRIO;
@@ -2298,6 +2565,7 @@ void set_cpus_allowed(task_t *p, unsigne
 	unsigned long flags;
 	migration_req_t req;
 	runqueue_t *rq;
+	int cpu;
 
 #if 0 /* FIXME: Grab cpu_lock, return error on this case. --RR */
 	new_mask &= cpu_online_map;
@@ -2319,32 +2587,32 @@ void set_cpus_allowed(task_t *p, unsigne
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
-	if (!p->array && !task_running(rq, p)) {
+	if (!p->array && !task_running(p)) {
 		set_task_cpu(p, __ffs(p->cpus_allowed));
 		task_rq_unlock(rq, &flags);
 		return;
 	}
 	init_completion(&req.done);
 	req.task = p;
-	list_add(&req.list, &rq->migration_queue);
+	cpu = task_cpu(p);
+	list_add(&req.list, migration_queue(cpu));
 	task_rq_unlock(rq, &flags);
-
-	wake_up_process(rq->migration_thread);
+	wake_up_process(migration_thread(cpu));
 
 	wait_for_completion(&req.done);
 }
 
 /*
- * migration_thread - this is a highprio system thread that performs
+ * migration_task - this is a highprio system thread that performs
  * thread migration by 'pulling' threads into the target runqueue.
  */
-static int migration_thread(void * data)
+static int migration_task(void * data)
 {
 	/* Marking "param" __user is ok, since we do a set_fs(KERNEL_DS); */
 	struct sched_param __user param = { .sched_priority = MAX_RT_PRIO-1 };
 	int cpu = (long) data;
 	runqueue_t *rq;
-	int ret;
+	int ret, idx;
 
 	daemonize("migration/%d", cpu);
 	set_fs(KERNEL_DS);
@@ -2358,7 +2626,8 @@ static int migration_thread(void * data)
 	ret = setscheduler(0, SCHED_FIFO, &param);
 
 	rq = this_rq();
-	rq->migration_thread = current;
+	migration_thread(cpu) = current;
+	idx = cpu_idx(cpu);
 
 	for (;;) {
 		runqueue_t *rq_src, *rq_dest;
@@ -2369,8 +2638,10 @@ static int migration_thread(void * data)
 		task_t *p;
 
 		spin_lock_irqsave(&rq->lock, flags);
-		head = &rq->migration_queue;
-		current->state = TASK_INTERRUPTIBLE;
+		if (cpu_active_balance(cpu))
+			do_active_balance(rq, cpu);
+		head = migration_queue(cpu);
+		current->state = TASK_UNINTERRUPTIBLE;
 		if (list_empty(head)) {
 			spin_unlock_irqrestore(&rq->lock, flags);
 			schedule();
@@ -2399,8 +2670,7 @@ repeat:
 			if (p->array) {
 				deactivate_task(p, rq_src);
 				__activate_task(p, rq_dest);
-				if (p->prio < rq_dest->curr->prio)
-					resched_task(rq_dest->curr);
+				wake_up_cpu(rq_dest, cpu_dest, p);
 			}
 		}
 		double_rq_unlock(rq_src, rq_dest);
@@ -2418,12 +2688,13 @@ static int migration_call(struct notifie
 			  unsigned long action,
 			  void *hcpu)
 {
+	long cpu = (long) hcpu;
+
 	switch (action) {
 	case CPU_ONLINE:
-		printk("Starting migration thread for cpu %li\n",
-		       (long)hcpu);
-		kernel_thread(migration_thread, hcpu, CLONE_KERNEL);
-		while (!cpu_rq((long)hcpu)->migration_thread)
+		printk("Starting migration thread for cpu %li\n", cpu);
+		kernel_thread(migration_task, hcpu, CLONE_KERNEL);
+		while (!migration_thread(cpu))
 			yield();
 		break;
 	}
@@ -2488,6 +2759,7 @@ __init static void init_kstat(void) {
 	register_cpu_notifier(&kstat_nb);
 }
 
+
 void __init sched_init(void)
 {
 	runqueue_t *rq;
@@ -2498,11 +2770,20 @@ void __init sched_init(void)
 	for (i = 0; i < NR_CPUS; i++) {
 		prio_array_t *array;
 
+		/*
+		 * Start with a 1:1 mapping between CPUs and runqueues:
+		 */
+#if CONFIG_SHARE_RUNQUEUE
+		rq_idx(i) = i;
+		cpu_idx(i) = 0;
+#endif
 		rq = cpu_rq(i);
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
-		INIT_LIST_HEAD(&rq->migration_queue);
+		INIT_LIST_HEAD(migration_queue(i));
+		rq->nr_cpus = 1;
+		rq->cpu[cpu_idx(i)].cpu = i;
 		atomic_set(&rq->nr_iowait, 0);
 		nr_running_init(rq);
 
@@ -2520,9 +2801,9 @@ void __init sched_init(void)
 	 * We have to do a little magic to get the first
 	 * thread right in SMP mode.
 	 */
-	rq = this_rq();
-	rq->curr = current;
-	rq->idle = current;
+	cpu_curr_ptr(smp_processor_id()) = current;
+	cpu_idle_ptr(smp_processor_id()) = current;
+
 	set_task_cpu(current, smp_processor_id());
 	wake_up_forked_process(current);
 
--- linux/init/main.c.orig	
+++ linux/init/main.c	
@@ -367,7 +367,14 @@ static void __init smp_init(void)
 
 static void rest_init(void)
 {
+	/* 
+	 *	We count on the initial thread going ok 
+	 *	Like idlers init is an unlocked kernel thread, which will
+	 *	make syscalls (and thus be locked).
+	 */
+	init_idle(current, smp_processor_id());
 	kernel_thread(init, NULL, CLONE_KERNEL);
+
 	unlock_kernel();
  	cpu_idle();
 } 
@@ -453,13 +460,6 @@ asmlinkage void __init start_kernel(void
 	check_bugs();
 	printk("POSIX conformance testing by UNIFIX\n");
 
-	/* 
-	 *	We count on the initial thread going ok 
-	 *	Like idlers init is an unlocked kernel thread, which will
-	 *	make syscalls (and thus be locked).
-	 */
-	init_idle(current, smp_processor_id());
-
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }


^ permalink raw reply	[flat|nested] 41+ messages in thread
* [PATCH v2 0/3] x86/mm: INVPCID support
@ 2016-01-25 18:37 Andy Lutomirski
  2016-01-25 18:57 ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Andy Lutomirski @ 2016-01-25 18:37 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Brian Gerst, Dave Hansen, Linus Torvalds,
	Oleg Nesterov, linux-mm, Andrey Ryabinin, Andy Lutomirski

Ingo, before applying this, please apply these two KASAN fixes:

http://lkml.kernel.org/g/1452516679-32040-2-git-send-email-aryabinin@virtuozzo.com
http://lkml.kernel.org/g/1452516679-32040-3-git-send-email-aryabinin@virtuozzo.com

Without those fixes, this series will trigger a KASAN bug.

This is a straightforward speedup on Ivy Bridge and newer, IIRC.
(I tested on Skylake.  INVPCID is not available on Sandy Bridge.
I don't have Ivy Bridge, Haswell or Broadwell to test on, so I
could be wrong as to when the feature was introduced.)

I think we should consider these patches separately from the rest
of the PCID stuff -- they barely interact, and this part is much
simpler and is useful on its own.

This is exactly identical to patches 2-4 of the PCID RFC series.

Andy Lutomirski (3):
  x86/mm: Add INVPCID helpers
  x86/mm: Add a noinvpcid option to turn off INVPCID
  x86/mm: If INVPCID is available, use it to flush global mappings

 Documentation/kernel-parameters.txt |  2 ++
 arch/x86/include/asm/tlbflush.h     | 50 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c        | 16 ++++++++++++
 3 files changed, 68 insertions(+)

-- 
2.5.0

^ permalink raw reply	[flat|nested] 41+ messages in thread
* [PATCH 00/13] Add VT-d Posted-Interrupts support for KVM
@ 2014-11-10  6:26 Feng Wu
  2014-11-10  6:26 ` [PATCH 13/13] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts Feng Wu
  0 siblings, 1 reply; 41+ messages in thread
From: Feng Wu @ 2014-11-10  6:26 UTC (permalink / raw)
  To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
  Cc: kvm, iommu, linux-kernel, Feng Wu

VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (13):
  iommu/vt-d: VT-d Posted-Interrupts feature detection
  KVM: Initialize VT-d Posted-Interrtups Descriptor
  KVM: Add KVM_CAP_PI to detect VT-d Posted-Interrtups
  iommu/vt-d: Adjust 'struct irte' to better suit for VT-d
    Posted-Interrupts
  KVM: Update IRTE according to guest interrupt configuration changes
  KVM: Add some helper functions for Posted-Interrupts
  x86, irq: Define a global vector for VT-d Posted-Interrupts
  KVM: Update Posted-Interrupts descriptor during VCPU scheduling
  KVM: Change NDST field after VCPU scheduling
  KVM: Add the handler for Wake-up Vector
  KVM: Suppress posted-interrupt when 'SN' is set
  iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 arch/x86/include/asm/entry_arch.h    |    2 +
 arch/x86/include/asm/hardirq.h       |    1 +
 arch/x86/include/asm/hw_irq.h        |    2 +
 arch/x86/include/asm/irq_remapping.h |    7 +
 arch/x86/include/asm/irq_vectors.h   |    1 +
 arch/x86/include/asm/kvm_host.h      |    9 ++
 arch/x86/kernel/apic/apic.c          |    1 +
 arch/x86/kernel/entry_64.S           |    2 +
 arch/x86/kernel/irq.c                |   27 ++++
 arch/x86/kernel/irqinit.c            |    2 +
 arch/x86/kvm/vmx.c                   |  257 +++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c                   |   53 ++++++-
 drivers/iommu/amd_iommu.c            |    6 +
 drivers/iommu/intel_irq_remapping.c  |   83 +++++++++--
 drivers/iommu/irq_remapping.c        |   20 +++
 drivers/iommu/irq_remapping.h        |    8 +
 include/linux/dmar.h                 |   30 ++++-
 include/linux/intel-iommu.h          |    1 +
 include/linux/kvm_host.h             |   25 ++++
 include/uapi/linux/kvm.h             |    2 +
 virt/kvm/assigned-dev.c              |  141 +++++++++++++++++++
 virt/kvm/irq_comm.c                  |    4 +-
 virt/kvm/irqchip.c                   |   11 --
 virt/kvm/kvm_main.c                  |   14 ++
 24 files changed, 667 insertions(+), 42 deletions(-)


^ permalink raw reply	[flat|nested] 41+ messages in thread
* [PATCHv2] netfilter: add CHECKSUM target
@ 2010-07-11 15:06 Michael S. Tsirkin
  2010-07-11 15:14 ` [PATCHv3] extensions: libxt_CHECKSUM extension Michael S. Tsirkin
  0 siblings, 1 reply; 41+ messages in thread
From: Michael S. Tsirkin @ 2010-07-11 15:06 UTC (permalink / raw)
  To: Patrick McHardy, Michael S. Tsirkin, David S. Miller,
	Jan Engelhardt, Randy Dunlap, netfilter-devel, netfilter,
	coreteam, linux-kernel, netdev, kvm, herbert

This adds a `CHECKSUM' target, which can be used in the iptables mangle
table.

You can use this target to compute and fill in the checksum in
a packet that lacks a checksum.  This is particularly useful,
if you need to work around old applications such as dhcp clients,
that do not work well with checksum offloads, but don't want to
disable checksum offload in your device.

The problem happens in the field with virtualized applications.
For reference, see Red Hat bz 605555, as well as
http://www.spinics.net/lists/kvm/msg37660.html

Typical expected use (helps old dhclient binary running in a VM):
iptables -A POSTROUTING -t mangle -p udp --dport bootpc \
	-j CHECKSUM --checksum-fill

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

Changes from v1:
	moved from ipt to xt
	get rid of any ipv4 dependencies
	coding style tweaks

 include/linux/netfilter/xt_CHECKSUM.h |   18 ++++++++
 net/netfilter/Kconfig                 |   17 +++++++-
 net/netfilter/Makefile                |    1 +
 net/netfilter/xt_CHECKSUM.c           |   72 +++++++++++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/netfilter/xt_CHECKSUM.h
 create mode 100644 net/netfilter/xt_CHECKSUM.c

diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/linux/netfilter/xt_CHECKSUM.h
new file mode 100644
index 0000000..56afe57
--- /dev/null
+++ b/include/linux/netfilter/xt_CHECKSUM.h
@@ -0,0 +1,18 @@
+/* Header file for iptables ipt_CHECKSUM target
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2010 Red Hat Inc
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This software is distributed under GNU GPL v2, 1991
+*/
+#ifndef _IPT_CHECKSUM_TARGET_H
+#define _IPT_CHECKSUM_TARGET_H
+
+#define XT_CHECKSUM_OP_FILL	0x01	/* fill in checksum in IP header */
+
+struct xt_CHECKSUM_info {
+	u_int8_t operation;	/* bitset of operations */
+};
+
+#endif /* _IPT_CHECKSUM_TARGET_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77..1cf4852 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -294,7 +294,7 @@ endif # NF_CONNTRACK
 config NETFILTER_TPROXY
 	tristate "Transparent proxying support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on IP_NF_MANGLE
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
 	  This option enables transparent proxying support, that is,
@@ -347,6 +347,21 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_CHECKSUM
+	tristate "CHECKSUM target support"
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option adds a `CHECKSUM' target, which can be used in the iptables mangle
+	  table.  
+
+	  You can use this target to compute and fill in the checksum in
+	  a packet that lacks a checksum.  This is particularly useful,
+	  if you need to work around old applications such as dhcp clients,
+	  that do not work well with checksum offloads, but don't want to disable
+	  checksum offload in your device.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8f..8eb541d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 0000000..0fee1a7
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,72 @@
+/* iptables module for the packet checksum mangling
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * (C) 2010 Red Hat, Inc.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CHECKSUM.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
+MODULE_DESCRIPTION("Xtables: checksum modification");
+MODULE_ALIAS("ipt_CHECKSUM");
+MODULE_ALIAS("ip6t_CHECKSUM");
+
+static unsigned int
+checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb_checksum_help(skb);
+
+	return XT_CONTINUE;
+}
+
+static int checksum_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_CHECKSUM_info *einfo = par->targinfo;
+
+	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
+		pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
+		return -EINVAL;
+	}
+	if (!einfo->operation) {
+		pr_info("no CHECKSUM operation enabled\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_target checksum_tg_reg __read_mostly = {
+	.name		= "CHECKSUM",
+	.family		= NFPROTO_UNSPEC,
+	.target		= checksum_tg,
+	.targetsize	= sizeof(struct xt_CHECKSUM_info),
+	.table		= "mangle",
+	.checkentry	= checksum_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init checksum_tg_init(void)
+{
+	return xt_register_target(&checksum_tg_reg);
+}
+
+static void __exit checksum_tg_exit(void)
+{
+	xt_unregister_target(&checksum_tg_reg);
+}
+
+module_init(checksum_tg_init);
+module_exit(checksum_tg_exit);
-- 
1.7.2.rc0.14.g41c1c

^ permalink raw reply related	[flat|nested] 41+ messages in thread
* [PATCH 0/1] HID: hid_apple is not used for apple alu wireless keyboards
@ 2008-11-26 14:33 Jan Scholz
  2008-11-26 14:33 ` [PATCH 1/1] HID: Apple alu wireless keyboards are bluetooth devices Jan Scholz
  0 siblings, 1 reply; 41+ messages in thread
From: Jan Scholz @ 2008-11-26 14:33 UTC (permalink / raw)
  To: jkosina; +Cc: jirislaby, linux-kernel, Jan Scholz

Hi Jiri,

While parsing 'hid_blacklist' in hid-core.c my apple alu wireless
keyboard is not found.  This happens because in the blacklist it
is declared with HID_USB_DEVICE although the keyboards are really
bluetooth devices.  The same holds for 'apple_devices' list in
hid-apple.c

This patch fixes it by changing HID_USB_DEVICE to
HID_BLUETOOTH_DEVICE in those two lists.

Jan Scholz (1):
  HID: Apple alu wireless keyboards are bluetooth devices

 drivers/hid/hid-apple.c |    6 +++---
 drivers/hid/hid-core.c  |    6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)


^ permalink raw reply	[flat|nested] 41+ messages in thread
* [PATCH 1/2] HID: add hid_type
@ 2008-10-19 14:15 Jiri Slaby
  2008-10-19 14:15 ` [PATCH 2/2] HID: fix appletouch regression Jiri Slaby
  0 siblings, 1 reply; 41+ messages in thread
From: Jiri Slaby @ 2008-10-19 14:15 UTC (permalink / raw)
  To: jkosina
  Cc: linux-input, linux-kernel, Steven Noonan, Justin Mattock,
	Sven Anders, Marcel Holtmann, linux-bluetooth, Jiri Slaby

Add type to the hid structure to distinguish to which device type
(mouse/kbd) we are talking to. Needed for per device type ignore
list support.

Note: this patch leaves the type as unknown for bluetooth devices,
there is not support for this in the hidp code.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
---
 drivers/hid/usbhid/hid-core.c |    8 ++++++++
 include/linux/hid.h           |    7 +++++++
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 1d3b8a3..20617d8 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -972,6 +972,14 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	hid->vendor = le16_to_cpu(dev->descriptor.idVendor);
 	hid->product = le16_to_cpu(dev->descriptor.idProduct);
 	hid->name[0] = 0;
+	switch (intf->cur_altsetting->desc.bInterfaceProtocol) {
+	case USB_INTERFACE_PROTOCOL_KEYBOARD:
+		hid->type = HID_TYPE_KEYBOARD;
+		break;
+	case USB_INTERFACE_PROTOCOL_MOUSE:
+		hid->type = HID_TYPE_MOUSE;
+		break;
+	}
 
 	if (dev->manufacturer)
 		strlcpy(hid->name, dev->manufacturer, sizeof(hid->name));
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f13bca2..36a3953 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -417,6 +417,12 @@ struct hid_input {
 	struct input_dev *input;
 };
 
+enum hid_type {
+	HID_TYPE_UNKNOWN = 0,
+	HID_TYPE_MOUSE,
+	HID_TYPE_KEYBOARD
+};
+
 struct hid_driver;
 struct hid_ll_driver;
 
@@ -431,6 +437,7 @@ struct hid_device {							/* device report descriptor */
 	__u32 vendor;							/* Vendor ID */
 	__u32 product;							/* Product ID */
 	__u32 version;							/* HID version */
+	enum hid_type type;						/* device type (mouse, kbd, ...) */
 	unsigned country;						/* HID country */
 	struct hid_report_enum report_enum[HID_REPORT_TYPES];
 
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 41+ messages in thread
[parent not found: <200702211929.17203.david-b@pacbell.net>]
* Linux 2.6.16.4
@ 2006-04-11 17:33 Greg KH
  2006-04-11 19:04 ` several messages Jan Engelhardt
  0 siblings, 1 reply; 41+ messages in thread
From: Greg KH @ 2006-04-11 17:33 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: torvalds

We (the -stable team) are announcing the release of the 2.6.16.4 kernel.

The diffstat and short summary of the fixes are below.

I'll also be replying to this message with a copy of the patch between
2.6.16.3 and 2.6.16.4, as it is small enough to do so.

The updated 2.6.16.y git tree can be found at:
 	rsync://rsync.kernel.org/pub/scm/linux/kernel/git/stable/linux-2.6.16.y.git
and can be browsed at the normal kernel.org git web browser:
	www.kernel.org/git/

thanks,

greg k-h

--------

 Makefile        |    2 +-
 kernel/signal.c |    1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

Summary of changes from v2.6.16.3 to v2.6.16.4
==============================================


Greg Kroah-Hartman:
      Linux 2.6.16.4

Oleg Nesterov:
      RCU signal handling [CVE-2006-1523]


^ permalink raw reply	[flat|nested] 41+ messages in thread
* ata over ethernet question
@ 2005-05-04 17:31 Maciej Soltysiak
  2005-05-04 19:48 ` David Hollis
  0 siblings, 1 reply; 41+ messages in thread
From: Maciej Soltysiak @ 2005-05-04 17:31 UTC (permalink / raw)
  To: linux-kernel

Hi,

AOE is a bit new for me.

Would it be possible to use tha AOE driver to
attach one ATA drive in a host over ethernet to another
host ? Or is it support for specific hardware devices only?

You know, something like:
# fdisk <device_on_another_host>
# mkfs.ext2 <device_on_another_host/partition1>
# mount <device_on_another_host/partition1> /mnt/part1

--
Maciej



^ permalink raw reply	[flat|nested] 41+ messages in thread
* Re: 2.6.6-rc3-mm2 (4KSTACK)
@ 2004-05-11  8:45 Helge Hafting
  2004-05-11 17:59 ` several messages Bill Davidsen
  0 siblings, 1 reply; 41+ messages in thread
From: Helge Hafting @ 2004-05-11  8:45 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: linux-kernel

Bill Davidsen wrote:

> Arjan van de Ven wrote:
>
>>> It's probably a Bad Idea to push this to Linus before the vendors 
>>> that have
>>> significant market-impact issues (again - anybody other than NVidia 
>>> here?)
>>> have gotten their stuff cleaned up...
>>
>>
>>
>> Ok I don't want to start a flamewar but... Do we want to hold linux back
>> until all binary only module vendors have caught up ??
>
>
> My questions is, hold it back from what? Having the 4k option is fine, 
> it's just eliminating the current default which I think is 
> undesirable. I tried 4k stack, I couldn't measure any improvement in 
> anything (as in no visible speedup or saving in memory). 

The memory saving is usually modest: 4k per thread. It might make a 
difference for
those with many thousands of threads.   I believe this is unswappable 
memory,
which is much more valuable than ordinary process memory.

More interesting is that it removes one way for fork() to fail. With 8k 
stacks,
the new process needs to allocate two consecutive pages for those 8k.  That
might be impossible due to fragmentation, even if there are megabytes of 
free
memory. Such a problem usually only shows up after a long time.  Now we 
only need to
allocate a single page, which always works as long as there is any free 
memory at all.

> For an embedded system, where space is tight and the code paths well 
> known, sure, but I haven't been able to find or generate any objective 
> improvement, other than some posts saying smaller is always better. 
> Nothing slows a system down like a crash, even if it isn't followed by 
> a restore from backup.

Consider the case when your server (web/mail/other) fails to fork, and then
you can't login because that requires fork() too.  4k stacks remove this 
scenario,
and is a stability improvement.

Helge Hafting

^ permalink raw reply	[flat|nested] 41+ messages in thread
* Re: ANN: LKMB (Linux Kernel Module Builder) version 0.1.16
@ 2003-01-23  0:20 Hal Duston
  2003-01-27 16:46 ` several messages Bill Davidsen
  0 siblings, 1 reply; 41+ messages in thread
From: Hal Duston @ 2003-01-23  0:20 UTC (permalink / raw)
  To: linux-kernel

I use "INSTALL_MOD_PATH=put/the/modules/here/instead/of/lib/modules" in my
.profile or whatever in order to drop the modules into another directory
at "make modules_install" time.  Is this one of the things folks are
talking about?

Hal Duston


^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2016-01-29 13:21 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-21 19:13 [patch] HT scheduler, sched-2.5.68-A9 Ingo Molnar
2003-04-22  4:01 ` Dave Jones
2003-04-22  7:08   ` Ingo Molnar
2003-04-22 10:18 ` Rick Lindsley
2003-04-22 10:34   ` Ingo Molnar
2003-04-22 11:10     ` Rick Lindsley
2003-04-22 11:27       ` Ingo Molnar
2003-04-22 22:16     ` several messages Bill Davidsen
2003-04-22 23:38       ` Rick Lindsley
2003-04-23  9:17         ` Ingo Molnar
2003-04-22 23:48 ` [patch] HT scheduler, sched-2.5.68-A9 Martin J. Bligh
2003-04-23  9:12   ` Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2016-01-25 18:37 [PATCH v2 0/3] x86/mm: INVPCID support Andy Lutomirski
2016-01-25 18:57 ` Ingo Molnar
2016-01-27 10:09   ` several messages Thomas Gleixner
2016-01-29 13:21     ` Borislav Petkov
2014-11-10  6:26 [PATCH 00/13] Add VT-d Posted-Interrupts support for KVM Feng Wu
2014-11-10  6:26 ` [PATCH 13/13] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts Feng Wu
2014-11-10 18:15   ` several messages Thomas Gleixner
2014-11-11  2:28     ` Jiang Liu
2014-11-11  6:37       ` Wu, Feng
2010-07-11 15:06 [PATCHv2] netfilter: add CHECKSUM target Michael S. Tsirkin
2010-07-11 15:14 ` [PATCHv3] extensions: libxt_CHECKSUM extension Michael S. Tsirkin
2010-07-15  9:39   ` Patrick McHardy
2010-07-15 10:17     ` several messages Jan Engelhardt
2008-11-26 14:33 [PATCH 0/1] HID: hid_apple is not used for apple alu wireless keyboards Jan Scholz
2008-11-26 14:33 ` [PATCH 1/1] HID: Apple alu wireless keyboards are bluetooth devices Jan Scholz
2008-11-26 14:54   ` Jiri Kosina
2008-11-26 15:17     ` Jan Scholz
2008-11-26 15:33       ` Jiri Kosina
2008-11-26 21:06         ` Tobias Müller
2008-11-27  0:57           ` several messages Jiri Kosina
2008-10-19 14:15 [PATCH 1/2] HID: add hid_type Jiri Slaby
2008-10-19 14:15 ` [PATCH 2/2] HID: fix appletouch regression Jiri Slaby
2008-10-19 19:40   ` several messages Jiri Kosina
2008-10-19 20:06     ` Justin Mattock
2008-10-19 22:09     ` Jiri Slaby
     [not found] <200702211929.17203.david-b@pacbell.net>
2007-02-22  3:50 ` [patch 6/6] rtc suspend()/resume() restores system clock David Brownell
2007-02-22 22:58   ` several messages Guennadi Liakhovetski
2007-02-23  1:15     ` David Brownell
2007-02-23 11:17     ` Johannes Berg
2006-04-11 17:33 Linux 2.6.16.4 Greg KH
2006-04-11 19:04 ` several messages Jan Engelhardt
2006-04-11 19:20   ` Boris B. Zhmurov
2006-04-11 20:30   ` Greg KH
2006-04-11 23:46     ` Jan Engelhardt
2006-04-12  0:36     ` Nix
2005-05-04 17:31 ata over ethernet question Maciej Soltysiak
2005-05-04 19:48 ` David Hollis
2005-05-04 21:17   ` Re[2]: " Maciej Soltysiak
2005-05-05 15:09     ` David Hollis
2005-05-07 15:05       ` Sander
2005-05-10 22:00         ` Guennadi Liakhovetski
2005-05-11  8:56           ` Vladislav Bolkhovitin
2005-05-11 21:26             ` several messages Guennadi Liakhovetski
2005-05-12  2:16               ` Ming Zhang
2005-05-12 18:32                 ` Dmitry Yusupov
2005-05-13  8:12                   ` Christoph Hellwig
2005-05-13 15:04                     ` Dmitry Yusupov
2005-05-13 15:07                       ` Christoph Hellwig
2005-05-13 15:38                         ` Dmitry Yusupov
2005-05-12 10:17               ` Vladislav Bolkhovitin
2004-05-11  8:45 2.6.6-rc3-mm2 (4KSTACK) Helge Hafting
2004-05-11 17:59 ` several messages Bill Davidsen
2003-01-23  0:20 ANN: LKMB (Linux Kernel Module Builder) version 0.1.16 Hal Duston
2003-01-27 16:46 ` several messages Bill Davidsen
2003-01-27 16:59   ` David Woodhouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).