[PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls
@ 2008-08-02  0:14 Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 1 of 8] x86-32: use smp_call_function_mask for SMP TLB invalidations Jeremy Fitzhardinge
                   ` (8 more replies)
  0 siblings, 9 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

Hi Ingo,

Here's the more polished series to:

1. use smp_call_function_mask to implement cross-cpu TLB flushes
2. bring tlb_32/64.c closer together, in preparation for
3. unifying tlb.c
4. add multi-queue capability for smp_call_function
5. implement multiple queues on x86
6. make the number of queues more configurable
7. add a kernel parameter to disable multiple queues in case
   it causes problems

Thanks,
	J

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1 of 8] x86-32: use smp_call_function_mask for SMP TLB invalidations
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 2 of 8] x86-64: " Jeremy Fitzhardinge
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

Now that smp_call_function_mask exists and is scalable, there's no
reason to have a special TLB flush IPI.  This saves a mass of code.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic_32.c                 |    3 -
 arch/x86/kernel/tlb_32.c                  |   86 +++++------------------------
 include/asm-x86/irq_vectors.h             |    1
 include/asm-x86/mach-default/entry_arch.h |    1
 4 files changed, 17 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1381,9 +1381,6 @@
 	 */
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
-	/* IPI for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -1,14 +1,12 @@
-#include <linux/spinlock.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
 
 #include <asm/tlbflush.h>
 
 DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
 			____cacheline_aligned = { &init_mm, 0, };
-
-/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 
 /*
  *	Smarter SMP flushing macros.
@@ -20,10 +18,10 @@
  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
  */
 
-static cpumask_t flush_cpumask;
-static struct mm_struct *flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+struct tlb_flush {
+	struct mm_struct *mm;
+	unsigned long va;
+};
 
 /*
  * We cannot call mmdrop() because we are in interrupt context,
@@ -87,37 +85,23 @@
  * 2) Leave the mm if we are in the lazy tlb mode.
  */
 
-void smp_invalidate_interrupt(struct pt_regs *regs)
+static void tlb_invalidate(void *arg)
 {
+	struct tlb_flush *flush = arg;
 	unsigned long cpu;
 
 	cpu = get_cpu();
 
-	if (!cpu_isset(cpu, flush_cpumask))
-		goto out;
-		/*
-		 * This was a BUG() but until someone can quote me the
-		 * line from the intel manual that guarantees an IPI to
-		 * multiple CPUs is retried _only_ on the erroring CPUs
-		 * its staying as a return
-		 *
-		 * BUG();
-		 */
-
-	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+	if (flush->mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush_va == TLB_FLUSH_ALL)
+			if (flush->va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
-				__flush_tlb_one(flush_va);
+				__flush_tlb_one(flush->va);
 		} else
 			leave_mm(cpu);
 	}
-	ack_APIC_irq();
-	smp_mb__before_clear_bit();
-	cpu_clear(cpu, flush_cpumask);
-	smp_mb__after_clear_bit();
-out:
+
 	put_cpu_no_resched();
 	__get_cpu_var(irq_stat).irq_tlb_count++;
 }
@@ -125,48 +109,12 @@
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 			     unsigned long va)
 {
-	cpumask_t cpumask = *cpumaskp;
+	struct tlb_flush flush = {
+		.mm = mm,
+		.va = va
+	};
 
-	/*
-	 * A couple of (to be removed) sanity checks:
-	 *
-	 * - current CPU must not be in mask
-	 * - mask must exist :)
-	 */
-	BUG_ON(cpus_empty(cpumask));
-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-	BUG_ON(!mm);
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/* If a CPU which we ran on has gone down, OK. */
-	cpus_and(cpumask, cpumask, cpu_online_map);
-	if (unlikely(cpus_empty(cpumask)))
-		return;
-#endif
-
-	/*
-	 * i'm not happy about this global shared spinlock in the
-	 * MM hot path, but we'll see how contended it is.
-	 * AK: x86-64 has a faster method that could be ported.
-	 */
-	spin_lock(&tlbstate_lock);
-
-	flush_mm = mm;
-	flush_va = va;
-	cpus_or(flush_cpumask, cpumask, flush_cpumask);
-	/*
-	 * We have to send the IPI only to
-	 * CPUs affected.
-	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
-	while (!cpus_empty(flush_cpumask))
-		/* nothing. lockup detection does not belong here */
-		cpu_relax();
-
-	flush_mm = NULL;
-	flush_va = 0;
-	spin_unlock(&tlbstate_lock);
+	smp_call_function_mask(*cpumaskp, tlb_invalidate, &flush, 1);
 }
 
 void flush_tlb_current_task(void)
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -61,7 +61,6 @@
 
 # define SPURIOUS_APIC_VECTOR		0xff
 # define ERROR_APIC_VECTOR		0xfe
-# define INVALIDATE_TLB_VECTOR		0xfd
 # define RESCHEDULE_VECTOR		0xfc
 # define CALL_FUNCTION_VECTOR		0xfb
 # define CALL_FUNCTION_SINGLE_VECTOR	0xfa
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -11,7 +11,6 @@
  */
 #ifdef CONFIG_X86_SMP
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2 of 8] x86-64: use smp_call_function_mask for SMP TLB invalidations
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 1 of 8] x86-32: use smp_call_function_mask for SMP TLB invalidations Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 3 of 8] x86: make tlb_32|64 closer Jeremy Fitzhardinge
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

Now that smp_call_function_mask exists and is scalable, there's no
reason to have a special TLB flush IPI.  This saves a mass of code.

In the process, I removed a copy of a cpumask_t.  The UV tlb flush
code relies on that copy, so I propagated it down.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Cliff Wickman <cpw@sgi.com>
---
 arch/x86/kernel/entry_64.S    |   15 ----
 arch/x86/kernel/irqinit_64.c  |   10 ---
 arch/x86/kernel/tlb_64.c      |  125 ++++++-----------------------------------
 arch/x86/kernel/tlb_uv.c      |    5 -
 include/asm-x86/irq_vectors.h |    4 -
 include/asm-x86/uv/uv_bau.h   |    2
 6 files changed, 23 insertions(+), 138 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -869,21 +869,6 @@
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 END(reschedule_interrupt)
 
-	.macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
-	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt	
-END(invalidate_interrupt\num)
-	.endm
-
-	INVALIDATE_ENTRY 0
-	INVALIDATE_ENTRY 1
-	INVALIDATE_ENTRY 2
-	INVALIDATE_ENTRY 3
-	INVALIDATE_ENTRY 4
-	INVALIDATE_ENTRY 5
-	INVALIDATE_ENTRY 6
-	INVALIDATE_ENTRY 7
-
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -187,16 +187,6 @@
 	 */
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
-	/* IPIs for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -1,24 +1,16 @@
-#include <linux/init.h>
-
 #include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/interrupt.h>
+#include <linux/module.h>
 
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
+
+/* For UV tlb flush */
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_bau.h>
+#include <asm/genapic.h>	/* for is_uv_system */
 
-#include <mach_ipi.h>
 /*
  *	Smarter SMP flushing macros.
  *		c/o Linus Torvalds.
@@ -27,34 +19,12 @@
  *	writing to user space from interrupts. (Its not allowed anyway).
  *
  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- *	More scalable flush, from Andi Kleen
- *
- *	To avoid global state use 8 different call vectors.
- *	Each CPU uses a specific vector to trigger flushes on other
- *	CPUs. Depending on the received vector the target CPUs look into
- *	the right per cpu variable for the flush data.
- *
- *	With more than 8 CPUs they are hashed to the 8 available
- *	vectors. The limited global vector space forces us to this right now.
- *	In future when interrupts are split into per CPU domains this could be
- *	fixed, at the cost of triggering multiple IPIs in some cases.
  */
 
-union smp_flush_state {
-	struct {
-		cpumask_t flush_cpumask;
-		struct mm_struct *flush_mm;
-		unsigned long flush_va;
-		spinlock_t tlbstate_lock;
-	};
-	char pad[SMP_CACHE_BYTES];
-} ____cacheline_aligned;
-
-/* State is put into the per CPU data section, but padded
-   to a full cache line because other CPUs can access it and we don't
-   want false sharing in the per cpu data segment. */
-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
+struct tlb_flush {
+	struct mm_struct *mm;
+	unsigned long va;
+};
 
 /*
  * We cannot call mmdrop() because we are in interrupt context,
@@ -117,95 +87,38 @@
  * Interrupts are disabled.
  */
 
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+static void tlb_invalidate(void *arg)
 {
+	struct tlb_flush *f = arg;
 	int cpu;
-	int sender;
-	union smp_flush_state *f;
 
 	cpu = smp_processor_id();
-	/*
-	 * orig_rax contains the negated interrupt vector.
-	 * Use that to determine where the sender put the data.
-	 */
-	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
-	f = &per_cpu(flush_state, sender);
 
-	if (!cpu_isset(cpu, f->flush_cpumask))
-		goto out;
-		/*
-		 * This was a BUG() but until someone can quote me the
-		 * line from the intel manual that guarantees an IPI to
-		 * multiple CPUs is retried _only_ on the erroring CPUs
-		 * its staying as a return
-		 *
-		 * BUG();
-		 */
-
-	if (f->flush_mm == read_pda(active_mm)) {
+	if (f->mm == read_pda(active_mm)) {
 		if (read_pda(mmu_state) == TLBSTATE_OK) {
-			if (f->flush_va == TLB_FLUSH_ALL)
+			if (f->va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
-				__flush_tlb_one(f->flush_va);
+				__flush_tlb_one(f->va);
 		} else
 			leave_mm(cpu);
 	}
-out:
-	ack_APIC_irq();
-	cpu_clear(cpu, f->flush_cpumask);
 	add_pda(irq_tlb_count, 1);
 }
 
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 			     unsigned long va)
 {
-	int sender;
-	union smp_flush_state *f;
-	cpumask_t cpumask = *cpumaskp;
+	struct tlb_flush flush = {
+		.mm = mm,
+		.va = va,
+	};
 
-	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
+	if (is_uv_system() && uv_flush_tlb_others(cpumaskp, mm, va))
 		return;
 
-	/* Caller has disabled preemption */
-	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
-	f = &per_cpu(flush_state, sender);
-
-	/*
-	 * Could avoid this lock when
-	 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
-	 * probably not worth checking this for a cache-hot lock.
-	 */
-	spin_lock(&f->tlbstate_lock);
-
-	f->flush_mm = mm;
-	f->flush_va = va;
-	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
-
-	/*
-	 * We have to send the IPI only to
-	 * CPUs affected.
-	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
-
-	while (!cpus_empty(f->flush_cpumask))
-		cpu_relax();
-
-	f->flush_mm = NULL;
-	f->flush_va = 0;
-	spin_unlock(&f->tlbstate_lock);
+	smp_call_function_mask(*cpumaskp, tlb_invalidate, &flush, 1);
 }
-
-static int __cpuinit init_smp_flush(void)
-{
-	int i;
-
-	for_each_possible_cpu(i)
-		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
-
-	return 0;
-}
-core_initcall(init_smp_flush);
 
 void flush_tlb_current_task(void)
 {
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -295,7 +295,7 @@
  * Returns 1 if all remote flushing was done.
  * Returns 0 if some remote flushing remains to be done.
  */
-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+int uv_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 			unsigned long va)
 {
 	int i;
@@ -305,6 +305,7 @@
 	int this_blade;
 	int locals = 0;
 	struct bau_desc *bau_desc;
+	cpumask_t cpumask = *cpumaskp;
 
 	cpu = uv_blade_processor_id();
 	this_blade = uv_numa_blade_id();
@@ -339,7 +340,7 @@
 	bau_desc->payload.address = va;
 	bau_desc->payload.sending_cpu = smp_processor_id();
 
-	return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
+	return uv_flush_send_and_wait(cpu, this_blade, bau_desc, &cpumask);
 }
 
 /*
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -75,10 +75,6 @@
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
 #define THRESHOLD_APIC_VECTOR		0xf9
-#define INVALIDATE_TLB_VECTOR_END	0xf7
-#define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
-
-#define NUM_INVALIDATE_TLB_VECTORS	8
 
 #endif
 
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -330,7 +330,7 @@
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+extern int uv_flush_tlb_others(const cpumask_t *, struct mm_struct *, unsigned long);
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3 of 8] x86: make tlb_32|64 closer
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 1 of 8] x86-32: use smp_call_function_mask for SMP TLB invalidations Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 2 of 8] x86-64: " Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 4 of 8] x86: unify tlb.c Jeremy Fitzhardinge
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

Bring arch/x86/kernel/tlb_32.c and _64.c closer into alignment, so
that unification is more straightforward.  After this patch, the
remaining differences come down to UV support and the distinction
between percpu and pda variables.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/tlb_32.c |   39 +++++++++++++++++++--------------------
 arch/x86/kernel/tlb_64.c |   12 +++++++-----
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -1,7 +1,7 @@
-#include <linux/cpu.h>
+#include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/smp.h>
 #include <linux/percpu.h>
+#include <linux/module.h>
 
 #include <asm/tlbflush.h>
 
@@ -46,25 +46,25 @@
  * 1) switch_mm() either 1a) or 1b)
  * 1a) thread switch to a different mm
  * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * 	Stop ipi delivery for the old mm. This is not synchronized with
- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * 	for the wrong mm, and in the worst case we perform a superfluous
- * 	tlb flush.
+ *	Stop ipi delivery for the old mm. This is not synchronized with
+ *	the other cpus, but tlb_invalidate() ignores flush ipis
+ *	for the wrong mm, and in the worst case we perform a superfluous
+ *	tlb flush.
  * 1a2) set cpu_tlbstate to TLBSTATE_OK
  * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  *	was in lazy tlb mode.
  * 1a3) update cpu_tlbstate[].active_mm
- * 	Now cpu0 accepts tlb flushes for the new mm.
+ *	Now cpu0 accepts tlb flushes for the new mm.
  * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * 	Now the other cpus will send tlb flush ipis.
+ *	Now the other cpus will send tlb flush ipis.
  * 1a4) change cr3.
  * 1b) thread switch without mm change
  *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
  *	flush ipis.
  * 1b1) set cpu_tlbstate to TLBSTATE_OK
  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * 	Atomically set the bit [other cpus will start sending flush ipis],
- * 	and test the bit.
+ *	Atomically set the bit [other cpus will start sending flush ipis],
+ *	and test the bit.
  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  * 2) switch %%esp, ie current
  *
@@ -83,26 +83,27 @@
  *
  * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
  * 2) Leave the mm if we are in the lazy tlb mode.
+ *
+ * Interrupts are disabled.
  */
 
 static void tlb_invalidate(void *arg)
 {
-	struct tlb_flush *flush = arg;
-	unsigned long cpu;
+	struct tlb_flush *f = arg;
+	int cpu;
 
-	cpu = get_cpu();
+	cpu = smp_processor_id();
 
-	if (flush->mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+	if (f->mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush->va == TLB_FLUSH_ALL)
+			if (f->va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
-				__flush_tlb_one(flush->va);
+				__flush_tlb_one(f->va);
 		} else
 			leave_mm(cpu);
 	}
 
-	put_cpu_no_resched();
 	__get_cpu_var(irq_stat).irq_tlb_count++;
 }
 
@@ -164,7 +165,7 @@
 	if (current->active_mm == mm) {
 		if (current->mm)
 			__flush_tlb_one(va);
-		 else
+		else
 			leave_mm(smp_processor_id());
 	}
 
@@ -173,7 +174,6 @@
 
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_page);
 
 static void do_flush_tlb_all(void *info)
 {
@@ -188,4 +188,3 @@
 {
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
-
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -1,10 +1,8 @@
-#include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 
 #include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
 
 /* For UV tlb flush */
 #include <asm/uv/uv_hub.h>
@@ -29,6 +27,9 @@
 /*
  * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
  */
 void leave_mm(int cpu)
 {
@@ -47,7 +48,7 @@
  * 1a) thread switch to a different mm
  * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
  *	Stop ipi delivery for the old mm. This is not synchronized with
- *	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *	the other cpus, but tlb_invalidate() ignores flush ipis
  *	for the wrong mm, and in the worst case we perform a superfluous
  *	tlb flush.
  * 1a2) set cpu mmu_state to TLBSTATE_OK
@@ -103,6 +104,7 @@
 		} else
 			leave_mm(cpu);
 	}
+
 	add_pda(irq_tlb_count, 1);
 }
 
@@ -111,7 +113,7 @@
 {
 	struct tlb_flush flush = {
 		.mm = mm,
-		.va = va,
+		.va = va
 	};
 
 	if (is_uv_system() && uv_flush_tlb_others(cpumaskp, mm, va))



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4 of 8] x86: unify tlb.c
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (2 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 3 of 8] x86: make tlb_32|64 closer Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 5 of 8] smp_function_call: add multiple queues for scalability Jeremy Fitzhardinge
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

arch/x86/kernel/tlb_*.c are functionally identical, so unify them.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/Makefile |    2
 arch/x86/kernel/tlb.c    |  229 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/tlb_32.c |  190 --------------------------------------
 arch/x86/kernel/tlb_64.c |  194 --------------------------------------
 4 files changed, 230 insertions(+), 385 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -59,7 +59,7 @@
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_X86_SMP)		+= smp.o
-obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb.o
 obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
 obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
diff --git a/arch/x86/kernel/tlb.c b/arch/x86/kernel/tlb.c
new file mode 100644
--- /dev/null
+++ b/arch/x86/kernel/tlb.c
@@ -0,0 +1,229 @@
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/tlbflush.h>
+
+/* For UV tlb flush */
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_bau.h>
+#include <asm/genapic.h>	/* for is_uv_system */
+
+/*
+ *	Smarter SMP flushing macros.
+ *		c/o Linus Torvalds.
+ *
+ *	These mean you can really definitely utterly forget about
+ *	writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+#ifdef CONFIG_X86_32
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
+			____cacheline_aligned = { &init_mm, 0, };
+
+static inline short get_mmu_state(void)
+{
+	return __get_cpu_var(cpu_tlbstate).state;
+}
+
+static inline struct mm_struct *get_active_mm(void)
+{
+	return __get_cpu_var(cpu_tlbstate).active_mm;
+}
+
+static inline void inc_irq_count(void)
+{
+	__get_cpu_var(irq_stat).irq_tlb_count++;
+}
+#else
+static inline short get_mmu_state(void)
+{
+	return read_pda(mmu_state);
+}
+
+static inline struct mm_struct *get_active_mm(void)
+{
+	return read_pda(active_mm);
+}
+
+static inline void inc_irq_count(void)
+{
+	add_pda(irq_tlb_count, 1);
+}
+#endif
+
+struct tlb_flush {
+	struct mm_struct *mm;
+	unsigned long va;
+};
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+void leave_mm(int cpu)
+{
+	if (get_mmu_state() == TLBSTATE_OK)
+		BUG();
+	cpu_clear(cpu, get_active_mm()->cpu_vm_mask);
+	load_cr3(swapper_pg_dir);
+}
+EXPORT_SYMBOL_GPL(leave_mm);
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ *	Stop ipi delivery for the old mm. This is not synchronized with
+ *	the other cpus, but tlb_invalidate() ignores flush ipis
+ *	for the wrong mm, and in the worst case we perform a superfluous
+ *	tlb flush.
+ * 1a2) set cpu mmu_state to TLBSTATE_OK
+ *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	was in lazy tlb mode.
+ * 1a3) update cpu active_mm
+ *	Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ *	Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *	cpu active_mm is correct, cpu0 already handles
+ *	flush ipis.
+ * 1b1) set cpu mmu_state to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *	Atomically set the bit [other cpus will start sending flush ipis],
+ *	and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu mmu_state is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ *
+ * Interrupts are disabled.
+ */
+
+static void tlb_invalidate(void *arg)
+{
+	struct tlb_flush *f = arg;
+	int cpu;
+
+	cpu = smp_processor_id();
+
+	if (f->mm == get_active_mm()) {
+		if (get_mmu_state() == TLBSTATE_OK) {
+			if (f->va == TLB_FLUSH_ALL)
+				local_flush_tlb();
+			else
+				__flush_tlb_one(f->va);
+		} else
+			leave_mm(cpu);
+	}
+
+	inc_irq_count();
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+			     unsigned long va)
+{
+	struct tlb_flush flush = {
+		.mm = mm,
+		.va = va
+	};
+
+	if (is_uv_system() && uv_flush_tlb_others(cpumaskp, mm, va))
+		return;
+
+	smp_call_function_mask(*cpumaskp, tlb_invalidate, &flush, 1);
+}
+
+void flush_tlb_current_task(void)
+{
+	struct mm_struct *mm = current->mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	local_flush_tlb();
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	preempt_enable();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			local_flush_tlb();
+		else
+			leave_mm(smp_processor_id());
+	}
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+
+	preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			__flush_tlb_one(va);
+		else
+			leave_mm(smp_processor_id());
+	}
+
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, va);
+
+	preempt_enable();
+}
+
+static void do_flush_tlb_all(void *info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	__flush_tlb_all();
+	if (get_mmu_state() == TLBSTATE_LAZY)
+		leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu(do_flush_tlb_all, NULL, 1);
+}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <linux/module.h>
-
-#include <asm/tlbflush.h>
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
-			____cacheline_aligned = { &init_mm, 0, };
-
-/*
- *	Smarter SMP flushing macros.
- *		c/o Linus Torvalds.
- *
- *	These mean you can really definitely utterly forget about
- *	writing to user space from interrupts. (Its not allowed anyway).
- *
- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-struct tlb_flush {
-	struct mm_struct *mm;
-	unsigned long va;
-};
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-		BUG();
-	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-	load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *	Stop ipi delivery for the old mm. This is not synchronized with
- *	the other cpus, but tlb_invalidate() ignores flush ipis
- *	for the wrong mm, and in the worst case we perform a superfluous
- *	tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *	was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- *	Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- *	Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *	flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *	Atomically set the bit [other cpus will start sending flush ipis],
- *	and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-
-static void tlb_invalidate(void *arg)
-{
-	struct tlb_flush *f = arg;
-	int cpu;
-
-	cpu = smp_processor_id();
-
-	if (f->mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (f->va == TLB_FLUSH_ALL)
-				local_flush_tlb();
-			else
-				__flush_tlb_one(f->va);
-		} else
-			leave_mm(cpu);
-	}
-
-	__get_cpu_var(irq_stat).irq_tlb_count++;
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-			     unsigned long va)
-{
-	struct tlb_flush flush = {
-		.mm = mm,
-		.va = va
-	};
-
-	smp_call_function_mask(*cpumaskp, tlb_invalidate, &flush, 1);
-}
-
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-	preempt_enable();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			local_flush_tlb();
-		else
-			leave_mm(smp_processor_id());
-	}
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
-	preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			__flush_tlb_one(va);
-		else
-			leave_mm(smp_processor_id());
-	}
-
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, va);
-
-	preempt_enable();
-}
-
-static void do_flush_tlb_all(void *info)
-{
-	unsigned long cpu = smp_processor_id();
-
-	__flush_tlb_all();
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-		leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-	on_each_cpu(do_flush_tlb_all, NULL, 1);
-}
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
deleted file mode 100644
--- a/arch/x86/kernel/tlb_64.c
+++ /dev/null
@@ -1,194 +0,0 @@
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include <asm/tlbflush.h>
-
-/* For UV tlb flush */
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
-#include <asm/genapic.h>	/* for is_uv_system */
-
-/*
- *	Smarter SMP flushing macros.
- *		c/o Linus Torvalds.
- *
- *	These mean you can really definitely utterly forget about
- *	writing to user space from interrupts. (Its not allowed anyway).
- *
- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-struct tlb_flush {
-	struct mm_struct *mm;
-	unsigned long va;
-};
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
-	if (read_pda(mmu_state) == TLBSTATE_OK)
-		BUG();
-	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
-	load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *	Stop ipi delivery for the old mm. This is not synchronized with
- *	the other cpus, but tlb_invalidate() ignores flush ipis
- *	for the wrong mm, and in the worst case we perform a superfluous
- *	tlb flush.
- * 1a2) set cpu mmu_state to TLBSTATE_OK
- *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *	was in lazy tlb mode.
- * 1a3) update cpu active_mm
- *	Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- *	Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *	cpu active_mm is correct, cpu0 already handles
- *	flush ipis.
- * 1b1) set cpu mmu_state to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *	Atomically set the bit [other cpus will start sending flush ipis],
- *	and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu mmu_state is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-
-static void tlb_invalidate(void *arg)
-{
-	struct tlb_flush *f = arg;
-	int cpu;
-
-	cpu = smp_processor_id();
-
-	if (f->mm == read_pda(active_mm)) {
-		if (read_pda(mmu_state) == TLBSTATE_OK) {
-			if (f->va == TLB_FLUSH_ALL)
-				local_flush_tlb();
-			else
-				__flush_tlb_one(f->va);
-		} else
-			leave_mm(cpu);
-	}
-
-	add_pda(irq_tlb_count, 1);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-			     unsigned long va)
-{
-	struct tlb_flush flush = {
-		.mm = mm,
-		.va = va
-	};
-
-	if (is_uv_system() && uv_flush_tlb_others(cpumaskp, mm, va))
-		return;
-
-	smp_call_function_mask(*cpumaskp, tlb_invalidate, &flush, 1);
-}
-
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-	preempt_enable();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			local_flush_tlb();
-		else
-			leave_mm(smp_processor_id());
-	}
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
-	preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			__flush_tlb_one(va);
-		else
-			leave_mm(smp_processor_id());
-	}
-
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, va);
-
-	preempt_enable();
-}
-
-static void do_flush_tlb_all(void *info)
-{
-	unsigned long cpu = smp_processor_id();
-
-	__flush_tlb_all();
-	if (read_pda(mmu_state) == TLBSTATE_LAZY)
-		leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-	on_each_cpu(do_flush_tlb_all, NULL, 1);
-}



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5 of 8] smp_function_call: add multiple queues for scalability
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (3 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 4 of 8] x86: unify tlb.c Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 6 of 8] x86: add multiple smp_call_function queues Jeremy Fitzhardinge
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

This patch allows an architecture to have multiple smp_call_function
queues, in order to reduce contention on a single queue and lock.  By
default there is still one queue, so this patch makes no functional
change.

However, an architecture can set CONFIG_GENERIC_SMP_QUEUES to enable a
certain number of queues.  It's expected it will will set up an IPI
vector for each queue, and it should pass the appropriate queue number
into generic_smp_call_function_interrupt.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/alpha/kernel/smp.c             |    2 -
 arch/arm/kernel/smp.c               |    2 -
 arch/ia64/kernel/smp.c              |    2 -
 arch/m32r/kernel/smp.c              |    2 -
 arch/mips/kernel/smp.c              |    2 -
 arch/parisc/kernel/smp.c            |    2 -
 arch/powerpc/kernel/smp.c           |    2 -
 arch/sparc64/kernel/smp.c           |    2 -
 arch/x86/kernel/smp.c               |    2 -
 arch/x86/mach-voyager/voyager_smp.c |    2 -
 arch/x86/xen/smp.c                  |    2 -
 include/linux/smp.h                 |    2 -
 kernel/smp.c                        |   70 ++++++++++++++++++++++++++++-------
 13 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -588,7 +588,7 @@
 			break;
 
 		case IPI_CALL_FUNC:
-			generic_smp_call_function_interrupt();
+			generic_smp_call_function_interrupt(0);
 			break;
 
 		case IPI_CALL_FUNC_SINGLE:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -481,7 +481,7 @@
 				break;
 
 			case IPI_CALL_FUNC:
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 
 			case IPI_CALL_FUNC_SINGLE:
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -114,7 +114,7 @@
 				stop_this_cpu();
 				break;
 			case IPI_CALL_FUNC:
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 			case IPI_CALL_FUNC_SINGLE:
 				generic_smp_call_function_single_interrupt();
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -576,7 +576,7 @@
 void smp_call_function_interrupt(void)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	irq_exit();
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -151,7 +151,7 @@
 {
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	irq_exit();
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -179,7 +179,7 @@
 
 			case IPI_CALL_FUNC:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC\n", this_cpu);
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 
 			case IPI_CALL_FUNC_SINGLE:
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -98,7 +98,7 @@
 {
 	switch(msg) {
 	case PPC_MSG_CALL_FUNCTION:
-		generic_smp_call_function_interrupt();
+		generic_smp_call_function_interrupt(0);
 		break;
 	case PPC_MSG_RESCHEDULE:
 		/* XXX Do we have to do this? */
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -811,7 +811,7 @@
 void smp_call_function_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 }
 
 void smp_call_function_single_client(int irq, struct pt_regs *regs)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -189,7 +189,7 @@
 {
 	ack_APIC_irq();
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -957,7 +957,7 @@
 static void smp_call_function_interrupt(void)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -391,7 +391,7 @@
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
diff --git a/include/linux/smp.h b/include/linux/smp.h
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,7 +73,7 @@
  */
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 void generic_smp_call_function_single_interrupt(void);
-void generic_smp_call_function_interrupt(void);
+void generic_smp_call_function_interrupt(unsigned queue);
 void ipi_call_lock(void);
 void ipi_call_unlock(void);
 void ipi_call_lock_irq(void);
diff --git a/kernel/smp.c b/kernel/smp.c
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -11,9 +11,19 @@
 #include <linux/rculist.h>
 #include <linux/smp.h>
 
+#ifdef CONFIG_GENERIC_SMP_QUEUES
+#define	NQUEUES	CONFIG_GENERIC_SMP_QUEUES
+#else
+#define	NQUEUES	1
+#endif
+
 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
-static LIST_HEAD(call_function_queue);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+struct ____cacheline_aligned queue {
+	struct list_head list;
+	spinlock_t lock;
+};
+
+static __cacheline_aligned struct queue call_function_queues[NQUEUES];
 
 enum {
 	CSD_FLAG_WAIT		= 0x01,
@@ -96,17 +106,20 @@
  * Invoked by arch to handle an IPI for call function. Must be called with
  * interrupts disabled.
  */
-void generic_smp_call_function_interrupt(void)
+void generic_smp_call_function_interrupt(unsigned queue_no)
 {
 	struct call_function_data *data;
+	struct queue *queue;
 	int cpu = get_cpu();
+
+	queue = &call_function_queues[queue_no];
 
 	/*
 	 * It's ok to use list_for_each_rcu() here even though we may delete
 	 * 'pos', since list_del_rcu() doesn't clear ->next
 	 */
 	rcu_read_lock();
-	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+	list_for_each_entry_rcu(data, &queue->list, csd.list) {
 		int refs;
 
 		if (!cpu_isset(cpu, data->cpumask))
@@ -124,9 +137,9 @@
 		if (refs)
 			continue;
 
-		spin_lock(&call_function_lock);
+		spin_lock(&queue->lock);
 		list_del_rcu(&data->csd.list);
-		spin_unlock(&call_function_lock);
+		spin_unlock(&queue->lock);
 
 		if (data->csd.flags & CSD_FLAG_WAIT) {
 			/*
@@ -285,6 +298,8 @@
 	cpumask_t allbutself;
 	unsigned long flags;
 	int cpu, num_cpus;
+	unsigned queue_no;
+	struct queue *queue;
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
@@ -294,6 +309,9 @@
 	cpu_clear(cpu, allbutself);
 	cpus_and(mask, mask, allbutself);
 	num_cpus = cpus_weight(mask);
+
+	queue_no = cpu % NQUEUES;
+	queue = &call_function_queues[queue_no];
 
 	/*
 	 * If zero CPUs, return. If just a single CPU, turn this request
@@ -323,9 +341,9 @@
 	data->refs = num_cpus;
 	data->cpumask = mask;
 
-	spin_lock_irqsave(&call_function_lock, flags);
-	list_add_tail_rcu(&data->csd.list, &call_function_queue);
-	spin_unlock_irqrestore(&call_function_lock, flags);
+	spin_lock_irqsave(&queue->lock, flags);
+	list_add_tail_rcu(&data->csd.list, &queue->list);
+	spin_unlock_irqrestore(&queue->lock, flags);
 
 	/* Send a message to all CPUs in the map */
 	arch_send_call_function_ipi(mask);
@@ -366,20 +384,46 @@
 
 void ipi_call_lock(void)
 {
-	spin_lock(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_lock(&call_function_queues[i].lock);
 }
 
 void ipi_call_unlock(void)
 {
-	spin_unlock(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_unlock(&call_function_queues[i].lock);
 }
 
 void ipi_call_lock_irq(void)
 {
-	spin_lock_irq(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_lock_irq(&call_function_queues[i].lock);
 }
 
 void ipi_call_unlock_irq(void)
 {
-	spin_unlock_irq(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_unlock_irq(&call_function_queues[i].lock);
 }
+
+
+static int __init init_smp_function_call(void)
+{
+	int i;
+
+	for(i = 0; i < NQUEUES; i++) {
+		INIT_LIST_HEAD(&call_function_queues[i].list);
+		spin_lock_init(&call_function_queues[i].lock);
+	}
+
+	return 0;
+}
+early_initcall(init_smp_function_call);



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 6 of 8] x86: add multiple smp_call_function queues
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (4 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 5 of 8] smp_function_call: add multiple queues for scalability Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 7 of 8] x86: make number of smp_call_function queues truely configurable Jeremy Fitzhardinge
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

This adds 8 queues for smp_call_function(), in order to avoid a
bottleneck on a single global lock and list for function calls.  When
initiating a function call, the sender chooses a queue based on its
own processor id (if there are more than 8 processors, they hash down
to 8 queues).  It then sends an IPI to the corresponding vector for
that queue to each target CPU.  The target CPUs use the vector number
to determine which queue they should scan for work.

This should give smp_call_function the same performance
characteristics as the original x86-64 cross-cpu tlb flush code, which
used the same scheme.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/Kconfig                          |    4 ++++
 arch/x86/kernel/apic_32.c                 |   11 ++++++++++-
 arch/x86/kernel/entry_32.S                |   25 ++++++++++++++-----------
 arch/x86/kernel/entry_64.S                |   19 ++++++++++++++++---
 arch/x86/kernel/irqinit_64.c              |   11 ++++++++++-
 arch/x86/kernel/smp.c                     |   10 +++++++---
 arch/x86/xen/smp.c                        |   27 ++++++++++++++++++++++++++-
 include/asm-x86/hw_irq.h                  |   20 +++++++++-----------
 include/asm-x86/irq_vectors.h             |    6 ++++--
 include/asm-x86/mach-default/entry_arch.h |   16 +++++++++++++++-
 10 files changed, 115 insertions(+), 34 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -178,6 +178,10 @@
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
 	select USE_GENERIC_SMP_HELPERS
 	default y
+
+config GENERIC_SMP_QUEUES
+       int
+       default "8"
 
 config X86_32_SMP
 	def_bool y
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1382,7 +1382,16 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
+		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -688,18 +688,21 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
+#define __BUILD_INTERRUPT(name, func, nr)	\
+ENTRY(name)					\
+	RING0_INT_FRAME;			\
+	pushl $~(nr);				\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	SAVE_ALL;				\
+	TRACE_IRQS_OFF				\
+	movl %esp,%eax;				\
+	call func;				\
+	jmp ret_from_intr;			\
+	CFI_ENDPROC;				\
+ENDPROC(name)
+
 #define BUILD_INTERRUPT(name, nr)	\
-ENTRY(name)				\
-	RING0_INT_FRAME;		\
-	pushl $~(nr);			\
-	CFI_ADJUST_CFA_OFFSET 4;	\
-	SAVE_ALL;			\
-	TRACE_IRQS_OFF			\
-	movl %esp,%eax;			\
-	call smp_##name;		\
-	jmp ret_from_intr;		\
-	CFI_ENDPROC;			\
-ENDPROC(name)
+	__BUILD_INTERRUPT(name, smp_##name, nr)
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -869,9 +869,22 @@
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 END(reschedule_interrupt)
 
-ENTRY(call_function_interrupt)
-	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
+
+	.macro CALLFUNCTION_ENTRY num
+ENTRY(call_function_interrupt\num)
+	apicinterrupt CALL_FUNCTION_VECTOR_START+\num,smp_call_function_interrupt
+END(call_function_interrupt\num)
+	.endm
+
+	CALLFUNCTION_ENTRY 0
+	CALLFUNCTION_ENTRY 1
+	CALLFUNCTION_ENTRY 2
+	CALLFUNCTION_ENTRY 3
+	CALLFUNCTION_ENTRY 4
+	CALLFUNCTION_ENTRY 5
+	CALLFUNCTION_ENTRY 6
+	CALLFUNCTION_ENTRY 7
+
 ENTRY(call_function_single_interrupt)
 	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
 END(call_function_single_interrupt)
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -188,7 +188,16 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
+		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
 
 	/* IPI for generic single function call */
 	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -129,15 +129,16 @@
 void native_send_call_func_ipi(cpumask_t mask)
 {
 	cpumask_t allbutself;
+	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
 	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
-		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR_START + queue);
 	else
-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR_START + queue);
 }
 
 static void stop_this_cpu(void *dummy)
@@ -187,9 +188,12 @@
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
+	unsigned queue;
+
 	ack_APIC_irq();
 	irq_enter();
-	generic_smp_call_function_interrupt(0);
+	queue = ~regs->orig_ax - CALL_FUNCTION_VECTOR_START;
+	generic_smp_call_function_interrupt(queue);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -41,6 +41,8 @@
 static DEFINE_PER_CPU(int, callfunc_irq);
 static DEFINE_PER_CPU(int, callfuncsingle_irq);
 static DEFINE_PER_CPU(int, debug_irq) = -1;
+
+static DEFINE_PER_CPU(unsigned, callfunc_queue);
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -371,6 +373,21 @@
 static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
+	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
+
+	/*
+	 * We can't afford to allocate N callfunc vectors * M cpu
+	 * interrupts, so we just need to fake it for now.  We can fix
+	 * this when we integrate event channels at the vector level.
+	 * For now, we just leave a hint for the target cpus for which
+	 * queue to start on, but they still need to search them all.
+	 * (Which is not really much worse than having a single
+	 * queue.)
+	 */
+	for_each_cpu_mask_nr(cpu, mask)
+		per_cpu(callfunc_queue, cpu) = queue;
+
+	wmb();			/* set queues before sending interrupt */
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
@@ -390,8 +407,16 @@
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
+	unsigned start_queue = __get_cpu_var(callfunc_queue);
+	unsigned queue;
+
 	irq_enter();
-	generic_smp_call_function_interrupt(0);
+	queue = start_queue;
+	do {
+		generic_smp_call_function_interrupt(queue);
+		queue = (queue + 1) % CONFIG_GENERIC_SMP_QUEUES;
+	} while(queue != start_queue);
+
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -34,20 +34,18 @@
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
 
-extern void invalidate_interrupt(void);
-extern void invalidate_interrupt0(void);
-extern void invalidate_interrupt1(void);
-extern void invalidate_interrupt2(void);
-extern void invalidate_interrupt3(void);
-extern void invalidate_interrupt4(void);
-extern void invalidate_interrupt5(void);
-extern void invalidate_interrupt6(void);
-extern void invalidate_interrupt7(void);
-
 extern void irq_move_cleanup_interrupt(void);
 extern void threshold_interrupt(void);
 
-extern void call_function_interrupt(void);
+extern void call_function_interrupt0(void);
+extern void call_function_interrupt1(void);
+extern void call_function_interrupt2(void);
+extern void call_function_interrupt3(void);
+extern void call_function_interrupt4(void);
+extern void call_function_interrupt5(void);
+extern void call_function_interrupt6(void);
+extern void call_function_interrupt7(void);
+
 extern void call_function_single_interrupt(void);
 
 /* PIC specific functions */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -62,8 +62,9 @@
 # define SPURIOUS_APIC_VECTOR		0xff
 # define ERROR_APIC_VECTOR		0xfe
 # define RESCHEDULE_VECTOR		0xfc
-# define CALL_FUNCTION_VECTOR		0xfb
 # define CALL_FUNCTION_SINGLE_VECTOR	0xfa
+# define CALL_FUNCTION_VECTOR_END	0xf8
+# define CALL_FUNCTION_VECTOR_START	0xf1 /* f1-f8 multiple callfunction queues */
 # define THERMAL_APIC_VECTOR		0xf0
 
 #else
@@ -71,10 +72,11 @@
 #define SPURIOUS_APIC_VECTOR		0xff
 #define ERROR_APIC_VECTOR		0xfe
 #define RESCHEDULE_VECTOR		0xfd
-#define CALL_FUNCTION_VECTOR		0xfc
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
 #define THRESHOLD_APIC_VECTOR		0xf9
+#define CALL_FUNCTION_VECTOR_END	0xf7
+#define CALL_FUNCTION_VECTOR_START	0xf0 /* f0-f7 multiple callfunction queues */
 
 #endif
 
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -11,8 +11,22 @@
  */
 #ifdef CONFIG_X86_SMP
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+
+#define BUILD_CALLFUNCTION(n)					\
+	__BUILD_INTERRUPT(call_function_interrupt##n,		\
+			  smp_call_function_interrupt,		\
+			  CALL_FUNCTION_VECTOR_START + n)
+BUILD_CALLFUNCTION(0)
+BUILD_CALLFUNCTION(1)
+BUILD_CALLFUNCTION(2)
+BUILD_CALLFUNCTION(3)
+BUILD_CALLFUNCTION(4)
+BUILD_CALLFUNCTION(5)
+BUILD_CALLFUNCTION(6)
+BUILD_CALLFUNCTION(7)
+
+#undef BUILD_CALLFUNCTION
 #endif
 
 /*



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 7 of 8] x86: make number of smp_call_function queues truely configurable
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (5 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 6 of 8] x86: add multiple smp_call_function queues Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-02  0:14 ` [PATCH 8 of 8] smp function calls: add kernel parameter to disable multiple queues Jeremy Fitzhardinge
  2008-08-06  3:22 ` [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Andi Kleen
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

Previously, the config option GENERIC_SMP_QUEUES was never actually
adjustable because a moderate amount of static boilerplate code had to
match.

This patch makes it truely adjustable up to the number of IPI vectors
we statically preallocate.  The various IPI handlers are generated by
looping assembler macros, and their addresses are put into an array
for C code to use to actually set the vectors up.

The default number of queues is never more than NR_CPUS, and defaults
to NR_CPUS.  Ideally this could be a runtime variable based on the
number of possible cpus, rather than a static compile-time options.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/Kconfig                          |    8 +++++++-
 arch/x86/kernel/apic_32.c                 |   18 ++++++++----------
 arch/x86/kernel/entry_32.S                |   12 ++++++------
 arch/x86/kernel/entry_64.S                |   29 ++++++++++++++++-------------
 arch/x86/kernel/irqinit_64.c              |   16 ++++++----------
 include/asm-x86/hw_irq.h                  |    9 +--------
 include/asm-x86/irq_vectors.h             |    3 +++
 include/asm-x86/mach-default/entry_arch.h |   29 ++++++++++++++++-------------
 8 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -179,9 +179,15 @@
 	select USE_GENERIC_SMP_HELPERS
 	default y
 
+config MAX_GENERIC_SMP_QUEUES
+       int
+       default 8
+
 config GENERIC_SMP_QUEUES
        int
-       default "8"
+       range 1 NR_CPUS if NR_CPUS <= MAX_GENERIC_SMP_QUEUES
+       range 1 MAX_GENERIC_SMP_QUEUES
+       default NR_CPUS
 
 config X86_32_SMP
 	def_bool y
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1369,6 +1369,8 @@
 #ifdef CONFIG_SMP
 void __init smp_intr_init(void)
 {
+	int i;
+
 	/*
 	 * IRQ0 must be given a fixed assignment and initialized,
 	 * because it's used before the IO-APIC is set up.
@@ -1382,16 +1384,12 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
-		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES > NUM_CALL_FUNCTION_VECTORS);
+	BUILD_BUG_ON(CONFIG_MAX_GENERIC_SMP_QUEUES != NUM_CALL_FUNCTION_VECTORS);
+
+	for(i = 0; i < CONFIG_GENERIC_SMP_QUEUES; i++)
+		alloc_intr_gate(CALL_FUNCTION_VECTOR_START+i,
+				callfunction_interrupts[i]);
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -688,8 +688,7 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-#define __BUILD_INTERRUPT(name, func, nr)	\
-ENTRY(name)					\
+#define __BUILD_INTERRUPT(func, nr)		\
 	RING0_INT_FRAME;			\
 	pushl $~(nr);				\
 	CFI_ADJUST_CFA_OFFSET 4;		\
@@ -698,11 +697,12 @@
 	movl %esp,%eax;				\
 	call func;				\
 	jmp ret_from_intr;			\
-	CFI_ENDPROC;				\
+	CFI_ENDPROC;
+
+#define BUILD_INTERRUPT(name, nr)		\
+ENTRY(name)					\
+	__BUILD_INTERRUPT(smp_##name, nr)	\
 ENDPROC(name)
-
-#define BUILD_INTERRUPT(name, nr)	\
-	__BUILD_INTERRUPT(name, smp_##name, nr)
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -870,20 +870,23 @@
 END(reschedule_interrupt)
 
 
-	.macro CALLFUNCTION_ENTRY num
-ENTRY(call_function_interrupt\num)
-	apicinterrupt CALL_FUNCTION_VECTOR_START+\num,smp_call_function_interrupt
-END(call_function_interrupt\num)
-	.endm
+.globl callfunction_interrupts
+.pushsection .rodata,"a"
+callfunction_interrupts:
+.popsection
 
-	CALLFUNCTION_ENTRY 0
-	CALLFUNCTION_ENTRY 1
-	CALLFUNCTION_ENTRY 2
-	CALLFUNCTION_ENTRY 3
-	CALLFUNCTION_ENTRY 4
-	CALLFUNCTION_ENTRY 5
-	CALLFUNCTION_ENTRY 6
-	CALLFUNCTION_ENTRY 7
+vec=0
+.rept CONFIG_GENERIC_SMP_QUEUES
+	ALIGN
+993:	apicinterrupt CALL_FUNCTION_VECTOR_START+vec, smp_call_function_interrupt
+.pushsection .rodata,"a"
+	.quad 993b
+.popsection
+	vec = vec+1
+.endr
+.pushsection .rodata,"a"
+	.size callfunction_interrupts, . - callfunction_interrupts
+.popsection
 
 ENTRY(call_function_single_interrupt)
 	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -188,16 +188,12 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
-		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
-	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES > NUM_CALL_FUNCTION_VECTORS);
+	BUILD_BUG_ON(CONFIG_MAX_GENERIC_SMP_QUEUES != NUM_CALL_FUNCTION_VECTORS);
+
+	for(i = 0; i < CONFIG_GENERIC_SMP_QUEUES; i++)
+		alloc_intr_gate(CALL_FUNCTION_VECTOR_START+i,
+				callfunction_interrupts[i]);
 
 	/* IPI for generic single function call */
 	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -37,14 +37,7 @@
 extern void irq_move_cleanup_interrupt(void);
 extern void threshold_interrupt(void);
 
-extern void call_function_interrupt0(void);
-extern void call_function_interrupt1(void);
-extern void call_function_interrupt2(void);
-extern void call_function_interrupt3(void);
-extern void call_function_interrupt4(void);
-extern void call_function_interrupt5(void);
-extern void call_function_interrupt6(void);
-extern void call_function_interrupt7(void);
+extern void *callfunction_interrupts[CONFIG_GENERIC_SMP_QUEUES];
 
 extern void call_function_single_interrupt(void);
 
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -79,6 +79,9 @@
 #define CALL_FUNCTION_VECTOR_START	0xf0 /* f0-f7 multiple callfunction queues */
 
 #endif
+
+#define NUM_CALL_FUNCTION_VECTORS	\
+	(CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1)
 
 /*
  * Local APIC timer IRQ vector is on a different priority level,
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,20 +13,23 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 
-#define BUILD_CALLFUNCTION(n)					\
-	__BUILD_INTERRUPT(call_function_interrupt##n,		\
-			  smp_call_function_interrupt,		\
-			  CALL_FUNCTION_VECTOR_START + n)
-BUILD_CALLFUNCTION(0)
-BUILD_CALLFUNCTION(1)
-BUILD_CALLFUNCTION(2)
-BUILD_CALLFUNCTION(3)
-BUILD_CALLFUNCTION(4)
-BUILD_CALLFUNCTION(5)
-BUILD_CALLFUNCTION(6)
-BUILD_CALLFUNCTION(7)
+.pushsection .rodata,"a"
+.globl callfunction_interrupts
+callfunction_interrupts:
+.popsection
 
-#undef BUILD_CALLFUNCTION
+vec = 0
+.rept CONFIG_GENERIC_SMP_QUEUES
+	ALIGN
+1:	__BUILD_INTERRUPT(smp_call_function_interrupt, CALL_FUNCTION_VECTOR_START+vec)
+	vec = vec+1
+.pushsection .rodata,"a"
+	.long 1b
+.popsection
+.endr
+.pushsection .rodata,"a"
+	.size callfunction_interrupts, . - callfunction_interrupts
+.popsection
 #endif
 
 /*



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 8 of 8] smp function calls: add kernel parameter to disable multiple queues
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (6 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 7 of 8] x86: make number of smp_call_function queues truely configurable Jeremy Fitzhardinge
@ 2008-08-02  0:14 ` Jeremy Fitzhardinge
  2008-08-06  3:22 ` [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Andi Kleen
  8 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-02  0:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Andi Kleen, Nick Piggin, Jens Axboe

There was some concern that using multiple queues - and their
associated APIC vectors - may trigger bugs in various dubious APIC
implementations.  This patch adds a command line option to force the
kernel to use a single queue, even if the architecture can support
more.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 Documentation/kernel-parameters.txt |    5 +++++
 arch/x86/kernel/smp.c               |    2 +-
 arch/x86/xen/smp.c                  |    6 ++++--
 include/linux/smp.h                 |   26 ++++++++++++++++++++++++++
 kernel/smp.c                        |   14 +++++++++++++-
 5 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1894,6 +1894,11 @@
 	simeth=		[IA-64]
 	simscsi=
 
+	single_ipi_queue [X86]
+			Force the use of a single queue for smp function calls.
+			This means that only a single vector is used, which may avoid
+			bugs in some APIC implementations.
+
 	slram=		[HW,MTD]
 
 	slub_debug[=options[,slabs]]	[MM, SLUB]
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -129,7 +129,7 @@
 void native_send_call_func_ipi(cpumask_t mask)
 {
 	cpumask_t allbutself;
-	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
+	unsigned queue = smp_ipi_choose_queue();
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -373,7 +373,7 @@
 static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
-	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
+	unsigned queue = smp_ipi_choose_queue();
 
 	/*
 	 * We can't afford to allocate N callfunc vectors * M cpu
@@ -411,10 +411,12 @@
 	unsigned queue;
 
 	irq_enter();
+
 	queue = start_queue;
 	do {
 		generic_smp_call_function_interrupt(queue);
-		queue = (queue + 1) % CONFIG_GENERIC_SMP_QUEUES;
+		if (++queue == smp_ipi_nqueues())
+			queue = 0;
 	} while(queue != start_queue);
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/smp.h b/include/linux/smp.h
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -168,4 +168,30 @@
 
 void smp_setup_processor_id(void);
 
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+extern bool __read_mostly smp_single_ipi_queue;
+
+static inline unsigned smp_ipi_nqueues(void)
+{
+	unsigned nqueues = 1;
+
+#ifdef CONFIG_GENERIC_SMP_QUEUES
+	nqueues = CONFIG_GENERIC_SMP_QUEUES;
+#endif
+
+	return nqueues;
+}
+
+static inline unsigned smp_ipi_choose_queue(void)
+{
+	unsigned cpu = smp_processor_id();
+	unsigned nqueues = smp_ipi_nqueues();
+
+	if (nqueues == 1 || smp_single_ipi_queue)
+		return 0;
+
+	return cpu % nqueues;
+}
+#endif
+
 #endif /* __LINUX_SMP_H */
diff --git a/kernel/smp.c b/kernel/smp.c
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -10,6 +10,8 @@
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/smp.h>
+
+bool __read_mostly smp_single_ipi_queue = false;
 
 #ifdef CONFIG_GENERIC_SMP_QUEUES
 #define	NQUEUES	CONFIG_GENERIC_SMP_QUEUES
@@ -310,7 +312,7 @@
 	cpus_and(mask, mask, allbutself);
 	num_cpus = cpus_weight(mask);
 
-	queue_no = cpu % NQUEUES;
+	queue_no = smp_ipi_choose_queue();
 	queue = &call_function_queues[queue_no];
 
 	/*
@@ -424,6 +426,16 @@
 		spin_lock_init(&call_function_queues[i].lock);
 	}
 
+	printk(KERN_INFO "smp function calls: using %d/%d queues\n",
+	       smp_ipi_nqueues(), NQUEUES);
+
 	return 0;
 }
 early_initcall(init_smp_function_call);
+
+static __init int set_single_ipi_queue(char *str)
+{
+	smp_single_ipi_queue = true;
+	return 0;
+}
+early_param("single_ipi_queue", set_single_ipi_queue);



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls
  2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
                   ` (7 preceding siblings ...)
  2008-08-02  0:14 ` [PATCH 8 of 8] smp function calls: add kernel parameter to disable multiple queues Jeremy Fitzhardinge
@ 2008-08-06  3:22 ` Andi Kleen
  2008-08-06  3:33   ` Jeremy Fitzhardinge
  8 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2008-08-06  3:22 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ingo Molnar, LKML, x86, Nick Piggin, Jens Axboe

Jeremy Fitzhardinge <jeremy@goop.org> writes:

> 1. use smp_call_function_mask to implement cross-cpu TLB flushes


Do you have any measurements comparing performance of the old and 
the new call? TLB flush is quite time critical so we should be very
careful here.

-Andi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls
  2008-08-06  3:22 ` [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Andi Kleen
@ 2008-08-06  3:33   ` Jeremy Fitzhardinge
  2008-08-06  3:37     ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-06  3:33 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Ingo Molnar, LKML, x86, Nick Piggin, Jens Axboe

Andi Kleen wrote:
> Jeremy Fitzhardinge <jeremy@goop.org> writes:
>
>   
>> 1. use smp_call_function_mask to implement cross-cpu TLB flushes
>>     
>
>
> Do you have any measurements comparing performance of the old and 
> the new call? TLB flush is quite time critical so we should be very
> careful here.

No.  What benchmark would you suggest?

    J


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls
  2008-08-06  3:33   ` Jeremy Fitzhardinge
@ 2008-08-06  3:37     ` Andi Kleen
  2008-08-11 23:57       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2008-08-06  3:37 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Andi Kleen, Ingo Molnar, LKML, x86, Nick Piggin, Jens Axboe

On Tue, Aug 05, 2008 at 08:33:24PM -0700, Jeremy Fitzhardinge wrote:
> Andi Kleen wrote:
> >Jeremy Fitzhardinge <jeremy@goop.org> writes:
> >
> >  
> >>1. use smp_call_function_mask to implement cross-cpu TLB flushes
> >>    
> >
> >
> >Do you have any measurements comparing performance of the old and 
> >the new call? TLB flush is quite time critical so we should be very
> >careful here.
> 
> No.  What benchmark would you suggest?

Micro benchmark. Just stick RDTSC around the calls and trigger
a few remote flushes by e.g. doing mmap/munmap on a multithreaded program
running on different CPUs.

-Andi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls
  2008-08-06  3:37     ` Andi Kleen
@ 2008-08-11 23:57       ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 13+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-11 23:57 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Ingo Molnar, LKML, x86, Nick Piggin, Jens Axboe

Andi Kleen wrote:
> Micro benchmark. Just stick RDTSC around the calls and trigger
> a few remote flushes by e.g. doing mmap/munmap on a multithreaded program
> running on different CPUs.
>   

On a HT 3GHz Xeon, I see a ~35% increase in time (1830ns -> 2500ns), 
averaged over 1000,000 interrupts.  Because it's only 2 cpu, it ends up 
always using the smp_call_function_single path (but via the 
smp_call_function_mask path).

I need to try getting a more-cpu machine running native to do a proper 
test, esp to see if putting kmalloc on that path is very expensive or 
not.  (Is it worth using a kmem_cache?)

    J

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2008-08-11 23:57 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-02  0:14 [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 1 of 8] x86-32: use smp_call_function_mask for SMP TLB invalidations Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 2 of 8] x86-64: " Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 3 of 8] x86: make tlb_32|64 closer Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 4 of 8] x86: unify tlb.c Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 5 of 8] smp_function_call: add multiple queues for scalability Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 6 of 8] x86: add multiple smp_call_function queues Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 7 of 8] x86: make number of smp_call_function queues truely configurable Jeremy Fitzhardinge
2008-08-02  0:14 ` [PATCH 8 of 8] smp function calls: add kernel parameter to disable multiple queues Jeremy Fitzhardinge
2008-08-06  3:22 ` [PATCH 0 of 8] x86/smp function calls: convert x86 tlb flushes to use function calls Andi Kleen
2008-08-06  3:33   ` Jeremy Fitzhardinge
2008-08-06  3:37     ` Andi Kleen
2008-08-11 23:57       ` Jeremy Fitzhardinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).