linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions
@ 2008-09-06 23:50 Mike Travis
  2008-09-06 23:50 ` [RFC 01/13] smp: modify send_IPI_mask interface to accept cpumask_t pointers Mike Travis
                   ` (13 more replies)
  0 siblings, 14 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel


[Note: all these changes require some more testing but I wanted to solicit
comments before then, hence the "RFC" in the subject line. -thanks! Mike]

  * Change the genapic->send_IPI_mask function to accept cpumask_t pointer.

  * Add for_each_online_cpu_mask_nr to eliminate a common case of needing
    a temporary on-stack cpumask_t variable.

  * Change send_IPI_mask function in xen to use for_each_online_cpu_mask_nr().

  * Add cpumask_ptr operations.

  * Add get_cpumask_var debug operations.

  * Add global allbutself PER_CPUMASK variable.

  * Remove as many on-stack cpumask_t variables in kernel/sched.c

  * Remove as many on-stack cpumask_t variables in acpi-cpufreq.c

  * Remove as many on-stack cpumask_t variables in io_apic.c


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 01/13] smp: modify send_IPI_mask interface to accept cpumask_t pointers
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 02/13] cpumask: add for_each_online_cpu_mask_nr function Mike Travis
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: send_IPI_mask --]
[-- Type: text/plain, Size: 8075 bytes --]

  * Change the send_IPI_mask interface for x86_64 to accept a pointer
    to the cpumask_t argument.


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genapic_flat_64.c       |   16 ++++++++--------
 arch/x86/kernel/genx2apic_cluster.c     |    8 ++++----
 arch/x86/kernel/genx2apic_phys.c        |    8 ++++----
 arch/x86/kernel/genx2apic_uv_x.c        |    8 ++++----
 arch/x86/xen/smp.c                      |    9 ++++++---
 include/asm-x86/genapic_64.h            |    2 +-
 include/asm-x86/mach-default/mach_ipi.h |    2 +-
 include/asm-x86/mach-generic/mach_ipi.h |    4 ++++
 8 files changed, 32 insertions(+), 25 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6.tip/arch/x86/kernel/genapic_flat_64.c
@@ -69,9 +69,9 @@ static void flat_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long mask = cpus_addr(*cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int
 		cpu_clear(smp_processor_id(), allbutme);
 
 		if (!cpus_empty(allbutme))
-			flat_send_IPI_mask(allbutme, vector);
+			flat_send_IPI_mask(&allbutme, vector);
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -101,7 +101,7 @@ static void flat_send_IPI_allbutself(int
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(cpu_online_map, vector);
+		flat_send_IPI_mask(&cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -196,9 +196,9 @@ static cpumask_t physflat_vector_allocat
 	return cpumask_of_cpu(cpu);
 }
 
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
 {
-	send_IPI_mask_sequence(cpumask, vector);
+	send_IPI_mask_sequence(*cpumask, vector);
 }
 
 static void physflat_send_IPI_allbutself(int vector)
@@ -206,12 +206,12 @@ static void physflat_send_IPI_allbutself
 	cpumask_t allbutme = cpu_online_map;
 
 	cpu_clear(smp_processor_id(), allbutme);
-	physflat_send_IPI_mask(allbutme, vector);
+	physflat_send_IPI_mask(&allbutme, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(cpu_online_map, vector);
+	physflat_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_cluster.c
@@ -56,13 +56,13 @@ static void __x2apic_send_IPI_dest(unsig
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 				       vector, APIC_DEST_LOGICAL);
 	}
@@ -76,12 +76,12 @@ static void x2apic_send_IPI_allbutself(i
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+		x2apic_send_IPI_mask(&mask, vector);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_phys.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_phys.c
@@ -54,13 +54,13 @@ static void __x2apic_send_IPI_dest(unsig
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
@@ -74,12 +74,12 @@ static void x2apic_send_IPI_allbutself(i
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+		x2apic_send_IPI_mask(&mask, vector);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -124,12 +124,12 @@ static void uv_send_IPI_one(int cpu, int
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned int cpu;
 
 	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, mask))
+		if (cpu_isset(cpu, *mask))
 			uv_send_IPI_one(cpu, vector);
 }
 
@@ -140,12 +140,12 @@ static void uv_send_IPI_allbutself(int v
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		uv_send_IPI_mask(mask, vector);
+		uv_send_IPI_mask(&mask, vector);
 }
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(cpu_online_map, vector);
+	uv_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int uv_apic_id_registered(void)
--- linux-2.6.tip.orig/arch/x86/xen/smp.c
+++ linux-2.6.tip/arch/x86/xen/smp.c
@@ -358,10 +358,12 @@ static void xen_smp_send_reschedule(int 
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const cpumask_t *inmask, enum ipi_vector vector)
 {
 	unsigned cpu;
+	cpumask_t mask;
 
+	mask = *inmask;
 	cpus_and(mask, mask, cpu_online_map);
 
 	for_each_cpu_mask_nr(cpu, mask)
@@ -372,7 +374,7 @@ static void xen_smp_send_call_function_i
 {
 	int cpu;
 
-	xen_send_IPI_mask(*mask, XEN_CALL_FUNCTION_VECTOR);
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
 	for_each_cpu_mask_nr(cpu, *mask) {
@@ -385,7 +387,8 @@ static void xen_smp_send_call_function_i
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	xen_send_IPI_mask(&cpumask_of_cpu(cpu),
+			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
--- linux-2.6.tip.orig/include/asm-x86/genapic_64.h
+++ linux-2.6.tip/include/asm-x86/genapic_64.h
@@ -22,7 +22,7 @@ struct genapic {
 	cpumask_t (*vector_allocation_domain)(int cpu);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
--- linux-2.6.tip.orig/include/asm-x86/mach-default/mach_ipi.h
+++ linux-2.6.tip/include/asm-x86/mach-default/mach_ipi.h
@@ -11,7 +11,7 @@ extern int no_broadcast;
 
 #ifdef CONFIG_X86_64
 #include <asm/genapic.h>
-#define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask(mask, vector) (genapic->send_IPI_mask)(&(mask), vector)
 #else
 static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
--- linux-2.6.tip.orig/include/asm-x86/mach-generic/mach_ipi.h
+++ linux-2.6.tip/include/asm-x86/mach-generic/mach_ipi.h
@@ -3,7 +3,11 @@
 
 #include <asm/genapic.h>
 
+#ifdef CONFIG_X86_64
+#define send_IPI_mask(mask, vector) (genapic->send_IPI_mask)(&(mask), vector)
+#else
 #define send_IPI_mask (genapic->send_IPI_mask)
+#endif
 #define send_IPI_allbutself (genapic->send_IPI_allbutself)
 #define send_IPI_all (genapic->send_IPI_all)
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 02/13] cpumask: add for_each_online_cpu_mask_nr function
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
  2008-09-06 23:50 ` [RFC 01/13] smp: modify send_IPI_mask interface to accept cpumask_t pointers Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 03/13] xen: use new " Mike Travis
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: for_each_online_cpu_mask_nr --]
[-- Type: text/plain, Size: 4303 bytes --]

  * Add for_each_online_cpu_mask_nr() function to eliminate need for
    a common use of a temporary cpumask_t variable.  When the following
    procedure is being used:

    funcproto(cpumask_t *mask, ...)
	cpumask_t temp;

	cpus_and(temp, *mask, cpu_online_map);
	for_each_cpu_mask_nr(cpu, temp)
		...

    If then becomes:

	for_each_online_cpu_mask_nr(cpu, *mask)
		...

  * Note the generic __next_cpu_and (and __next_cpu_and_nr) functions
    allowing AND'ing with any cpumask_t variable, not just the cpu_online_map.


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask.h |   26 +++++++++++++++++++++++---
 lib/cpumask.c           |   26 ++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

--- linux-2.6.tip.orig/include/linux/cpumask.h
+++ linux-2.6.tip/include/linux/cpumask.h
@@ -404,23 +404,32 @@ static inline void __cpus_fold(cpumask_t
 #define first_cpu(src)		({ (void)(src); 0; })
 #define next_cpu(n, src)	({ (void)(src); 1; })
 #define any_online_cpu(mask)	0
-#define for_each_cpu_mask(cpu, mask)	\
+
+#define for_each_cpu_mask(cpu, mask)		\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_online_cpu_mask(cpu, mask)	\
+	for_each_cpu_mask(cpu, mask)
 
 #else /* NR_CPUS > 1 */
 
 extern int nr_cpu_ids;
 int __first_cpu(const cpumask_t *srcp);
 int __next_cpu(int n, const cpumask_t *srcp);
+int __next_cpu_and(int n, const cpumask_t *srcp, const cpumask_t *andp);
 int __any_online_cpu(const cpumask_t *mask);
 
 #define first_cpu(src)		__first_cpu(&(src))
 #define next_cpu(n, src)	__next_cpu((n), &(src))
 #define any_online_cpu(mask) __any_online_cpu(&(mask))
+
 #define for_each_cpu_mask(cpu, mask)			\
 	for ((cpu) = -1;				\
 		(cpu) = next_cpu((cpu), (mask)),	\
 		(cpu) < NR_CPUS; )
+#define for_each_online_cpu_mask(cpu, mask)				   \
+	for ((cpu) = -1;						   \
+		(cpu) = __next_cpu_and((cpu), &(mask), &(cpu_online_map)), \
+		(cpu) < NR_CPUS; )
 #endif
 
 #if NR_CPUS <= 64
@@ -428,17 +437,28 @@ int __any_online_cpu(const cpumask_t *ma
 #define next_cpu_nr(n, src)		next_cpu(n, src)
 #define cpus_weight_nr(cpumask)		cpus_weight(cpumask)
 #define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
+#define for_each_online_cpu_mask_nr(cpu, mask)	\
+					for_each_online_cpu_mask(cpu, mask)
 
 #else /* NR_CPUS > 64 */
 
 int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src)	__next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask)	__cpus_weight(&(cpumask), nr_cpu_ids)
+int __next_cpu_and_nr(int n, const cpumask_t *srcp, const cpumask_t *andp);
+
+#define next_cpu_nr(n, src)		__next_cpu_nr((n), &(src))
+#define next_cpu_and_nr(n, src, and)	__next_cpu_and_nr((n), &(src), &(and))
+#define cpus_weight_nr(cpumask)		__cpus_weight(&(cpumask), nr_cpu_ids)
+
 #define for_each_cpu_mask_nr(cpu, mask)			\
 	for ((cpu) = -1;				\
 		(cpu) = next_cpu_nr((cpu), (mask)),	\
 		(cpu) < nr_cpu_ids; )
 
+#define for_each_online_cpu_mask_nr(cpu, mask)				  \
+	for ((cpu) = -1;						  \
+		(cpu) = next_cpu_and_nr((cpu), (mask), (cpu_online_map)), \
+		(cpu) < nr_cpu_ids; )
+
 #endif /* NR_CPUS > 64 */
 
 /*
--- linux-2.6.tip.orig/lib/cpumask.c
+++ linux-2.6.tip/lib/cpumask.c
@@ -14,6 +14,19 @@ int __next_cpu(int n, const cpumask_t *s
 	return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
 }
 EXPORT_SYMBOL(__next_cpu);
+int __next_cpu_and(int n, const cpumask_t *srcp, const cpumask_t *andp)
+{
+	int cpu;
+
+	for (cpu = n + 1; cpu < NR_CPUS; cpu++) {
+		cpu = find_next_bit(srcp->bits, NR_CPUS, cpu);
+
+		if (cpu < NR_CPUS && cpu_isset(cpu, *andp))
+			return cpu;
+	}
+	return NR_CPUS;
+}
+EXPORT_SYMBOL(__next_cpu_and);
 
 #if NR_CPUS > 64
 int __next_cpu_nr(int n, const cpumask_t *srcp)
@@ -22,6 +35,19 @@ int __next_cpu_nr(int n, const cpumask_t
 				find_next_bit(srcp->bits, nr_cpu_ids, n+1));
 }
 EXPORT_SYMBOL(__next_cpu_nr);
+int __next_cpu_and_nr(int n, const cpumask_t *srcp, const cpumask_t *andp)
+{
+	int cpu;
+
+	for (cpu = n + 1; cpu < nr_cpu_ids; cpu++) {
+		cpu = find_next_bit(srcp->bits, nr_cpu_ids, cpu);
+
+		if (cpu < nr_cpu_ids && cpu_isset(cpu, *andp))
+			return cpu;
+	}
+	return nr_cpu_ids;
+}
+EXPORT_SYMBOL(__next_cpu_and_nr);
 #endif
 
 int __any_online_cpu(const cpumask_t *mask)

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 03/13] xen: use new for_each_online_cpu_mask_nr function
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
  2008-09-06 23:50 ` [RFC 01/13] smp: modify send_IPI_mask interface to accept cpumask_t pointers Mike Travis
  2008-09-06 23:50 ` [RFC 02/13] cpumask: add for_each_online_cpu_mask_nr function Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 04/13] cpumask: add cpumask_ptr operations Mike Travis
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: xen-send_IPI_mask --]
[-- Type: text/plain, Size: 833 bytes --]

  * Change the send_IPI_mask function in xen to not require a local
    cpumask_t variable.

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/xen/smp.c |    8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

--- linux-2.6.tip.orig/arch/x86/xen/smp.c
+++ linux-2.6.tip/arch/x86/xen/smp.c
@@ -358,15 +358,11 @@ static void xen_smp_send_reschedule(int 
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(const cpumask_t *inmask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
 {
 	unsigned cpu;
-	cpumask_t mask;
 
-	mask = *inmask;
-	cpus_and(mask, mask, cpu_online_map);
-
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_online_cpu_mask_nr(cpu, *mask)
 		xen_send_IPI_one(cpu, vector);
 }
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 04/13] cpumask: add cpumask_ptr operations
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (2 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 03/13] xen: use new " Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 05/13] cpumask: add get_cpumask_var debug operations Mike Travis
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: get_cpumask_ptr --]
[-- Type: text/plain, Size: 4671 bytes --]

 * Define a generic cpumask_ptr type and some helper functions that
 * do not add any overhead to low count NR_CPUS systems.  Essentially
 * for NR_CPUS <= BITS_PER_LONG, an array of one cpumask is created on
 * the stack.  For larger count systems, a pointer is defined.  In both
 * cases the pointer variable can be used as C will optimize out the
 * pointer derefence in the small system case.
 *
 * Add a simple kmalloc to allocate a temporary cpumask variable.
 * (Note, kmalloc should be defined before including this file.)
 *
 * Add a simple per_cpu variable to facilitate those cases where a
 * kmalloc failure cannot be reasonable handled (or we're using it
 * before the mm system is initialized.)
 *
 * This is useful for getting temporary per_cpu cpumask_t variables.
 * Note there are no locks, so these are _extremely_ temporary cpumask
 * vars, with only preemption disabled, while in use.
 *
 * Many thanks to Linus Torvalds for the typedef/alloc idea.
 *
 * Adds the following cpumask_ptr operations for NR_CPUS > BITS_PER_LONG:
 *
 *   cpumask_ptr p;		Declares p to be a cpumask_t pointer
 *
 *   alloc_cpumask_ptr(p);	allocates and assigns memory to p
 *   free_cpumask_ptr(p);	frees memory used by p
 *
 *   DEFINE_PER_CPUMASK(p, v)	Defines a PER_CPUMASK variable v
 *   DECLARE_PER_CPUMASK(p, v)	Declares a PER_CPUMASK variable v
 *   get_cpumask_var(p, v)	Obtains possession of PER_CPUMASK variable v
 *   				and assignes it to p
 *   put_cpumask_var(p, v)	Relenquishs possession of PER_CPUMASK variable v
 *
 * If NR_CPUS <= BITS_PER_LONG then the above are all essentially NOP's.

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask_ptr.h |   78 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

--- /dev/null
+++ linux-2.6.tip/include/linux/cpumask_ptr.h
@@ -0,0 +1,78 @@
+#ifndef __LINUX_CPUMASK_PTR_H
+#define __LINUX_CPUMASK_PTR_H
+
+/*
+ * Define a generic cpumask_ptr type and some helper functions that
+ * do not add any overhead to low count NR_CPUS systems.  Essentially
+ * for NR_CPUS <= BITS_PER_LONG, an array of one cpumask is created on
+ * the stack.  For larger count systems, a pointer is defined.  In both
+ * cases the pointer variable can be used as C will optimize out the
+ * pointer derefence in the small system case.
+ *
+ * Add a simple kmalloc to allocate a temporary cpumask variable.
+ * (Note, kmalloc should be defined before including this file.)
+ *
+ * Add a simple per_cpu variable to facilitate those cases where a
+ * kmalloc failure cannot be reasonable handled (or we're using it
+ * before the mm system is initialized.)
+ *
+ * This is useful for getting temporary per_cpu cpumask_t variables.
+ * Note there are no locks, so these are _extremely_ temporary cpumask
+ * vars, with only preemption disabled, while in use.
+ *
+ * Many thanks to Linus Torvalds for the typedef/alloc idea.
+ *
+ * Adds the following cpumask_ptr operations for NR_CPUS > BITS_PER_LONG:
+ *
+ *   cpumask_ptr p;		Declares p to be a cpumask_t pointer
+ *
+ *   alloc_cpumask_ptr(p);	allocates and assigns memory to p
+ *   free_cpumask_ptr(p);	frees memory used by p
+ *
+ *   DEFINE_PER_CPUMASK(p, v)	Defines a PER_CPUMASK variable v
+ *   DECLARE_PER_CPUMASK(p, v)	Declares a PER_CPUMASK variable v
+ *   get_cpumask_var(p, v)	Obtains possession of PER_CPUMASK variable v
+ *   				and assignes it to p
+ *   put_cpumask_var(p, v)	Relenquishs possession of PER_CPUMASK variable v
+ *
+ * If NR_CPUS <= BITS_PER_LONG then the above are all essentially NOP's.
+ */
+
+#if NR_CPUS <= BITS_PER_LONG
+
+typedef cpumask_t cpumask_ptr[1];
+static inline void alloc_cpumask_ptr(cpumask_ptr *p)
+{
+}
+static inline void free_cpumask_ptr(cpumask_ptr *p)
+{
+}
+#define	DEFINE_PER_CPUMASK(v)	void *unused_##v __maybe_unused
+#define	DECLARE_PER_CPUMASK(v)
+#define get_cpumask_var(p, v)	do { } while(0)
+#define put_cpumask_var(p, v)	do { } while(0)
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#include <linux/slab.h>
+typedef cpumask_t *cpumask_ptr;
+static inline void _get_cpumask_ptr(cpumask_ptr *p, cpumask_t *m)
+{
+	*p = m;
+}
+static inline void alloc_cpumask_ptr(cpumask_ptr *p)
+{
+	_get_cpumask_ptr(p, kmalloc(sizeof(cpumask_t), GFP_KERNEL));
+}
+static inline void free_cpumask_ptr(cpumask_ptr *p)
+{
+	kfree(*p);
+}
+#define	DEFINE_PER_CPUMASK(v)	DEFINE_PER_CPU(cpumask_t, v)
+#define	DECLARE_PER_CPUMASK(v)	DECLARE_PER_CPU(cpumask_t, v)
+#define	get_cpumask_var(p, v)	_get_cpumask_ptr(&(p), &get_cpu_var(v))
+#define	put_cpumask_var(p, v)	put_cpu_var(v)
+
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+#endif /* __LINUX_CPUMASK_PTR_H */

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 05/13] cpumask: add get_cpumask_var debug operations
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (3 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 04/13] cpumask: add cpumask_ptr operations Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 06/13] genapic: use get_cpumask_var operations for allbutself cpumask_ts Mike Travis
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: get_cpumask_var-debug --]
[-- Type: text/plain, Size: 2459 bytes --]

  * Adds debugging to the get_cpumask_var operations.  It uses a primitive
    per_cpu lock variable and will only discover nested get_cpumask_var
    operations on the same variable.

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/Kconfig.debug      |   12 ++++++++++++
 include/linux/cpumask_ptr.h |   34 ++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

--- linux-2.6.tip.orig/arch/x86/Kconfig.debug
+++ linux-2.6.tip/arch/x86/Kconfig.debug
@@ -92,6 +92,18 @@ config DEBUG_PER_CPU_MAPS
 
 	  Say N if unsure.
 
+config DEBUG_PER_CPUMASK_VAR
+	bool "Debug per_cpumask variables"
+	depends on DEBUG_KERNEL
+	depends on X86_SMP
+	default n
+	help
+	  Say Y to verify that the per_cpumask variables being accessed
+	  are available.  Adds a fair amount of code to kernel memory
+	  and decreases performance.
+
+	  Say N if unsure.
+
 config X86_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
--- linux-2.6.tip.orig/include/linux/cpumask_ptr.h
+++ linux-2.6.tip/include/linux/cpumask_ptr.h
@@ -53,11 +53,45 @@ static inline void free_cpumask_ptr(cpum
 {
 	kfree(*p);
 }
+
+#ifndef CONFIG_DEBUG_CPUMASK_VAR
 #define	DEFINE_PER_CPUMASK(v)	DEFINE_PER_CPU(cpumask_t, v)
 #define	DECLARE_PER_CPUMASK(v)	DECLARE_PER_CPU(cpumask_t, v)
 #define	get_cpumask_var(p, v)	_get_cpumask_ptr(&(p), &get_cpu_var(v))
 #define	put_cpumask_var(p, v)	put_cpu_var(v)
 
+#else /* CONFIG_DEBUG_CPUMASK_VAR */
+#define	DEFINE_PER_CPUMASK(v)	DEFINE_PER_CPU(cpumask_t, v);	\
+				DEFINE_PER_CPU(char, lock_##v)
+#define	DECLARE_PER_CPUMASK(v)	DECLARE_PER_CPU(cpumask_t, v);	\
+				DECLARE_PER_CPU(char, lock_##v)
+
+#define	get_cpumask_var(p, v)	{				\
+	char *lock = &get_cpu_var(lock_##v);			\
+	if (*lock) {						\
+		printk(KERN_NOTICE "get_cpumask_var(" v "): "	\
+			"already locked!\n");			\
+		dump_stack();					\
+		BUG();						\
+	}							\
+	*lock = 1;						\
+	_get_cpumask_ptr(&(p), &get_cpu_var(v))			\
+	put_cpu_var(lock_##v);					\
+}
+
+#define	put_cpumask_var(p, v)	{				\
+	char *lock = &get_cpu_var(lock_##v);			\
+	if (!*lock) {						\
+		printk(KERN_NOTICE "put_cpumask_var(" v "): "	\
+			"not locked!\n");			\
+		dump_stack();					\
+	}							\
+	*lock = 0;						\
+	put_cpu_var(lock_##v);					\
+	put_cpu_var(v);						\
+}
+#endif /* CONFIG_DEBUG_CPUMASK_VAR */
+
 #endif /* NR_CPUS > BITS_PER_LONG */
 
 #endif /* __LINUX_CPUMASK_PTR_H */

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 06/13] genapic: use get_cpumask_var operations for allbutself cpumask_ts
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (4 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 05/13] cpumask: add get_cpumask_var debug operations Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c Mike Travis
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: add-percpu-allbutself --]
[-- Type: text/plain, Size: 8622 bytes --]

  * Adds a global allbutself PER_CPUMASK variable and modifies functions
    that create a temp cpumask variable for the following procedure:

	cpumask_t mask = cpu_online_map;
 
	cpu_clear(smp_processor_id(), mask);
 
	if (!cpus_empty(mask))
		XXX_send_IPI_mask(&mask, vector);

    This then becomes:

	cpumask_ptr mask;
 
	get_cpumask_var(mask, allbutself);
	*mask = cpu_online_map;
	cpu_clear(smp_processor_id(), *mask);
 
	if (!cpus_empty(*mask))
		XXX_send_IPI_mask(mask, vector);

	put_cpumask_var(mask, allbutself);

  * Note: this "allbutself" per_cpumask variable could be pre-built during
    system startup saving execution time, but would require that all the
    per_cpumask variables are updated whenever a cpu goes off- or on-line.


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/apic.c                  |    3 +++
 arch/x86/kernel/genapic_flat_64.c       |   26 +++++++++++++++++++-------
 arch/x86/kernel/genx2apic_cluster.c     |   13 +++++++++----
 arch/x86/kernel/genx2apic_uv_x.c        |   13 +++++++++----
 arch/x86/kernel/smp.c                   |   28 ++++++++++++++++++++++++----
 include/asm-x86/genapic_32.h            |    2 ++
 include/asm-x86/genapic_64.h            |    4 ++++
 include/asm-x86/mach-default/mach_ipi.h |   14 ++++++++++----
 8 files changed, 80 insertions(+), 23 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/apic.c
+++ linux-2.6.tip/arch/x86/kernel/apic.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/dmi.h>
 #include <linux/dmar.h>
+#include <linux/cpumask_ptr.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -162,6 +163,8 @@ static DEFINE_PER_CPU(struct clock_event
 
 static unsigned long apic_phys;
 
+DEFINE_PER_CPUMASK(allbutself);
+
 /*
  * Get the LAPIC version
  */
--- linux-2.6.tip.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6.tip/arch/x86/kernel/genapic_flat_64.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <linux/cpumask_ptr.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
@@ -87,12 +88,17 @@ static void flat_send_IPI_allbutself(int
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		cpumask_t allbutme = cpu_online_map;
+		cpumask_ptr mask;
 
-		cpu_clear(smp_processor_id(), allbutme);
+		get_cpumask_var(mask, allbutself);
+		*mask = cpu_online_map;
+		cpu_clear(smp_processor_id(), *mask);
+
+		if (!cpus_empty(*mask))
+			flat_send_IPI_mask(mask, vector);
+
+		put_cpumask_var(mask, allbutself);
 
-		if (!cpus_empty(allbutme))
-			flat_send_IPI_mask(&allbutme, vector);
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -205,10 +211,16 @@ static void physflat_send_IPI_mask(const
 
 static void physflat_send_IPI_allbutself(int vector)
 {
-	cpumask_t allbutme = cpu_online_map;
+	cpumask_ptr mask;
+
+	get_cpumask_var(mask, allbutself);
+	*mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), *mask);
+
+	if (!cpus_empty(*mask))
+		physflat_send_IPI_mask(mask, vector);
 
-	cpu_clear(smp_processor_id(), allbutme);
-	physflat_send_IPI_mask(&allbutme, vector);
+	put_cpumask_var(mask, allbutself);
 }
 
 static void physflat_send_IPI_all(int vector)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_cluster.c
@@ -1,5 +1,6 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <linux/cpumask_ptr.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
@@ -71,12 +72,16 @@ static void x2apic_send_IPI_mask(const c
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	cpumask_ptr mask;
 
-	cpu_clear(smp_processor_id(), mask);
+	get_cpumask_var(mask, allbutself);
+	*mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), *mask);
 
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(&mask, vector);
+	if (!cpus_empty(*mask))
+		x2apic_send_IPI_mask(mask, vector);
+
+	put_cpumask_var(mask, allbutself);
 }
 
 static void x2apic_send_IPI_all(int vector)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <linux/cpumask_ptr.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
@@ -135,12 +136,16 @@ static void uv_send_IPI_mask(const cpuma
 
 static void uv_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	cpumask_ptr mask;
 
-	cpu_clear(smp_processor_id(), mask);
+	get_cpumask_var(mask, allbutself);
+	*mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), *mask);
 
-	if (!cpus_empty(mask))
-		uv_send_IPI_mask(&mask, vector);
+	if (!cpus_empty(*mask))
+		uv_send_IPI_mask(mask, vector);
+
+	put_cpumask_var(mask, allbutself);
 }
 
 static void uv_send_IPI_all(int vector)
--- linux-2.6.tip.orig/arch/x86/kernel/smp.c
+++ linux-2.6.tip/arch/x86/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/cache.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/cpumask_ptr.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
@@ -128,16 +129,35 @@ void native_send_call_func_single_ipi(in
 
 void native_send_call_func_ipi(const cpumask_t *mask)
 {
-	cpumask_t allbutself;
 
-	allbutself = cpu_online_map;
-	cpu_clear(smp_processor_id(), allbutself);
+#ifndef	ARCH_DOES_NOT_HAVE_ALLBUTSELF_IPI_CALL
+	/*
+	 * Can't use global "allbutself" because many of the
+	 * genapic->send_IPI_allbutself functions use it to
+	 * expand to a mask to send to each cpu as they do not
+	 * have a single IPI call to send to "allbutself".
+	 *
+	 * This is here only for the small count NR_CPUS that actually
+	 * have one of those handy functions.
+	 *
+	 * There might be an ifdef I could use to short-circuit this out...?
+	 */
+	static DEFINE_PER_CPUMASK(smp_temp_cpumask);
+	cpumask_ptr allbutself;
+
+	get_cpumask_var(allbutself, smp_temp_cpumask);
+	*allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), *allbutself);
 
-	if (cpus_equal(*mask, allbutself) &&
+	if (cpus_equal(*mask, *allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(*mask, CALL_FUNCTION_VECTOR);
+	put_cpumask_var(allbutself, smp_temp_cpumask);
+#else
+	send_IPI_mask(*mask, CALL_FUNCTION_VECTOR);
+#endif
 }
 
 static void stop_this_cpu(void *dummy)
--- linux-2.6.tip.orig/include/asm-x86/genapic_32.h
+++ linux-2.6.tip/include/asm-x86/genapic_32.h
@@ -2,6 +2,7 @@
 #define ASM_X86__GENAPIC_32_H
 
 #include <asm/mpspec.h>
+#include <linux/cpumask_ptr.h>
 
 /*
  * Generic APIC driver interface.
@@ -122,5 +123,6 @@ enum uv_system_type {UV_NONE, UV_LEGACY_
 #define uv_wakeup_secondary(a, b)	1
 #define uv_system_init()		do {} while (0)
 
+DECLARE_PER_CPUMASK(allbutself);
 
 #endif /* ASM_X86__GENAPIC_32_H */
--- linux-2.6.tip.orig/include/asm-x86/genapic_64.h
+++ linux-2.6.tip/include/asm-x86/genapic_64.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__GENAPIC_64_H
 #define ASM_X86__GENAPIC_64_H
 
+#include <linux/cpumask_ptr.h>
+
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -55,4 +57,6 @@ extern int uv_wakeup_secondary(int phys_
 
 extern void setup_apic_routing(void);
 
+DECLARE_PER_CPUMASK(allbutself);
+
 #endif /* ASM_X86__GENAPIC_64_H */
--- linux-2.6.tip.orig/include/asm-x86/mach-default/mach_ipi.h
+++ linux-2.6.tip/include/asm-x86/mach-default/mach_ipi.h
@@ -1,6 +1,8 @@
 #ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H
 #define ASM_X86__MACH_DEFAULT__MACH_IPI_H
 
+#include <linux/cpumask_ptr.h>
+
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
@@ -9,8 +11,9 @@ void __send_IPI_shortcut(unsigned int sh
 
 extern int no_broadcast;
 
-#ifdef CONFIG_X86_64
 #include <asm/genapic.h>
+
+#ifdef CONFIG_X86_64
 #define send_IPI_mask(mask, vector) (genapic->send_IPI_mask)(&(mask), vector)
 #else
 static inline void send_IPI_mask(cpumask_t mask, int vector)
@@ -22,10 +25,13 @@ static inline void send_IPI_mask(cpumask
 static inline void __local_send_IPI_allbutself(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR) {
-		cpumask_t mask = cpu_online_map;
+		cpumask_ptr mask;
 
-		cpu_clear(smp_processor_id(), mask);
-		send_IPI_mask(mask, vector);
+		get_cpumask_var(mask, allbutself);
+		*mask = cpu_online_map;
+		cpu_clear(smp_processor_id(), *mask);
+		send_IPI_mask(*mask, vector);
+		put_cpumask_var(mask, allbutself);
 	} else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (5 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 06/13] genapic: use get_cpumask_var operations for allbutself cpumask_ts Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-07 10:24   ` Peter Zijlstra
  2008-09-06 23:50 ` [RFC 08/13] cpufreq: Reduce stack size requirements in acpi-cpufreq.c Mike Travis
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-kernel_sched_c --]
[-- Type: text/plain, Size: 13839 bytes --]

  * Make the following changes to kernel/sched.c functions:

    - use node_to_cpumask_ptr in place of node_to_cpumask
    - use get_cpumask_var for temporary cpumask_t variables
    - use alloc_cpumask_ptr where available

  * Remove special code for SCHED_CPUMASK_ALLOC and use CPUMASK_ALLOC
    from linux/cpumask.h.

  * The resultant stack savings are:

    ====== Stack (-l 100)

	1 - initial
	2 - stack-hogs-kernel_sched_c
	'.' is less than the limit(100)

       .1.    .2.    ..final..
      2216  -1536 680   -69%  __build_sched_domains
      1592  -1592   .  -100%  move_task_off_dead_cpu
      1096  -1096   .  -100%  sched_balance_self
      1032  -1032   .  -100%  sched_setaffinity
       616   -616   .  -100%  rebalance_domains
       552   -552   .  -100%  free_sched_groups
       512   -512   .  -100%  cpu_to_allnodes_group
      7616  -6936 680   -91%  Totals


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 kernel/sched.c |  151 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 81 insertions(+), 70 deletions(-)

--- linux-2.6.tip.orig/kernel/sched.c
+++ linux-2.6.tip/kernel/sched.c
@@ -70,6 +70,7 @@
 #include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
+#include <linux/cpumask_ptr.h>
 #include <linux/ftrace.h>
 #include <trace/sched.h>
 
@@ -117,6 +118,12 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
+/*
+ * temp cpumask variables
+ */
+static DEFINE_PER_CPUMASK(temp_cpumask_1);
+static DEFINE_PER_CPUMASK(temp_cpumask_2);
+
 #ifdef CONFIG_SMP
 /*
  * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -2141,7 +2148,11 @@ static int sched_balance_self(int cpu, i
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
+	cpumask_ptr span;
+	cpumask_ptr tmpmask;
 
+	get_cpumask_var(span, temp_cpumask_1);
+	get_cpumask_var(tmpmask, temp_cpumask_2);
 	for_each_domain(cpu, tmp) {
 		/*
 		 * If power savings logic is enabled for a domain, stop there.
@@ -2156,7 +2167,6 @@ static int sched_balance_self(int cpu, i
 		update_shares(sd);
 
 	while (sd) {
-		cpumask_t span, tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2165,14 +2175,14 @@ static int sched_balance_self(int cpu, i
 			continue;
 		}
 
-		span = sd->span;
+		*span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
 		}
 
-		new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+		new_cpu = find_idlest_cpu(group, t, cpu, tmpmask);
 		if (new_cpu == -1 || new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
@@ -2182,7 +2192,7 @@ static int sched_balance_self(int cpu, i
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
 		sd = NULL;
-		weight = cpus_weight(span);
+		weight = cpus_weight(*span);
 		for_each_domain(cpu, tmp) {
 			if (weight <= cpus_weight(tmp->span))
 				break;
@@ -2192,6 +2202,9 @@ static int sched_balance_self(int cpu, i
 		/* while loop will break here if sd == NULL */
 	}
 
+	put_cpumask_var(span, temp_cpumask_1);
+	put_cpumask_var(tmpmask, temp_cpumask_2);
+
 	return cpu;
 }
 
@@ -3865,8 +3878,9 @@ static void rebalance_domains(int cpu, e
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
 	int need_serialize;
-	cpumask_t tmp;
+	cpumask_ptr tmp;
 
+	get_cpumask_var(tmp, temp_cpumask_1);
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
@@ -3890,7 +3904,7 @@ static void rebalance_domains(int cpu, e
 		}
 
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
-			if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
+			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -3924,6 +3938,8 @@ out:
 	 */
 	if (likely(update_next_balance))
 		rq->next_balance = next_balance;
+
+	put_cpumask_var(tmp, temp_cpumask_1);
 }
 
 /*
@@ -5384,11 +5400,14 @@ out_unlock:
 
 long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 {
-	cpumask_t cpus_allowed;
-	cpumask_t new_mask = *in_mask;
+	cpumask_ptr cpus_allowed;
+	cpumask_ptr new_mask;
 	struct task_struct *p;
 	int retval;
 
+	get_cpumask_var(cpus_allowed, temp_cpumask_1);
+	get_cpumask_var(new_mask, temp_cpumask_2);
+	*new_mask = *in_mask;
 	get_online_cpus();
 	read_lock(&tasklist_lock);
 
@@ -5416,24 +5435,26 @@ long sched_setaffinity(pid_t pid, const 
 	if (retval)
 		goto out_unlock;
 
-	cpuset_cpus_allowed(p, &cpus_allowed);
-	cpus_and(new_mask, new_mask, cpus_allowed);
+	cpuset_cpus_allowed(p, cpus_allowed);
+	cpus_and(*new_mask, *new_mask, *cpus_allowed);
  again:
-	retval = set_cpus_allowed_ptr(p, &new_mask);
+	retval = set_cpus_allowed_ptr(p, new_mask);
 
 	if (!retval) {
-		cpuset_cpus_allowed(p, &cpus_allowed);
-		if (!cpus_subset(new_mask, cpus_allowed)) {
+		cpuset_cpus_allowed(p, cpus_allowed);
+		if (!cpus_subset(*new_mask, *cpus_allowed)) {
 			/*
 			 * We must have raced with a concurrent cpuset
 			 * update. Just reset the cpus_allowed to the
 			 * cpuset's cpus_allowed
 			 */
-			new_mask = cpus_allowed;
+			*new_mask = *cpus_allowed;
 			goto again;
 		}
 	}
 out_unlock:
+	put_cpumask_var(cpus_allowed, temp_cpumask_1);
+	put_cpumask_var(new_mask, temp_cpumask_2);
 	put_task_struct(p);
 	put_online_cpus();
 	return retval;
@@ -6107,15 +6128,19 @@ static int __migrate_task_irq(struct tas
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	unsigned long flags;
-	cpumask_t mask;
+	cpumask_ptr mask;
+	cpumask_ptr cpus_allowed;
 	struct rq *rq;
 	int dest_cpu;
 
+	get_cpumask_var(mask, temp_cpumask_1);
+	get_cpumask_var(cpus_allowed, temp_cpumask_2);
 	do {
 		/* On same node? */
-		mask = node_to_cpumask(cpu_to_node(dead_cpu));
-		cpus_and(mask, mask, p->cpus_allowed);
-		dest_cpu = any_online_cpu(mask);
+		node_to_cpumask_ptr(pnodemask, cpu_to_node(dead_cpu));
+		*mask = *pnodemask;
+		cpus_and(*mask, *mask, p->cpus_allowed);
+		dest_cpu = any_online_cpu(*mask);
 
 		/* On any allowed CPU? */
 		if (dest_cpu >= nr_cpu_ids)
@@ -6123,9 +6148,8 @@ static void move_task_off_dead_cpu(int d
 
 		/* No more Mr. Nice Guy. */
 		if (dest_cpu >= nr_cpu_ids) {
-			cpumask_t cpus_allowed;
+			cpuset_cpus_allowed_locked(p, cpus_allowed);
 
-			cpuset_cpus_allowed_locked(p, &cpus_allowed);
 			/*
 			 * Try to stay on the same cpuset, where the
 			 * current cpuset may be a subset of all cpus.
@@ -6134,7 +6158,7 @@ static void move_task_off_dead_cpu(int d
 			 * called within calls to cpuset_lock/cpuset_unlock.
 			 */
 			rq = task_rq_lock(p, &flags);
-			p->cpus_allowed = cpus_allowed;
+			p->cpus_allowed = *cpus_allowed;
 			dest_cpu = any_online_cpu(p->cpus_allowed);
 			task_rq_unlock(rq, &flags);
 
@@ -6150,6 +6174,9 @@ static void move_task_off_dead_cpu(int d
 			}
 		}
 	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
+
+	put_cpumask_var(mask, temp_cpumask_1);
+	put_cpumask_var(cpus_allowed, temp_cpumask_2);
 }
 
 /*
@@ -6710,7 +6737,7 @@ static int sched_domain_debug_one(struct
 
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
-	cpumask_t *groupmask;
+	cpumask_ptr groupmask;
 	int level = 0;
 
 	if (!sd) {
@@ -6720,7 +6747,7 @@ static void sched_domain_debug(struct sc
 
 	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
 
-	groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	alloc_cpumask_ptr(&groupmask);
 	if (!groupmask) {
 		printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
 		return;
@@ -6734,7 +6761,7 @@ static void sched_domain_debug(struct sc
 		if (!sd)
 			break;
 	}
-	kfree(groupmask);
+	free_cpumask_ptr(groupmask);
 }
 #else /* !CONFIG_SCHED_DEBUG */
 # define sched_domain_debug(sd, cpu) do { } while (0)
@@ -7120,9 +7147,9 @@ static int cpu_to_allnodes_group(int cpu
 				 struct sched_group **sg, cpumask_t *nodemask)
 {
 	int group;
+	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	*nodemask = node_to_cpumask(cpu_to_node(cpu));
-	cpus_and(*nodemask, *nodemask, *cpu_map);
+	cpus_and(*nodemask, *pnodemask, *cpu_map);
 	group = first_cpu(*nodemask);
 
 	if (sg)
@@ -7172,9 +7199,9 @@ static void free_sched_groups(const cpum
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+			node_to_cpumask_ptr(pnodemask, i);
 
-			*nodemask = node_to_cpumask(i);
-			cpus_and(*nodemask, *nodemask, *cpu_map);
+			cpus_and(*nodemask, *pnodemask, *cpu_map);
 			if (cpus_empty(*nodemask))
 				continue;
 
@@ -7297,19 +7324,6 @@ struct allmasks {
 #endif
 };
 
-#if	NR_CPUS > 128
-#define	SCHED_CPUMASK_ALLOC		1
-#define	SCHED_CPUMASK_FREE(v)		kfree(v)
-#define	SCHED_CPUMASK_DECLARE(v)	struct allmasks *v
-#else
-#define	SCHED_CPUMASK_ALLOC		0
-#define	SCHED_CPUMASK_FREE(v)
-#define	SCHED_CPUMASK_DECLARE(v)	struct allmasks _v, *v = &_v
-#endif
-
-#define	SCHED_CPUMASK_VAR(v, a) 	cpumask_t *v = (cpumask_t *) \
-			((unsigned long)(a) + offsetof(struct allmasks, v))
-
 static int default_relax_domain_level = -1;
 
 static int __init setup_relax_domain_level(char *str)
@@ -7354,8 +7368,9 @@ static int __build_sched_domains(const c
 {
 	int i;
 	struct root_domain *rd;
-	SCHED_CPUMASK_DECLARE(allmasks);
-	cpumask_t *tmpmask;
+	CPUMASK_ALLOC(allmasks);
+	CPUMASK_PTR(tmpmask, allmasks);
+
 #ifdef CONFIG_NUMA
 	struct sched_group **sched_group_nodes = NULL;
 	int sd_allnodes = 0;
@@ -7367,6 +7382,7 @@ static int __build_sched_domains(const c
 				    GFP_KERNEL);
 	if (!sched_group_nodes) {
 		printk(KERN_WARNING "Can not alloc sched group node list\n");
+		CPUMASK_FREE(allmasks);
 		return -ENOMEM;
 	}
 #endif
@@ -7377,13 +7393,11 @@ static int __build_sched_domains(const c
 #ifdef CONFIG_NUMA
 		kfree(sched_group_nodes);
 #endif
+		CPUMASK_FREE(allmasks);
 		return -ENOMEM;
 	}
 
-#if SCHED_CPUMASK_ALLOC
-	/* get space for all scratch cpumask variables */
-	allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
-	if (!allmasks) {
+	if (allmasks == NULL) {
 		printk(KERN_WARNING "Cannot alloc cpumask array\n");
 		kfree(rd);
 #ifdef CONFIG_NUMA
@@ -7391,9 +7405,6 @@ static int __build_sched_domains(const c
 #endif
 		return -ENOMEM;
 	}
-#endif
-	tmpmask = (cpumask_t *)allmasks;
-
 
 #ifdef CONFIG_NUMA
 	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
@@ -7404,10 +7415,10 @@ static int __build_sched_domains(const c
 	 */
 	for_each_cpu_mask_nr(i, *cpu_map) {
 		struct sched_domain *sd = NULL, *p;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
+		CPUMASK_PTR(nodemask, allmasks);
+		node_to_cpumask_ptr(pnodemask, cpu_to_node(i));
 
-		*nodemask = node_to_cpumask(cpu_to_node(i));
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpus_and(*nodemask, *pnodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
 		if (cpus_weight(*cpu_map) >
@@ -7470,8 +7481,8 @@ static int __build_sched_domains(const c
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
 	for_each_cpu_mask_nr(i, *cpu_map) {
-		SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
+		CPUMASK_PTR(this_sibling_map, allmasks);
+		CPUMASK_PTR(send_covered, allmasks);
 
 		*this_sibling_map = per_cpu(cpu_sibling_map, i);
 		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
@@ -7487,8 +7498,8 @@ static int __build_sched_domains(const c
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
 	for_each_cpu_mask_nr(i, *cpu_map) {
-		SCHED_CPUMASK_VAR(this_core_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
+		CPUMASK_PTR(this_core_map, allmasks);
+		CPUMASK_PTR(send_covered, allmasks);
 
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
@@ -7503,11 +7514,11 @@ static int __build_sched_domains(const c
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
+		CPUMASK_PTR(nodemask, allmasks);
+		CPUMASK_PTR(send_covered, allmasks);
+		node_to_cpumask_ptr(pnodemask, i);
 
-		*nodemask = node_to_cpumask(i);
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpus_and(*nodemask, *pnodemask, *cpu_map);
 		if (cpus_empty(*nodemask))
 			continue;
 
@@ -7519,7 +7530,7 @@ static int __build_sched_domains(const c
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
 	if (sd_allnodes) {
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
+		CPUMASK_PTR(send_covered, allmasks);
 
 		init_sched_build_groups(cpu_map, cpu_map,
 					&cpu_to_allnodes_group,
@@ -7529,15 +7540,15 @@ static int __build_sched_domains(const c
 	for (i = 0; i < nr_node_ids; i++) {
 		/* Set up node groups */
 		struct sched_group *sg, *prev;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(domainspan, allmasks);
-		SCHED_CPUMASK_VAR(covered, allmasks);
+		CPUMASK_PTR(nodemask, allmasks);
+		CPUMASK_PTR(domainspan, allmasks);
+		CPUMASK_PTR(covered, allmasks);
+		node_to_cpumask_ptr(pnodemask, i);
 		int j;
 
-		*nodemask = node_to_cpumask(i);
 		cpus_clear(*covered);
 
-		cpus_and(*nodemask, *nodemask, *cpu_map);
+		cpus_and(*nodemask, *pnodemask, *cpu_map);
 		if (cpus_empty(*nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
@@ -7566,7 +7577,7 @@ static int __build_sched_domains(const c
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
-			SCHED_CPUMASK_VAR(notcovered, allmasks);
+			CPUMASK_PTR(notcovered, allmasks);
 			int n = (i + j) % nr_node_ids;
 			node_to_cpumask_ptr(pnodemask, n);
 
@@ -7645,13 +7656,13 @@ static int __build_sched_domains(const c
 		cpu_attach_domain(sd, rd, i);
 	}
 
-	SCHED_CPUMASK_FREE((void *)allmasks);
+	CPUMASK_FREE(allmasks);
 	return 0;
 
 #ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map, tmpmask);
-	SCHED_CPUMASK_FREE((void *)allmasks);
+	CPUMASK_FREE(allmasks);
 	return -ENOMEM;
 #endif
 }

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 08/13] cpufreq: Reduce stack size requirements in acpi-cpufreq.c
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (6 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts Mike Travis
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-acpi-cpufreq_c --]
[-- Type: text/plain, Size: 7299 bytes --]

  * Make the following changes to acpi-cpufreq.c functions:

    - use node_to_cpumask_ptr in place of node_to_cpumask
    - use get_cpumask_var for temporary cpumask_t variables
    - use alloc_cpumask_ptr where available
    - use a per_cpu temp variable for drv_cmd which contains an
	embedded cpumask_t variable.

  * The resultant stack savings are:

    ====== Stack (-l 100)

	1 - initial
	2 - stack-hogs-acpi-cpufreq_c
	'.' is less than the limit(100)

       .1.    .2.    ..final..
      1608  -1024 584   -63%  acpi_cpufreq_target
      1096  -1096   .  -100%  sched_balance_self
      1048   -536 512   -51%  get_cur_val
       520   -520   .  -100%  get_measured_perf

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |   93 +++++++++++++++++------------
 1 file changed, 57 insertions(+), 36 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ linux-2.6.tip/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
+#include <linux/cpumask_ptr.h>
 
 #include <linux/acpi.h>
 #include <acpi/processor.h>
@@ -68,6 +69,8 @@ struct acpi_cpufreq_data {
 };
 
 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+static DEFINE_PER_CPU(struct drv_cmd, temp_cmd);
+static DEFINE_PER_CPUMASK(temp_cpumask);
 
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
@@ -214,33 +217,39 @@ static void drv_write(struct drv_cmd *cm
 static u32 get_cur_val(const cpumask_t *mask)
 {
 	struct acpi_processor_performance *perf;
-	struct drv_cmd cmd;
+	struct drv_cmd *cmd;
+	u32 temp_val;
 
 	if (unlikely(cpus_empty(*mask)))
 		return 0;
 
+	cmd = &get_cpu_var(temp_cmd);
 	switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
+		cmd->type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd->addr.msr.reg = MSR_IA32_PERF_STATUS;
 		break;
 	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
+		cmd->type = SYSTEM_IO_CAPABLE;
 		perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
+		cmd->addr.io.port = perf->control_register.address;
+		cmd->addr.io.bit_width = perf->control_register.bit_width;
 		break;
 	default:
+		put_cpu_var(temp_cmd);
 		return 0;
 	}
 
-	cmd.mask = *mask;
+	cmd->mask = *mask;
 
-	drv_read(&cmd);
+	drv_read(cmd);
 
-	dprintk("get_cur_val = %u\n", cmd.val);
+	temp_val = cmd->val;
+	put_cpu_var(temp_cmd);
 
-	return cmd.val;
+	dprintk("get_cur_val = %u\n", temp_val);
+
+	return temp_val;
 }
 
 /*
@@ -266,11 +275,12 @@ static unsigned int get_measured_perf(un
 		u64 whole;
 	} aperf_cur, mperf_cur;
 
-	cpumask_t saved_mask;
+	cpumask_ptr saved_mask;
 	unsigned int perf_percent;
 	unsigned int retval;
 
-	saved_mask = current->cpus_allowed;
+	get_cpumask_var(saved_mask, temp_cpumask);
+	*saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (get_cpu() != cpu) {
 		/* We were not able to run on requested processor */
@@ -329,7 +339,8 @@ static unsigned int get_measured_perf(un
 	retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
 
 	put_cpu();
-	set_cpus_allowed_ptr(current, &saved_mask);
+	set_cpus_allowed_ptr(current, saved_mask);
+	put_cpumask_var(saved_mask, temp_cpumask);
 
 	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
 	return retval;
@@ -384,8 +395,8 @@ static int acpi_cpufreq_target(struct cp
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 	struct acpi_processor_performance *perf;
 	struct cpufreq_freqs freqs;
-	cpumask_t online_policy_cpus;
-	struct drv_cmd cmd;
+	cpumask_ptr online_policy_cpus;
+	struct drv_cmd *cmd;
 	unsigned int next_state = 0; /* Index into freq_table */
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
@@ -394,9 +405,8 @@ static int acpi_cpufreq_target(struct cp
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
 	if (unlikely(data == NULL ||
-	     data->acpi_data == NULL || data->freq_table == NULL)) {
+	     data->acpi_data == NULL || data->freq_table == NULL))
 		return -ENODEV;
-	}
 
 	perf = data->acpi_data;
 	result = cpufreq_frequency_table_target(policy,
@@ -406,11 +416,15 @@ static int acpi_cpufreq_target(struct cp
 	if (unlikely(result))
 		return -ENODEV;
 
+	/* obtain temp variables */
+	get_cpumask_var(online_policy_cpus, temp_cpumask);
+	cmd = &get_cpu_var(temp_cmd);
+
 #ifdef CONFIG_HOTPLUG_CPU
 	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+	cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
 #else
-	online_policy_cpus = policy->cpus;
+	*online_policy_cpus = policy->cpus;
 #endif
 
 	next_perf_state = data->freq_table[next_state].index;
@@ -422,56 +436,63 @@ static int acpi_cpufreq_target(struct cp
 		} else {
 			dprintk("Already at target state (P%d)\n",
 				next_perf_state);
-			return 0;
+			result = 0;
+			goto out;
 		}
 	}
 
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
-		cmd.val = (u32) perf->states[next_perf_state].control;
+		cmd->type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd->addr.msr.reg = MSR_IA32_PERF_CTL;
+		cmd->val = (u32) perf->states[next_perf_state].control;
 		break;
 	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
-		cmd.val = (u32) perf->states[next_perf_state].control;
+		cmd->type = SYSTEM_IO_CAPABLE;
+		cmd->addr.io.port = perf->control_register.address;
+		cmd->addr.io.bit_width = perf->control_register.bit_width;
+		cmd->val = (u32) perf->states[next_perf_state].control;
 		break;
 	default:
-		return -ENODEV;
+		result = -ENODEV;
+		goto out;
 	}
 
-	cpus_clear(cmd.mask);
+	cpus_clear(cmd->mask);
 
 	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
-		cmd.mask = online_policy_cpus;
+		cmd->mask = *online_policy_cpus;
 	else
-		cpu_set(policy->cpu, cmd.mask);
+		cpu_set(policy->cpu, cmd->mask);
 
 	freqs.old = perf->states[perf->state].core_frequency * 1000;
 	freqs.new = data->freq_table[next_state].frequency;
-	for_each_cpu_mask_nr(i, cmd.mask) {
+	for_each_cpu_mask_nr(i, cmd->mask) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
 
-	drv_write(&cmd);
+	drv_write(cmd);
 
 	if (acpi_pstate_strict) {
-		if (!check_freqs(&cmd.mask, freqs.new, data)) {
+		if (!check_freqs(&cmd->mask, freqs.new, data)) {
 			dprintk("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
-			return -EAGAIN;
+			result = -EAGAIN;
+			goto out;
 		}
 	}
 
-	for_each_cpu_mask_nr(i, cmd.mask) {
+	for_each_cpu_mask_nr(i, cmd->mask) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
 	perf->state = next_perf_state;
 
+out:
+	put_cpumask_var(online_policy_cpus, temp_cpumask);
+	put_cpu_var(temp_cmd);
+
 	return result;
 }
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (7 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 08/13] cpufreq: Reduce stack size requirements in acpi-cpufreq.c Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-08 11:01   ` Andi Kleen
  2008-09-06 23:50 ` [RFC 10/13] genapic: reduce stack pressuge in io_apic.c step 2 internal abi Mike Travis
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-io_apic_c-step-1-temp-cpumask_ts --]
[-- Type: text/plain, Size: 19048 bytes --]

  * Step 1 of cleaning up io_apic.c removes local cpumask_t variables
    from the stack.

  - Method 1: remove unnecessary "extra" cpumask variables.

  - Method 2: use for_each_online_cpu_mask_nr() to logically AND
              the passed in mask with cpu_online_map, eliminating
	      the need for a temp cpumask variable.

  - Method 3: use get_cpumask_var variables where possible. The current
    assignment of temp variables is:
	

    /*
     * Temporary cpumask variables
     *
     * (XXX - would be _MUCH_ better as a "stack" of temp cpumasks.)
     *
     * level 4:
     *     irq_complete_move()
     *     check_timer()
     *     msi_compose_msg()
     *     set_msi_irq_affinity()
     *     ir_set_msi_irq_affinity()
     *     dmar_msi_set_affinity()
     *     set_ht_irq_affinity()
     *     arch_setup_ht_irq()
     *     setup_ioapic_dest()
     *
     * level 3:
     *     set_ioapic_affinity_irq()
     *     setup_IO_APIC_irq()
     *     migrate_ioapic_irq()
     *
     * level 2:
     *     create_irq_nr()
     *
     * level 1:
     *     __assign_irq_vector()
     *     setup_timer_IRQ0_pin()
     */

  * Addition of temp cpumask variables for the "target" of TARGET_CPUS
    is in preparation of changing the TARGET_CPUS for x86_64.  I've
    kept those changes here to document which routines get which temp
    cpumask variables.

  * Total stack size savings are in the last step.

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/io_apic.c |  268 ++++++++++++++++++++++++++++++----------------
 1 file changed, 175 insertions(+), 93 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6.tip/arch/x86/kernel/io_apic.c
@@ -41,6 +41,7 @@
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/cpumask_ptr.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -93,6 +94,39 @@ int mp_bus_id_to_type[MAX_MP_BUSSES];
 
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
+/*
+ * Temporary cpumask variables
+ *
+ * (XXX - would be _MUCH_ better as a "stack" of temp cpumasks.)
+ *
+ * level 4:
+ *     irq_complete_move()
+ *     check_timer()
+ *     msi_compose_msg()
+ *     set_msi_irq_affinity()
+ *     ir_set_msi_irq_affinity()
+ *     dmar_msi_set_affinity()
+ *     set_ht_irq_affinity()
+ *     arch_setup_ht_irq()
+ *     setup_ioapic_dest()
+ *
+ * level 3:
+ *     set_ioapic_affinity_irq()
+ *     setup_IO_APIC_irq()
+ *     migrate_ioapic_irq()
+ *
+ * level 2:
+ *     create_irq_nr()
+ *
+ * level 1:
+ *     __assign_irq_vector()
+ *     setup_timer_IRQ0_pin()
+ */
+static DEFINE_PER_CPUMASK(cpumask_irq_level_4);
+static DEFINE_PER_CPUMASK(cpumask_irq_level_3);
+static DEFINE_PER_CPUMASK(cpumask_irq_level_2);
+static DEFINE_PER_CPUMASK(cpumask_irq_level_1);
+
 int skip_ioapic_setup;
 
 static int __init parse_noapic(char *str)
@@ -551,19 +585,24 @@ static void set_ioapic_affinity_irq(unsi
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
-	cpumask_t tmp;
+	cpumask_ptr tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	get_cpumask_var(tmp, cpumask_irq_level_3);
+	cpus_and(*tmp, mask, cpu_online_map);
+	if (cpus_empty(*tmp)) {
+		put_cpumask_var(tmp, cpumask_irq_level_3);
 		return;
+	}
 
 	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, mask)) {
+		put_cpumask_var(tmp, cpumask_irq_level_3);
 		return;
+	}
 
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmp);
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -574,6 +613,7 @@ static void set_ioapic_affinity_irq(unsi
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
+	put_cpumask_var(tmp, cpumask_irq_level_3);
 }
 #endif /* CONFIG_SMP */
 
@@ -1206,7 +1246,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, cpumask_t inmask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1223,37 +1263,37 @@ static int __assign_irq_vector(int irq, 
 	unsigned int old_vector;
 	int cpu;
 	struct irq_cfg *cfg;
+	cpumask_ptr tmpmask;
 
 	cfg = irq_cfg(irq);
-
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
+	/* Only try and allocate irqs on cpus that are present */
+	get_cpumask_var(tmpmask, cpumask_irq_level_1);
+
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
+		cpus_and(*tmpmask, inmask, cpu_online_map);
+		cpus_and(*tmpmask, cfg->domain, *tmpmask);
+		if (!cpus_empty(*tmpmask)) {
+			put_cpumask_var(tmpmask, cpumask_irq_level_1);
 			return 0;
+		}
 	}
 
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
+	for_each_online_cpu_mask_nr(cpu, inmask) {
 		int new_cpu;
 		int vector, offset;
 
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
+		*tmpmask = vector_allocation_domain(cpu);
 
 		vector = current_vector;
 		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
+			/* If no more vectors on large boxen, must share them */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
@@ -1266,7 +1306,7 @@ next:
 		if (vector == SYSCALL_VECTOR)
 			goto next;
 #endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_online_cpu_mask_nr(new_cpu, *tmpmask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1276,12 +1316,14 @@ next:
 			cfg->move_in_progress = 1;
 			cfg->old_domain = cfg->domain;
 		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_online_cpu_mask_nr(new_cpu, *tmpmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = domain;
+		cfg->domain = *tmpmask;
+		put_cpumask_var(tmpmask, cpumask_irq_level_1);
 		return 0;
 	}
+	put_cpumask_var(tmpmask, cpumask_irq_level_1);
 	return -ENOSPC;
 }
 
@@ -1299,15 +1341,13 @@ static int assign_irq_vector(int irq, cp
 static void __clear_irq_vector(int irq)
 {
 	struct irq_cfg *cfg;
-	cpumask_t mask;
 	int cpu, vector;
 
 	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_online_cpu_mask_nr(cpu, cfg->domain)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
@@ -1478,18 +1518,19 @@ static void setup_IO_APIC_irq(int apic, 
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
+	cpumask_ptr mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = irq_cfg(irq);
 
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
-		return;
+	get_cpumask_var(mask, cpumask_irq_level_3);
+	*mask = TARGET_CPUS;
+	if (assign_irq_vector(irq, *mask))
+		goto out;
 
-	cpus_and(mask, cfg->domain, mask);
+	cpus_and(*mask, cfg->domain, *mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1499,12 +1540,12 @@ static void setup_IO_APIC_irq(int apic, 
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cpu_mask_to_apicid(*mask), trigger, polarity,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
 		__clear_irq_vector(irq);
-		return;
+		goto out;
 	}
 
 	ioapic_register_intr(irq, trigger);
@@ -1512,6 +1553,8 @@ static void setup_IO_APIC_irq(int apic, 
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic, pin, entry);
+out:
+	put_cpumask_var(mask, cpumask_irq_level_3);
 }
 
 static void __init setup_IO_APIC_irqs(void)
@@ -1560,6 +1603,7 @@ static void __init setup_timer_IRQ0_pin(
 					int vector)
 {
 	struct IO_APIC_route_entry entry;
+	cpumask_ptr tgt_cpus;
 
 #ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled)
@@ -1567,6 +1611,8 @@ static void __init setup_timer_IRQ0_pin(
 #endif
 
 	memset(&entry, 0, sizeof(entry));
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_1);
+	*tgt_cpus = TARGET_CPUS;
 
 	/*
 	 * We use logical delivery to get the timer IRQ
@@ -1574,7 +1620,7 @@ static void __init setup_timer_IRQ0_pin(
 	 */
 	entry.dest_mode = INT_DEST_MODE;
 	entry.mask = 1;					/* mask IRQ now */
-	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
+	entry.dest = cpu_mask_to_apicid(*tgt_cpus);
 	entry.delivery_mode = INT_DELIVERY_MODE;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -1590,6 +1636,7 @@ static void __init setup_timer_IRQ0_pin(
 	 * Add it to the IO-APIC irq-routing table:
 	 */
 	ioapic_write_entry(apic, pin, entry);
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_1);
 }
 
 
@@ -2250,25 +2297,26 @@ static void migrate_ioapic_irq(int irq, 
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
-	cpumask_t tmp, cleanup_mask;
+	cpumask_ptr tmpmask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
+	get_cpumask_var(tmpmask, cpumask_irq_level_3);
+	cpus_and(*tmpmask, mask, cpu_online_map);
+	if (cpus_empty(*tmpmask))
+		goto out;
 
 	if (get_irte(irq, &irte))
-		return;
+		goto out;
 
 	if (assign_irq_vector(irq, mask))
-		return;
+		goto out;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmpmask, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmpmask);
 
 	desc = irq_to_desc(irq);
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
@@ -2287,13 +2335,15 @@ static void migrate_ioapic_irq(int irq, 
 	modify_irte(irq, &irte);
 
 	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cpus_and(*tmpmask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(*tmpmask);
+		send_IPI_mask(*tmpmask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
 	desc->affinity = mask;
+out:
+	put_cpumask_var(tmpmask, cpumask_irq_level_3);
 }
 
 static int migrate_irq_remapped_level(int irq)
@@ -2415,11 +2465,13 @@ static void irq_complete_move(unsigned i
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
+		cpumask_ptr cleanup_mask;
 
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		get_cpumask_var(cleanup_mask, cpumask_irq_level_4);
+		cpus_and(*cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(*cleanup_mask);
+		send_IPI_mask(*cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		put_cpumask_var(cleanup_mask, cpumask_irq_level_4);
 		cfg->move_in_progress = 0;
 	}
 }
@@ -2749,6 +2801,7 @@ static inline void __init check_timer(vo
 	unsigned long flags;
 	unsigned int ver;
 	int no_pin1 = 0;
+	cpumask_ptr tgt_cpus;
 
 	local_irq_save(flags);
 
@@ -2759,7 +2812,10 @@ static inline void __init check_timer(vo
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
+	*tgt_cpus = TARGET_CPUS;
+	assign_irq_vector(0, *tgt_cpus);
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -3059,12 +3115,15 @@ unsigned int create_irq_nr(unsigned int 
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new;
+	cpumask_ptr tgt_cpus;
 
 #ifndef CONFIG_HAVE_SPARSE_IRQ
 	irq_want = nr_irqs - 1;
 #endif
 
 	irq = 0;
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_2);
+	*tgt_cpus = TARGET_CPUS;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
@@ -3075,11 +3134,12 @@ unsigned int create_irq_nr(unsigned int 
 		/* check if need to create one */
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+		if (__assign_irq_vector(new, *tgt_cpus) == 0)
 			irq = new;
 		break;
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_2);
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
@@ -3122,16 +3182,18 @@ static int msi_compose_msg(struct pci_de
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
-	cpumask_t tmp;
+	cpumask_ptr tgt_cpus;
 
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
+	*tgt_cpus = TARGET_CPUS;
+	err = assign_irq_vector(irq, *tgt_cpus);
 	if (err)
 		return err;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tgt_cpus, cfg->domain, *tgt_cpus);
+	dest = cpu_mask_to_apicid(*tgt_cpus);
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3190,19 +3252,20 @@ static void set_msi_irq_affinity(unsigne
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
+	cpumask_ptr tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
+	get_cpumask_var(tmp, cpumask_irq_level_4);
+	cpus_and(*tmp, mask, cpu_online_map);
+	if (cpus_empty(*tmp))
+		goto out;
 
 	if (assign_irq_vector(irq, mask))
-		return;
+		goto out;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmp);
 
 	read_msi_msg(irq, &msg);
 
@@ -3214,6 +3277,8 @@ static void set_msi_irq_affinity(unsigne
 	write_msi_msg(irq, &msg);
 	desc = irq_to_desc(irq);
 	desc->affinity = mask;
+out:
+	put_cpumask_var(tmp, cpumask_irq_level_4);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -3225,12 +3290,13 @@ static void ir_set_msi_irq_affinity(unsi
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
+	cpumask_ptr tmp;
 	struct irte irte;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	get_cpumask_var(tmp, cpumask_irq_level_4);
+	cpus_and(*tmp, mask, cpu_online_map);
+	if (cpus_empty(*tmp))
 		return;
 
 	if (get_irte(irq, &irte))
@@ -3240,8 +3306,8 @@ static void ir_set_msi_irq_affinity(unsi
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmp);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3257,14 +3323,15 @@ static void ir_set_msi_irq_affinity(unsi
 	 * vector allocation.
 	 */
 	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cpus_and(*tmp, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(*tmp);
+		send_IPI_mask(*tmp, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
 	desc = irq_to_desc(irq);
 	desc->affinity = mask;
+	put_cpumask_var(tmp, cpumask_irq_level_4);
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -3469,19 +3536,20 @@ static void dmar_msi_set_affinity(unsign
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
+	cpumask_ptr tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
+	get_cpumask_var(tmp, cpumask_irq_level_4);
+	cpus_and(*tmp, mask, cpu_online_map);
+	if (cpus_empty(*tmp))
+		goto out;
 
 	if (assign_irq_vector(irq, mask))
-		return;
+		goto out;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3493,6 +3561,8 @@ static void dmar_msi_set_affinity(unsign
 	dmar_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
 	desc->affinity = mask;
+out:
+	put_cpumask_var(tmp, cpumask_irq_level_4);
 }
 #endif /* CONFIG_SMP */
 
@@ -3548,23 +3618,26 @@ static void set_ht_irq_affinity(unsigned
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp;
+	cpumask_ptr tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
+	get_cpumask_var(tmp, cpumask_irq_level_4);
+	cpus_and(*tmp, mask, cpu_online_map);
+	if (cpus_empty(*tmp))
+		goto out;
 
 	if (assign_irq_vector(irq, mask))
-		return;
+		goto out;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(*tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(*tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	desc = irq_to_desc(irq);
 	desc->affinity = mask;
+out:
+	put_cpumask_var(tmp, cpumask_irq_level_4);
 }
 #endif
 
@@ -3583,17 +3656,18 @@ int arch_setup_ht_irq(unsigned int irq, 
 {
 	struct irq_cfg *cfg;
 	int err;
-	cpumask_t tmp;
+	cpumask_ptr tgt_cpus;
 
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
+	*tgt_cpus = TARGET_CPUS;
+	err = assign_irq_vector(irq, *tgt_cpus);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
 		cfg = irq_cfg(irq);
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+		cpus_and(*tgt_cpus, cfg->domain, *tgt_cpus);
+		dest = cpu_mask_to_apicid(*tgt_cpus);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3615,6 +3689,7 @@ int arch_setup_ht_irq(unsigned int irq, 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
 	}
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
@@ -3799,10 +3874,12 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	struct irq_cfg *cfg;
+	cpumask_ptr tgt_cpus;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
+	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
 		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
 			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
@@ -3820,14 +3897,19 @@ void __init setup_ioapic_dest(void)
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
 #ifdef CONFIG_INTR_REMAP
-			else if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+			else if (intr_remapping_enabled) {
+				*tgt_cpus = TARGET_CPUS;
+				set_ir_ioapic_affinity_irq(irq, *tgt_cpus);
+			}
 #endif
-			else
-				set_ioapic_affinity_irq(irq, TARGET_CPUS);
+			else {
+				*tgt_cpus = TARGET_CPUS;
+				set_ioapic_affinity_irq(irq, *tgt_cpus);
+			}
 		}
 
 	}
+	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 }
 #endif
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 10/13] genapic: reduce stack pressuge in io_apic.c step 2 internal abi
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (8 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus Mike Travis
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-io_apic_c-step-2-internal-abi --]
[-- Type: text/plain, Size: 7922 bytes --]

  * Step 2 of cleaning up io_apic.c removes cpumask_t variables
    passed as arguments on the stack for internal functions:

	assign_irq_vector()
	__assign_irq_vector()
	migrate_ioapic_irq()
	set_ioapic_affinity_irq()
	set_ir_ioapic_affinity_irq()

   The last two functions are used both internally and externally so
   there are now intermediate functions to maintain the external ABI.

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/io_apic.c |   66 +++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 27 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6.tip/arch/x86/kernel/io_apic.c
@@ -578,9 +578,9 @@ static void __target_IO_APIC_irq(unsigne
 	}
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, cpumask_t *mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_p(unsigned int irq, cpumask_t *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
@@ -589,7 +589,7 @@ static void set_ioapic_affinity_irq(unsi
 	struct irq_desc *desc;
 
 	get_cpumask_var(tmp, cpumask_irq_level_3);
-	cpus_and(*tmp, mask, cpu_online_map);
+	cpus_and(*tmp, *mask, cpu_online_map);
 	if (cpus_empty(*tmp)) {
 		put_cpumask_var(tmp, cpumask_irq_level_3);
 		return;
@@ -601,7 +601,7 @@ static void set_ioapic_affinity_irq(unsi
 		return;
 	}
 
-	cpus_and(*tmp, cfg->domain, mask);
+	cpus_and(*tmp, cfg->domain, *mask);
 	dest = cpu_mask_to_apicid(*tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -611,10 +611,16 @@ static void set_ioapic_affinity_irq(unsi
 	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
+	desc->affinity = *mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 	put_cpumask_var(tmp, cpumask_irq_level_3);
 }
+
+/* for struct irq_chip.set_affinity compatibility */
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	set_ioapic_affinity_irq_p(irq, &mask);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1246,7 +1252,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, cpumask_t inmask)
+static int __assign_irq_vector(int irq, cpumask_t *inmask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1274,7 +1280,7 @@ static int __assign_irq_vector(int irq, 
 
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpus_and(*tmpmask, inmask, cpu_online_map);
+		cpus_and(*tmpmask, *inmask, cpu_online_map);
 		cpus_and(*tmpmask, cfg->domain, *tmpmask);
 		if (!cpus_empty(*tmpmask)) {
 			put_cpumask_var(tmpmask, cpumask_irq_level_1);
@@ -1282,7 +1288,7 @@ static int __assign_irq_vector(int irq, 
 		}
 	}
 
-	for_each_online_cpu_mask_nr(cpu, inmask) {
+	for_each_online_cpu_mask_nr(cpu, *inmask) {
 		int new_cpu;
 		int vector, offset;
 
@@ -1327,7 +1333,7 @@ next:
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, cpumask_t *mask)
 {
 	int err;
 	unsigned long flags;
@@ -1527,7 +1533,7 @@ static void setup_IO_APIC_irq(int apic, 
 
 	get_cpumask_var(mask, cpumask_irq_level_3);
 	*mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, *mask))
+	if (assign_irq_vector(irq, mask))
 		goto out;
 
 	cpus_and(*mask, cfg->domain, *mask);
@@ -2293,7 +2299,7 @@ static DECLARE_DELAYED_WORK(ir_migration
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq(int irq, cpumask_t *mask)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -2304,7 +2310,7 @@ static void migrate_ioapic_irq(int irq, 
 	unsigned long flags;
 
 	get_cpumask_var(tmpmask, cpumask_irq_level_3);
-	cpus_and(*tmpmask, mask, cpu_online_map);
+	cpus_and(*tmpmask, *mask, cpu_online_map);
 	if (cpus_empty(*tmpmask))
 		goto out;
 
@@ -2315,7 +2321,7 @@ static void migrate_ioapic_irq(int irq, 
 		goto out;
 
 	cfg = irq_cfg(irq);
-	cpus_and(*tmpmask, cfg->domain, mask);
+	cpus_and(*tmpmask, cfg->domain, *mask);
 	dest = cpu_mask_to_apicid(*tmpmask);
 
 	desc = irq_to_desc(irq);
@@ -2341,7 +2347,7 @@ static void migrate_ioapic_irq(int irq, 
 		cfg->move_in_progress = 0;
 	}
 
-	desc->affinity = mask;
+	desc->affinity = *mask;
 out:
 	put_cpumask_var(tmpmask, cpumask_irq_level_3);
 }
@@ -2365,7 +2371,7 @@ static int migrate_irq_remapped_level(in
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
+	migrate_ioapic_irq(irq, &desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
@@ -2402,19 +2408,25 @@ static void ir_irq_migration(struct work
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_p(unsigned int irq, cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
+		desc->pending_mask = *mask;
 		migrate_irq_remapped_level(irq);
 		return;
 	}
 
 	migrate_ioapic_irq(irq, mask);
 }
+
+/* for struct irq_chip.set_affinity compatibility */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	set_ir_ioapic_affinity_irq_p(irq, &mask)
+}
 #endif
 
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
@@ -2814,7 +2826,7 @@ static inline void __init check_timer(vo
 	disable_8259A_irq(0);
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 	*tgt_cpus = TARGET_CPUS;
-	assign_irq_vector(0, *tgt_cpus);
+	assign_irq_vector(0, tgt_cpus);
 	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 
 	/*
@@ -3134,7 +3146,7 @@ unsigned int create_irq_nr(unsigned int 
 		/* check if need to create one */
 		if (!cfg_new)
 			cfg_new = irq_cfg_alloc(new);
-		if (__assign_irq_vector(new, *tgt_cpus) == 0)
+		if (__assign_irq_vector(new, tgt_cpus) == 0)
 			irq = new;
 		break;
 	}
@@ -3186,7 +3198,7 @@ static int msi_compose_msg(struct pci_de
 
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 	*tgt_cpus = TARGET_CPUS;
-	err = assign_irq_vector(irq, *tgt_cpus);
+	err = assign_irq_vector(irq, tgt_cpus);
 	if (err)
 		return err;
 
@@ -3260,7 +3272,7 @@ static void set_msi_irq_affinity(unsigne
 	if (cpus_empty(*tmp))
 		goto out;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, &mask))
 		goto out;
 
 	cfg = irq_cfg(irq);
@@ -3302,7 +3314,7 @@ static void ir_set_msi_irq_affinity(unsi
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, &mask))
 		return;
 
 	cfg = irq_cfg(irq);
@@ -3544,7 +3556,7 @@ static void dmar_msi_set_affinity(unsign
 	if (cpus_empty(*tmp))
 		goto out;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, &mask))
 		goto out;
 
 	cfg = irq_cfg(irq);
@@ -3626,7 +3638,7 @@ static void set_ht_irq_affinity(unsigned
 	if (cpus_empty(*tmp))
 		goto out;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, &mask))
 		goto out;
 
 	cfg = irq_cfg(irq);
@@ -3660,7 +3672,7 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 	*tgt_cpus = TARGET_CPUS;
-	err = assign_irq_vector(irq, *tgt_cpus);
+	err = assign_irq_vector(irq, tgt_cpus);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
@@ -3899,12 +3911,12 @@ void __init setup_ioapic_dest(void)
 #ifdef CONFIG_INTR_REMAP
 			else if (intr_remapping_enabled) {
 				*tgt_cpus = TARGET_CPUS;
-				set_ir_ioapic_affinity_irq(irq, *tgt_cpus);
+				set_ir_ioapic_affinity_irq_p(irq, tgt_cpus);
 			}
 #endif
 			else {
 				*tgt_cpus = TARGET_CPUS;
-				set_ioapic_affinity_irq(irq, *tgt_cpus);
+				set_ioapic_affinity_irq_p(irq, tgt_cpus);
 			}
 		}
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (9 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 10/13] genapic: reduce stack pressuge in io_apic.c step 2 internal abi Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-07  7:55   ` Bert Wesarg
  2008-09-06 23:50 ` [RFC 12/13] genapic: reduce stack pressuge in io_apic.c step 4 vector allocation Mike Travis
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-io_apic_c-step-3-target_cpus --]
[-- Type: text/plain, Size: 8323 bytes --]

  * Step 3 "target_cpus" of cleaning up io_apic.c modifies the TARGET_CPUS
    interface to pass a pointer to the returned mask for arch X86_64,
    removing yet another "cpumask_t variable on the stack".

	target_cpus = TARGET_CPUS;

    becomes:

	TARGET_CPUS(target_cpus);

    For x86_32 this is expanded to:

	target_cpus = (genapic->target_cpus());

    For x86_64 this is expanded to:

	target_cpus = (genapic->target_cpus)(&(target_cpus));

  * All the appropriate genapic "target_cpus" functions are modified
    to use this new interface.

  * Note that arch-i386-gcc had trouble with the name of the variables
    being "target_cpus" (conflicted with the macro TARGET_CPUS expanding
    to (target_cpus()), so they are now "tgt_cpus".

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genapic_flat_64.c        |    8 ++++----
 arch/x86/kernel/genx2apic_cluster.c      |    4 ++--
 arch/x86/kernel/genx2apic_phys.c         |    4 ++--
 arch/x86/kernel/genx2apic_uv_x.c         |    4 ++--
 arch/x86/kernel/io_apic.c                |   16 ++++++++--------
 include/asm-x86/genapic_64.h             |    2 +-
 include/asm-x86/mach-default/mach_apic.h |   28 +++++++++++++++++-----------
 include/asm-x86/mach-generic/mach_apic.h |    6 +++++-
 8 files changed, 41 insertions(+), 31 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6.tip/arch/x86/kernel/genapic_flat_64.c
@@ -31,9 +31,9 @@ static int __init flat_acpi_madt_oem_che
 	return 1;
 }
 
-static cpumask_t flat_target_cpus(void)
+static void flat_target_cpus(cpumask_t *retmask)
 {
-	return cpu_online_map;
+	*retmask = cpu_online_map;
 }
 
 static cpumask_t flat_vector_allocation_domain(int cpu)
@@ -194,9 +194,9 @@ static int __init physflat_acpi_madt_oem
 	return 0;
 }
 
-static cpumask_t physflat_target_cpus(void)
+static void physflat_target_cpus(cpumask_t *retmask)
 {
-	return cpu_online_map;
+	*retmask = cpu_online_map;
 }
 
 static cpumask_t physflat_vector_allocation_domain(int cpu)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_cluster.c
@@ -23,9 +23,9 @@ static int __init x2apic_acpi_madt_oem_c
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static void x2apic_target_cpus(cpumask_t *retmask)
 {
-	return cpumask_of_cpu(0);
+	*retmask = cpumask_of_cpu(0);
 }
 
 /*
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_phys.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_phys.c
@@ -29,9 +29,9 @@ static int __init x2apic_acpi_madt_oem_c
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static void x2apic_target_cpus(cpumask_t *retmask)
 {
-	return cpumask_of_cpu(0);
+	*retmask = cpumask_of_cpu(0);
 }
 
 static cpumask_t x2apic_vector_allocation_domain(int cpu)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -77,9 +77,9 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t uv_target_cpus(void)
+static uv_target_cpus(cpumask_t *retmask)
 {
-	return cpumask_of_cpu(0);
+	*retmask = cpumask_of_cpu(0);
 }
 
 static cpumask_t uv_vector_allocation_domain(int cpu)
--- linux-2.6.tip.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6.tip/arch/x86/kernel/io_apic.c
@@ -1532,7 +1532,7 @@ static void setup_IO_APIC_irq(int apic, 
 	cfg = irq_cfg(irq);
 
 	get_cpumask_var(mask, cpumask_irq_level_3);
-	*mask = TARGET_CPUS;
+	TARGET_CPUS(*mask);
 	if (assign_irq_vector(irq, mask))
 		goto out;
 
@@ -1618,7 +1618,7 @@ static void __init setup_timer_IRQ0_pin(
 
 	memset(&entry, 0, sizeof(entry));
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_1);
-	*tgt_cpus = TARGET_CPUS;
+	TARGET_CPUS(*tgt_cpus);
 
 	/*
 	 * We use logical delivery to get the timer IRQ
@@ -2825,7 +2825,7 @@ static inline void __init check_timer(vo
 	 */
 	disable_8259A_irq(0);
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
-	*tgt_cpus = TARGET_CPUS;
+	TARGET_CPUS(*tgt_cpus);
 	assign_irq_vector(0, tgt_cpus);
 	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 
@@ -3135,7 +3135,7 @@ unsigned int create_irq_nr(unsigned int 
 
 	irq = 0;
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_2);
-	*tgt_cpus = TARGET_CPUS;
+	TARGET_CPUS(*tgt_cpus);
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
@@ -3197,7 +3197,7 @@ static int msi_compose_msg(struct pci_de
 	cpumask_ptr tgt_cpus;
 
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
-	*tgt_cpus = TARGET_CPUS;
+	TARGET_CPUS(*tgt_cpus);
 	err = assign_irq_vector(irq, tgt_cpus);
 	if (err)
 		return err;
@@ -3671,7 +3671,7 @@ int arch_setup_ht_irq(unsigned int irq, 
 	cpumask_ptr tgt_cpus;
 
 	get_cpumask_var(tgt_cpus, cpumask_irq_level_4);
-	*tgt_cpus = TARGET_CPUS;
+	TARGET_CPUS(*tgt_cpus);
 	err = assign_irq_vector(irq, tgt_cpus);
 	if (!err) {
 		struct ht_irq_msg msg;
@@ -3910,12 +3910,12 @@ void __init setup_ioapic_dest(void)
 						  irq_polarity(irq_entry));
 #ifdef CONFIG_INTR_REMAP
 			else if (intr_remapping_enabled) {
-				*tgt_cpus = TARGET_CPUS;
+				TARGET_CPUS(*tgt_cpus);
 				set_ir_ioapic_affinity_irq_p(irq, tgt_cpus);
 			}
 #endif
 			else {
-				*tgt_cpus = TARGET_CPUS;
+				TARGET_CPUS(*tgt_cpus);
 				set_ioapic_affinity_irq_p(irq, tgt_cpus);
 			}
 		}
--- linux-2.6.tip.orig/include/asm-x86/genapic_64.h
+++ linux-2.6.tip/include/asm-x86/genapic_64.h
@@ -20,7 +20,7 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
+	void (*target_cpus)(cpumask_t *retmask);
 	cpumask_t (*vector_allocation_domain)(int cpu);
 	void (*init_apic_ldr)(void);
 	/* ipi */
--- linux-2.6.tip.orig/include/asm-x86/mach-default/mach_apic.h
+++ linux-2.6.tip/include/asm-x86/mach-default/mach_apic.h
@@ -7,24 +7,22 @@
 #include <asm/smp.h>
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
+#define NO_BALANCE_IRQ (0)
+#define esr_disable (0)
 
-static inline cpumask_t target_cpus(void)
+#ifdef CONFIG_X86_64
+#include <asm/genapic.h>
+static inline void target_cpus(cpumask_t *retmask)
 { 
 #ifdef CONFIG_SMP
-	return cpu_online_map;
+	*retmask = cpu_online_map;
 #else
-	return cpumask_of_cpu(0);
+	*retmask = cpumask_of_cpu(0);
 #endif
 } 
-
-#define NO_BALANCE_IRQ (0)
-#define esr_disable (0)
-
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
 #define INT_DELIVERY_MODE (genapic->int_delivery_mode)
 #define INT_DEST_MODE (genapic->int_dest_mode)
-#define TARGET_CPUS	  (genapic->target_cpus())
+#define TARGET_CPUS(retval) (genapic->target_cpus)(&(retval))
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
@@ -34,9 +32,17 @@ static inline cpumask_t target_cpus(void
 #define send_IPI_self (genapic->send_IPI_self)
 extern void setup_apic_routing(void);
 #else
+static inline cpumask_t target_cpus(void)
+{
+#ifdef CONFIG_SMP
+	return cpu_online_map;
+#else
+	return cpumask_of_cpu(0);
+#endif
+}
 #define INT_DELIVERY_MODE dest_LowestPrio
 #define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
-#define TARGET_CPUS (target_cpus())
+#define TARGET_CPUS(retval)	retval = (target_cpus())
 /*
  * Set up the logical destination ID.
  *
--- linux-2.6.tip.orig/include/asm-x86/mach-generic/mach_apic.h
+++ linux-2.6.tip/include/asm-x86/mach-generic/mach_apic.h
@@ -3,13 +3,17 @@
 
 #include <asm/genapic.h>
 
+#ifdef CONFIG_X86_64
+#define TARGET_CPUS(retval)	  (genapic->target_cpus)(&(retval))
+#else
+#define TARGET_CPUS(retval)	  retval = (genapic->target_cpus())
+#endif
 #define esr_disable (genapic->ESR_DISABLE)
 #define NO_BALANCE_IRQ (genapic->no_balance_irq)
 #define INT_DELIVERY_MODE (genapic->int_delivery_mode)
 #define INT_DEST_MODE (genapic->int_dest_mode)
 #undef APIC_DEST_LOGICAL
 #define APIC_DEST_LOGICAL (genapic->apic_destination_logical)
-#define TARGET_CPUS	  (genapic->target_cpus())
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define ioapic_phys_id_map (genapic->ioapic_phys_id_map)

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 12/13] genapic: reduce stack pressuge in io_apic.c step 4 vector allocation
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (10 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-06 23:50 ` [RFC 13/13] genapic: reduce stack pressuge in io_apic.c step 5 cpu_mask_to_apicid Mike Travis
  2008-09-07  7:36 ` [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Ingo Molnar
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-io_apic_c-step-4-vector_allocation_domain --]
[-- Type: text/plain, Size: 9183 bytes --]

  * Step 4 "vector allocation" of cleaning up io_apic.c modifies the
    vector_allocation_domain genapic interface to pass a pointer to
    the returned mask removing yet another "cpumask_t variable on the stack".

	domain = vector_allocation_domain(cpu);

    becomes:

	vector_allocation_domain(cpu, &domain);

  * All the appropriate genapic "vector_allocation_domain" functions
    are modified to use this new interface.


Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genapic_flat_64.c        |    9 ++++-----
 arch/x86/kernel/genx2apic_cluster.c      |    6 ++----
 arch/x86/kernel/genx2apic_phys.c         |    6 ++----
 arch/x86/kernel/genx2apic_uv_x.c         |    6 ++----
 arch/x86/kernel/io_apic.c                |    2 +-
 arch/x86/mach-generic/bigsmp.c           |    4 ++--
 arch/x86/mach-generic/es7000.c           |    5 ++---
 arch/x86/mach-generic/numaq.c            |    5 ++---
 arch/x86/mach-generic/summit.c           |    5 ++---
 include/asm-x86/genapic_32.h             |    4 ++--
 include/asm-x86/genapic_64.h             |    2 +-
 include/asm-x86/mach-default/mach_apic.h |    5 ++---
 12 files changed, 24 insertions(+), 35 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6.tip/arch/x86/kernel/genapic_flat_64.c
@@ -36,7 +36,7 @@ static void flat_target_cpus(cpumask_t *
 	*retmask = cpu_online_map;
 }
 
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -46,8 +46,7 @@ static cpumask_t flat_vector_allocation_
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 /*
@@ -199,9 +198,9 @@ static void physflat_target_cpus(cpumask
 	*retmask = cpu_online_map;
 }
 
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	return cpumask_of_cpu(cpu);
+	*retmask = cpumask_of_cpu(cpu);
 }
 
 static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_cluster.c
@@ -31,11 +31,9 @@ static void x2apic_target_cpus(cpumask_t
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	*retmask = cpumask_of_cpu(cpu);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_phys.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_phys.c
@@ -34,11 +34,9 @@ static void x2apic_target_cpus(cpumask_t
 	*retmask = cpumask_of_cpu(0);
 }
 
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	*retmask = cpumask_of_cpu(cpu);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -82,11 +82,9 @@ static uv_target_cpus(cpumask_t *retmask
 	*retmask = cpumask_of_cpu(0);
 }
 
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	*retmask = cpumask_of_cpu(cpu);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
--- linux-2.6.tip.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6.tip/arch/x86/kernel/io_apic.c
@@ -1292,7 +1292,7 @@ static int __assign_irq_vector(int irq, 
 		int new_cpu;
 		int vector, offset;
 
-		*tmpmask = vector_allocation_domain(cpu);
+		vector_allocation_domain(cpu, tmpmask);
 
 		vector = current_vector;
 		offset = current_offset;
--- linux-2.6.tip.orig/arch/x86/mach-generic/bigsmp.c
+++ linux-2.6.tip/arch/x86/mach-generic/bigsmp.c
@@ -41,9 +41,9 @@ static const struct dmi_system_id bigsmp
 	 { }
 };
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-        return cpumask_of_cpu(cpu);
+	*retmask = cpumask_of_cpu(cpu);
 }
 
 static int probe_bigsmp(void)
--- linux-2.6.tip.orig/arch/x86/mach-generic/es7000.c
+++ linux-2.6.tip/arch/x86/mach-generic/es7000.c
@@ -65,7 +65,7 @@ static int __init acpi_madt_oem_check(ch
 }
 #endif
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -75,8 +75,7 @@ static cpumask_t vector_allocation_domai
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
--- linux-2.6.tip.orig/arch/x86/mach-generic/numaq.c
+++ linux-2.6.tip/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domai
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
--- linux-2.6.tip.orig/arch/x86/mach-generic/summit.c
+++ linux-2.6.tip/arch/x86/mach-generic/summit.c
@@ -23,7 +23,7 @@ static int probe_summit(void)
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -33,8 +33,7 @@ static cpumask_t vector_allocation_domai
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
--- linux-2.6.tip.orig/include/asm-x86/genapic_32.h
+++ linux-2.6.tip/include/asm-x86/genapic_32.h
@@ -58,7 +58,7 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
@@ -106,7 +106,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
-	APICFUNC(vector_allocation_domain)			\
+	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
 	IPIFUNC(send_IPI_allbutself)			\
--- linux-2.6.tip.orig/include/asm-x86/genapic_64.h
+++ linux-2.6.tip/include/asm-x86/genapic_64.h
@@ -21,7 +21,7 @@ struct genapic {
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
 	void (*target_cpus)(cpumask_t *retmask);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
 	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
--- linux-2.6.tip.orig/include/asm-x86/mach-default/mach_apic.h
+++ linux-2.6.tip/include/asm-x86/mach-default/mach_apic.h
@@ -92,7 +92,7 @@ static inline int apicid_to_node(int log
 #endif
 }
 
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
@@ -102,8 +102,7 @@ static inline cpumask_t vector_allocatio
          * deliver interrupts to the wrong hyperthread when only one
          * hyperthread was specified in the interrupt desitination.
          */
-        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-        return domain;
+        *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 #endif
 

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC 13/13] genapic: reduce stack pressuge in io_apic.c step 5 cpu_mask_to_apicid
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (11 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 12/13] genapic: reduce stack pressuge in io_apic.c step 4 vector allocation Mike Travis
@ 2008-09-06 23:50 ` Mike Travis
  2008-09-07  7:36 ` [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Ingo Molnar
  13 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-06 23:50 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton
  Cc: davej, David Miller, Eric Dumazet, Eric W. Biederman,
	Jack Steiner, Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel

[-- Attachment #1: stack-hogs-io_apic_c-step-5-cpu_mask_to_apicid --]
[-- Type: text/plain, Size: 13470 bytes --]

  * Step 5 "cpu_mask_to_apicid" of cleaning up io_apic.c modifies the
    cpu_mask_to_apicid genapic interface to pass a pointer to incoming
    cpumask_t argument removing yet another "cpumask_t variable on the stack".

	apicid = cpu_mask_to_apicid(cpumask);

    becomes:

	apicid = cpu_mask_to_apicid(&cpumask);

  * All the appropriate genapic "cpu_mask_to_apicid" functions
    are modified to use this new interface.

  * Total stack savings are:

    ====== Stack (-l 100)

	1 - initial
	2 - stack-hogs-io_apic_c-step-1-temp-cpumask_ts
	3 - stack-hogs-io_apic_c-step-2-internal-abi
	4 - stack-hogs-io_apic_c-step-3-target_cpus
	5 - stack-hogs-io_apic_c-step-4-vector_allocation_domain
	6 - stack-hogs-io_apic_c-step-5-cpu_mask_to_apicid

       .1.    .2.   .3.   .4.    .5.   .6.    ..final..
      1576   -512     .  -512      .  -552   .  -100%  setup_IO_APIC_irq
      1560   -512     .  -528      .  -520   .  -100%  msi_compose_msg
      1560   -512     .  -512      .  -536   .  -100%  arch_setup_ht_irq
      1560  -1008     .     .   -552     .   .  -100%  __assign_irq_vector
      1144      .  -512  -632      .     .   .  -100%  setup_IO_APIC
      1048      .     .  -512      .  -536   .  -100%  setup_timer_IRQ0_pin
      1048      .  -512  -536      .     .   .  -100%  setup_ioapic_dest
      1048   -512  -536     .      .     .   .  -100%  set_ioapic_affinity_irq
      1040   -512     .     .      .  -528   .  -100%  set_msi_irq_affinity
      1040   -512     .     .      .  -528   .  -100%  set_ht_irq_affinity
      1040   -512     .     .      .  -528   .  -100%  dmar_msi_set_affinity
      1032     -8  -512  -512      .     .   .  -100%  create_irq_nr
      1024      .     .     .  -1024     .   .  -100%  x2apic_vector_allocation_domain
      1024      .     .     .  -1024     .   .  -100%  uv_vector_allocation_domain
       520   -520     .     .      .     .   .  -100%  __clear_irq_vector
       512   -512     .     .      .     .   .  -100%  irq_complete_move
       512      .  -512     .      .     .   .  -100%  assign_irq_vector
	 0      .  +536     .      .  -536   .      .  set_ioapic_affinity_irq_p
    18288 -5632 -2048 -3744 -2600 -4264   . -100%  Totals

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genapic_flat_64.c        |    8 ++++----
 arch/x86/kernel/genx2apic_cluster.c      |    4 ++--
 arch/x86/kernel/genx2apic_phys.c         |    4 ++--
 arch/x86/kernel/genx2apic_uv_x.c         |    4 ++--
 arch/x86/kernel/io_apic.c                |   20 ++++++++++----------
 include/asm-x86/bigsmp/apic.h            |    4 ++--
 include/asm-x86/es7000/apic.h            |    8 ++++----
 include/asm-x86/genapic_32.h             |    2 +-
 include/asm-x86/genapic_64.h             |    2 +-
 include/asm-x86/mach-default/mach_apic.h |    4 ++--
 include/asm-x86/numaq/apic.h             |    2 +-
 include/asm-x86/summit/apic.h            |    8 ++++----
 12 files changed, 35 insertions(+), 35 deletions(-)

--- linux-2.6.tip.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6.tip/arch/x86/kernel/genapic_flat_64.c
@@ -140,9 +140,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -227,7 +227,7 @@ static void physflat_send_IPI_all(int ve
 	physflat_send_IPI_mask(&cpu_online_map, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -235,7 +235,7 @@ static unsigned int physflat_cpu_mask_to
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_cluster.c
@@ -92,7 +92,7 @@ static int x2apic_apic_id_registered(voi
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -100,7 +100,7 @@ static unsigned int x2apic_cpu_mask_to_a
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < NR_CPUS)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_phys.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_phys.c
@@ -85,7 +85,7 @@ static int x2apic_apic_id_registered(voi
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -93,7 +93,7 @@ static unsigned int x2apic_cpu_mask_to_a
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < NR_CPUS)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -160,7 +160,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -168,7 +168,7 @@ static unsigned int uv_cpu_mask_to_apici
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
--- linux-2.6.tip.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6.tip/arch/x86/kernel/io_apic.c
@@ -602,7 +602,7 @@ static void set_ioapic_affinity_irq_p(un
 	}
 
 	cpus_and(*tmp, cfg->domain, *mask);
-	dest = cpu_mask_to_apicid(*tmp);
+	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -1546,7 +1546,7 @@ static void setup_IO_APIC_irq(int apic, 
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(*mask), trigger, polarity,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
@@ -1626,7 +1626,7 @@ static void __init setup_timer_IRQ0_pin(
 	 */
 	entry.dest_mode = INT_DEST_MODE;
 	entry.mask = 1;					/* mask IRQ now */
-	entry.dest = cpu_mask_to_apicid(*tgt_cpus);
+	entry.dest = cpu_mask_to_apicid(tgt_cpus);
 	entry.delivery_mode = INT_DELIVERY_MODE;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -2322,7 +2322,7 @@ static void migrate_ioapic_irq(int irq, 
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tmpmask, cfg->domain, *mask);
-	dest = cpu_mask_to_apicid(*tmpmask);
+	dest = cpu_mask_to_apicid(tmpmask);
 
 	desc = irq_to_desc(irq);
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
@@ -3204,7 +3204,7 @@ static int msi_compose_msg(struct pci_de
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tgt_cpus, cfg->domain, *tgt_cpus);
-	dest = cpu_mask_to_apicid(*tgt_cpus);
+	dest = cpu_mask_to_apicid(tgt_cpus);
 	put_cpumask_var(tgt_cpus, cpumask_irq_level_4);
 
 #ifdef CONFIG_INTR_REMAP
@@ -3277,7 +3277,7 @@ static void set_msi_irq_affinity(unsigne
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(*tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
 
@@ -3319,7 +3319,7 @@ static void ir_set_msi_irq_affinity(unsi
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(*tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3561,7 +3561,7 @@ static void dmar_msi_set_affinity(unsign
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(*tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3643,7 +3643,7 @@ static void set_ht_irq_affinity(unsigned
 
 	cfg = irq_cfg(irq);
 	cpus_and(*tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(*tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	desc = irq_to_desc(irq);
@@ -3679,7 +3679,7 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 		cfg = irq_cfg(irq);
 		cpus_and(*tgt_cpus, cfg->domain, *tgt_cpus);
-		dest = cpu_mask_to_apicid(*tgt_cpus);
+		dest = cpu_mask_to_apicid(tgt_cpus);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
--- linux-2.6.tip.orig/include/asm-x86/bigsmp/apic.h
+++ linux-2.6.tip/include/asm-x86/bigsmp/apic.h
@@ -121,12 +121,12 @@ static inline int check_phys_apicid_pres
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 	int apicid;	
 
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	return apicid;
 }
--- linux-2.6.tip.orig/include/asm-x86/es7000/apic.h
+++ linux-2.6.tip/include/asm-x86/es7000/apic.h
@@ -144,14 +144,14 @@ static inline int check_phys_apicid_pres
 	return (1);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 #if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -163,10 +163,10 @@ static inline unsigned int cpu_mask_to_a
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
--- linux-2.6.tip.orig/include/asm-x86/genapic_32.h
+++ linux-2.6.tip/include/asm-x86/genapic_32.h
@@ -57,7 +57,7 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
 	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
--- linux-2.6.tip.orig/include/asm-x86/genapic_64.h
+++ linux-2.6.tip/include/asm-x86/genapic_64.h
@@ -29,7 +29,7 @@ struct genapic {
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
--- linux-2.6.tip.orig/include/asm-x86/mach-default/mach_apic.h
+++ linux-2.6.tip/include/asm-x86/mach-default/mach_apic.h
@@ -65,9 +65,9 @@ static inline int apic_id_registered(voi
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0];
+	return cpus_addr(*cpumask)[0];
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
--- linux-2.6.tip.orig/include/asm-x86/numaq/apic.h
+++ linux-2.6.tip/include/asm-x86/numaq/apic.h
@@ -122,7 +122,7 @@ static inline void enable_apic_mode(void
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	return (int) 0xF;
 }
--- linux-2.6.tip.orig/include/asm-x86/summit/apic.h
+++ linux-2.6.tip/include/asm-x86/summit/apic.h
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void
 {
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_a
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){

-- 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions
  2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
                   ` (12 preceding siblings ...)
  2008-09-06 23:50 ` [RFC 13/13] genapic: reduce stack pressuge in io_apic.c step 5 cpu_mask_to_apicid Mike Travis
@ 2008-09-07  7:36 ` Ingo Molnar
  2008-09-08 15:17   ` Mike Travis
  13 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2008-09-07  7:36 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel


* Mike Travis <travis@sgi.com> wrote:

> [Note: all these changes require some more testing but I wanted to 
> solicit comments before then, hence the "RFC" in the subject line. 
> -thanks! Mike]

these changes are certainly looking very nice.

They do interact with a ton of high-flux topics in -tip, so i'd prefer 
to start integrating/testing this straight away (today-ish), before the 
target moves yet again. Are there any showstoppers that speak against 
that?

The plan would be to not keep this in a single topic but to spread them 
into their closest topic (tip/x86/x2apic, tip/irq/sparseirq etc.) - are 
there any cross-topic dependencies to be careful about? Most of the 
commits seem to be standalone. The debug patch would go into 
tip/cpus4096.

And more generally: how far away are we from being able to introduce 
struct cpumask and hide its implementation from most .c files? That 
would be rather efficient in preventing it from being put on the stack 
spuriously in one of the 30+ thousand Linux kernel source code files. 
Just like we hide the true structure of things like 'struct kmem_cache' 
and force them to always be used as pointers.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus
  2008-09-06 23:50 ` [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus Mike Travis
@ 2008-09-07  7:55   ` Bert Wesarg
  2008-09-07  9:13     ` Ingo Molnar
  2008-09-08 15:29     ` Mike Travis
  0 siblings, 2 replies; 41+ messages in thread
From: Bert Wesarg @ 2008-09-07  7:55 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

On Sun, Sep 7, 2008 at 01:50, Mike Travis <travis@sgi.com> wrote:
>  * Step 3 "target_cpus" of cleaning up io_apic.c modifies the TARGET_CPUS
>    interface to pass a pointer to the returned mask for arch X86_64,
>    removing yet another "cpumask_t variable on the stack".
>
>        target_cpus = TARGET_CPUS;
>
>    becomes:
>
>        TARGET_CPUS(target_cpus);
>
>    For x86_32 this is expanded to:
>
>        target_cpus = (genapic->target_cpus());
>
>    For x86_64 this is expanded to:
>
>        target_cpus = (genapic->target_cpus)(&(target_cpus));
But its expended to:
         (genapic->target_cpus)(&(target_cpus));

> -#define TARGET_CPUS      (genapic->target_cpus())
> +#define TARGET_CPUS(retval) (genapic->target_cpus)(&(retval))


> +#ifdef CONFIG_X86_64
> +#define TARGET_CPUS(retval)      (genapic->target_cpus)(&(retval))
> +#else
> +#define TARGET_CPUS(retval)      retval = (genapic->target_cpus())
> +#endif

Bert

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus
  2008-09-07  7:55   ` Bert Wesarg
@ 2008-09-07  9:13     ` Ingo Molnar
  2008-09-08 15:01       ` Mike Travis
  2008-09-08 15:29     ` Mike Travis
  1 sibling, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2008-09-07  9:13 UTC (permalink / raw)
  To: Bert Wesarg
  Cc: Mike Travis, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel


> > +#ifdef CONFIG_X86_64
> > +#define TARGET_CPUS(retval)      (genapic->target_cpus)(&(retval))
> > +#else
> > +#define TARGET_CPUS(retval)      retval = (genapic->target_cpus())
> > +#endif

hm, this should be unified.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-06 23:50 ` [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c Mike Travis
@ 2008-09-07 10:24   ` Peter Zijlstra
  2008-09-07 11:00     ` Andrew Morton
  2008-09-08 14:54     ` Mike Travis
  0 siblings, 2 replies; 41+ messages in thread
From: Peter Zijlstra @ 2008-09-07 10:24 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

On Sat, 2008-09-06 at 16:50 -0700, Mike Travis wrote:
> plain text document attachment (stack-hogs-kernel_sched_c)
> * Make the following changes to kernel/sched.c functions:
> 
>     - use node_to_cpumask_ptr in place of node_to_cpumask
>     - use get_cpumask_var for temporary cpumask_t variables
>     - use alloc_cpumask_ptr where available
> 
>   * Remove special code for SCHED_CPUMASK_ALLOC and use CPUMASK_ALLOC
>     from linux/cpumask.h.
> 
>   * The resultant stack savings are:
> 
>     ====== Stack (-l 100)
> 
> 	1 - initial
> 	2 - stack-hogs-kernel_sched_c
> 	'.' is less than the limit(100)
> 
>        .1.    .2.    ..final..
>       2216  -1536 680   -69%  __build_sched_domains
>       1592  -1592   .  -100%  move_task_off_dead_cpu
>       1096  -1096   .  -100%  sched_balance_self
>       1032  -1032   .  -100%  sched_setaffinity
>        616   -616   .  -100%  rebalance_domains
>        552   -552   .  -100%  free_sched_groups
>        512   -512   .  -100%  cpu_to_allnodes_group
>       7616  -6936 680   -91%  Totals
> 
> 
> Applies to linux-2.6.tip/master.
> 
> Signed-off-by: Mike Travis <travis@sgi.com>
> ---
>  kernel/sched.c |  151 ++++++++++++++++++++++++++++++---------------------------
>  1 file changed, 81 insertions(+), 70 deletions(-)
> 
> --- linux-2.6.tip.orig/kernel/sched.c
> +++ linux-2.6.tip/kernel/sched.c
> @@ -70,6 +70,7 @@
>  #include <linux/bootmem.h>
>  #include <linux/debugfs.h>
>  #include <linux/ctype.h>
> +#include <linux/cpumask_ptr.h>
>  #include <linux/ftrace.h>
>  #include <trace/sched.h>
>  
> @@ -117,6 +118,12 @@
>   */
>  #define RUNTIME_INF	((u64)~0ULL)
>  
> +/*
> + * temp cpumask variables
> + */
> +static DEFINE_PER_CPUMASK(temp_cpumask_1);
> +static DEFINE_PER_CPUMASK(temp_cpumask_2);

Yuck, that relies on turning preemption off everywhere you want to use
those.


> @@ -5384,11 +5400,14 @@ out_unlock:
>  
>  long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
>  {
> -	cpumask_t cpus_allowed;
> -	cpumask_t new_mask = *in_mask;
> +	cpumask_ptr cpus_allowed;
> +	cpumask_ptr new_mask;
>  	struct task_struct *p;
>  	int retval;
>  
> +	get_cpumask_var(cpus_allowed, temp_cpumask_1);
> +	get_cpumask_var(new_mask, temp_cpumask_2);
> +	*new_mask = *in_mask;
>  	get_online_cpus();
>  	read_lock(&tasklist_lock);

BUG!

get_online_cpus() can sleep, but you just disabled preemption with those
get_cpumask_var() horribles!

Couldn't be arsed to look through the rest, but I really hate this
cpumask_ptr() stuff that relies on disabling preemption.

NAK


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-07 10:24   ` Peter Zijlstra
@ 2008-09-07 11:00     ` Andrew Morton
  2008-09-07 13:05       ` Peter Zijlstra
  2008-09-07 20:28       ` Peter Zijlstra
  2008-09-08 14:54     ` Mike Travis
  1 sibling, 2 replies; 41+ messages in thread
From: Andrew Morton @ 2008-09-07 11:00 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mike Travis, Ingo Molnar, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

On Sun, 07 Sep 2008 12:24:47 +0200 Peter Zijlstra <peterz@infradead.org> wrote:

> get_online_cpus() can sleep, but you just disabled preemption with those
> get_cpumask_var() horribles!

make cpu_hotplug.refcount an atomic_t.

> Couldn't be arsed to look through the rest, but I really hate this
> cpumask_ptr() stuff that relies on disabling preemption.

that's harder to fix ;)

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-07 11:00     ` Andrew Morton
@ 2008-09-07 13:05       ` Peter Zijlstra
  2008-09-08 14:56         ` Mike Travis
  2008-09-07 20:28       ` Peter Zijlstra
  1 sibling, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2008-09-07 13:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Mike Travis, Ingo Molnar, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

On Sun, 2008-09-07 at 04:00 -0700, Andrew Morton wrote:
> On Sun, 07 Sep 2008 12:24:47 +0200 Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > get_online_cpus() can sleep, but you just disabled preemption with those
> > get_cpumask_var() horribles!
> 
> make cpu_hotplug.refcount an atomic_t.

A much easier fix is just re-ordering those operations and do the
get_online_cpus() before disabling preemption. But it does indicate this
patch series isn't carefully constructed.

> > Couldn't be arsed to look through the rest, but I really hate this
> > cpumask_ptr() stuff that relies on disabling preemption.
> 
> that's harder to fix ;)

Looking at more patches than just the sched one convinced me more that
this approach isn't a good one. It seems to make code much more
fragile. 

See patch 9, there it was needed to draw out the callgraph in order to
map stuff to these global variables - we're adding global dependencies
to code that didn't have any, increasing complexity.




^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-07 11:00     ` Andrew Morton
  2008-09-07 13:05       ` Peter Zijlstra
@ 2008-09-07 20:28       ` Peter Zijlstra
  1 sibling, 0 replies; 41+ messages in thread
From: Peter Zijlstra @ 2008-09-07 20:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Mike Travis, Ingo Molnar, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel,
	Gautham R Shenoy

On Sun, 2008-09-07 at 04:00 -0700, Andrew Morton wrote:

> make cpu_hotplug.refcount an atomic_t.

That might actually be a worthwhile idea, but it will not make
get_online_cpus() atomic. The whole point of get_online_cpus() is to
serialize against actual hotplug operations, so it will have to sleep at
some point.

Now, turning cpu_hotplug.refcount into an atomic_t might be worthwhile
because it will reduce the amount of atomic operations in its fastpath
from 2 to 1.

You'd have to make recount==1 the stable situation and use
atomic_inc_unless() and atomic_dec_and_test() in get_online_cpus() and
put_online_cpus() resp. that way !refcount can signify a hotplug
operation and we'd fall back into the slow paths.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts
  2008-09-06 23:50 ` [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts Mike Travis
@ 2008-09-08 11:01   ` Andi Kleen
  2008-09-08 16:03     ` Mike Travis
  0 siblings, 1 reply; 41+ messages in thread
From: Andi Kleen @ 2008-09-08 11:01 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Mike Travis <travis@sgi.com> writes:

>   * Step 1 of cleaning up io_apic.c removes local cpumask_t variables
>     from the stack.

Sorry that patch seems incredibly messy. Global variables
and a tricky ordering and while it's at least commented it's still a mess
and maintenance unfriendly.

Also I think set_affinity is the only case where a truly arbitary cpu
mask can be passed in anyways. And it's passed in from elsewhere. 

The other cases generally just want to handle a subset of CPUs which
are nearby. How about you define a new cpumask like type that 
consists of a start/stop CPU and a mask for that range only 
and is not larger than a few words?

I think with that the nearby assignments could be handled 
reasonably cleanly with arguments and local variables.

And I suspect with some restructuring set_affinity could
be also made to support such a model.

-Andi

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-07 10:24   ` Peter Zijlstra
  2008-09-07 11:00     ` Andrew Morton
@ 2008-09-08 14:54     ` Mike Travis
  2008-09-08 15:05       ` Peter Zijlstra
  1 sibling, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-08 14:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Peter Zijlstra wrote:
> On Sat, 2008-09-06 at 16:50 -0700, Mike Travis wrote:
>> plain text document attachment (stack-hogs-kernel_sched_c)
>> * Make the following changes to kernel/sched.c functions:
>>
>>     - use node_to_cpumask_ptr in place of node_to_cpumask
>>     - use get_cpumask_var for temporary cpumask_t variables
>>     - use alloc_cpumask_ptr where available
>>
>>   * Remove special code for SCHED_CPUMASK_ALLOC and use CPUMASK_ALLOC
>>     from linux/cpumask.h.
>>
>>   * The resultant stack savings are:
>>
>>     ====== Stack (-l 100)
>>
>> 	1 - initial
>> 	2 - stack-hogs-kernel_sched_c
>> 	'.' is less than the limit(100)
>>
>>        .1.    .2.    ..final..
>>       2216  -1536 680   -69%  __build_sched_domains
>>       1592  -1592   .  -100%  move_task_off_dead_cpu
>>       1096  -1096   .  -100%  sched_balance_self
>>       1032  -1032   .  -100%  sched_setaffinity
>>        616   -616   .  -100%  rebalance_domains
>>        552   -552   .  -100%  free_sched_groups
>>        512   -512   .  -100%  cpu_to_allnodes_group
>>       7616  -6936 680   -91%  Totals
>>
>>
>> Applies to linux-2.6.tip/master.
>>
>> Signed-off-by: Mike Travis <travis@sgi.com>
>> ---
>>  kernel/sched.c |  151 ++++++++++++++++++++++++++++++---------------------------
>>  1 file changed, 81 insertions(+), 70 deletions(-)
>>
>> --- linux-2.6.tip.orig/kernel/sched.c
>> +++ linux-2.6.tip/kernel/sched.c
>> @@ -70,6 +70,7 @@
>>  #include <linux/bootmem.h>
>>  #include <linux/debugfs.h>
>>  #include <linux/ctype.h>
>> +#include <linux/cpumask_ptr.h>
>>  #include <linux/ftrace.h>
>>  #include <trace/sched.h>
>>  
>> @@ -117,6 +118,12 @@
>>   */
>>  #define RUNTIME_INF	((u64)~0ULL)
>>  
>> +/*
>> + * temp cpumask variables
>> + */
>> +static DEFINE_PER_CPUMASK(temp_cpumask_1);
>> +static DEFINE_PER_CPUMASK(temp_cpumask_2);
> 
> Yuck, that relies on turning preemption off everywhere you want to use
> those.
> 
> 
>> @@ -5384,11 +5400,14 @@ out_unlock:
>>  
>>  long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
>>  {
>> -	cpumask_t cpus_allowed;
>> -	cpumask_t new_mask = *in_mask;
>> +	cpumask_ptr cpus_allowed;
>> +	cpumask_ptr new_mask;
>>  	struct task_struct *p;
>>  	int retval;
>>  
>> +	get_cpumask_var(cpus_allowed, temp_cpumask_1);
>> +	get_cpumask_var(new_mask, temp_cpumask_2);
>> +	*new_mask = *in_mask;
>>  	get_online_cpus();
>>  	read_lock(&tasklist_lock);
> 
> BUG!
> 
> get_online_cpus() can sleep, but you just disabled preemption with those
> get_cpumask_var() horribles!
> 
> Couldn't be arsed to look through the rest, but I really hate this
> cpumask_ptr() stuff that relies on disabling preemption.
> 
> NAK

Yeah, I really agree as well.  But I wanted to start playing with using
cpumask_t pointers in some fairly straight forward manner.  Linus's and
Ingo's suggestion to just bite the bullet and redefine the cpumask_t 
would force a lot of changes to be made, but perhaps that's really the
way to go.

As to obtaining temp cpumask_t's (both early and late), perhaps a pool of
them would be better?  I believe it could be done similar to alloc_bootmem
(but much simpler), and I don't think there's enough nesting to require a
very large pool.  (4 was the largest depth I could find in io_apic.c.)  Of
course, with preemption enabled then other problems arise...

One other really big use was for the "allbutself" cpumask in the send_IPI
functions.  I think here, preemption is ok because the ownership of the
cpumask temp is very short lived.

But thanks for pointing out the get_online_cpus problem.  I did try and
chase down as many call trees as I could, but I obviously missed one
important one.

And thanks for looking it over!
Mike


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-07 13:05       ` Peter Zijlstra
@ 2008-09-08 14:56         ` Mike Travis
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-08 14:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andrew Morton, Ingo Molnar, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Peter Zijlstra wrote:
> On Sun, 2008-09-07 at 04:00 -0700, Andrew Morton wrote:
>> On Sun, 07 Sep 2008 12:24:47 +0200 Peter Zijlstra <peterz@infradead.org> wrote:
>>
>>> get_online_cpus() can sleep, but you just disabled preemption with those
>>> get_cpumask_var() horribles!
>> make cpu_hotplug.refcount an atomic_t.
> 
> A much easier fix is just re-ordering those operations and do the
> get_online_cpus() before disabling preemption. But it does indicate this
> patch series isn't carefully constructed.

Yes, it's mostly a hunt for comments on my part... ;-)
> 
>>> Couldn't be arsed to look through the rest, but I really hate this
>>> cpumask_ptr() stuff that relies on disabling preemption.
>> that's harder to fix ;)
> 
> Looking at more patches than just the sched one convinced me more that
> this approach isn't a good one. It seems to make code much more
> fragile. 
> 
> See patch 9, there it was needed to draw out the callgraph in order to
> map stuff to these global variables - we're adding global dependencies
> to code that didn't have any, increasing complexity.

Again, yes, as I got farther into that one, it became clear that having
static cpumask_t temps over too large a range was ending up very messy.

Thanks,
Mike


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus
  2008-09-07  9:13     ` Ingo Molnar
@ 2008-09-08 15:01       ` Mike Travis
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-08 15:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Bert Wesarg, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Ingo Molnar wrote:
>>> +#ifdef CONFIG_X86_64
>>> +#define TARGET_CPUS(retval)      (genapic->target_cpus)(&(retval))
>>> +#else
>>> +#define TARGET_CPUS(retval)      retval = (genapic->target_cpus())
>>> +#endif
> 
> hm, this should be unified.
> 
> 	Ingo

As I did that one first, I didn't want to muddle through too much i386 code,
but when I did the vector_allocation_domain, it became more clear that making
them common would be better.

I also tripped myself up because one of my test i386 configs had 64 cpus and
it fired up the "NR_CPUS > BITS_PER_LON" code. ;-)  [And I'm supposing it's
probably justified to believe that there may be "fairly large" 32-bit systems,
for those applications that need horsepower but not a lot of memory.]

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-08 14:54     ` Mike Travis
@ 2008-09-08 15:05       ` Peter Zijlstra
  2008-09-08 18:38         ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2008-09-08 15:05 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

On Mon, 2008-09-08 at 07:54 -0700, Mike Travis wrote:
> Peter Zijlstra wrote:

> > get_online_cpus() can sleep, but you just disabled preemption with those
> > get_cpumask_var() horribles!
> > 
> > Couldn't be arsed to look through the rest, but I really hate this
> > cpumask_ptr() stuff that relies on disabling preemption.
> > 
> > NAK
> 
> Yeah, I really agree as well.  But I wanted to start playing with using
> cpumask_t pointers in some fairly straight forward manner.  Linus's and
> Ingo's suggestion to just bite the bullet and redefine the cpumask_t 
> would force a lot of changes to be made, but perhaps that's really the
> way to go.

I much prefer that approach!

> As to obtaining temp cpumask_t's (both early and late), perhaps a pool of
> them would be better?  I believe it could be done similar to alloc_bootmem
> (but much simpler), and I don't think there's enough nesting to require a
> very large pool.  (4 was the largest depth I could find in io_apic.c.)  Of
> course, with preemption enabled then other problems arise...
> 
> One other really big use was for the "allbutself" cpumask in the send_IPI
> functions.  I think here, preemption is ok because the ownership of the
> cpumask temp is very short lived.

The thing is, you add serialization requirements (be it preempt_disable,
or a lock for some preemptable form) to code that didn't had any for a
case that hardly anyone will ever encounter in real life - I mean,
really, who has 4096 cpus?

Stuffing the cpumap_t in an already existing structure that has suitable
serialization requirements is of course the preferred situation, but
lacking that a dynamic cpumap_t is best, since it keeps the references
local, and thus doesn't add requirements to the existing code.

You could also consider adding 1 cpumap_t to task_struct and use that as
temporary scratch pad - but seeing you needed at least 4 that might not
be a feasible solution either.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions
  2008-09-07  7:36 ` [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Ingo Molnar
@ 2008-09-08 15:17   ` Mike Travis
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-08 15:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> [Note: all these changes require some more testing but I wanted to 
>> solicit comments before then, hence the "RFC" in the subject line. 
>> -thanks! Mike]
> 
> these changes are certainly looking very nice.
> 
> They do interact with a ton of high-flux topics in -tip, so i'd prefer 
> to start integrating/testing this straight away (today-ish), before the 
> target moves yet again. Are there any showstoppers that speak against 
> that?
> 
> The plan would be to not keep this in a single topic but to spread them 
> into their closest topic (tip/x86/x2apic, tip/irq/sparseirq etc.) - are 
> there any cross-topic dependencies to be careful about? Most of the 
> commits seem to be standalone. The debug patch would go into 
> tip/cpus4096.
> 
> And more generally: how far away are we from being able to introduce 
> struct cpumask and hide its implementation from most .c files? That 
> would be rather efficient in preventing it from being put on the stack 
> spuriously in one of the 30+ thousand Linux kernel source code files. 
> Just like we hide the true structure of things like 'struct kmem_cache' 
> and force them to always be used as pointers.
> 
> 	Ingo

What I'll do is resubmit the changes that have nothing to do with
cpumask_ptr's first as they are mostly just "cleaning up extraneous
temp cpumask variables".  Then I'll try the redefine of cpumask_t to
see what kind of hornet's nest is opened up.

What do you think of a pool of temp cpumask_t's?  That way, they
could be made available early (before kmalloc is available).  An
atomic op could be used for reservation which when the zero-based
percpu variables finally get completed, become very low cost.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus
  2008-09-07  7:55   ` Bert Wesarg
  2008-09-07  9:13     ` Ingo Molnar
@ 2008-09-08 15:29     ` Mike Travis
  1 sibling, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-08 15:29 UTC (permalink / raw)
  To: Bert Wesarg
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Bert Wesarg wrote:
> On Sun, Sep 7, 2008 at 01:50, Mike Travis <travis@sgi.com> wrote:
>>  * Step 3 "target_cpus" of cleaning up io_apic.c modifies the TARGET_CPUS
>>    interface to pass a pointer to the returned mask for arch X86_64,
>>    removing yet another "cpumask_t variable on the stack".
>>
>>        target_cpus = TARGET_CPUS;
>>
>>    becomes:
>>
>>        TARGET_CPUS(target_cpus);
>>
>>    For x86_32 this is expanded to:
>>
>>        target_cpus = (genapic->target_cpus());
>>
>>    For x86_64 this is expanded to:
>>
>>        target_cpus = (genapic->target_cpus)(&(target_cpus));
> But its expended to:
>          (genapic->target_cpus)(&(target_cpus));

Umm, right, my cut and paste error...

Thanks,
Mike
> 
>> -#define TARGET_CPUS      (genapic->target_cpus())
>> +#define TARGET_CPUS(retval) (genapic->target_cpus)(&(retval))
> 
> 
>> +#ifdef CONFIG_X86_64
>> +#define TARGET_CPUS(retval)      (genapic->target_cpus)(&(retval))
>> +#else
>> +#define TARGET_CPUS(retval)      retval = (genapic->target_cpus())
>> +#endif
> 
> Bert


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts
  2008-09-08 11:01   ` Andi Kleen
@ 2008-09-08 16:03     ` Mike Travis
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-08 16:03 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel

Andi Kleen wrote:
> Mike Travis <travis@sgi.com> writes:
> 
>>   * Step 1 of cleaning up io_apic.c removes local cpumask_t variables
>>     from the stack.
> 
> Sorry that patch seems incredibly messy. Global variables
> and a tricky ordering and while it's at least commented it's still a mess
> and maintenance unfriendly.
> 
> Also I think set_affinity is the only case where a truly arbitary cpu
> mask can be passed in anyways. And it's passed in from elsewhere. 
> 
> The other cases generally just want to handle a subset of CPUs which
> are nearby. How about you define a new cpumask like type that 
> consists of a start/stop CPU and a mask for that range only 
> and is not larger than a few words?
> 
> I think with that the nearby assignments could be handled 
> reasonably cleanly with arguments and local variables.
> 
> And I suspect with some restructuring set_affinity could
> be also made to support such a model.
> 
> -Andi

Thanks for the comments.  I did mull over something like this early on
in researching this "cpumask" problem, but the problem of having different
cpumask operators didn't seem worthwhile.  But perhaps for a very limited
use (with very few ops), it would be worthwhile.

But how big to make these?  Variable sized?  Config option?  Should I
introduce some kind of MAX_CPUS_PER_NODE constant?  (NR_CPUS/MAX_NUMNODES
I don't think is the right answer.)

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c
  2008-09-08 15:05       ` Peter Zijlstra
@ 2008-09-08 18:38         ` Ingo Molnar
  2008-09-10 22:47           ` [RFC] CPUMASK: proposal for replacing cpumask_t Mike Travis
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2008-09-08 18:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mike Travis, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel


* Peter Zijlstra <peterz@infradead.org> wrote:

> > > NAK
> > 
> > Yeah, I really agree as well.  But I wanted to start playing with 
> > using cpumask_t pointers in some fairly straight forward manner.  
> > Linus's and Ingo's suggestion to just bite the bullet and redefine 
> > the cpumask_t would force a lot of changes to be made, but perhaps 
> > that's really the way to go.
> 
> I much prefer that approach!

seconded. Mike, since none of this is v2.6.27 material, lets do it right 
with a v2.6.28 target. You know all the cpumask_t using code sites 
inside out already, so the know-how is all available already :-) Please 
make it finegrained series of patches so that we can resolve conflicts 
with other trees more easily.

perhaps propose the new cpumask_t API early (in this thread?), so that 
people can comment on it before NAKs come flying against a full patchset 
;-)

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-08 18:38         ` Ingo Molnar
@ 2008-09-10 22:47           ` Mike Travis
  2008-09-10 22:53             ` Andi Kleen
                               ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-10 22:47 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel,
	Jack Steiner, Christoph Lameter, Andi Kleen

Ingo Molnar wrote:
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
>>>> NAK
...
> 
> seconded. Mike, since none of this is v2.6.27 material, lets do it right 
> with a v2.6.28 target. You know all the cpumask_t using code sites 
> inside out already, so the know-how is all available already :-) Please 
> make it finegrained series of patches so that we can resolve conflicts 
> with other trees more easily.
> 
> perhaps propose the new cpumask_t API early (in this thread?), so that 
> people can comment on it before NAKs come flying against a full patchset 
> ;-)
> 
> 	Ingo


Here's an initial proposal for abstracting cpumask_t to be either
an array of 1 or a pointer to an array...   Hopefully this will
minimize the amount of code changes while providing the capabilities
this change is attempting to do.

Comments most welcome. ;-)

Thanks,
Mike
--

Basically, linux/cpumask.h has the following defines:

	typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_data;

	#if NR_CPUS > BITS_PER_LONG
	typedef const cpumask_data	*cpumask_t;
	typedef cpumask_data		*cpumask_var;
	typedef cpumask_t		cpumask_val;
	#define	_NR_CPUS		nr_cpu_ids
	#else
	typedef const cpumask_data	cpumask_t[1];
	typedef cpumask_data		cpumask_var[1];
	typedef cpumask_data		cpumask_val;
	#define	_NR_CPUS		NR_CPUS
	#endif

So in function prototypes:

	cpumask_t function(const cpumask_t *A,
			   cpumask_t *B,
			   cpumask_t cpumask_C)

becomes:

	cpumask_val function(cpumask_t A,
			     cpumask_var B,
			     cpumask_t cpumask_C)

And in local variables:

	cpumask_t ==> cpumask_var  IFF variable is to be written.

This:

	cpumask_t mask = cpu_online_map 
	<change mask>

becomes:

	#include <linux/cpumask_alloc.h>

	cpumask_var mask;

	alloc_cpumask(&mask);
	*mask = *cpu_online_map;
	<change mask>
	free_cpumask(&mask);

Currently, alloc_cpumask is:

	#define BYTES_PER_CPUMASK (BITS_TO_LONGS(nr_cpu_ids)/sizeof(long))

	static inline bool no_cpumask(cpumask_t *m)
	{
		return (*m == NULL);
	}

	static inline void alloc_cpumask(cpumask_t *m)
	{
		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);
		if (no_cpumask(&d))
			BUG();
		*m = d;
	}

	static inline void alloc_cpumask_nopanic(cpumask_t *m)
	{
		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);

		*m = d;
	}

	static inline void free_cpumask(cpumask_t *m)
	{
		kfree(*m);
	}

Other means of obtaining a temporary cpumask_t variable will be provided
for those cases where kmalloc() is not available.

Furthermore, system-wide maps become:

	extern cpumask_data _cpu_possible_map;			 /* read/write */
	extern cpumask_data _cpu_online_map;
	extern cpumask_data _cpu_present_map;
	extern cpumask_data _cpu_active_map;
	#define	cpu_possible_map ((cpumask_t)&_cpu_possible_map) /* read only */
	#define	cpu_online_map ((cpumask_t)&_cpu_online_map)
	#define	cpu_present_map ((cpumask_t)&_cpu_present_map)
	#define	cpu_active_map ((cpumask_t)&_cpu_active_map)


So code to set these bits would be:

	cpu_set(cpu, &_cpu_online_map);
	cpu_set(cpu, &_cpu_present_map);
	cpu_set(cpu, &_cpu_possible_map);

Arrays that contain a fixed cpumask would have:

	struct xxx {
		cpumask_data	cpumask;
	};

... though we should probably encourage the map to be allocated:

	struct xxx {
		cpumask_t	readonly_cpumask;
		cpumask_var	readwrite_cpumask;
	};

	alloc_cpumask(&xxx->readonly_cpumask);
	alloc_cpumask(&xxx->readwrite_cpumask);


All the cpu operators become:

	#define cpu_XXX(dst, src) _cpu_XXX(dst, src, _NR_CPUS)
	static inline void __cpu_XXX(cpumask_var dstp, cpumask_t srcp, int count)
	{
		XXX_bit(dstp->bits, srcp->bits, count);
	}

(_NR_CPUS being defined to be nr_cpu_ids allows us to allocate variable lengthed arrays.)


Cpumask initializers become:


	#if NR_CPUS <= BITS_PER_LONG

	#define INIT_CPU_MASK_ALL					\
	(cpumask_t) { {							\
		[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
	} }

	#else
	#define INIT_CPU_MASK_ALL					\
	(cpumask_data) { {						\
		[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
		[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
	} }
	#endif

	#define INIT_CPU_MASK_NONE					\
	(cpumask_data) { {						\
		[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL			\
	} }

	#define INIT_CPU_MASK_CPU0					\
	(cpumask_data) { {						\
		[0] =  1UL						\
	} }

	#if NR_CPUS > BITS_PER_LONG
	extern cpumask_t cpu_mask_all, cpu_mask_none, cpu_mask_cpu0;
	#define CPU_MASK_ALL		(cpu_mask_all)
	#define CPU_MASK_NONE		(cpu_mask_none)
	#define CPU_MASK_CPU0		(cpu_mask_cpu0)
	#else
	#define CPU_MASK_ALL		((cpumask_t)&INIT_CPU_MASK_ALL)
	#define CPU_MASK_NONE		((cpumask_t)&INIT_CPU_MASK_NONE)
	#define CPU_MASK_CPU0		((cpumask_t)&INIT_CPU_MASK_CPU0)
	#endif

And in kernel/cpu.c:

	/*
	 * provide const cpumask_t's
	 */
	#if NR_CPUS > BITS_PER_LONG
	cpumask_data cpu_mask_all __read_mostly = INIT_CPU_MASK_ALL;
	EXPORT_SYMBOL(cpu_mask_all);

	cpumask_data cpu_mask_none __read_mostly = INIT_CPU_MASK_NONE;
	EXPORT_SYMBOL(cpu_mask_none);

	cpumask_data cpu_mask_cpu0 __read_mostly = INIT_CPU_MASK_CPU0;
	EXPORT_SYMBOL(cpu_mask_cpu0);
	#endif


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-10 22:47           ` [RFC] CPUMASK: proposal for replacing cpumask_t Mike Travis
@ 2008-09-10 22:53             ` Andi Kleen
  2008-09-10 23:33               ` Mike Travis
  2008-09-11  9:00             ` Peter Zijlstra
  2008-09-12  4:55             ` Rusty Russell
  2 siblings, 1 reply; 41+ messages in thread
From: Andi Kleen @ 2008-09-10 22:53 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter, Andi Kleen

> Here's an initial proposal for abstracting cpumask_t to be either

At least for some cases I don't think you'll get around defining
a "nearby subset of CPUs that can be handled together" type. Handling 1K 
objects all the time in one piece is simply not a good idea.

-Andi

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-10 22:53             ` Andi Kleen
@ 2008-09-10 23:33               ` Mike Travis
  2008-09-11  5:21                 ` Andi Kleen
  0 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-10 23:33 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter

Andi Kleen wrote:
>> Here's an initial proposal for abstracting cpumask_t to be either
> 
> At least for some cases I don't think you'll get around defining
> a "nearby subset of CPUs that can be handled together" type. Handling 1K 
> objects all the time in one piece is simply not a good idea.
> 
> -Andi


Every time I stop to think about this, the problems with the cpu
operators come to mind.  Should there be a separate set?  Or simply
conversion functions to/from a "cpumask_subset" type?  

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-10 23:33               ` Mike Travis
@ 2008-09-11  5:21                 ` Andi Kleen
  0 siblings, 0 replies; 41+ messages in thread
From: Andi Kleen @ 2008-09-11  5:21 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andi Kleen, Ingo Molnar, Peter Zijlstra, Andrew Morton, davej,
	David Miller, Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter

On Wed, Sep 10, 2008 at 04:33:16PM -0700, Mike Travis wrote:
> Andi Kleen wrote:
> >> Here's an initial proposal for abstracting cpumask_t to be either
> > 
> > At least for some cases I don't think you'll get around defining
> > a "nearby subset of CPUs that can be handled together" type. Handling 1K 
> > objects all the time in one piece is simply not a good idea.
> > 
> > -Andi
> 
> 
> Every time I stop to think about this, the problems with the cpu
> operators come to mind.  Should there be a separate set?  Or simply
> conversion functions to/from a "cpumask_subset" type?  

A subset would be hopefully enough (set/isset etc.) plus conversion
operators.

-Andi

-- 
ak@linux.intel.com

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-10 22:47           ` [RFC] CPUMASK: proposal for replacing cpumask_t Mike Travis
  2008-09-10 22:53             ` Andi Kleen
@ 2008-09-11  9:00             ` Peter Zijlstra
  2008-09-11 15:04               ` Mike Travis
  2008-09-12  4:55             ` Rusty Russell
  2 siblings, 1 reply; 41+ messages in thread
From: Peter Zijlstra @ 2008-09-11  9:00 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel,
	Christoph Lameter, Andi Kleen

On Wed, 2008-09-10 at 15:47 -0700, Mike Travis wrote:
> Ingo Molnar wrote:
> > * Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> >>>> NAK
> ....
> > 
> > seconded. Mike, since none of this is v2.6.27 material, lets do it right 
> > with a v2.6.28 target. You know all the cpumask_t using code sites 
> > inside out already, so the know-how is all available already :-) Please 
> > make it finegrained series of patches so that we can resolve conflicts 
> > with other trees more easily.
> > 
> > perhaps propose the new cpumask_t API early (in this thread?), so that 
> > people can comment on it before NAKs come flying against a full patchset 
> > ;-)
> > 
> > 	Ingo
> 
> 
> Here's an initial proposal for abstracting cpumask_t to be either
> an array of 1 or a pointer to an array...   Hopefully this will
> minimize the amount of code changes while providing the capabilities
> this change is attempting to do.
> 
> Comments most welcome. ;-)
> 
> Thanks,
> Mike
> --
> 
> Basically, linux/cpumask.h has the following defines:
> 
> 	typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_data;
> 
> 	#if NR_CPUS > BITS_PER_LONG
> 	typedef const cpumask_data	*cpumask_t;
> 	typedef cpumask_data		*cpumask_var;
> 	typedef cpumask_t		cpumask_val;
> 	#define	_NR_CPUS		nr_cpu_ids
> 	#else
> 	typedef const cpumask_data	cpumask_t[1];
> 	typedef cpumask_data		cpumask_var[1];
> 	typedef cpumask_data		cpumask_val;
> 	#define	_NR_CPUS		NR_CPUS
> 	#endif
> 
> So in function prototypes:
> 
> 	cpumask_t function(const cpumask_t *A,
> 			   cpumask_t *B,
> 			   cpumask_t cpumask_C)
> 
> becomes:
> 
> 	cpumask_val function(cpumask_t A,
> 			     cpumask_var B,
> 			     cpumask_t cpumask_C)

I guess we have to stick the const into the typedef otherwise we get a
const pointer instead of a const array member, right?

In which case I much prefer the following names:

 cpumask_data_t  - value

 const_cpumask_t - pointer to constant value
 cpumask_t       - pointer to value

> And in local variables:
> 
> 	cpumask_t ==> cpumask_var  IFF variable is to be written.
> 
> This:
> 
> 	cpumask_t mask = cpu_online_map 
> 	<change mask>
> 
> becomes:
> 
> 	#include <linux/cpumask_alloc.h>
> 
> 	cpumask_var mask;
> 
> 	alloc_cpumask(&mask);

Don't you have to deal with allocation errors?

> 	*mask = *cpu_online_map;
> 	<change mask>
> 	free_cpumask(&mask);
> 
> Currently, alloc_cpumask is:
> 
> 	#define BYTES_PER_CPUMASK (BITS_TO_LONGS(nr_cpu_ids)/sizeof(long))
> 
> 	static inline bool no_cpumask(cpumask_t *m)
> 	{
> 		return (*m == NULL);
> 	}
> 
> 	static inline void alloc_cpumask(cpumask_t *m)
> 	{
> 		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);
> 		if (no_cpumask(&d))
> 			BUG();

yuckery yuck yuck!

> 		*m = d;
> 	}
> 
> 	static inline void alloc_cpumask_nopanic(cpumask_t *m)
> 	{
> 		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);
> 
> 		*m = d;
> 	}

gah - at the very least you got the naming wrong, methinks the one
panic-ing should have panic in its name - if you really want to persist
with that variant.

> 	static inline void free_cpumask(cpumask_t *m)
> 	{
> 		kfree(*m);
> 	}
> 
> Other means of obtaining a temporary cpumask_t variable will be provided
> for those cases where kmalloc() is not available.
> 
> Furthermore, system-wide maps become:
> 
> 	extern cpumask_data _cpu_possible_map;			 /* read/write */
> 	extern cpumask_data _cpu_online_map;
> 	extern cpumask_data _cpu_present_map;
> 	extern cpumask_data _cpu_active_map;
> 	#define	cpu_possible_map ((cpumask_t)&_cpu_possible_map) /* read only */
> 	#define	cpu_online_map ((cpumask_t)&_cpu_online_map)
> 	#define	cpu_present_map ((cpumask_t)&_cpu_present_map)
> 	#define	cpu_active_map ((cpumask_t)&_cpu_active_map)
> 
> 
> So code to set these bits would be:
> 
> 	cpu_set(cpu, &_cpu_online_map);
> 	cpu_set(cpu, &_cpu_present_map);
> 	cpu_set(cpu, &_cpu_possible_map);
> 
> Arrays that contain a fixed cpumask would have:
> 
> 	struct xxx {
> 		cpumask_data	cpumask;
> 	};
> 
> .... though we should probably encourage the map to be allocated:
> 
> 	struct xxx {
> 		cpumask_t	readonly_cpumask;
> 		cpumask_var	readwrite_cpumask;
> 	};
> 
> 	alloc_cpumask(&xxx->readonly_cpumask);
> 	alloc_cpumask(&xxx->readwrite_cpumask);
> 
> 
> All the cpu operators become:
> 
> 	#define cpu_XXX(dst, src) _cpu_XXX(dst, src, _NR_CPUS)
> 	static inline void __cpu_XXX(cpumask_var dstp, cpumask_t srcp, int count)
> 	{
> 		XXX_bit(dstp->bits, srcp->bits, count);
> 	}
> 
> (_NR_CPUS being defined to be nr_cpu_ids allows us to allocate variable lengthed arrays.)
> 
> 
> Cpumask initializers become:
> 
> 
> 	#if NR_CPUS <= BITS_PER_LONG
> 
> 	#define INIT_CPU_MASK_ALL					\
> 	(cpumask_t) { {							\
> 		[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
> 	} }
> 
> 	#else
> 	#define INIT_CPU_MASK_ALL					\
> 	(cpumask_data) { {						\
> 		[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
> 		[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
> 	} }
> 	#endif
> 
> 	#define INIT_CPU_MASK_NONE					\
> 	(cpumask_data) { {						\
> 		[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL			\
> 	} }
> 
> 	#define INIT_CPU_MASK_CPU0					\
> 	(cpumask_data) { {						\
> 		[0] =  1UL						\
> 	} }
> 
> 	#if NR_CPUS > BITS_PER_LONG
> 	extern cpumask_t cpu_mask_all, cpu_mask_none, cpu_mask_cpu0;
> 	#define CPU_MASK_ALL		(cpu_mask_all)
> 	#define CPU_MASK_NONE		(cpu_mask_none)
> 	#define CPU_MASK_CPU0		(cpu_mask_cpu0)
> 	#else
> 	#define CPU_MASK_ALL		((cpumask_t)&INIT_CPU_MASK_ALL)
> 	#define CPU_MASK_NONE		((cpumask_t)&INIT_CPU_MASK_NONE)
> 	#define CPU_MASK_CPU0		((cpumask_t)&INIT_CPU_MASK_CPU0)
> 	#endif
> 
> And in kernel/cpu.c:
> 
> 	/*
> 	 * provide const cpumask_t's
> 	 */
> 	#if NR_CPUS > BITS_PER_LONG
> 	cpumask_data cpu_mask_all __read_mostly = INIT_CPU_MASK_ALL;
> 	EXPORT_SYMBOL(cpu_mask_all);
> 
> 	cpumask_data cpu_mask_none __read_mostly = INIT_CPU_MASK_NONE;
> 	EXPORT_SYMBOL(cpu_mask_none);
> 
> 	cpumask_data cpu_mask_cpu0 __read_mostly = INIT_CPU_MASK_CPU0;
> 	EXPORT_SYMBOL(cpu_mask_cpu0);
> 	#endif
> 


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-11  9:00             ` Peter Zijlstra
@ 2008-09-11 15:04               ` Mike Travis
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Travis @ 2008-09-11 15:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Andrew Morton, davej, David Miller, Eric Dumazet,
	Eric W. Biederman, Jack Steiner, Jeremy Fitzhardinge,
	Jes Sorensen, H. Peter Anvin, Thomas Gleixner, linux-kernel,
	Christoph Lameter, Andi Kleen

Peter Zijlstra wrote:
...
>> So in function prototypes:
>>
>> 	cpumask_t function(const cpumask_t *A,
>> 			   cpumask_t *B,
>> 			   cpumask_t cpumask_C)
>>
>> becomes:
>>
>> 	cpumask_val function(cpumask_t A,
>> 			     cpumask_var B,
>> 			     cpumask_t cpumask_C)
> 
> I guess we have to stick the const into the typedef otherwise we get a
> const pointer instead of a const array member, right?
> 
> In which case I much prefer the following names:
> 
>  cpumask_data_t  - value
> 
>  const_cpumask_t - pointer to constant value
>  cpumask_t       - pointer to value

There were some comments previously such that we should "imply" that the
incoming cpumask_t args are const, so the compiler would flag those
who arbitrarily modify it.

> 
...
>> 	alloc_cpumask(&mask);
> 
> Don't you have to deal with allocation errors?

In a perfect world, no... ;-)
...
>> 	static inline void alloc_cpumask(cpumask_t *m)
>> 	{
>> 		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);
>> 		if (no_cpumask(&d))
>> 			BUG();
> 
> yuckery yuck yuck!
> 
>> 		*m = d;
>> 	}
>>
>> 	static inline void alloc_cpumask_nopanic(cpumask_t *m)
>> 	{
>> 		cpumask_t d = kmalloc(BYTES_PER_CPUMASK, GFP_KERNEL);
>>
>> 		*m = d;
>> 	}
> 
> gah - at the very least you got the naming wrong, methinks the one
> panic-ing should have panic in its name - if you really want to persist
> with that variant.

Yeah, I rather rushed through the allocation part (yuck indeed ;-).

There are some other alternatives:

	- reserve one or more of these in the task struct
	- reserve one or more in a per-cpu area
	- setup some kind of allocation pool similar to alloc_bootmem
	- ???

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-10 22:47           ` [RFC] CPUMASK: proposal for replacing cpumask_t Mike Travis
  2008-09-10 22:53             ` Andi Kleen
  2008-09-11  9:00             ` Peter Zijlstra
@ 2008-09-12  4:55             ` Rusty Russell
  2008-09-12 14:28               ` Mike Travis
  2 siblings, 1 reply; 41+ messages in thread
From: Rusty Russell @ 2008-09-12  4:55 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter, Andi Kleen

On Thursday 11 September 2008 08:47:58 Mike Travis wrote:
> Here's an initial proposal for abstracting cpumask_t to be either
> an array of 1 or a pointer to an array...   Hopefully this will
> minimize the amount of code changes while providing the capabilities
> this change is attempting to do.
>
> Comments most welcome. ;-)

I think this is still "wrong way go back".

I'm yet to be convinced that we really need to allocate cpumasks in any fast 
paths.  And if not, we should simply allocate them everywhere.  I'd rather 
see one #ifdef around a place where we can show a perf issue.

Get rid of CPU_MASK_ALL et al in favour of cpu_mask_all.  And cpu_mask_any_one 
instead of CPU_MASK_CPU0 since that's usually what they want.

API looks like so (look Ma, no typedefs!)

	struct cpumask *cpus;

	cpus = cpumask_alloc();
	if (!cpus)
		return -ENOMEM;

	cpumask_init_single(cpunum);
	OR
	cpumask_init(cpu_mask_all);
	...
	cpumask_free(cpus);

Unmistakable and really hard to screw up.  You can even be clever and not 
reveal the struct cpumask definition so noone can declare one by accident...

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-12  4:55             ` Rusty Russell
@ 2008-09-12 14:28               ` Mike Travis
  2008-09-12 22:02                 ` Rusty Russell
  0 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-12 14:28 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter, Andi Kleen

Rusty Russell wrote:
> On Thursday 11 September 2008 08:47:58 Mike Travis wrote:
>> Here's an initial proposal for abstracting cpumask_t to be either
>> an array of 1 or a pointer to an array...   Hopefully this will
>> minimize the amount of code changes while providing the capabilities
>> this change is attempting to do.
>>
>> Comments most welcome. ;-)
> 
> I think this is still "wrong way go back".
> 
> I'm yet to be convinced that we really need to allocate cpumasks in any fast 
> paths.  And if not, we should simply allocate them everywhere.  I'd rather 
> see one #ifdef around a place where we can show a perf issue.
> 
> Get rid of CPU_MASK_ALL et al in favour of cpu_mask_all.  And cpu_mask_any_one 
> instead of CPU_MASK_CPU0 since that's usually what they want.
> 
> API looks like so (look Ma, no typedefs!)
> 
> 	struct cpumask *cpus;
> 
> 	cpus = cpumask_alloc();
> 	if (!cpus)
> 		return -ENOMEM;
> 
> 	cpumask_init_single(cpunum);
> 	OR
> 	cpumask_init(cpu_mask_all);
> 	...
> 	cpumask_free(cpus);
> 
> Unmistakable and really hard to screw up.  You can even be clever and not 
> reveal the struct cpumask definition so noone can declare one by accident...
> 
> Cheers,
> Rusty.

Using a typedef came from Linus, and the idea is basically if NR_CPUS fits
into a long, then it's carried as an array of one (ie., local variable).
If it's bigger, then it's a pointer to a remote array.  The references can
all be pointers (*cpumask), though most of the references use the cpu_XXX
operators which already treat the references correctly (in my proposal,
that is).  That way, small systems can optimize out the indirect reference
and the overhead becomes zero.

Also, cpumask_alloc/free() becomes nop's for small systems.

But I like the idea of dumping some of the initializers.  I should have
made CPU0 "cpumask_of_cpu(0)".  I'll have to look at where they are used to
see if this is feasible.

Thanks!
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-12 14:28               ` Mike Travis
@ 2008-09-12 22:02                 ` Rusty Russell
  2008-09-12 22:50                   ` Mike Travis
  0 siblings, 1 reply; 41+ messages in thread
From: Rusty Russell @ 2008-09-12 22:02 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter, Andi Kleen

On Saturday 13 September 2008 00:28:56 Mike Travis wrote:
> Rusty Russell wrote:
> > I'm yet to be convinced that we really need to allocate cpumasks in any
> > fast paths.  And if not, we should simply allocate them everywhere.  I'd
> > rather see one #ifdef around a place where we can show a perf issue.
>
> Using a typedef came from Linus, and the idea is basically if NR_CPUS fits
> into a long, then it's carried as an array of one (ie., local variable).

Sure it's clever.  ie. nice and confusing.

But do we have any code paths where we care?  Unless we do, let's just keep it 
simple...

Cheers,
Rusty.


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-12 22:02                 ` Rusty Russell
@ 2008-09-12 22:50                   ` Mike Travis
  2008-09-12 22:58                     ` H. Peter Anvin
  0 siblings, 1 reply; 41+ messages in thread
From: Mike Travis @ 2008-09-12 22:50 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Ingo Molnar, Peter Zijlstra, Andrew Morton, davej, David Miller,
	Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, H. Peter Anvin,
	Thomas Gleixner, linux-kernel, Christoph Lameter, Andi Kleen

Rusty Russell wrote:
> On Saturday 13 September 2008 00:28:56 Mike Travis wrote:
>> Rusty Russell wrote:
>>> I'm yet to be convinced that we really need to allocate cpumasks in any
>>> fast paths.  And if not, we should simply allocate them everywhere.  I'd
>>> rather see one #ifdef around a place where we can show a perf issue.
>> Using a typedef came from Linus, and the idea is basically if NR_CPUS fits
>> into a long, then it's carried as an array of one (ie., local variable).
> 
> Sure it's clever.  ie. nice and confusing.
> 
> But do we have any code paths where we care?  Unless we do, let's just keep it 
> simple...
> 
> Cheers,
> Rusty.

Here's the thread:

	http://marc.info/?l=linux-kernel&m=121976977901223&w=4

It doesn't seem worthwhile to force all systems to deal with large cpumask's
if they don't need to.  Passing the value on the stack (actually usually in a
reg) if it fits into a long makes a lot of sense.

And I don't think it's that abstract, but I'm willing to hear other opinions.

Btw, most likely only distros that distribute an "Enterprise" edition of
Linux will ever set NR_CPUS so high, so the actual number of systems making
use of this will be a very small percentage (big $$-wise though... ;-) 

I even think that perhaps BITS_PER_LONG might be too low a threshold to kick
in this extra code.  A Larabee chip will have 128 cpus so maybe 128 or 256 is
a better metric...?

As soon as I get a working kernel with the proposed changes, I will definitely
be doing perf testing.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC] CPUMASK: proposal for replacing cpumask_t
  2008-09-12 22:50                   ` Mike Travis
@ 2008-09-12 22:58                     ` H. Peter Anvin
  0 siblings, 0 replies; 41+ messages in thread
From: H. Peter Anvin @ 2008-09-12 22:58 UTC (permalink / raw)
  To: Mike Travis
  Cc: Rusty Russell, Ingo Molnar, Peter Zijlstra, Andrew Morton, davej,
	David Miller, Eric Dumazet, Eric W. Biederman, Jack Steiner,
	Jeremy Fitzhardinge, Jes Sorensen, Thomas Gleixner, linux-kernel,
	Christoph Lameter, Andi Kleen

Mike Travis wrote:
> 
> Here's the thread:
> 
> 	http://marc.info/?l=linux-kernel&m=121976977901223&w=4
> 
> It doesn't seem worthwhile to force all systems to deal with large cpumask's
> if they don't need to.  Passing the value on the stack (actually usually in a
> reg) if it fits into a long makes a lot of sense.
> 
> And I don't think it's that abstract, but I'm willing to hear other opinions.
> 
> Btw, most likely only distros that distribute an "Enterprise" edition of
> Linux will ever set NR_CPUS so high, so the actual number of systems making
> use of this will be a very small percentage (big $$-wise though... ;-) 
> 
> I even think that perhaps BITS_PER_LONG might be too low a threshold to kick
> in this extra code.  A Larabee chip will have 128 cpus so maybe 128 or 256 is
> a better metric...?
> 
> As soon as I get a working kernel with the proposed changes, I will definitely
> be doing perf testing.
> 

If the performance difference isn't significant, then there is a major 
advantage to getting rid of a configuration option.  At that point we 
can basically scale to an arbitrary number of processors in a stock 
configuration.

	-hpa


^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2008-09-12 23:05 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-06 23:50 [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Mike Travis
2008-09-06 23:50 ` [RFC 01/13] smp: modify send_IPI_mask interface to accept cpumask_t pointers Mike Travis
2008-09-06 23:50 ` [RFC 02/13] cpumask: add for_each_online_cpu_mask_nr function Mike Travis
2008-09-06 23:50 ` [RFC 03/13] xen: use new " Mike Travis
2008-09-06 23:50 ` [RFC 04/13] cpumask: add cpumask_ptr operations Mike Travis
2008-09-06 23:50 ` [RFC 05/13] cpumask: add get_cpumask_var debug operations Mike Travis
2008-09-06 23:50 ` [RFC 06/13] genapic: use get_cpumask_var operations for allbutself cpumask_ts Mike Travis
2008-09-06 23:50 ` [RFC 07/13] sched: Reduce stack size requirements in kernel/sched.c Mike Travis
2008-09-07 10:24   ` Peter Zijlstra
2008-09-07 11:00     ` Andrew Morton
2008-09-07 13:05       ` Peter Zijlstra
2008-09-08 14:56         ` Mike Travis
2008-09-07 20:28       ` Peter Zijlstra
2008-09-08 14:54     ` Mike Travis
2008-09-08 15:05       ` Peter Zijlstra
2008-09-08 18:38         ` Ingo Molnar
2008-09-10 22:47           ` [RFC] CPUMASK: proposal for replacing cpumask_t Mike Travis
2008-09-10 22:53             ` Andi Kleen
2008-09-10 23:33               ` Mike Travis
2008-09-11  5:21                 ` Andi Kleen
2008-09-11  9:00             ` Peter Zijlstra
2008-09-11 15:04               ` Mike Travis
2008-09-12  4:55             ` Rusty Russell
2008-09-12 14:28               ` Mike Travis
2008-09-12 22:02                 ` Rusty Russell
2008-09-12 22:50                   ` Mike Travis
2008-09-12 22:58                     ` H. Peter Anvin
2008-09-06 23:50 ` [RFC 08/13] cpufreq: Reduce stack size requirements in acpi-cpufreq.c Mike Travis
2008-09-06 23:50 ` [RFC 09/13] genapic: reduce stack pressuge in io_apic.c step 1 temp cpumask_ts Mike Travis
2008-09-08 11:01   ` Andi Kleen
2008-09-08 16:03     ` Mike Travis
2008-09-06 23:50 ` [RFC 10/13] genapic: reduce stack pressuge in io_apic.c step 2 internal abi Mike Travis
2008-09-06 23:50 ` [RFC 11/13] genapic: reduce stack pressuge in io_apic.c step 3 target_cpus Mike Travis
2008-09-07  7:55   ` Bert Wesarg
2008-09-07  9:13     ` Ingo Molnar
2008-09-08 15:01       ` Mike Travis
2008-09-08 15:29     ` Mike Travis
2008-09-06 23:50 ` [RFC 12/13] genapic: reduce stack pressuge in io_apic.c step 4 vector allocation Mike Travis
2008-09-06 23:50 ` [RFC 13/13] genapic: reduce stack pressuge in io_apic.c step 5 cpu_mask_to_apicid Mike Travis
2008-09-07  7:36 ` [RFC 00/13] smp: reduce stack requirements for genapic send_IPI_mask functions Ingo Molnar
2008-09-08 15:17   ` Mike Travis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).