All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] irq: remove not need bootmem code
@ 2010-01-08 11:53 Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

mem_init is moved early already.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c |   14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -19,7 +19,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <linux/bootmem.h>
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -87,12 +86,8 @@ void __ref init_kstat_irqs(struct irq_de
 {
 	void *ptr;
 
-	if (slab_is_available())
-		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
-				   GFP_ATOMIC, node);
-	else
-		ptr = alloc_bootmem_node(NODE_DATA(node),
-				nr * sizeof(*desc->kstat_irqs));
+	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+			   GFP_ATOMIC, node);
 
 	/*
 	 * don't overwite if can not get new one
@@ -219,10 +214,7 @@ struct irq_desc * __ref irq_to_desc_allo
 	if (desc)
 		goto out_unlock;
 
-	if (slab_is_available())
-		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	else
-		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 
 	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/5] radix: move radix init early
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

prepare to use it in early_irq_init()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 init/main.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6/init/main.c
===================================================================
--- linux-2.6.orig/init/main.c
+++ linux-2.6/init/main.c
@@ -584,6 +584,7 @@ asmlinkage void __init start_kernel(void
 		local_irq_disable();
 	}
 	rcu_init();
+	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
@@ -659,7 +660,6 @@ asmlinkage void __init start_kernel(void
 	key_init();
 	security_init();
 	vfs_caches_init(totalram_pages);
-	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/5] sparseirq: change irq_desc_ptrs to static
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

add replace_irq_desc()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c       |    8 +++++++-
 kernel/irq/internals.h    |    6 +-----
 kernel/irq/numa_migrate.c |    4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -127,7 +127,7 @@ static void init_one_irq_desc(int irq, s
  */
 DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 
-struct irq_desc **irq_desc_ptrs __read_mostly;
+static struct irq_desc **irq_desc_ptrs __read_mostly;
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
@@ -192,6 +192,12 @@ struct irq_desc *irq_to_desc(unsigned in
 	return NULL;
 }
 
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	if (irq_desc_ptrs && irq < nr_irqs)
+		irq_desc_ptrs[irq] = desc;
+}
+
 struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
Index: linux-2.6/kernel/irq/internals.h
===================================================================
--- linux-2.6.orig/kernel/irq/internals.h
+++ linux-2.6/kernel/irq/internals.h
@@ -21,11 +21,7 @@ extern void clear_kstat_irqs(struct irq_
 extern raw_spinlock_t sparse_irq_lock;
 
 #ifdef CONFIG_SPARSE_IRQ
-/* irq_desc_ptrs allocated at boot time */
-extern struct irq_desc **irq_desc_ptrs;
-#else
-/* irq_desc_ptrs is a fixed size array */
-extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_PROC_FS
Index: linux-2.6/kernel/irq/numa_migrate.c
===================================================================
--- linux-2.6.orig/kernel/irq/numa_migrate.c
+++ linux-2.6/kernel/irq/numa_migrate.c
@@ -70,7 +70,7 @@ static struct irq_desc *__real_move_irq_
 	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 
 	if (desc && old_desc != desc)
 		goto out_unlock;
@@ -90,7 +90,7 @@ static struct irq_desc *__real_move_irq_
 		goto out_unlock;
 	}
 
-	irq_desc_ptrs[irq] = desc;
+	replace_irq_desc(irq, desc);
 	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
 	/* free the old one */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
  2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 12:14   ` Cyrill Gorcunov
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
  2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman
  4 siblings, 1 reply; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

use radix_tree irq_desc_tree instead of irq_desc_ptrs.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -19,6 +19,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
+#include <linux/radix-tree.h>
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
  */
 DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 
-static struct irq_desc **irq_desc_ptrs __read_mostly;
+static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
+
+static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	radix_tree_insert(&irq_desc_tree, irq, desc);
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return radix_tree_lookup(&irq_desc_tree, irq);
+}
+
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	radix_tree_delete(&irq_desc_tree, irq);
+	radix_tree_insert(&irq_desc_tree, irq, desc);
+}
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
@@ -159,9 +176,6 @@ int __init early_irq_init(void)
 	legacy_count = ARRAY_SIZE(irq_desc_legacy);
 	node = first_online_node;
 
-	/* allocate irq_desc_ptrs array based on nr_irqs */
-	irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
-
 	/* allocate based on nr_cpu_ids */
 	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
 					  sizeof(int), GFP_NOWAIT, node);
@@ -175,29 +189,12 @@ int __init early_irq_init(void)
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		alloc_desc_masks(&desc[i], node, true);
 		init_desc_masks(&desc[i]);
-		irq_desc_ptrs[i] = desc + i;
+		set_irq_desc(i, &desc[i]);
 	}
 
-	for (i = legacy_count; i < nr_irqs; i++)
-		irq_desc_ptrs[i] = NULL;
-
 	return arch_early_irq_init();
 }
 
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	if (irq_desc_ptrs && irq < nr_irqs)
-		return irq_desc_ptrs[irq];
-
-	return NULL;
-}
-
-void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
-{
-	if (irq_desc_ptrs && irq < nr_irqs)
-		irq_desc_ptrs[irq] = desc;
-}
-
 struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
@@ -209,14 +206,14 @@ struct irq_desc * __ref irq_to_desc_allo
 		return NULL;
 	}
 
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 	if (desc)
 		return desc;
 
 	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 	if (desc)
 		goto out_unlock;
 
@@ -229,7 +226,7 @@ struct irq_desc * __ref irq_to_desc_allo
 	}
 	init_one_irq_desc(irq, desc, node);
 
-	irq_desc_ptrs[irq] = desc;
+	set_irq_desc(irq, desc);
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
                   ` (2 preceding siblings ...)
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman
  4 siblings, 1 reply; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

that is max number on run time.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/kernel/apic/io_apic.c |   16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3840,16 +3840,14 @@ int __init arch_probe_nr_irqs(void)
 {
 	int nr;
 
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = nr_irqs_gsi + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += nr_irqs_gsi * 64;
+	nr = NR_VECTORS - FIRST_DEVICE_VECTOR;
+#ifdef CONFIG_SMP
+	/* system vectors */
+	nr -= 0x10;
 #endif
+	nr *= nr_cpu_ids;
+	nr += FIRST_DEVICE_VECTOR - IRQ0_VECTOR;
+
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
@ 2010-01-08 12:14   ` Cyrill Gorcunov
  2010-01-08 18:43     ` Eric W. Biederman
  0 siblings, 1 reply; 18+ messages in thread
From: Cyrill Gorcunov @ 2010-01-08 12:14 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg, linux-kernel

On Fri, Jan 08, 2010 at 03:53:14AM -0800, Yinghai Lu wrote:
> use radix_tree irq_desc_tree instead of irq_desc_ptrs.
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
>  1 file changed, 22 insertions(+), 25 deletions(-)
> 
> Index: linux-2.6/kernel/irq/handle.c
> ===================================================================
> --- linux-2.6.orig/kernel/irq/handle.c
> +++ linux-2.6/kernel/irq/handle.c
> @@ -19,6 +19,7 @@
>  #include <linux/kernel_stat.h>
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
> +#include <linux/radix-tree.h>
>  #include <trace/events/irq.h>
>  
>  #include "internals.h"
> @@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
>   */
>  DEFINE_RAW_SPINLOCK(sparse_irq_lock);
>  
> -static struct irq_desc **irq_desc_ptrs __read_mostly;
> +static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
> +
> +static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
> +{
> +	radix_tree_insert(&irq_desc_tree, irq, desc);
> +}
> +
> +struct irq_desc *irq_to_desc(unsigned int irq)
> +{
> +	return radix_tree_lookup(&irq_desc_tree, irq);
> +}
> +
> +void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
> +{
> +	radix_tree_delete(&irq_desc_tree, irq);
> +	radix_tree_insert(&irq_desc_tree, irq, desc);
> +}
>  
...

Hi Yinghai,

should not we printk\warn if radix_tree_insert() is get failed?
This is hardly (if ever) happen on machines with small number
of interrupts allocated but anyway.

Or I miss something?

	-- Cyrill

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 12:14   ` Cyrill Gorcunov
@ 2010-01-08 18:43     ` Eric W. Biederman
  0 siblings, 0 replies; 18+ messages in thread
From: Eric W. Biederman @ 2010-01-08 18:43 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Jesse Brandeburg, linux-kernel

Cyrill Gorcunov <gorcunov@gmail.com> writes:

> On Fri, Jan 08, 2010 at 03:53:14AM -0800, Yinghai Lu wrote:
>> use radix_tree irq_desc_tree instead of irq_desc_ptrs.
>> 
>> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>> 
>> ---
>>  kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
>>  1 file changed, 22 insertions(+), 25 deletions(-)
>> 
>> Index: linux-2.6/kernel/irq/handle.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/irq/handle.c
>> +++ linux-2.6/kernel/irq/handle.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/kernel_stat.h>
>>  #include <linux/rculist.h>
>>  #include <linux/hash.h>
>> +#include <linux/radix-tree.h>
>>  #include <trace/events/irq.h>
>>  
>>  #include "internals.h"
>> @@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
>>   */
>>  DEFINE_RAW_SPINLOCK(sparse_irq_lock);
>>  
>> -static struct irq_desc **irq_desc_ptrs __read_mostly;
>> +static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
>> +
>> +static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
>> +{
>> +	radix_tree_insert(&irq_desc_tree, irq, desc);
>> +}
>> +
>> +struct irq_desc *irq_to_desc(unsigned int irq)
>> +{
>> +	return radix_tree_lookup(&irq_desc_tree, irq);
>> +}
>> +
>> +void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
>> +{
>> +	radix_tree_delete(&irq_desc_tree, irq);
>> +	radix_tree_insert(&irq_desc_tree, irq, desc);
>> +}
>>  
> ...
>
> Hi Yinghai,
>
> should not we printk\warn if radix_tree_insert() is get failed?
> This is hardly (if ever) happen on machines with small number
> of interrupts allocated but anyway.
>
> Or I miss something?

It looks to me like we can use radix_tree_lookup_slot and
radix_tree_replace_slot here.  Since we don't have to allocate
memory radix_tree_replace_slot can not fail.  Using a case
that can never fail seems better than worry about a case that
can rarely fail.

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/5] irq: remove not need bootmem code
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
                   ` (3 preceding siblings ...)
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
@ 2010-01-08 18:52 ` Eric W. Biederman
  4 siblings, 0 replies; 18+ messages in thread
From: Eric W. Biederman @ 2010-01-08 18:52 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

YH overall this looks like a good patch series, that enables
additional cleanups beyond what it does.  Thank you for putting this
together.

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
@ 2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 19:49     ` Yinghai Lu
  0 siblings, 2 replies; 18+ messages in thread
From: Eric W. Biederman @ 2010-01-08 19:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

> that is max number on run time.

Ouch!  Unless I misread this code this will leave nr_irqs at
NR_IRQS_LEGACY. aka 16.

Let's do something stupid and simple.
nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */

Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
places using nr_irqs that I think those loops would get painful if we
were to do that.

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:11   ` Eric W. Biederman
@ 2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 20:06       ` Yinghai Lu
  2010-01-08 20:10       ` Eric W. Biederman
  2010-01-08 19:49     ` Yinghai Lu
  1 sibling, 2 replies; 18+ messages in thread
From: H. Peter Anvin @ 2010-01-08 19:21 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
> 
>> that is max number on run time.
> 
> Ouch!  Unless I misread this code this will leave nr_irqs at
> NR_IRQS_LEGACY. aka 16.
> 
> Let's do something stupid and simple.
> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */

This would be 1048576 on the biggest machines we currently support.
Now, the number of IRQ *vectors* is limited to
(224-system vectors)*(cpu count), so one could argue that if there is
anything that is not semi-arbitrary it would be that number, but that
doesn't account for vector sharing.

Do we have any place which requires nr_irqs to be *stable*, or can we
simply treat it as a high water mark for IRQ numbers used?

> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
> places using nr_irqs that I think those loops would get painful if we
> were to do that.

Ideally we should presumably get rid of nr_irqs completely?

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 19:21     ` H. Peter Anvin
@ 2010-01-08 19:49     ` Yinghai Lu
  2010-01-08 20:20       ` Eric W. Biederman
  1 sibling, 1 reply; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 19:49 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 11:11 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>
>> that is max number on run time.
>
> Ouch!  Unless I misread this code this will leave nr_irqs at
> NR_IRQS_LEGACY. aka 16.

nr_irqs is set to NR_IRQS before.

>
> Let's do something stupid and simple.
> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
> places using nr_irqs that I think those loops would get painful if we
> were to do that.

or you need have NR_IRQS = NR_CPUS * 256 at first,

and then make nr_irqs = nr_cpu_ids * 224 ?

YH

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:21     ` H. Peter Anvin
@ 2010-01-08 20:06       ` Yinghai Lu
  2010-01-08 21:11         ` H. Peter Anvin
  2010-01-08 20:10       ` Eric W. Biederman
  1 sibling, 1 reply; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 20:06 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Eric W. Biederman, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 11:21 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> that is max number on run time.
>>
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.
>>
>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> This would be 1048576 on the biggest machines we currently support.
> Now, the number of IRQ *vectors* is limited to
> (224-system vectors)*(cpu count), so one could argue that if there is
> anything that is not semi-arbitrary it would be that number, but that
> doesn't account for vector sharing.

(256 - 32 - 16 - system_vectors) * cpu_count + 16

the 16 is legacy from IRQ0 to 15 has domain with all cpus set.

later could move arch_probe_nr_irqs out of early_irq_init,
call it after init_IRQ(), so could use first_system_vector exactly
instead of 0x10

>
> Do we have any place which requires nr_irqs to be *stable*, or can we
> simply treat it as a high water mark for IRQ numbers used?
>
>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
yes, those loops
>
> Ideally we should presumably get rid of nr_irqs completely?

some interface in /proc/interrupts need it to make sure sth in sequence.

YH

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 20:06       ` Yinghai Lu
@ 2010-01-08 20:10       ` Eric W. Biederman
  2010-01-08 21:15         ` H. Peter Anvin
  1 sibling, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2010-01-08 20:10 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

"H. Peter Anvin" <hpa@zytor.com> writes:

> On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>> 
>>> that is max number on run time.
>> 
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.

I goofed and misread this.  I was looking at nr_irqs_gsi which
is initialized to 16.

We actually initialize nr_irqs to NR_IRQS, which has an unfortunately
convoluted formula, that winds up being 8*NR_CPUS or 32 *MAX_IO_APICS.
in the extreme cases.

Since there are still arrays sized at NR_IRQS (bleh) we can not
increase nr_irqs to be greater than NR_IRQS.

So YN can you do the simple thing here and simply remove
arch_probe_nr_irqs().  Sane code doesn't care how big nr_irqs is and
code that does care needs to be fixed.

>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> This would be 1048576 on the biggest machines we currently support.
> Now, the number of IRQ *vectors* is limited to
> (224-system vectors)*(cpu count), so one could argue that if there is
> anything that is not semi-arbitrary it would be that number, but that
> doesn't account for vector sharing.

Except we have irq sources that we know about that are never utilized,
Think of unconnected inputs to ioapics.

I don't know if we ever actually perform vector sharing.  The only case
I recall where the code could share vectors is if the firmware tables
told us to irq sources were the same interrupt.  I don't think that
happens.  We do have the remains of support for vector sharing
in the code but I don't think it is utilized.  MSI interrupts certainly
can not share vectors.

My point with the semi-arbitrary number is that we should not think of
nr_irqs as something defined by the resources of the receivers of
interrupts.  NR_IRQS has never been that.  nr_irqs really is a limit
on how many interrupt sources we have.

> Do we have any place which requires nr_irqs to be *stable*, or can we
> simply treat it as a high water mark for IRQ numbers used?

We have several loops that walk through the irq descriptors and look for
an unbound irq.  Which means having nr_irqs as a high water mark is not
going to work today.

>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
>
> Ideally we should presumably get rid of nr_irqs completely?

Yes.  It was enough of a pain the first pass at it that we wound
up with nr_irqs, a value that can vary at boot time.

Once YH's radix tree changes get it in.  A war on NR_IRQS and nr_irqs
seems appropriate.

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:49     ` Yinghai Lu
@ 2010-01-08 20:20       ` Eric W. Biederman
  2010-01-08 20:43         ` Yinghai Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Eric W. Biederman @ 2010-01-08 20:20 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

> On Fri, Jan 8, 2010 at 11:11 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> that is max number on run time.
>>
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.
>
> nr_irqs is set to NR_IRQS before.

Yep my mistake.

>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>>
>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
>
> or you need have NR_IRQS = NR_CPUS * 256 at first,
>
> and then make nr_irqs = nr_cpu_ids * 224 ?

The important part is that NR_IRQS become an arbitrary number larger
than we can strictly support.

Based on my quick look the bad offenders (aka static sized arrays of
NR_IRQS) all look at NR_IRQS not nr_irqs.  So I don't see a point
in having nr_irqs < NR_IRQS.

So let's just kill arch_probe_nr_irqs() on x86.

Then we can worry about things like fixing xen and the interrupt
remapping code to not having NR_IRQS sized arrays.

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:20       ` Eric W. Biederman
@ 2010-01-08 20:43         ` Yinghai Lu
  0 siblings, 0 replies; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 20:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 12:20 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Based on my quick look the bad offenders (aka static sized arrays of
> NR_IRQS) all look at NR_IRQS not nr_irqs.  So I don't see a point
> in having nr_irqs < NR_IRQS.
>
> So let's just kill arch_probe_nr_irqs() on x86.
>
> Then we can worry about things like fixing xen and the interrupt
> remapping code to not having NR_IRQS sized arrays.

arch/sh/kernel/irq.c:int __init arch_probe_nr_irqs(void)
arch/x86/kernel/apic/io_apic.c:int __init arch_probe_nr_irqs(void)
arch/x86/kernel/apic/io_apic.c:  *  if we move calling of
arch_probe_nr_irqs() after init_IRQ()
include/linux/interrupt.h:extern int arch_probe_nr_irqs(void);
kernel/irq/handle.c:    arch_probe_nr_irqs();
kernel/softirq.c:int __init __weak arch_probe_nr_irqs(void)

sh have sparse_irq now...it has it's own copy for arch_probe_nr_irqs...

YH

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:06       ` Yinghai Lu
@ 2010-01-08 21:11         ` H. Peter Anvin
  0 siblings, 0 replies; 18+ messages in thread
From: H. Peter Anvin @ 2010-01-08 21:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Eric W. Biederman, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 12:06 PM, Yinghai Lu wrote:
>>>
>>> Ouch!  Unless I misread this code this will leave nr_irqs at
>>> NR_IRQS_LEGACY. aka 16.
>>>
>>> Let's do something stupid and simple.
>>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>>
>> This would be 1048576 on the biggest machines we currently support.
>> Now, the number of IRQ *vectors* is limited to
>> (224-system vectors)*(cpu count), so one could argue that if there is
>> anything that is not semi-arbitrary it would be that number, but that
>> doesn't account for vector sharing.
> 
> (256 - 32 - 16 - system_vectors) * cpu_count + 16
> 
> the 16 is legacy from IRQ0 to 15 has domain with all cpus set.
> 

... assuming you're on a platform with a legacy PIC.  I would like to
see the legacy PIC hard-coded assumptions to go away, and instead be
done as runtime allocations on the relevant platforms.

> some interface in /proc/interrupts need it to make sure sth in sequence.

I can't even parse this sentence, never mind figuring out what it would
mean.  Certainly there is a better way to do that?

	-hpa


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:10       ` Eric W. Biederman
@ 2010-01-08 21:15         ` H. Peter Anvin
  0 siblings, 0 replies; 18+ messages in thread
From: H. Peter Anvin @ 2010-01-08 21:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 12:10 PM, Eric W. Biederman wrote:
> 
> I don't know if we ever actually perform vector sharing.  The only case
> I recall where the code could share vectors is if the firmware tables
> told us to irq sources were the same interrupt.  I don't think that
> happens.  We do have the remains of support for vector sharing
> in the code but I don't think it is utilized.  MSI interrupts certainly
> can not share vectors.
> 

We probably will need to support that at some point, though, simply
because it's not all that hard to construct a system in which there are
more MSI-X interrupts than there are possible vectors in the system.

Until then we can do 224 * nr_cpus_ids, but it's still not the right thing.

> Yes.  It was enough of a pain the first pass at it that we wound
> up with nr_irqs, a value that can vary at boot time.
> 
> Once YH's radix tree changes get it in.  A war on NR_IRQS and nr_irqs
> seems appropriate.

Agreed.

	-hpa


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/5] irq: remove not need bootmem code
@ 2010-01-08 23:46 Yinghai Lu
  0 siblings, 0 replies; 18+ messages in thread
From: Yinghai Lu @ 2010-01-08 23:46 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

mem_init is moved early already.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c |   14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -19,7 +19,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <linux/bootmem.h>
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -87,12 +86,8 @@ void __ref init_kstat_irqs(struct irq_de
 {
 	void *ptr;
 
-	if (slab_is_available())
-		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
-				   GFP_ATOMIC, node);
-	else
-		ptr = alloc_bootmem_node(NODE_DATA(node),
-				nr * sizeof(*desc->kstat_irqs));
+	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+			   GFP_ATOMIC, node);
 
 	/*
 	 * don't overwite if can not get new one
@@ -219,10 +214,7 @@ struct irq_desc * __ref irq_to_desc_allo
 	if (desc)
 		goto out_unlock;
 
-	if (slab_is_available())
-		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	else
-		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 
 	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2010-01-08 23:47 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
2010-01-08 12:14   ` Cyrill Gorcunov
2010-01-08 18:43     ` Eric W. Biederman
2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
2010-01-08 19:11   ` Eric W. Biederman
2010-01-08 19:21     ` H. Peter Anvin
2010-01-08 20:06       ` Yinghai Lu
2010-01-08 21:11         ` H. Peter Anvin
2010-01-08 20:10       ` Eric W. Biederman
2010-01-08 21:15         ` H. Peter Anvin
2010-01-08 19:49     ` Yinghai Lu
2010-01-08 20:20       ` Eric W. Biederman
2010-01-08 20:43         ` Yinghai Lu
2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman
2010-01-08 23:46 Yinghai Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.