All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] irq: remove not need bootmem code
@ 2010-01-08 11:53 Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

mem_init is moved early already.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c |   14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -19,7 +19,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <linux/bootmem.h>
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -87,12 +86,8 @@ void __ref init_kstat_irqs(struct irq_de
 {
 	void *ptr;
 
-	if (slab_is_available())
-		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
-				   GFP_ATOMIC, node);
-	else
-		ptr = alloc_bootmem_node(NODE_DATA(node),
-				nr * sizeof(*desc->kstat_irqs));
+	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+			   GFP_ATOMIC, node);
 
 	/*
 	 * don't overwite if can not get new one
@@ -219,10 +214,7 @@ struct irq_desc * __ref irq_to_desc_allo
 	if (desc)
 		goto out_unlock;
 
-	if (slab_is_available())
-		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	else
-		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 
 	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2/5] radix: move radix init early
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

prepare to use it in early_irq_init()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 init/main.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6/init/main.c
===================================================================
--- linux-2.6.orig/init/main.c
+++ linux-2.6/init/main.c
@@ -584,6 +584,7 @@ asmlinkage void __init start_kernel(void
 		local_irq_disable();
 	}
 	rcu_init();
+	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
@@ -659,7 +660,6 @@ asmlinkage void __init start_kernel(void
 	key_init();
 	security_init();
 	vfs_caches_init(totalram_pages);
-	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3/5] sparseirq: change irq_desc_ptrs to static
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

add replace_irq_desc()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c       |    8 +++++++-
 kernel/irq/internals.h    |    6 +-----
 kernel/irq/numa_migrate.c |    4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -127,7 +127,7 @@ static void init_one_irq_desc(int irq, s
  */
 DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 
-struct irq_desc **irq_desc_ptrs __read_mostly;
+static struct irq_desc **irq_desc_ptrs __read_mostly;
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
@@ -192,6 +192,12 @@ struct irq_desc *irq_to_desc(unsigned in
 	return NULL;
 }
 
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	if (irq_desc_ptrs && irq < nr_irqs)
+		irq_desc_ptrs[irq] = desc;
+}
+
 struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
Index: linux-2.6/kernel/irq/internals.h
===================================================================
--- linux-2.6.orig/kernel/irq/internals.h
+++ linux-2.6/kernel/irq/internals.h
@@ -21,11 +21,7 @@ extern void clear_kstat_irqs(struct irq_
 extern raw_spinlock_t sparse_irq_lock;
 
 #ifdef CONFIG_SPARSE_IRQ
-/* irq_desc_ptrs allocated at boot time */
-extern struct irq_desc **irq_desc_ptrs;
-#else
-/* irq_desc_ptrs is a fixed size array */
-extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_PROC_FS
Index: linux-2.6/kernel/irq/numa_migrate.c
===================================================================
--- linux-2.6.orig/kernel/irq/numa_migrate.c
+++ linux-2.6/kernel/irq/numa_migrate.c
@@ -70,7 +70,7 @@ static struct irq_desc *__real_move_irq_
 	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 
 	if (desc && old_desc != desc)
 		goto out_unlock;
@@ -90,7 +90,7 @@ static struct irq_desc *__real_move_irq_
 		goto out_unlock;
 	}
 
-	irq_desc_ptrs[irq] = desc;
+	replace_irq_desc(irq, desc);
 	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
 
 	/* free the old one */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
  2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
  2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 12:14   ` Cyrill Gorcunov
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
  2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman
  4 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

use radix_tree irq_desc_tree instead of irq_desc_ptrs.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -19,6 +19,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
+#include <linux/radix-tree.h>
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
  */
 DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 
-static struct irq_desc **irq_desc_ptrs __read_mostly;
+static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
+
+static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	radix_tree_insert(&irq_desc_tree, irq, desc);
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return radix_tree_lookup(&irq_desc_tree, irq);
+}
+
+void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	radix_tree_delete(&irq_desc_tree, irq);
+	radix_tree_insert(&irq_desc_tree, irq, desc);
+}
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
@@ -159,9 +176,6 @@ int __init early_irq_init(void)
 	legacy_count = ARRAY_SIZE(irq_desc_legacy);
 	node = first_online_node;
 
-	/* allocate irq_desc_ptrs array based on nr_irqs */
-	irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
-
 	/* allocate based on nr_cpu_ids */
 	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
 					  sizeof(int), GFP_NOWAIT, node);
@@ -175,29 +189,12 @@ int __init early_irq_init(void)
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		alloc_desc_masks(&desc[i], node, true);
 		init_desc_masks(&desc[i]);
-		irq_desc_ptrs[i] = desc + i;
+		set_irq_desc(i, &desc[i]);
 	}
 
-	for (i = legacy_count; i < nr_irqs; i++)
-		irq_desc_ptrs[i] = NULL;
-
 	return arch_early_irq_init();
 }
 
-struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	if (irq_desc_ptrs && irq < nr_irqs)
-		return irq_desc_ptrs[irq];
-
-	return NULL;
-}
-
-void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
-{
-	if (irq_desc_ptrs && irq < nr_irqs)
-		irq_desc_ptrs[irq] = desc;
-}
-
 struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
@@ -209,14 +206,14 @@ struct irq_desc * __ref irq_to_desc_allo
 		return NULL;
 	}
 
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 	if (desc)
 		return desc;
 
 	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
 	/* We have to check it to avoid races with another CPU */
-	desc = irq_desc_ptrs[irq];
+	desc = irq_to_desc(irq);
 	if (desc)
 		goto out_unlock;
 
@@ -229,7 +226,7 @@ struct irq_desc * __ref irq_to_desc_allo
 	}
 	init_one_irq_desc(irq, desc, node);
 
-	irq_desc_ptrs[irq] = desc;
+	set_irq_desc(irq, desc);
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
                   ` (2 preceding siblings ...)
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
@ 2010-01-08 11:53 ` Yinghai Lu
  2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman
  4 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 11:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg
  Cc: linux-kernel, Yinghai Lu

that is max number on run time.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/kernel/apic/io_apic.c |   16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -3840,16 +3840,14 @@ int __init arch_probe_nr_irqs(void)
 {
 	int nr;
 
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = nr_irqs_gsi + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += nr_irqs_gsi * 64;
+	nr = NR_VECTORS - FIRST_DEVICE_VECTOR;
+#ifdef CONFIG_SMP
+	/* system vectors */
+	nr -= 0x10;
 #endif
+	nr *= nr_cpu_ids;
+	nr += FIRST_DEVICE_VECTOR - IRQ0_VECTOR;
+
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
@ 2010-01-08 12:14   ` Cyrill Gorcunov
  2010-01-08 18:43     ` Eric W. Biederman
  0 siblings, 1 reply; 17+ messages in thread
From: Cyrill Gorcunov @ 2010-01-08 12:14 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Eric W. Biederman, Jesse Brandeburg, linux-kernel

On Fri, Jan 08, 2010 at 03:53:14AM -0800, Yinghai Lu wrote:
> use radix_tree irq_desc_tree instead of irq_desc_ptrs.
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> 
> ---
>  kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
>  1 file changed, 22 insertions(+), 25 deletions(-)
> 
> Index: linux-2.6/kernel/irq/handle.c
> ===================================================================
> --- linux-2.6.orig/kernel/irq/handle.c
> +++ linux-2.6/kernel/irq/handle.c
> @@ -19,6 +19,7 @@
>  #include <linux/kernel_stat.h>
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
> +#include <linux/radix-tree.h>
>  #include <trace/events/irq.h>
>  
>  #include "internals.h"
> @@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
>   */
>  DEFINE_RAW_SPINLOCK(sparse_irq_lock);
>  
> -static struct irq_desc **irq_desc_ptrs __read_mostly;
> +static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
> +
> +static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
> +{
> +	radix_tree_insert(&irq_desc_tree, irq, desc);
> +}
> +
> +struct irq_desc *irq_to_desc(unsigned int irq)
> +{
> +	return radix_tree_lookup(&irq_desc_tree, irq);
> +}
> +
> +void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
> +{
> +	radix_tree_delete(&irq_desc_tree, irq);
> +	radix_tree_insert(&irq_desc_tree, irq, desc);
> +}
>  
...

Hi Yinghai,

should not we printk\warn if radix_tree_insert() is get failed?
This is hardly (if ever) happen on machines with small number
of interrupts allocated but anyway.

Or I miss something?

	-- Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array
  2010-01-08 12:14   ` Cyrill Gorcunov
@ 2010-01-08 18:43     ` Eric W. Biederman
  0 siblings, 0 replies; 17+ messages in thread
From: Eric W. Biederman @ 2010-01-08 18:43 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andrew Morton, Jesse Brandeburg, linux-kernel

Cyrill Gorcunov <gorcunov@gmail.com> writes:

> On Fri, Jan 08, 2010 at 03:53:14AM -0800, Yinghai Lu wrote:
>> use radix_tree irq_desc_tree instead of irq_desc_ptrs.
>> 
>> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>> 
>> ---
>>  kernel/irq/handle.c |   47 ++++++++++++++++++++++-------------------------
>>  1 file changed, 22 insertions(+), 25 deletions(-)
>> 
>> Index: linux-2.6/kernel/irq/handle.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/irq/handle.c
>> +++ linux-2.6/kernel/irq/handle.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/kernel_stat.h>
>>  #include <linux/rculist.h>
>>  #include <linux/hash.h>
>> +#include <linux/radix-tree.h>
>>  #include <trace/events/irq.h>
>>  
>>  #include "internals.h"
>> @@ -127,7 +128,23 @@ static void init_one_irq_desc(int irq, s
>>   */
>>  DEFINE_RAW_SPINLOCK(sparse_irq_lock);
>>  
>> -static struct irq_desc **irq_desc_ptrs __read_mostly;
>> +static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
>> +
>> +static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
>> +{
>> +	radix_tree_insert(&irq_desc_tree, irq, desc);
>> +}
>> +
>> +struct irq_desc *irq_to_desc(unsigned int irq)
>> +{
>> +	return radix_tree_lookup(&irq_desc_tree, irq);
>> +}
>> +
>> +void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
>> +{
>> +	radix_tree_delete(&irq_desc_tree, irq);
>> +	radix_tree_insert(&irq_desc_tree, irq, desc);
>> +}
>>  
> ...
>
> Hi Yinghai,
>
> should not we printk\warn if radix_tree_insert() is get failed?
> This is hardly (if ever) happen on machines with small number
> of interrupts allocated but anyway.
>
> Or I miss something?

It looks to me like we can use radix_tree_lookup_slot and
radix_tree_replace_slot here.  Since we don't have to allocate
memory radix_tree_replace_slot can not fail.  Using a case
that can never fail seems better than worry about a case that
can rarely fail.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/5] irq: remove not need bootmem code
  2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
                   ` (3 preceding siblings ...)
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
@ 2010-01-08 18:52 ` Eric W. Biederman
  4 siblings, 0 replies; 17+ messages in thread
From: Eric W. Biederman @ 2010-01-08 18:52 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

YH overall this looks like a good patch series, that enables
additional cleanups beyond what it does.  Thank you for putting this
together.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
@ 2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 19:49     ` Yinghai Lu
  0 siblings, 2 replies; 17+ messages in thread
From: Eric W. Biederman @ 2010-01-08 19:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

> that is max number on run time.

Ouch!  Unless I misread this code this will leave nr_irqs at
NR_IRQS_LEGACY. aka 16.

Let's do something stupid and simple.
nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */

Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
places using nr_irqs that I think those loops would get painful if we
were to do that.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:11   ` Eric W. Biederman
@ 2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 20:06       ` Yinghai Lu
  2010-01-08 20:10       ` Eric W. Biederman
  2010-01-08 19:49     ` Yinghai Lu
  1 sibling, 2 replies; 17+ messages in thread
From: H. Peter Anvin @ 2010-01-08 19:21 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
> 
>> that is max number on run time.
> 
> Ouch!  Unless I misread this code this will leave nr_irqs at
> NR_IRQS_LEGACY. aka 16.
> 
> Let's do something stupid and simple.
> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */

This would be 1048576 on the biggest machines we currently support.
Now, the number of IRQ *vectors* is limited to
(224-system vectors)*(cpu count), so one could argue that if there is
anything that is not semi-arbitrary it would be that number, but that
doesn't account for vector sharing.

Do we have any place which requires nr_irqs to be *stable*, or can we
simply treat it as a high water mark for IRQ numbers used?

> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
> places using nr_irqs that I think those loops would get painful if we
> were to do that.

Ideally we should presumably get rid of nr_irqs completely?

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:11   ` Eric W. Biederman
  2010-01-08 19:21     ` H. Peter Anvin
@ 2010-01-08 19:49     ` Yinghai Lu
  2010-01-08 20:20       ` Eric W. Biederman
  1 sibling, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 19:49 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 11:11 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yinghai@kernel.org> writes:
>
>> that is max number on run time.
>
> Ouch!  Unless I misread this code this will leave nr_irqs at
> NR_IRQS_LEGACY. aka 16.

nr_irqs is set to NR_IRQS before.

>
> Let's do something stupid and simple.
> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
> places using nr_irqs that I think those loops would get painful if we
> were to do that.

or you need have NR_IRQS = NR_CPUS * 256 at first,

and then make nr_irqs = nr_cpu_ids * 224 ?

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:21     ` H. Peter Anvin
@ 2010-01-08 20:06       ` Yinghai Lu
  2010-01-08 21:11         ` H. Peter Anvin
  2010-01-08 20:10       ` Eric W. Biederman
  1 sibling, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 20:06 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Eric W. Biederman, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 11:21 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> that is max number on run time.
>>
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.
>>
>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> This would be 1048576 on the biggest machines we currently support.
> Now, the number of IRQ *vectors* is limited to
> (224-system vectors)*(cpu count), so one could argue that if there is
> anything that is not semi-arbitrary it would be that number, but that
> doesn't account for vector sharing.

(256 - 32 - 16 - system_vectors) * cpu_count + 16

the 16 is legacy from IRQ0 to 15 has domain with all cpus set.

later could move arch_probe_nr_irqs out of early_irq_init,
call it after init_IRQ(), so could use first_system_vector exactly
instead of 0x10

>
> Do we have any place which requires nr_irqs to be *stable*, or can we
> simply treat it as a high water mark for IRQ numbers used?
>
>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
yes, those loops
>
> Ideally we should presumably get rid of nr_irqs completely?

some interface in /proc/interrupts need it to make sure sth in sequence.

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:21     ` H. Peter Anvin
  2010-01-08 20:06       ` Yinghai Lu
@ 2010-01-08 20:10       ` Eric W. Biederman
  2010-01-08 21:15         ` H. Peter Anvin
  1 sibling, 1 reply; 17+ messages in thread
From: Eric W. Biederman @ 2010-01-08 20:10 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

"H. Peter Anvin" <hpa@zytor.com> writes:

> On 01/08/2010 11:11 AM, Eric W. Biederman wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>> 
>>> that is max number on run time.
>> 
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.

I goofed and misread this.  I was looking at nr_irqs_gsi which
is initialized to 16.

We actually initialize nr_irqs to NR_IRQS, which has an unfortunately
convoluted formula, that winds up being 8*NR_CPUS or 32 *MAX_IO_APICS.
in the extreme cases.

Since there are still arrays sized at NR_IRQS (bleh) we can not
increase nr_irqs to be greater than NR_IRQS.

So YN can you do the simple thing here and simply remove
arch_probe_nr_irqs().  Sane code doesn't care how big nr_irqs is and
code that does care needs to be fixed.

>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>
> This would be 1048576 on the biggest machines we currently support.
> Now, the number of IRQ *vectors* is limited to
> (224-system vectors)*(cpu count), so one could argue that if there is
> anything that is not semi-arbitrary it would be that number, but that
> doesn't account for vector sharing.

Except we have irq sources that we know about that are never utilized,
Think of unconnected inputs to ioapics.

I don't know if we ever actually perform vector sharing.  The only case
I recall where the code could share vectors is if the firmware tables
told us to irq sources were the same interrupt.  I don't think that
happens.  We do have the remains of support for vector sharing
in the code but I don't think it is utilized.  MSI interrupts certainly
can not share vectors.

My point with the semi-arbitrary number is that we should not think of
nr_irqs as something defined by the resources of the receivers of
interrupts.  NR_IRQS has never been that.  nr_irqs really is a limit
on how many interrupt sources we have.

> Do we have any place which requires nr_irqs to be *stable*, or can we
> simply treat it as a high water mark for IRQ numbers used?

We have several loops that walk through the irq descriptors and look for
an unbound irq.  Which means having nr_irqs as a high water mark is not
going to work today.

>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
>
> Ideally we should presumably get rid of nr_irqs completely?

Yes.  It was enough of a pain the first pass at it that we wound
up with nr_irqs, a value that can vary at boot time.

Once YH's radix tree changes get it in.  A war on NR_IRQS and nr_irqs
seems appropriate.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 19:49     ` Yinghai Lu
@ 2010-01-08 20:20       ` Eric W. Biederman
  2010-01-08 20:43         ` Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Eric W. Biederman @ 2010-01-08 20:20 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

Yinghai Lu <yinghai@kernel.org> writes:

> On Fri, Jan 8, 2010 at 11:11 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Yinghai Lu <yinghai@kernel.org> writes:
>>
>>> that is max number on run time.
>>
>> Ouch!  Unless I misread this code this will leave nr_irqs at
>> NR_IRQS_LEGACY. aka 16.
>
> nr_irqs is set to NR_IRQS before.

Yep my mistake.

>> Let's do something stupid and simple.
>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>>
>> Ideally we would set "nr_irqs = 0x7fffffff;" but we have just enough
>> places using nr_irqs that I think those loops would get painful if we
>> were to do that.
>
> or you need have NR_IRQS = NR_CPUS * 256 at first,
>
> and then make nr_irqs = nr_cpu_ids * 224 ?

The important part is that NR_IRQS become an arbitrary number larger
than we can strictly support.

Based on my quick look the bad offenders (aka static sized arrays of
NR_IRQS) all look at NR_IRQS not nr_irqs.  So I don't see a point
in having nr_irqs < NR_IRQS.

So let's just kill arch_probe_nr_irqs() on x86.

Then we can worry about things like fixing xen and the interrupt
remapping code to not having NR_IRQS sized arrays.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:20       ` Eric W. Biederman
@ 2010-01-08 20:43         ` Yinghai Lu
  0 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2010-01-08 20:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On Fri, Jan 8, 2010 at 12:20 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Based on my quick look the bad offenders (aka static sized arrays of
> NR_IRQS) all look at NR_IRQS not nr_irqs.  So I don't see a point
> in having nr_irqs < NR_IRQS.
>
> So let's just kill arch_probe_nr_irqs() on x86.
>
> Then we can worry about things like fixing xen and the interrupt
> remapping code to not having NR_IRQS sized arrays.

arch/sh/kernel/irq.c:int __init arch_probe_nr_irqs(void)
arch/x86/kernel/apic/io_apic.c:int __init arch_probe_nr_irqs(void)
arch/x86/kernel/apic/io_apic.c:  *  if we move calling of
arch_probe_nr_irqs() after init_IRQ()
include/linux/interrupt.h:extern int arch_probe_nr_irqs(void);
kernel/irq/handle.c:    arch_probe_nr_irqs();
kernel/softirq.c:int __init __weak arch_probe_nr_irqs(void)

sh have sparse_irq now...it has it's own copy for arch_probe_nr_irqs...

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:06       ` Yinghai Lu
@ 2010-01-08 21:11         ` H. Peter Anvin
  0 siblings, 0 replies; 17+ messages in thread
From: H. Peter Anvin @ 2010-01-08 21:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Eric W. Biederman, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 12:06 PM, Yinghai Lu wrote:
>>>
>>> Ouch!  Unless I misread this code this will leave nr_irqs at
>>> NR_IRQS_LEGACY. aka 16.
>>>
>>> Let's do something stupid and simple.
>>> nr_irqs = nr_cpus_ids * 256;  /* Semi-arbitrary number */
>>
>> This would be 1048576 on the biggest machines we currently support.
>> Now, the number of IRQ *vectors* is limited to
>> (224-system vectors)*(cpu count), so one could argue that if there is
>> anything that is not semi-arbitrary it would be that number, but that
>> doesn't account for vector sharing.
> 
> (256 - 32 - 16 - system_vectors) * cpu_count + 16
> 
> the 16 is legacy from IRQ0 to 15 has domain with all cpus set.
> 

... assuming you're on a platform with a legacy PIC.  I would like to
see the legacy PIC hard-coded assumptions to go away, and instead be
done as runtime allocations on the relevant platforms.

> some interface in /proc/interrupts need it to make sure sth in sequence.

I can't even parse this sentence, never mind figuring out what it would
mean.  Certainly there is a better way to do that?

	-hpa


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] x86: update nr_irqs according cpu num
  2010-01-08 20:10       ` Eric W. Biederman
@ 2010-01-08 21:15         ` H. Peter Anvin
  0 siblings, 0 replies; 17+ messages in thread
From: H. Peter Anvin @ 2010-01-08 21:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, Andrew Morton,
	Jesse Brandeburg, linux-kernel

On 01/08/2010 12:10 PM, Eric W. Biederman wrote:
> 
> I don't know if we ever actually perform vector sharing.  The only case
> I recall where the code could share vectors is if the firmware tables
> told us to irq sources were the same interrupt.  I don't think that
> happens.  We do have the remains of support for vector sharing
> in the code but I don't think it is utilized.  MSI interrupts certainly
> can not share vectors.
> 

We probably will need to support that at some point, though, simply
because it's not all that hard to construct a system in which there are
more MSI-X interrupts than there are possible vectors in the system.

Until then we can do 224 * nr_cpus_ids, but it's still not the right thing.

> Yes.  It was enough of a pain the first pass at it that we wound
> up with nr_irqs, a value that can vary at boot time.
> 
> Once YH's radix tree changes get it in.  A war on NR_IRQS and nr_irqs
> seems appropriate.

Agreed.

	-hpa


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2010-01-08 21:24 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-08 11:53 [PATCH 1/5] irq: remove not need bootmem code Yinghai Lu
2010-01-08 11:53 ` [PATCH 2/5] radix: move radix init early Yinghai Lu
2010-01-08 11:53 ` [PATCH 3/5] sparseirq: change irq_desc_ptrs to static Yinghai Lu
2010-01-08 11:53 ` [PATCH 4/5] sparseirq: use radix_tree instead of ptrs array Yinghai Lu
2010-01-08 12:14   ` Cyrill Gorcunov
2010-01-08 18:43     ` Eric W. Biederman
2010-01-08 11:53 ` [PATCH 5/5] x86: update nr_irqs according cpu num Yinghai Lu
2010-01-08 19:11   ` Eric W. Biederman
2010-01-08 19:21     ` H. Peter Anvin
2010-01-08 20:06       ` Yinghai Lu
2010-01-08 21:11         ` H. Peter Anvin
2010-01-08 20:10       ` Eric W. Biederman
2010-01-08 21:15         ` H. Peter Anvin
2010-01-08 19:49     ` Yinghai Lu
2010-01-08 20:20       ` Eric W. Biederman
2010-01-08 20:43         ` Yinghai Lu
2010-01-08 18:52 ` [PATCH 1/5] irq: remove not need bootmem code Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.