All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] irq: Add node_affinity CPU masks for smarter irqbalance hints
@ 2009-11-23  6:46 Peter P Waskiewicz Jr
  2009-11-23  7:32   ` Yong Zhang
  0 siblings, 1 reply; 68+ messages in thread
From: Peter P Waskiewicz Jr @ 2009-11-23  6:46 UTC (permalink / raw)
  To: linux-kernel, arjan; +Cc: davem, netdev

This patchset adds a new CPU mask for SMP systems to the irq_desc
struct.  It also exposes an API for underlying device drivers to
assist irqbalance in making smarter decisions when balancing, especially
in a NUMA environment.  For example, an ethernet driver with MSI-X may
wish to limit the CPUs that an interrupt can be balanced within to
stay on a single NUMA node.  Current irqbalance operation can move the
interrupt off the node, resulting in cross-node memory accesses and
locks.

The API is a get/set API within the kernel, along with a /proc entry
for the interrupt.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---

 include/linux/interrupt.h |    8 ++++++
 include/linux/irq.h       |    2 ++
 kernel/irq/manage.c       |   32 +++++++++++++++++++++++++
 kernel/irq/proc.c         |   57 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 0 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..9fd08aa 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -208,6 +208,8 @@ extern cpumask_var_t irq_default_affinity;
 extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
+extern int irq_set_node_affinity(unsigned int irq,
+                                 const struct cpumask *cpumask);
 
 #else /* CONFIG_SMP */
 
@@ -223,6 +225,12 @@ static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_set_node_affinity(unsigned int irq,
+                                        const struct cpumask *m)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index ae9653d..26d7d07 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -166,6 +166,7 @@ struct irq_2_iommu;
  * @lock:		locking for SMP
  * @affinity:		IRQ affinity on SMP
  * @node:		node index useful for balancing
+ * @node_affinity:	irq mask hints for irqbalance
  * @pending_mask:	pending rebalanced interrupts
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
@@ -196,6 +197,7 @@ struct irq_desc {
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 	unsigned int		node;
+	cpumask_var_t		node_affinity;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b29..9e80783 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,38 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	return 0;
 }
 
+/**
+ *	irq_set_node_affinity - Set the CPU mask this interrupt can run on
+ *	@irq:		Interrupt to modify
+ *	@cpumask:	CPU mask to assign to the interrupt
+ *
+ */
+int irq_set_node_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	cpumask_copy(desc->node_affinity, cpumask);
+	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(irq_set_node_affinity);
+
+/**
+ *	irq_get_node_affinity - Get the CPU mask this interrupt can run on
+ *	@irq:		Interrupt to get information
+ *
+ */
+struct cpumask *irq_get_node_affinity(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc->node_affinity;
+}
+EXPORT_SYMBOL(irq_get_node_affinity);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145..192e3fb 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -31,6 +31,16 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int irq_node_affinity_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	const struct cpumask *mask = desc->node_affinity;
+
+	seq_cpumask(m, mask);
+	seq_putc(m, '\n');
+	return 0;
+}
+
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -78,11 +88,46 @@ free_cpumask:
 	return err;
 }
 
+static ssize_t irq_node_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
+	cpumask_var_t new_value;
+	int err;
+
+	if (no_irq_affinity || irq_balancing_disabled(irq))
+		return -EIO;
+
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
+	if (err)
+		goto free_cpumask;
+
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
+
+	irq_set_node_affinity(irq, new_value);
+	err = count;
+
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
+}
+
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_node_affinity_proc_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, irq_node_affinity_proc_show, PDE(inode)->data);
+}
+
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -91,6 +136,14 @@ static const struct file_operations irq_affinity_proc_fops = {
 	.write		= irq_affinity_proc_write,
 };
 
+static const struct file_operations irq_node_affinity_proc_fops = {
+	.open		= irq_node_affinity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_node_affinity_proc_write,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -230,6 +283,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	/* create /proc/irq/<irq>/smp_affinity */
 	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
+
+	/* create /proc/irq/<irq>/node_affinity */
+	proc_create_data("node_affinity", 0600, desc->dir,
+	                 &irq_node_affinity_proc_fops, (void *)(long)irq);
 #endif
 
 	proc_create_data("spurious", 0444, desc->dir,


^ permalink raw reply related	[flat|nested] 68+ messages in thread
* Ixgbe question
@ 2008-03-10 21:27 Ben Greear
  2008-03-11  1:01 ` Brandeburg, Jesse
  0 siblings, 1 reply; 68+ messages in thread
From: Ben Greear @ 2008-03-10 21:27 UTC (permalink / raw)
  To: NetDev

I have a pair of IXGBE NICs in a system, and notice a strange
case where the NIC is not always 'UP' after my programs finish
trying to configure it.  I haven't noticed this with any other
NICs, but I also just moved to 2.6.23.17 from 23.9 and 23.14.

I see this in the logs:

ADDRCONF(NETDEV_UP): eth0: link is not ready

It would seem to me that we should be able to set the admin
state to UP, even if the link is not up??

Kernel is 2.6.23.17 plus my hacks.   Ixgbe driver is version 1.3.7.8-lro.

Hardware is quad-core Intel, 2 build-in e1000, 2-port ixgbe (CX4) chipset NIC
on a pcie riser.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 68+ messages in thread

end of thread, other threads:[~2009-11-30 20:20 UTC | newest]

Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-23  6:46 [PATCH] irq: Add node_affinity CPU masks for smarter irqbalance hints Peter P Waskiewicz Jr
2009-11-23  7:32 ` Yong Zhang
2009-11-23  7:32   ` Yong Zhang
2009-11-23  9:36   ` Peter P Waskiewicz Jr
2009-11-23 10:21     ` ixgbe question Eric Dumazet
2009-11-23 10:30       ` Badalian Vyacheslav
2009-11-23 10:34       ` Waskiewicz Jr, Peter P
2009-11-23 10:37         ` Eric Dumazet
2009-11-23 14:05           ` Eric Dumazet
2009-11-23 21:26           ` David Miller
2009-11-23 14:10       ` Jesper Dangaard Brouer
2009-11-23 14:38         ` Eric Dumazet
2009-11-23 18:30           ` robert
2009-11-23 16:59             ` Eric Dumazet
2009-11-23 20:54               ` robert
2009-11-23 21:28                 ` David Miller
2009-11-23 22:14                   ` Robert Olsson
2009-11-23 23:28               ` Waskiewicz Jr, Peter P
2009-11-23 23:44                 ` David Miller
2009-11-24  7:46                 ` Eric Dumazet
2009-11-24  8:46                   ` Badalian Vyacheslav
2009-11-24  9:07                   ` Peter P Waskiewicz Jr
2009-11-24  9:55                     ` Eric Dumazet
2009-11-24 10:06                       ` Peter P Waskiewicz Jr
2009-11-24 11:37                         ` [PATCH net-next-2.6] ixgbe: Fix TX stats accounting Eric Dumazet
2009-11-24 13:23                           ` Eric Dumazet
2009-11-25  7:38                             ` Jeff Kirsher
2009-11-25  9:31                               ` Eric Dumazet
2009-11-25  9:38                                 ` Jeff Kirsher
2009-11-24 13:14                         ` ixgbe question John Fastabend
2009-11-29  8:18                           ` David Miller
2009-11-30 13:02                             ` Eric Dumazet
2009-11-30 20:20                               ` John Fastabend
2009-11-26 14:10                       ` Badalian Vyacheslav
2009-11-23 17:05     ` [PATCH] irq: Add node_affinity CPU masks for smarter irqbalance hints Peter Zijlstra
2009-11-23 23:32       ` Waskiewicz Jr, Peter P
2009-11-24  8:38         ` Peter Zijlstra
2009-11-24  8:59           ` Peter P Waskiewicz Jr
2009-11-24  9:08             ` Peter Zijlstra
2009-11-24  9:15               ` Peter P Waskiewicz Jr
2009-11-24 14:43               ` Arjan van de Ven
2009-11-24  9:15             ` Peter Zijlstra
2009-11-24 10:07             ` Thomas Gleixner
2009-11-24 17:55               ` Peter P Waskiewicz Jr
2009-11-25 11:18               ` Peter Zijlstra
2009-11-24  6:07       ` Arjan van de Ven
2009-11-24  8:39         ` Peter Zijlstra
2009-11-24 14:42           ` Arjan van de Ven
2009-11-24 17:39           ` David Miller
2009-11-24 17:56             ` Peter P Waskiewicz Jr
2009-11-24 18:26               ` Eric Dumazet
2009-11-24 18:33                 ` Peter P Waskiewicz Jr
2009-11-24 19:01                   ` Eric Dumazet
2009-11-24 19:53                     ` Peter P Waskiewicz Jr
2009-11-24 18:54                 ` David Miller
2009-11-24 18:58                   ` Eric Dumazet
2009-11-24 20:35                     ` Andi Kleen
2009-11-24 20:46                       ` Eric Dumazet
2009-11-25 10:30                         ` Eric Dumazet
2009-11-25 10:37                           ` Andi Kleen
2009-11-25 11:35                             ` Eric Dumazet
2009-11-25 11:50                               ` Andi Kleen
2009-11-26 11:43                                 ` Eric Dumazet
2009-11-24  5:17     ` Yong Zhang
2009-11-24  5:17       ` Yong Zhang
2009-11-24  8:39       ` Peter P Waskiewicz Jr
  -- strict thread matches above, loose matches on Subject: below --
2008-03-10 21:27 Ixgbe question Ben Greear
2008-03-11  1:01 ` Brandeburg, Jesse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.