All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
To: linux-kernel@vger.kernel.org, arjan@linux.jf.intel.com
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH] irq: Add node_affinity CPU masks for smarter irqbalance hints
Date: Sun, 22 Nov 2009 23:12:50 -0800	[thread overview]
Message-ID: <20091123071250.7626.17713.stgit@ppwaskie-hc2.jf.intel.com> (raw)

This patchset adds a new CPU mask for SMP systems to the irq_desc
struct.  It also exposes an API for underlying device drivers to
assist irqbalance in making smarter decisions when balancing, especially
in a NUMA environment.  For example, an ethernet driver with MSI-X may
wish to limit the CPUs that an interrupt can be balanced within to
stay on a single NUMA node.  Current irqbalance operation can move the
interrupt off the node, resulting in cross-node memory accesses and
locks.

The API is a get/set API within the kernel, along with a /proc entry
for the interrupt.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---

 include/linux/interrupt.h |    8 ++++++
 include/linux/irq.h       |    2 ++
 kernel/irq/manage.c       |   32 +++++++++++++++++++++++++
 kernel/irq/proc.c         |   57 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 0 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..9fd08aa 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -208,6 +208,8 @@ extern cpumask_var_t irq_default_affinity;
 extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
+extern int irq_set_node_affinity(unsigned int irq,
+                                 const struct cpumask *cpumask);
 
 #else /* CONFIG_SMP */
 
@@ -223,6 +225,12 @@ static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_set_node_affinity(unsigned int irq,
+                                        const struct cpumask *m)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index ae9653d..26d7d07 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -166,6 +166,7 @@ struct irq_2_iommu;
  * @lock:		locking for SMP
  * @affinity:		IRQ affinity on SMP
  * @node:		node index useful for balancing
+ * @node_affinity:	irq mask hints for irqbalance
  * @pending_mask:	pending rebalanced interrupts
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
@@ -196,6 +197,7 @@ struct irq_desc {
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 	unsigned int		node;
+	cpumask_var_t		node_affinity;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7305b29..9e80783 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,38 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	return 0;
 }
 
+/**
+ *	irq_set_node_affinity - Set the CPU mask this interrupt can run on
+ *	@irq:		Interrupt to modify
+ *	@cpumask:	CPU mask to assign to the interrupt
+ *
+ */
+int irq_set_node_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	cpumask_copy(desc->node_affinity, cpumask);
+	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(irq_set_node_affinity);
+
+/**
+ *	irq_get_node_affinity - Get the CPU mask this interrupt can run on
+ *	@irq:		Interrupt to get information
+ *
+ */
+struct cpumask *irq_get_node_affinity(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc->node_affinity;
+}
+EXPORT_SYMBOL(irq_get_node_affinity);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145..192e3fb 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -31,6 +31,16 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int irq_node_affinity_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	const struct cpumask *mask = desc->node_affinity;
+
+	seq_cpumask(m, mask);
+	seq_putc(m, '\n');
+	return 0;
+}
+
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -78,11 +88,46 @@ free_cpumask:
 	return err;
 }
 
+static ssize_t irq_node_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
+	cpumask_var_t new_value;
+	int err;
+
+	if (no_irq_affinity || irq_balancing_disabled(irq))
+		return -EIO;
+
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
+	if (err)
+		goto free_cpumask;
+
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
+
+	irq_set_node_affinity(irq, new_value);
+	err = count;
+
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
+}
+
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_node_affinity_proc_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, irq_node_affinity_proc_show, PDE(inode)->data);
+}
+
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -91,6 +136,14 @@ static const struct file_operations irq_affinity_proc_fops = {
 	.write		= irq_affinity_proc_write,
 };
 
+static const struct file_operations irq_node_affinity_proc_fops = {
+	.open		= irq_node_affinity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_node_affinity_proc_write,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -230,6 +283,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	/* create /proc/irq/<irq>/smp_affinity */
 	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
+
+	/* create /proc/irq/<irq>/node_affinity */
+	proc_create_data("node_affinity", 0600, desc->dir,
+	                 &irq_node_affinity_proc_fops, (void *)(long)irq);
 #endif
 
 	proc_create_data("spurious", 0444, desc->dir,


             reply	other threads:[~2009-11-23  7:07 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-23  7:12 Peter P Waskiewicz Jr [this message]
  -- strict thread matches above, loose matches on Subject: below --
2009-11-23  6:46 [PATCH] irq: Add node_affinity CPU masks for smarter irqbalance hints Peter P Waskiewicz Jr
2009-11-23  7:32 ` Yong Zhang
2009-11-23  7:32   ` Yong Zhang
2009-11-23  9:36   ` Peter P Waskiewicz Jr
2009-11-23 17:05     ` Peter Zijlstra
2009-11-23 23:32       ` Waskiewicz Jr, Peter P
2009-11-24  8:38         ` Peter Zijlstra
2009-11-24  8:59           ` Peter P Waskiewicz Jr
2009-11-24  9:08             ` Peter Zijlstra
2009-11-24  9:15               ` Peter P Waskiewicz Jr
2009-11-24 14:43               ` Arjan van de Ven
2009-11-24  9:15             ` Peter Zijlstra
2009-11-24 10:07             ` Thomas Gleixner
2009-11-24 17:55               ` Peter P Waskiewicz Jr
2009-11-25 11:18               ` Peter Zijlstra
2009-11-24  6:07       ` Arjan van de Ven
2009-11-24  8:39         ` Peter Zijlstra
2009-11-24 14:42           ` Arjan van de Ven
2009-11-24 17:39           ` David Miller
2009-11-24 17:56             ` Peter P Waskiewicz Jr
2009-11-24 18:26               ` Eric Dumazet
2009-11-24 18:33                 ` Peter P Waskiewicz Jr
2009-11-24 19:01                   ` Eric Dumazet
2009-11-24 19:53                     ` Peter P Waskiewicz Jr
2009-11-24 18:54                 ` David Miller
2009-11-24 18:58                   ` Eric Dumazet
2009-11-24 20:35                     ` Andi Kleen
2009-11-24 20:46                       ` Eric Dumazet
2009-11-25 10:30                         ` Eric Dumazet
2009-11-25 10:37                           ` Andi Kleen
2009-11-25 11:35                             ` Eric Dumazet
2009-11-25 11:50                               ` Andi Kleen
2009-11-26 11:43                                 ` Eric Dumazet
2009-11-24  5:17     ` Yong Zhang
2009-11-24  5:17       ` Yong Zhang
2009-11-24  8:39       ` Peter P Waskiewicz Jr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091123071250.7626.17713.stgit@ppwaskie-hc2.jf.intel.com \
    --to=peter.p.waskiewicz.jr@intel.com \
    --cc=arjan@linux.jf.intel.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.