Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Thomas Gleixner <tglx@linutronix.de>, Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>,
	linux-nvme@lists.infradead.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 4/8] genirq/affinity: assign vectors to all present CPUs
Date: Sat,  3 Jun 2017 16:03:59 +0200
Message-ID: <20170603140403.27379-5-hch@lst.de> (raw)
In-Reply-To: <20170603140403.27379-1-hch@lst.de>

Currently we only assign spread vectors to online CPUs, which ties the
IRQ mapping to the currently online devices and doesn't deal nicely with
the fact that CPUs could come and go rapidly due to e.g. power management.

Instead assign vectors to all present CPUs to avoid this churn.

For this we have to build a map of all possible CPUs for a give node, as
the architectures only provide a map of all onlines CPUs.  We do this
dynamically on each call for the vector assingments, which is a bit
suboptimal and could be optimized in the future by provinding a mapping
from the arch code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/irq/affinity.c | 71 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 3cec0042fad2..337e6ffba93f 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -63,13 +63,54 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
 	}
 }
 
-static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+static cpumask_var_t *alloc_node_to_present_cpumask(void)
+{
+	int node;
+	cpumask_var_t *masks;
+
+	masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
+	if (!masks)
+		return NULL;
+
+	for (node = 0; node < nr_node_ids; node++) {
+		if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
+			goto out_unwind;
+	}
+
+	return masks;
+
+out_unwind:
+	while (--node >= 0)
+		free_cpumask_var(masks[node]);
+	kfree(masks);
+	return NULL;
+}
+
+static void free_node_to_present_cpumask(cpumask_var_t *masks)
+{
+	int node;
+
+	for (node = 0; node < nr_node_ids; node++)
+		free_cpumask_var(masks[node]);
+	kfree(masks);
+}
+
+static void build_node_to_present_cpumask(cpumask_var_t *masks)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
+}
+
+static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
+				const struct cpumask *mask, nodemask_t *nodemsk)
 {
 	int n, nodes = 0;
 
 	/* Calculate the number of nodes in the supplied affinity mask */
-	for_each_online_node(n) {
-		if (cpumask_intersects(mask, cpumask_of_node(n))) {
+	for_each_node(n) {
+		if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
 			node_set(n, *nodemsk);
 			nodes++;
 		}
@@ -92,7 +133,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	int last_affv = affv + affd->pre_vectors;
 	nodemask_t nodemsk = NODE_MASK_NONE;
 	struct cpumask *masks;
-	cpumask_var_t nmsk;
+	cpumask_var_t nmsk, *node_to_present_cpumask;
 
 	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return NULL;
@@ -101,13 +142,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	if (!masks)
 		goto out;
 
+	node_to_present_cpumask = alloc_node_to_present_cpumask();
+	if (!node_to_present_cpumask)
+		goto out;
+
 	/* Fill out vectors at the beginning that don't need affinity */
 	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 		cpumask_copy(masks + curvec, irq_default_affinity);
 
 	/* Stabilize the cpumasks */
 	get_online_cpus();
-	nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
+	build_node_to_present_cpumask(node_to_present_cpumask);
+	nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
+			&nodemsk);
 
 	/*
 	 * If the number of nodes in the mask is greater than or equal the
@@ -115,7 +162,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	 */
 	if (affv <= nodes) {
 		for_each_node_mask(n, nodemsk) {
-			cpumask_copy(masks + curvec, cpumask_of_node(n));
+			cpumask_copy(masks + curvec,
+				     node_to_present_cpumask[n]);
 			if (++curvec == last_affv)
 				break;
 		}
@@ -129,7 +177,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 		vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
 
 		/* Get the cpus on this node which are in the mask */
-		cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
+		cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
 
 		/* Calculate the number of cpus per vector */
 		ncpus = cpumask_weight(nmsk);
@@ -161,6 +209,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	/* Fill out vectors at the end that don't need affinity */
 	for (; curvec < nvecs; curvec++)
 		cpumask_copy(masks + curvec, irq_default_affinity);
+	free_node_to_present_cpumask(node_to_present_cpumask);
 out:
 	free_cpumask_var(nmsk);
 	return masks;
@@ -175,12 +224,6 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
 {
 	int resv = affd->pre_vectors + affd->post_vectors;
 	int vecs = maxvec - resv;
-	int cpus;
-
-	/* Stabilize the cpumasks */
-	get_online_cpus();
-	cpus = cpumask_weight(cpu_online_mask);
-	put_online_cpus();
 
-	return min(cpus, vecs) + resv;
+	return min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
 }
-- 
2.11.0

  parent reply index

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-03 14:03 spread MSI(-X) vectors to all possible CPUs V2 Christoph Hellwig
2017-06-03 14:03 ` [PATCH 1/8] genirq: allow assigning affinity to present but not online CPUs Christoph Hellwig
2017-06-04 15:14   ` Sagi Grimberg
2017-06-17 23:21   ` Thomas Gleixner
2017-06-03 14:03 ` [PATCH 2/8] genirq: move pending helpers to internal.h Christoph Hellwig
2017-06-04 15:15   ` Sagi Grimberg
2017-06-03 14:03 ` [PATCH 3/8] genirq/affinity: factor out a irq_affinity_set helper Christoph Hellwig
2017-06-04 15:15   ` Sagi Grimberg
2017-06-16 10:23   ` Thomas Gleixner
2017-06-16 11:08   ` Thomas Gleixner
2017-06-16 12:00     ` Thomas Gleixner
2017-06-17 23:14   ` Thomas Gleixner
2017-06-03 14:03 ` Christoph Hellwig [this message]
2017-06-04 15:17   ` [PATCH 4/8] genirq/affinity: assign vectors to all present CPUs Sagi Grimberg
2017-06-03 14:04 ` [PATCH 5/8] genirq/affinity: update CPU affinity for CPU hotplug events Christoph Hellwig
2017-06-16 10:26   ` Thomas Gleixner
2017-06-16 10:29   ` Thomas Gleixner
2017-06-03 14:04 ` [PATCH 6/8] blk-mq: include all present CPUs in the default queue mapping Christoph Hellwig
2017-06-04 15:11   ` Sagi Grimberg
2017-06-03 14:04 ` [PATCH 7/8] blk-mq: create hctx for each present CPU Christoph Hellwig
2017-06-04 15:11   ` Sagi Grimberg
2017-06-07  9:10   ` Ming Lei
2017-06-07 19:06     ` Christoph Hellwig
2017-06-08  2:28       ` Ming Lei
2017-06-07 22:04   ` Omar Sandoval
2017-06-08  6:58     ` Christoph Hellwig
2017-06-03 14:04 ` [PATCH 8/8] nvme: allocate queues for all possible CPUs Christoph Hellwig
2017-06-04 15:13   ` Sagi Grimberg
2017-06-16  6:48 ` spread MSI(-X) vectors to all possible CPUs V2 Christoph Hellwig
2017-06-16  7:28   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170603140403.27379-5-hch@lst.de \
    --to=hch@lst.de \
    --cc=axboe@kernel.dk \
    --cc=keith.busch@intel.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git