linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
To: <linux-kernel@vger.kernel.org>
Cc: yuqi jin <jinyuqi@huawei.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Rapoport <rppt@linux.ibm.com>,
	Paul Burton <paul.burton@mips.com>,
	Michal Hocko <mhocko@suse.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	Shaokun Zhang <zhangshaokun@hisilicon.com>
Subject: [PATCH] lib: optimize cpumask_local_spread()
Date: Thu, 31 Oct 2019 14:03:33 +0800	[thread overview]
Message-ID: <1572501813-2125-1-git-send-email-zhangshaokun@hisilicon.com> (raw)

From: yuqi jin <jinyuqi@huawei.com>

In the multi-processor and NUMA system, A device may have many numa
nodes belonging to multiple cpus. When we get a local numa, it is better
to find the node closest to the local numa node to return instead of
going to the online cpu immediately.

For example, In Huawei Kunpeng 920 system, there are 4 NUMA node(0 -3)
in the 2-socket system(0 - 1). If the I/O device is in socket1
and the local NUMA node is 2, we shall choose the non-local node3 in
the same socket when cpu core in NUMA node2 is less that I/O requirements.
If we directly pick one cpu core from all online ones, it may be in
the another socket and it is not friendly for performance.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: yuqi jin <jinyuqi@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
Changes from RFC:
     Address Michal Hocko's comment: Use GFP_ATOMIC instead of GFP_KERNEL

 lib/cpumask.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 0cb672eb107c..c92177b0e095 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -192,6 +192,33 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 }
 #endif
 
+static void calc_node_distance(int *node_dist, int node)
+{
+	int i;
+
+	for (i = 0; i < nr_node_ids; i++)
+		node_dist[i] = node_distance(node, i);
+}
+
+static int find_nearest_node(int *node_dist, bool *used_flag)
+{
+	int i, min_dist = node_dist[0], node_id = -1;
+
+	for (i = 0; i < nr_node_ids; i++)
+		if (used_flag[i] == 0) {
+			min_dist = node_dist[i];
+			node_id = i;
+			break;
+		}
+	for (i = 0; i < nr_node_ids; i++)
+		if (node_dist[i] < min_dist && used_flag[i] == 0) {
+			min_dist = node_dist[i];
+			node_id = i;
+		}
+
+	return node_id;
+}
+
 /**
  * cpumask_local_spread - select the i'th cpu with local numa cpu's first
  * @i: index number
@@ -205,7 +232,8 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
  */
 unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-	int cpu;
+	int cpu, j, id, *node_dist;
+	bool *used_flag;
 
 	/* Wrap: we always want a cpu. */
 	i %= num_online_cpus();
@@ -215,19 +243,45 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
 			if (i-- == 0)
 				return cpu;
 	} else {
-		/* NUMA first. */
-		for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
-			if (i-- == 0)
-				return cpu;
+		node_dist = kmalloc_array(nr_node_ids, sizeof(int), GFP_ATOMIC);
+		if (!node_dist)
+			for_each_cpu(cpu, cpu_online_mask)
+				if (i-- == 0)
+					return cpu;
 
-		for_each_cpu(cpu, cpu_online_mask) {
-			/* Skip NUMA nodes, done above. */
-			if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
-				continue;
+		used_flag = kmalloc_array(nr_node_ids, sizeof(bool), GFP_ATOMIC);
+		if (!used_flag)
+			for_each_cpu(cpu, cpu_online_mask)
+				if (i-- == 0) {
+					kfree(node_dist);
+					return cpu;
+				}
+		memset(used_flag, 0, nr_node_ids * sizeof(bool));
 
-			if (i-- == 0)
-				return cpu;
+		calc_node_distance(node_dist, node);
+		for (j = 0; j < nr_node_ids; j++) {
+			id = find_nearest_node(node_dist, used_flag);
+			if (id < 0)
+				break;
+			for_each_cpu_and(cpu,
+				cpumask_of_node(id), cpu_online_mask)
+				if (i-- == 0) {
+					kfree(node_dist);
+					kfree(used_flag);
+					return cpu;
+				}
+			used_flag[id] = 1;
 		}
+
+		for_each_cpu(cpu, cpu_online_mask)
+			if (i-- == 0) {
+				kfree(node_dist);
+				kfree(used_flag);
+				return cpu;
+			}
+
+		kfree(node_dist);
+		kfree(used_flag);
 	}
 	BUG();
 }
-- 
2.7.4


             reply	other threads:[~2019-10-31  6:03 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-31  6:03 Shaokun Zhang [this message]
2019-10-31  7:39 ` [PATCH] lib: optimize cpumask_local_spread() Michal Hocko
2019-11-04  3:37   ` Shaokun Zhang
2019-11-01  0:37 ` Andrew Morton
2019-11-04  6:02   ` Shaokun Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1572501813-2125-1-git-send-email-zhangshaokun@hisilicon.com \
    --to=zhangshaokun@hisilicon.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=jinyuqi@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=mpe@ellerman.id.au \
    --cc=paul.burton@mips.com \
    --cc=rppt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).