All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RDMA/srp: Fix support for unpopulated NUMA nodes
@ 2020-11-25 18:26 Nicolas Morey-Chaisemartin
  2020-11-30  4:07 ` Bart Van Assche
  0 siblings, 1 reply; 3+ messages in thread
From: Nicolas Morey-Chaisemartin @ 2020-11-25 18:26 UTC (permalink / raw)
  To: linux-rdma, Bart Van Assche

The current code computes a number of channels per SRP target and spread them
equally across all online NUMA nodes.
Each channel is then assigned a CPU within this node.

However some NUMA nodes may not have any online CPU at the time.
This causes the SRP connection to fail as some channels do not get connected.

This patch solves the issue by using the number of populated NUMA nodes
instead of the number of online ones. It also ensures that there cannot be
more channels than online CPUs.

Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
  drivers/infiniband/ulp/srp/ib_srp.c | 48 ++++++++++++++++++++++-------
  1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d8fcd21ab472..e0c4e1ac05d2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3613,6 +3613,28 @@ static int srp_parse_options(struct net *net, const char *buf,
  	return ret;
  }
  
+static int srp_get_num_populated_nodes(void)
+{
+	int num_populated_nodes = 0;
+	int node, cpu;
+
+	for_each_online_node(node) {
+		bool populated = false;
+		for_each_online_cpu(cpu) {
+			if (cpu_to_node(cpu) == node){
+				populated = true;
+				break;
+			}
+		}
+		if (populated) {
+			num_populated_nodes++;
+			break;
+		}
+	}
+
+	return num_populated_nodes;
+}
+
  static ssize_t srp_create_target(struct device *dev,
  				 struct device_attribute *attr,
  				 const char *buf, size_t count)
@@ -3624,7 +3646,7 @@ static ssize_t srp_create_target(struct device *dev,
  	struct srp_rdma_ch *ch;
  	struct srp_device *srp_dev = host->srp_dev;
  	struct ib_device *ibdev = srp_dev->dev;
-	int ret, node_idx, node, cpu, i;
+	int ret, node_idx, node, cpu, i, num_populated_nodes;
  	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
  	bool multich = false;
  	uint32_t max_iu_len;
@@ -3749,13 +3771,16 @@ static ssize_t srp_create_target(struct device *dev,
  		goto out;
  
  	ret = -ENOMEM;
+
+	num_populated_nodes = srp_get_num_populated_nodes();
+
  	if (target->ch_count == 0)
  		target->ch_count =
-			max_t(unsigned int, num_online_nodes(),
-			      min(ch_count ?:
-					  min(4 * num_online_nodes(),
-					      ibdev->num_comp_vectors),
-				  num_online_cpus()));
+			min(ch_count ?:
+				min(4 * num_populated_nodes,
+					ibdev->num_comp_vectors),
+				num_online_cpus());
+
  	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
  			     GFP_KERNEL);
  	if (!target->ch)
@@ -3764,13 +3789,13 @@ static ssize_t srp_create_target(struct device *dev,
  	node_idx = 0;
  	for_each_online_node(node) {
  		const int ch_start = (node_idx * target->ch_count /
-				      num_online_nodes());
+				      num_populated_nodes);
  		const int ch_end = ((node_idx + 1) * target->ch_count /
-				    num_online_nodes());
+				    num_populated_nodes);
  		const int cv_start = node_idx * ibdev->num_comp_vectors /
-				     num_online_nodes();
+				     num_populated_nodes;
  		const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
-				   num_online_nodes();
+				   num_populated_nodes;
  		int cpu_idx = 0;
  
  		for_each_online_cpu(cpu) {
@@ -3823,7 +3848,8 @@ static ssize_t srp_create_target(struct device *dev,
  			multich = true;
  			cpu_idx++;
  		}
-		node_idx++;
+		if(cpu_idx)
+			node_idx++;
  	}
  
  connected:
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-11-30  6:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-25 18:26 [PATCH] RDMA/srp: Fix support for unpopulated NUMA nodes Nicolas Morey-Chaisemartin
2020-11-30  4:07 ` Bart Van Assche
2020-11-30  6:56   ` Nicolas Morey-Chaisemartin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.