stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>,
	Bart Van Assche <bvanassche@acm.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Yi Zhang <yi.zhang@redhat.com>
Subject: [PATCH 5.11 26/31] RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
Date: Fri, 19 Mar 2021 13:19:20 +0100	[thread overview]
Message-ID: <20210319121748.047018321@linuxfoundation.org> (raw)
In-Reply-To: <20210319121747.203523570@linuxfoundation.org>

From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>

commit 2b5715fc17386a6223490d5b8f08d031999b0c0b upstream.

The current code computes a number of channels per SRP target and spreads
them equally across all online NUMA nodes.  Each channel is then assigned
a CPU within this node.

In the case of unbalanced, or even unpopulated nodes, some channels do not
get a CPU associated and thus do not get connected.  This causes the SRP
connection to fail.

This patch solves the issue by rewriting channel computation and
allocation:

- Drop channel to node/CPU association as it had no real effect on
  locality but added unnecessary complexity.

- Tweak the number of channels allocated to reduce CPU contention when
  possible:
  - Up to one channel per CPU (instead of up to 4 by node)
  - At least 4 channels per node, unless ch_count module parameter is
    used.

Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c |  116 ++++++++++++++----------------------
 1 file changed, 48 insertions(+), 68 deletions(-)

--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct
 	struct srp_rdma_ch *ch;
 	struct srp_device *srp_dev = host->srp_dev;
 	struct ib_device *ibdev = srp_dev->dev;
-	int ret, node_idx, node, cpu, i;
+	int ret, i, ch_idx;
 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
 	bool multich = false;
 	uint32_t max_iu_len;
@@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct
 		goto out;
 
 	ret = -ENOMEM;
-	if (target->ch_count == 0)
+	if (target->ch_count == 0) {
 		target->ch_count =
-			max_t(unsigned int, num_online_nodes(),
-			      min(ch_count ?:
-					  min(4 * num_online_nodes(),
-					      ibdev->num_comp_vectors),
-				  num_online_cpus()));
+			min(ch_count ?:
+				max(4 * num_online_nodes(),
+				    ibdev->num_comp_vectors),
+				num_online_cpus());
+	}
+
 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
 			     GFP_KERNEL);
 	if (!target->ch)
 		goto out;
 
-	node_idx = 0;
-	for_each_online_node(node) {
-		const int ch_start = (node_idx * target->ch_count /
-				      num_online_nodes());
-		const int ch_end = ((node_idx + 1) * target->ch_count /
-				    num_online_nodes());
-		const int cv_start = node_idx * ibdev->num_comp_vectors /
-				     num_online_nodes();
-		const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
-				   num_online_nodes();
-		int cpu_idx = 0;
-
-		for_each_online_cpu(cpu) {
-			if (cpu_to_node(cpu) != node)
-				continue;
-			if (ch_start + cpu_idx >= ch_end)
-				continue;
-			ch = &target->ch[ch_start + cpu_idx];
-			ch->target = target;
-			ch->comp_vector = cv_start == cv_end ? cv_start :
-				cv_start + cpu_idx % (cv_end - cv_start);
-			spin_lock_init(&ch->lock);
-			INIT_LIST_HEAD(&ch->free_tx);
-			ret = srp_new_cm_id(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_create_ch_ib(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_alloc_req_data(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_connect_ch(ch, max_iu_len, multich);
-			if (ret) {
-				char dst[64];
-
-				if (target->using_rdma_cm)
-					snprintf(dst, sizeof(dst), "%pIS",
-						 &target->rdma_cm.dst);
-				else
-					snprintf(dst, sizeof(dst), "%pI6",
-						 target->ib_cm.orig_dgid.raw);
-				shost_printk(KERN_ERR, target->scsi_host,
-					     PFX "Connection %d/%d to %s failed\n",
-					     ch_start + cpu_idx,
-					     target->ch_count, dst);
-				if (node_idx == 0 && cpu_idx == 0) {
-					goto free_ch;
-				} else {
-					srp_free_ch_ib(target, ch);
-					srp_free_req_data(target, ch);
-					target->ch_count = ch - target->ch;
-					goto connected;
-				}
+	for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
+		ch = &target->ch[ch_idx];
+		ch->target = target;
+		ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
+		spin_lock_init(&ch->lock);
+		INIT_LIST_HEAD(&ch->free_tx);
+		ret = srp_new_cm_id(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_create_ch_ib(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_alloc_req_data(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_connect_ch(ch, max_iu_len, multich);
+		if (ret) {
+			char dst[64];
+
+			if (target->using_rdma_cm)
+				snprintf(dst, sizeof(dst), "%pIS",
+					&target->rdma_cm.dst);
+			else
+				snprintf(dst, sizeof(dst), "%pI6",
+					target->ib_cm.orig_dgid.raw);
+			shost_printk(KERN_ERR, target->scsi_host,
+				PFX "Connection %d/%d to %s failed\n",
+				ch_idx,
+				target->ch_count, dst);
+			if (ch_idx == 0) {
+				goto free_ch;
+			} else {
+				srp_free_ch_ib(target, ch);
+				srp_free_req_data(target, ch);
+				target->ch_count = ch - target->ch;
+				goto connected;
 			}
-
-			multich = true;
-			cpu_idx++;
 		}
-		node_idx++;
+		multich = true;
 	}
 
 connected:



  parent reply	other threads:[~2021-03-19 12:21 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-19 12:18 [PATCH 5.11 00/31] 5.11.8-rc1 review Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 01/31] io_uring: dont attempt IO reissue from the ring exit path Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 02/31] KVM: x86/mmu: Expand on the comment in kvm_vcpu_ad_need_write_protect() Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 03/31] KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 04/31] mptcp: send ack for every add_addr Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 05/31] mptcp: pm: add lockdep assertions Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 06/31] mptcp: dispose initial struct socket when its subflow is closed Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 07/31] io_uring: refactor scheduling in io_cqring_wait Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 08/31] io_uring: refactor io_cqring_wait Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 09/31] io_uring: dont keep looping for more events if we cant flush overflow Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 10/31] io_uring: simplify do_read return parsing Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 11/31] io_uring: clear IOCB_WAITQ for non -EIOCBQUEUED return Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 12/31] gpiolib: Read "gpio-line-names" from a firmware node Greg Kroah-Hartman
2021-03-19 12:27   ` Marek Vasut
2021-03-19 12:36     ` Greg Kroah-Hartman
2021-03-19 12:45       ` Marek Vasut
2021-03-19 12:19 ` [PATCH 5.11 13/31] net: bonding: fix error return code of bond_neigh_init() Greg Kroah-Hartman
2021-03-19 14:12   ` Jiri Kosina
2021-03-19 14:24     ` Jiri Kosina
2021-03-19 14:29       ` Greg Kroah-Hartman
2021-03-19 14:25     ` Greg Kroah-Hartman
2021-03-19 15:14       ` Jiri Kosina
2021-03-19 12:19 ` [PATCH 5.11 14/31] regulator: pca9450: Add SD_VSEL GPIO for LDO5 Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 15/31] regulator: pca9450: Enable system reset on WDOG_B assertion Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 16/31] regulator: pca9450: Clear PRESET_EN bit to fix BUCK1/2/3 voltage setting Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 17/31] gfs2: Add common helper for holding and releasing the freeze glock Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 18/31] gfs2: move freeze glock outside the make_fs_rw and _ro functions Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 19/31] gfs2: bypass signal_our_withdraw if no journal Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 20/31] bpf: Prohibit alu ops for pointer types not defining ptr_limit Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 21/31] bpf: Fix off-by-one for area size in creating mask to left Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 22/31] bpf: Simplify alu_limit masking for pointer arithmetic Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 23/31] bpf: Add sanity check for upper ptr_limit Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 24/31] bpf, selftests: Fix up some test_verifier cases for unprivileged Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 25/31] arm64: Unconditionally set virtual cpu id registers Greg Kroah-Hartman
2021-03-19 12:19 ` Greg Kroah-Hartman [this message]
2021-03-19 12:19 ` [PATCH 5.11 27/31] fuse: fix live lock in fuse_iget() Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 28/31] Revert "nfsd4: remove check_conflicting_opens warning" Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 29/31] Revert "nfsd4: a clients own opens neednt prevent delegations" Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 30/31] net: dsa: b53: Support setting learning on port Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 31/31] crypto: x86/aes-ni-xts - use direct calls to and 4-way stride Greg Kroah-Hartman
2021-03-19 19:38 ` [PATCH 5.11 00/31] 5.11.8-rc1 review Naresh Kamboju
2021-03-20  9:52   ` Greg Kroah-Hartman
2021-03-19 21:23 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210319121748.047018321@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=bvanassche@acm.org \
    --cc=jgg@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nmoreychaisemartin@suse.com \
    --cc=stable@vger.kernel.org \
    --cc=yi.zhang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).