All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Mark Zhang <markzhang@nvidia.com>, linux-rdma@vger.kernel.org
Subject: [PATCH rdma-next 1/2] IB/core: Query IBoE link speed with a new driver API
Date: Mon, 10 Apr 2023 16:12:06 +0300	[thread overview]
Message-ID: <67b6ea0621b22b77db4cd637a4f9b48a2f447898.1681132096.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1681132096.git.leon@kernel.org>

From: Mark Zhang <markzhang@nvidia.com>

Currently the ethtool API is used to get IBoE link speed, which must be
protected with the rtnl lock. This becomes a bottleneck when try to setup
many rdma-cm connections at the same time, especially with multiple
processes.

In order to avoid it, a new driver API is introduced to query the IBoE
rate. It will be used firstly, and back to ethtool way if it fails.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cma.c    |  6 ++++--
 drivers/infiniband/core/device.c |  1 +
 include/rdma/ib_addr.h           | 31 ++++++++++++++++++++-----------
 include/rdma/ib_verbs.h          |  3 +++
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9c7d26a7d243..ff706d2e39c6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3296,7 +3296,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	route->path_rec->traffic_class = tos;
 	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
 	route->path_rec->rate_selector = IB_SA_EQ;
-	route->path_rec->rate = iboe_get_rate(ndev);
+	route->path_rec->rate = iboe_get_rate(ndev, id_priv->id.device,
+					      id_priv->id.port_num);
 	dev_put(ndev);
 	route->path_rec->packet_life_time_selector = IB_SA_EQ;
 	/* In case ACK timeout is set, use this value to calculate
@@ -4962,7 +4963,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	if (!ndev)
 		return -ENODEV;
 
-	ib.rec.rate = iboe_get_rate(ndev);
+	ib.rec.rate = iboe_get_rate(ndev, id_priv->id.device,
+				    id_priv->id.port_num);
 	ib.rec.hop_limit = 1;
 	ib.rec.mtu = iboe_get_mtu(ndev->mtu);
 
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a666847bd714..ba06a08c6497 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2693,6 +2693,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, query_ah);
 	SET_DEVICE_OP(dev_ops, query_device);
 	SET_DEVICE_OP(dev_ops, query_gid);
+	SET_DEVICE_OP(dev_ops, query_iboe_speed);
 	SET_DEVICE_OP(dev_ops, query_pkey);
 	SET_DEVICE_OP(dev_ops, query_port);
 	SET_DEVICE_OP(dev_ops, query_qp);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index d808dc3d239e..de762210ebd1 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -194,24 +194,33 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
 		return 0;
 }
 
-static inline int iboe_get_rate(struct net_device *dev)
+static inline int iboe_get_rate(struct net_device *ndev,
+				struct ib_device *ibdev, u32 port_num)
 {
 	struct ethtool_link_ksettings cmd;
-	int err;
+	int speed, err;
 
-	rtnl_lock();
-	err = __ethtool_get_link_ksettings(dev, &cmd);
-	rtnl_unlock();
-	if (err)
-		return IB_RATE_PORT_CURRENT;
+	if (ibdev->ops.query_iboe_speed) {
+		err = ibdev->ops.query_iboe_speed(ibdev, port_num, &speed);
+		if (err)
+			return IB_RATE_PORT_CURRENT;
+	} else {
+		rtnl_lock();
+		err = __ethtool_get_link_ksettings(ndev, &cmd);
+		rtnl_unlock();
+		if (err)
+			return IB_RATE_PORT_CURRENT;
+
+		speed = cmd.base.speed;
+	}
 
-	if (cmd.base.speed >= 40000)
+	if (speed >= 40000)
 		return IB_RATE_40_GBPS;
-	else if (cmd.base.speed >= 30000)
+	else if (speed >= 30000)
 		return IB_RATE_30_GBPS;
-	else if (cmd.base.speed >= 20000)
+	else if (speed >= 20000)
 		return IB_RATE_20_GBPS;
-	else if (cmd.base.speed >= 10000)
+	else if (speed >= 10000)
 		return IB_RATE_10_GBPS;
 	else
 		return IB_RATE_PORT_CURRENT;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index cc2ddd4e6c12..b143258b847f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2678,6 +2678,9 @@ struct ib_device_ops {
 	int (*query_ucontext)(struct ib_ucontext *context,
 			      struct uverbs_attr_bundle *attrs);
 
+	/* Query driver for IBoE link speed */
+	int (*query_iboe_speed)(struct ib_device *device, u32 port_num,
+				int *speed);
 	/*
 	 * Provide NUMA node. This API exists for rdmavt/hfi1 only.
 	 * Everyone else relies on Linux memory management model.
-- 
2.39.2


  reply	other threads:[~2023-04-10 13:12 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-10 13:12 [PATCH rdma-next 0/2] Query IBoE speed directly Leon Romanovsky
2023-04-10 13:12 ` Leon Romanovsky [this message]
2023-04-10 23:23   ` [PATCH rdma-next 1/2] IB/core: Query IBoE link speed with a new driver API Jason Gunthorpe
2023-04-10 23:42     ` Mark Zhang
2023-04-11 11:20       ` Jason Gunthorpe
2023-04-10 13:12 ` [PATCH rdma-next 2/2] IB/mlx5: Implement query_iboe_speed " Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=67b6ea0621b22b77db4cd637a4f9b48a2f447898.1681132096.git.leon@kernel.org \
    --to=leon@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=markzhang@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.