linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mark Zhang <markzhang@nvidia.com>
To: <jgg@nvidia.com>, <dledford@redhat.com>, <saeedm@nvidia.com>
Cc: <linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
	<aharonl@nvidia.com>, <netao@nvidia.com>, <leonro@nvidia.com>,
	Mark Zhang <markzhang@nvidia.com>
Subject: [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters
Date: Wed, 18 Aug 2021 14:24:24 +0300	[thread overview]
Message-ID: <20210818112428.209111-7-markzhang@nvidia.com> (raw)
In-Reply-To: <20210818112428.209111-1-markzhang@nvidia.com>

From: Aharon Landau <aharonl@nvidia.com>

This patch adds the ability to add/remove optional counter to a link
through RDMA netlink. Limit it to users with ADMIN capability only.

Examples:
$ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts
$ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
 drivers/infiniband/core/counters.c | 50 ++++++++++++++++
 drivers/infiniband/core/device.c   |  2 +
 drivers/infiniband/core/nldev.c    | 93 ++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h            |  7 +++
 include/rdma/rdma_counter.h        |  4 ++
 include/rdma/rdma_netlink.h        |  1 +
 include/uapi/rdma/rdma_netlink.h   |  9 +++
 7 files changed, 166 insertions(+)

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index b8b6db98bfdf..fa04178aa0eb 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -106,6 +106,56 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
 	return ret;
 }
 
+static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats,
+					     const char *name)
+{
+	int i;
+
+	for (i = 0; i < opstats->num_opcounters; i++)
+		if (!strcmp(opstats->opcounters[i].name, name))
+			return opstats->opcounters + i;
+
+	return NULL;
+}
+
+static int rdma_opcounter_set(struct ib_device *dev, u32 port,
+			      const char *name, bool is_add)
+{
+	struct rdma_port_counter *port_counter;
+	struct rdma_op_counter *opc;
+	int ret;
+
+	if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat)
+		return -EOPNOTSUPP;
+
+	port_counter = &dev->port_data[port].port_counter;
+	opc = get_opcounter(port_counter->opstats, name);
+	if (!opc)
+		return -EINVAL;
+
+	mutex_lock(&port_counter->opstats->lock);
+	ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) :
+		dev->ops.remove_op_stat(dev, port, opc->type);
+	if (ret)
+		goto end;
+
+	opc->enabled = is_add;
+end:
+	mutex_unlock(&port_counter->opstats->lock);
+	return ret;
+}
+
+int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name)
+{
+	return rdma_opcounter_set(dev, port, name, true);
+}
+
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+			  const char *name)
+{
+	return rdma_opcounter_set(dev, port, name, false);
+}
+
 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
 					   struct ib_qp *qp,
 					   enum rdma_nl_counter_mode mode)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 23e1ae50b2e4..b9138f20f9a8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2590,6 +2590,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 		ops->uverbs_no_driver_id_binding;
 
 	SET_DEVICE_OP(dev_ops, add_gid);
+	SET_DEVICE_OP(dev_ops, add_op_stat);
 	SET_DEVICE_OP(dev_ops, advise_mr);
 	SET_DEVICE_OP(dev_ops, alloc_dm);
 	SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
@@ -2701,6 +2702,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, reg_dm_mr);
 	SET_DEVICE_OP(dev_ops, reg_user_mr);
 	SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
+	SET_DEVICE_OP(dev_ops, remove_op_stat);
 	SET_DEVICE_OP(dev_ops, req_notify_cq);
 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
 	SET_DEVICE_OP(dev_ops, resize_cq);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e9b4b2cccaa0..17d55d89f11c 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -154,6 +154,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTERS]       = { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY]  = { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+				  .len = RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE },
+	[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1888,6 +1893,86 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
+static int nldev_stat_set_op_stat(struct sk_buff *skb,
+				  struct nlmsghdr *nlh,
+				  struct netlink_ext_ack *extack,
+				  bool cmd_add)
+{
+	char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {};
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	struct sk_buff *msg;
+	u32 index, port;
+	int ret;
+
+	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+
+	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(sock_net(skb->sk), index);
+	if (!device)
+		return -EINVAL;
+
+	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+	if (!rdma_is_port_valid(device, port)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME],
+		    sizeof(opcounter));
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+					 (cmd_add ?
+					  RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :
+					  RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),
+			0, 0);
+
+	if (cmd_add)
+		ret = rdma_opcounter_add(device, port, opcounter);
+	else
+		ret = rdma_opcounter_remove(device, port, opcounter);
+	if (ret)
+		goto err_msg;
+
+	nlmsg_end(msg, nlh);
+	ib_device_put(device);
+	return rdma_nl_unicast(sock_net(skb->sk), msg,
+			       NETLINK_CB(skb).portid);
+
+err_msg:
+	nlmsg_free(msg);
+err:
+	ib_device_put(device);
+	return ret;
+}
+
+static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,
+				       struct nlmsghdr *nlh,
+				       struct netlink_ext_ack *extack)
+{
+	return nldev_stat_set_op_stat(skb, nlh, extack, true);
+}
+
+static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,
+					  struct nlmsghdr *nlh,
+					  struct netlink_ext_ack *extack)
+{
+	return nldev_stat_set_op_stat(skb, nlh, extack, false);
+}
+
 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack)
 {
@@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.dump = nldev_res_get_mr_raw_dumpit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {
+		.doit = nldev_stat_add_op_stat_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
+	[RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {
+		.doit = nldev_stat_remove_op_stat_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 40b0f7825975..fa9e668b9b14 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -600,11 +600,14 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 
 /**
  * struct rdma_op_counter
+ * @enabled - To indicate if this counter is currently enabled (as optional
+ *    counters can be dynamically enabled/disabled)
  * @type - The vendor-specific type of the counter
  * @name - The name of the counter
  * @value - The value of the counter
  */
 struct rdma_op_counter {
+	bool enabled;
 	int type;
 	const char *name;
 	u64 value;
@@ -2595,6 +2598,10 @@ struct ib_device_ops {
 	struct rdma_op_stats *(*alloc_op_port_stats)(struct ib_device *device,
 						     u32 port_num);
 
+	int (*add_op_stat)(struct ib_device *device, u32 port,
+			   int optional_stat);
+	int (*remove_op_stat)(struct ib_device *device, u32 port,
+			      int optional_stat);
 	/**
 	 * Allows rdma drivers to add their own restrack attributes.
 	 */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 3531c5061718..48086a7248ac 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -63,5 +63,9 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
 			  enum rdma_nl_counter_mode *mode,
 			  enum rdma_nl_counter_mask *mask);
+int rdma_opcounter_add(struct ib_device *dev, u32 port,
+		       const char *name);
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+			  const char *name);
 
 #endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9df71ee..ac47a0cc0508 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -10,6 +10,7 @@ enum {
 	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
 	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
 	RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+	RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE = 64,
 };
 
 struct rdma_nl_cbs {
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 75a1ae2311d8..79e6ca87d2e0 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -297,6 +297,10 @@ enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER,
+
+	RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -549,6 +553,11 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK,	/* u8 */
 
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTERS,	/* nested table */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY,	/* nested table */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME,	/* string */
+	RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE,	/* u64 */
+
 	/*
 	 * Always the end
 	 */
-- 
2.26.2


  parent reply	other threads:[~2021-08-18 11:25 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18 11:24 [PATCH rdma-next 00/10] Optional counter statistics support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 01/10] net/mlx5: Add support in bth_opcode as a match criteria Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 02/10] net/mlx5: Add priorities for counters in RDMA namespaces Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 03/10] RDMA/counters: Support to allocate per-port optional counter statistics Mark Zhang
2021-08-23 19:30   ` Jason Gunthorpe
2021-08-24  6:22     ` Mark Zhang
2021-08-24 13:14       ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 04/10] RDMA/mlx5: Add alloc_op_port_stats() support Mark Zhang
2021-08-23 19:19   ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters Mark Zhang
2021-08-18 11:24 ` Mark Zhang [this message]
2021-08-23 19:42   ` [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters Jason Gunthorpe
2021-08-24  2:09     ` Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 07/10] RDMA/mlx5: Add add_op_stat() and remove_op_stat() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 08/10] RDMA/nldev: Add support to get optional counters statistics Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 09/10] RDMA/mlx5: Add get_op_stats() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 10/10] RDMA/nldev: Add support to get current enabled optional counters Mark Zhang
2021-08-23 19:44   ` Jason Gunthorpe
2021-08-24  2:13     ` Mark Zhang
2021-08-24 13:13       ` Jason Gunthorpe
2021-08-23 19:33 ` [PATCH rdma-next 00/10] Optional counter statistics support Jason Gunthorpe
2021-08-24  1:44   ` Mark Zhang
2021-08-24 13:11     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210818112428.209111-7-markzhang@nvidia.com \
    --to=markzhang@nvidia.com \
    --cc=aharonl@nvidia.com \
    --cc=dledford@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netao@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).