From: <dlinkin@nvidia.com>
To: <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>, <kuba@kernel.org>, <jiri@nvidia.com>,
<stephen@networkplumber.org>, <dsahern@gmail.com>,
<vladbu@nvidia.com>, <parav@nvidia.com>, <huyn@nvidia.com>,
Dmytro Linkin <dlinkin@nvidia.com>
Subject: [PATCH RESEND net-next v3 06/18] devlink: Introduce rate object
Date: Wed, 2 Jun 2021 15:17:19 +0300 [thread overview]
Message-ID: <1622636251-29892-7-git-send-email-dlinkin@nvidia.com> (raw)
In-Reply-To: <1622636251-29892-1-git-send-email-dlinkin@nvidia.com>
From: Dmytro Linkin <dlinkin@nvidia.com>
Allow registering rate object for devlink ports with dedicated
devlink_rate_leaf_{create|destroy}() API. Implement new netlink
DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info.
Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for
notifications when creating/deleting leaf rate object.
Rate API is intended to be used for rate limiting of individual
devlink ports (leafs) and their aggregates (nodes).
Example:
$ devlink port show
pci/0000:03:00.0/0
pci/0000:03:00.0/1
$ devlink port function rate show
pci/0000:03:00.0/0: type leaf
pci/0000:03:00.0/1: type leaf
Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
Notes:
v1->v2:
- s/func/function/ in commit message
- fixes kernel-doc for devlink_rate_leaf_{create|destroy}()
include/net/devlink.h | 14 +++
include/uapi/linux/devlink.h | 11 +++
net/core/devlink.c | 229 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 253 insertions(+), 1 deletion(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7c984ca..2f5954d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -34,6 +34,7 @@ struct devlink_dev_stats {
struct devlink {
struct list_head list;
struct list_head port_list;
+ struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
struct list_head resource_list;
@@ -133,6 +134,15 @@ struct devlink_port_attrs {
};
};
+struct devlink_rate {
+ struct list_head list;
+ enum devlink_rate_type type;
+ struct devlink *devlink;
+ void *priv;
+
+ struct devlink_port *devlink_port;
+};
+
struct devlink_port {
struct list_head list;
struct list_head param_list;
@@ -152,6 +162,8 @@ struct devlink_port {
struct delayed_work type_warn_dw;
struct list_head reporter_list;
struct mutex reporters_lock; /* Protects reporter_list */
+
+ struct devlink_rate *devlink_rate;
};
struct devlink_port_new_attrs {
@@ -1512,6 +1524,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
+int devlink_rate_leaf_create(struct devlink_port *port, void *priv);
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index f6008b2..0c27b45 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -126,6 +126,11 @@ enum devlink_command {
DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ DEVLINK_CMD_RATE_GET, /* can dump */
+ DEVLINK_CMD_RATE_SET,
+ DEVLINK_CMD_RATE_NEW,
+ DEVLINK_CMD_RATE_DEL,
+
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -206,6 +211,10 @@ enum devlink_port_flavour {
*/
};
+enum devlink_rate_type {
+ DEVLINK_RATE_TYPE_LEAF,
+};
+
enum devlink_param_cmode {
DEVLINK_PARAM_CMODE_RUNTIME,
DEVLINK_PARAM_CMODE_DRIVERINIT,
@@ -534,6 +543,8 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */
DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */
+
+ DEVLINK_ATTR_RATE_TYPE, /* u16 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4eb9695..28b2490 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -190,6 +190,25 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
return devlink_port_get_from_attrs(devlink, info->attrs);
}
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+ if (IS_ERR(devlink_port))
+ return ERR_CAST(devlink_port);
+ devlink_rate = devlink_port->devlink_rate;
+ return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -408,12 +427,13 @@ struct devlink_snapshot {
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@@ -442,6 +462,15 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = devlink_rate_leaf_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_rate;
}
return 0;
@@ -749,6 +778,39 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_rate *devlink_rate,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+ goto nla_put_failure;
+
+ if (devlink_rate_is_leaf(devlink_rate)) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ devlink_rate->devlink_port->index))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
static bool
devlink_port_fn_state_valid(enum devlink_port_fn_state state)
{
@@ -920,6 +982,99 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
+ cmd != DEVLINK_CMD_RATE_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+ cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err = 0;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
+
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink,
+ devlink_rate,
+ cmd, id,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, NULL);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+ DEVLINK_CMD_RATE_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
@@ -7802,6 +7957,7 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7828,6 +7984,13 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .doit = devlink_nl_cmd_rate_get_doit,
+ .dumpit = devlink_nl_cmd_rate_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+ /* can be retrieved by unprivileged users */
+ },
+ {
.cmd = DEVLINK_CMD_PORT_SPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
@@ -8202,6 +8365,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
__devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
+ INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -8304,6 +8468,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@@ -8620,6 +8785,68 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+/**
+ * devlink_rate_leaf_create - create devlink rate leaf
+ *
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ * Throws call trace if @devlink_port already has a devlink rate object.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
+ */
+int
+devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+ if (!devlink_rate)
+ return -ENOMEM;
+
+ mutex_lock(&devlink->lock);
+ WARN_ON(devlink_port->devlink_rate);
+ devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+ devlink_rate->devlink = devlink;
+ devlink_rate->devlink_port = devlink_port;
+ devlink_rate->priv = priv;
+ list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ devlink_port->devlink_rate = devlink_rate;
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+
+/**
+ * devlink_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+ struct devlink *devlink = devlink_port->devlink;
+
+ if (!devlink_rate)
+ return;
+
+ mutex_lock(&devlink->lock);
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ mutex_unlock(&devlink->lock);
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
--
1.8.3.1
next prev parent reply other threads:[~2021-06-02 12:18 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-02 12:17 [PATCH RESEND net-next v3 00/18] devlink: rate objects API dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 01/18] netdevsim: Add max_vfs to bus_dev dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 02/18] netdevsim: Disable VFs on nsim_dev_reload_destroy() call dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 03/18] netdevsim: Implement port types and indexing dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 04/18] netdevsim: Implement VFs dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 05/18] netdevsim: Implement legacy/switchdev mode for VFs dlinkin
2021-06-02 12:17 ` dlinkin [this message]
2021-06-02 12:17 ` [PATCH RESEND net-next v3 07/18] netdevsim: Register devlink rate leaf objects per VF dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 08/18] selftest: netdevsim: Add devlink rate test dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 09/18] devlink: Allow setting tx rate for devlink rate leaf objects dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 10/18] netdevsim: Implement devlink rate leafs tx rate support dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 11/18] selftest: netdevsim: Add devlink port shared/max tx rate test dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 12/18] devlink: Introduce rate nodes dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 13/18] netdevsim: Implement support for devlink " dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 14/18] selftest: netdevsim: Add devlink rate nodes test dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 15/18] devlink: Allow setting parent node of rate objects dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 16/18] netdevsim: " dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 17/18] selftest: netdevsim: Add devlink rate grouping test dlinkin
2021-06-02 12:17 ` [PATCH RESEND net-next v3 18/18] Documentation: devlink rate objects dlinkin
2021-06-02 12:31 ` [PATCH RESEND iproute2 net-next 0/4] devlink rate support Dmytro Linkin
2021-06-02 12:31 ` [PATCH RESEND iproute2 net-next 1/4] uapi: update devlink kernel header Dmytro Linkin
2021-06-02 12:31 ` [PATCH RESEND iproute2 net-next 2/4] devlink: Add helper function to validate object handler Dmytro Linkin
2021-06-02 12:31 ` [PATCH RESEND iproute2 net-next 3/4] devlink: Add port func rate support Dmytro Linkin
2021-06-02 12:31 ` [PATCH RESEND iproute2 net-next 4/4] devlink: Add ISO/IEC switch Dmytro Linkin
2021-06-02 16:58 ` [PATCH RESEND net-next v3 00/18] devlink: rate objects API Jakub Kicinski
2021-06-03 8:53 ` Dmytro Linkin
2021-06-04 1:59 ` Yunsheng Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1622636251-29892-7-git-send-email-dlinkin@nvidia.com \
--to=dlinkin@nvidia.com \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=huyn@nvidia.com \
--cc=jiri@nvidia.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=parav@nvidia.com \
--cc=stephen@networkplumber.org \
--cc=vladbu@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).