netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ariel Levkovich <lariel@mellanox.com>
To: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
Cc: Saeed Mahameed <saeedm@mellanox.com>,
	"sd@queasysnail.net" <sd@queasysnail.net>,
	"sbrivio@redhat.com" <sbrivio@redhat.com>,
	"nikolay@cumulusnetworks.com" <nikolay@cumulusnetworks.com>,
	Jiri Pirko <jiri@mellanox.com>,
	"dsahern@gmail.com" <dsahern@gmail.com>,
	"stephen@networkplumber.org" <stephen@networkplumber.org>,
	Ariel Levkovich <lariel@mellanox.com>
Subject: [PATCH net-next v2 1/3] net: Support querying specific VF properties
Date: Thu, 31 Oct 2019 19:47:34 +0000	[thread overview]
Message-ID: <1572551213-9022-2-git-send-email-lariel@mellanox.com> (raw)
In-Reply-To: <1572551213-9022-1-git-send-email-lariel@mellanox.com>

Querying the link with its VFs information involves putting a
vfinfo struct per VF in the netlink message under the
IFLA_VFINFO_LIST attribute.

Since the attribute's length is limited by it's definition to u16,
this introduces a problem when we want to add new fields to the
vfinfo attribute.
With increasing the vfinfo attribute and running in an environment
with a large number of VFs, we may overflow the IFLA_VFINFO_LIST
attribute length.

To avoid that, this patch introduces a single VF query.
With single VF query, the kernel may include extended VF information
and fields, such that take up a significant amount of memory, in the
vfinfo attribute.
This information may not be included with VF list
query and prevent attribute length overflow.

The admin will be able to query the link and get extended VF info
using iptool and following command:
ip link show dev <ifname> vf <vf_num>

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
---
 include/uapi/linux/if_link.h   |  1 +
 include/uapi/linux/rtnetlink.h |  3 ++-
 net/core/rtnetlink.c           | 53 +++++++++++++++++++++++++++++++++---------
 3 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8aec876..797e214 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -169,6 +169,7 @@ enum {
 	IFLA_MAX_MTU,
 	IFLA_PROP_LIST,
 	IFLA_ALT_IFNAME, /* Alternative ifname */
+	IFLA_VF_NUM, /* Get extended information for specific VF */
 	__IFLA_MAX
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 1418a83..8825ede 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -759,7 +759,8 @@ enum {
 #define RTEXT_FILTER_VF		(1 << 0)
 #define RTEXT_FILTER_BRVLAN	(1 << 1)
 #define RTEXT_FILTER_BRVLAN_COMPRESSED	(1 << 2)
-#define	RTEXT_FILTER_SKIP_STATS	(1 << 3)
+#define RTEXT_FILTER_SKIP_STATS	(1 << 3)
+#define RTEXT_FILTER_VF_EXT	(1 << 4)
 
 /* End of information exported to user level */
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49fa910..4dd5939 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -906,9 +906,14 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 static inline int rtnl_vfinfo_size(const struct net_device *dev,
 				   u32 ext_filter_mask)
 {
-	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) {
+	if (dev->dev.parent &&
+	    (ext_filter_mask & (RTEXT_FILTER_VF | RTEXT_FILTER_VF_EXT))) {
 		int num_vfs = dev_num_vf(dev->dev.parent);
 		size_t size = nla_total_size(0);
+
+		if (num_vfs && (ext_filter_mask & RTEXT_FILTER_VF_EXT))
+			num_vfs = 1;
+
 		size += num_vfs *
 			(nla_total_size(0) +
 			 nla_total_size(sizeof(struct ifla_vf_mac)) +
@@ -1022,7 +1027,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
 	       + nla_total_size(4) /* IFLA_GROUP */
 	       + nla_total_size(ext_filter_mask
-			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+				& (RTEXT_FILTER_VF | RTEXT_FILTER_VF_EXT) ?
+				4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
@@ -1203,7 +1209,8 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
 static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 					       struct net_device *dev,
 					       int vfs_num,
-					       struct nlattr *vfinfo)
+					       struct nlattr *vfinfo,
+					       bool vf_ext)
 {
 	struct ifla_vf_rss_query_en vf_rss_query_en;
 	struct nlattr *vf, *vfstats, *vfvlanlist;
@@ -1332,15 +1339,25 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 
 static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
 					   struct net_device *dev,
-					   u32 ext_filter_mask)
+					   u32 ext_filter_mask,
+					   int vf)
 {
+	bool vf_ext = (ext_filter_mask & RTEXT_FILTER_VF_EXT) && (vf >= 0);
 	struct nlattr *vfinfo;
 	int i, num_vfs;
 
-	if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+	if (!dev->dev.parent ||
+	    ((ext_filter_mask & (RTEXT_FILTER_VF | RTEXT_FILTER_VF_EXT)) == 0))
 		return 0;
 
 	num_vfs = dev_num_vf(dev->dev.parent);
+	if (vf_ext && num_vfs) {
+		if (vf > num_vfs)
+			return 0;
+
+		num_vfs = 1;
+	}
+
 	if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
 		return -EMSGSIZE;
 
@@ -1352,7 +1369,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
 		return -EMSGSIZE;
 
 	for (i = 0; i < num_vfs; i++) {
-		if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+		if (rtnl_fill_vfinfo(skb, dev, vf_ext ? vf : i, vfinfo, vf_ext))
 			return -EMSGSIZE;
 	}
 
@@ -1639,7 +1656,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask,
 			    u32 event, int *new_nsid, int new_ifindex,
-			    int tgt_netnsid)
+			    int tgt_netnsid, int vf)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -1717,7 +1734,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	if (rtnl_fill_stats(skb, dev))
 		goto nla_put_failure;
 
-	if (rtnl_fill_vf(skb, dev, ext_filter_mask))
+	if (rtnl_fill_vf(skb, dev, ext_filter_mask, vf))
 		goto nla_put_failure;
 
 	if (rtnl_port_fill(skb, dev, ext_filter_mask))
@@ -1806,6 +1823,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	[IFLA_PROP_LIST]	= { .type = NLA_NESTED },
 	[IFLA_ALT_IFNAME]	= { .type = NLA_STRING,
 				    .len = ALTIFNAMSIZ - 1 },
+	[IFLA_VF_NUM]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2057,7 +2075,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 					       NETLINK_CB(cb->skb).portid,
 					       nlh->nlmsg_seq, 0, flags,
 					       ext_filter_mask, 0, NULL, 0,
-					       netnsid);
+					       netnsid, -1);
 
 			if (err < 0) {
 				if (likely(skb->len))
@@ -3365,6 +3383,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb,
 		case IFLA_ALT_IFNAME:
 		case IFLA_EXT_MASK:
 		case IFLA_TARGET_NETNSID:
+		case IFLA_VF_NUM:
 			break;
 		default:
 			NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request");
@@ -3385,6 +3404,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
 	int netnsid = -1;
+	int vf = -1;
 	int err;
 	u32 ext_filter_mask = 0;
 
@@ -3407,6 +3427,17 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 
 	err = -EINVAL;
+	if ((ext_filter_mask & RTEXT_FILTER_VF) &&
+	    (ext_filter_mask & RTEXT_FILTER_VF_EXT))
+		goto out;
+
+	if (ext_filter_mask & RTEXT_FILTER_VF_EXT) {
+		if (tb[IFLA_VF_NUM])
+			vf = nla_get_u32(tb[IFLA_VF_NUM]);
+		else
+			goto out;
+	}
+
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
 		dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
@@ -3428,7 +3459,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = rtnl_fill_ifinfo(nskb, dev, net,
 			       RTM_NEWLINK, NETLINK_CB(skb).portid,
 			       nlh->nlmsg_seq, 0, 0, ext_filter_mask,
-			       0, NULL, 0, netnsid);
+			       0, NULL, 0, netnsid, vf);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -3634,7 +3665,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 
 	err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
 			       type, 0, 0, change, 0, 0, event,
-			       new_nsid, new_ifindex, -1);
+			       new_nsid, new_ifindex, -1, -1);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
-- 
1.8.3.1


  reply	other threads:[~2019-10-31 19:47 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-31 19:47 [PATCH net-next v2 0/3] VGT+ support Ariel Levkovich
2019-10-31 19:47 ` Ariel Levkovich [this message]
2019-10-31 19:47 ` [PATCH net-next v2 2/3] net: Add SRIOV " Ariel Levkovich
2019-10-31 19:47 ` [PATCH net-next v2 3/3] net/mlx5: " Ariel Levkovich
2019-10-31 20:31 ` [PATCH net-next v2 0/3] " David Miller
2019-10-31 22:20   ` Ariel Levkovich
2019-10-31 22:58     ` David Miller
2019-11-01 14:55       ` Ariel Levkovich
2019-11-01  0:23 ` Jakub Kicinski
     [not found]   ` <8d7db56c-376a-d809-4a65-bfc2baf3254f@mellanox.com>
2019-11-01 21:28     ` Saeed Mahameed
2019-11-02  0:21       ` Jakub Kicinski
2019-11-05  1:38         ` Saeed Mahameed
2019-11-05  1:47           ` David Ahern
2019-11-05  2:35             ` Jakub Kicinski
2019-11-05 20:10               ` Saeed Mahameed
2019-11-05 21:55                 ` Jakub Kicinski
2019-11-05 22:52                   ` Saeed Mahameed
2019-11-05 23:10                     ` Jakub Kicinski
2019-11-05 23:48                       ` Saeed Mahameed
2019-11-06  1:38                         ` Jakub Kicinski
2019-11-06 22:21                           ` Saeed Mahameed
2019-11-07 10:24                             ` Jiri Pirko
2019-11-13 22:55                           ` Keller, Jacob E
2019-11-14  2:25                             ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1572551213-9022-2-git-send-email-lariel@mellanox.com \
    --to=lariel@mellanox.com \
    --cc=dsahern@gmail.com \
    --cc=jiri@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=nikolay@cumulusnetworks.com \
    --cc=saeedm@mellanox.com \
    --cc=sbrivio@redhat.com \
    --cc=sd@queasysnail.net \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).