netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops
@ 2024-02-27  9:24 Eric Dumazet
  2024-02-27  9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27  9:24 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

This series removes RTNL use for RTM_GETNETCONF operations on AF_INET.

- Annotate data-races to avoid possible KCSAN splats.

- "ip -4 netconf show dev XXX" can be implemented without RTNL [1]

- "ip -4 netconf" dumps can be implemented using RCU instead of RTNL [1]

[1] This only refers to RTM_GETNETCONF operation, "ip" command
    also uses RTM_GETLINK dumps which are using RTNL at this moment.

Eric Dumazet (3):
  inet: annotate devconf data-races
  inet: do not use RTNL in inet_netconf_get_devconf()
  inet: use xa_array iterator to implement inet_netconf_dump_devconf()

 include/linux/inetdevice.h |  14 ++--
 net/ipv4/devinet.c         | 147 +++++++++++++++++--------------------
 net/ipv4/igmp.c            |   4 +-
 net/ipv4/proc.c            |   2 +-
 net/ipv4/route.c           |   4 +-
 5 files changed, 81 insertions(+), 90 deletions(-)

-- 
2.44.0.rc1.240.g4c46232300-goog


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH net-next 1/3] inet: annotate devconf data-races
  2024-02-27  9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
@ 2024-02-27  9:24 ` Eric Dumazet
  2024-02-27 12:59   ` Jiri Pirko
  2024-02-27  9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27  9:24 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Add READ_ONCE() in ipv4_devconf_get() and corresponding
WRITE_ONCE() in ipv4_devconf_set()

Add IPV4_DEVCONF_RO() and IPV4_DEVCONF_ALL_RO() macros,
and use them when reading devconf fields.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/inetdevice.h | 14 ++++++++------
 net/ipv4/devinet.c         | 21 +++++++++++----------
 net/ipv4/igmp.c            |  4 ++--
 net/ipv4/proc.c            |  2 +-
 net/ipv4/route.c           |  4 ++--
 5 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ddb27fc0ee8c8862d62f8c6243c4239ea53374f2..cb5280e6cc219106bcc55972dac850edf34988ff 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -53,13 +53,15 @@ struct in_device {
 };
 
 #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
+#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr))
 #define IPV4_DEVCONF_ALL(net, attr) \
 	IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
+#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr))
 
-static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+static inline int ipv4_devconf_get(const struct in_device *in_dev, int index)
 {
 	index--;
-	return in_dev->cnf.data[index];
+	return READ_ONCE(in_dev->cnf.data[index]);
 }
 
 static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
@@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
 {
 	index--;
 	set_bit(index, in_dev->cnf.state);
-	in_dev->cnf.data[index] = val;
+	WRITE_ONCE(in_dev->cnf.data[index], val);
 }
 
 static inline void ipv4_devconf_setall(struct in_device *in_dev)
@@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 	ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))
 
 #define IN_DEV_ANDCONF(in_dev, attr) \
-	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
+	(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \
 	 IN_DEV_CONF_GET((in_dev), attr))
 
 #define IN_DEV_NET_ORCONF(in_dev, net, attr) \
-	(IPV4_DEVCONF_ALL(net, attr) || \
+	(IPV4_DEVCONF_ALL_RO(net, attr) || \
 	 IN_DEV_CONF_GET((in_dev), attr))
 
 #define IN_DEV_ORCONF(in_dev, attr) \
 	IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)
 
 #define IN_DEV_MAXCONF(in_dev, attr) \
-	(max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
+	(max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \
 	     IN_DEV_CONF_GET((in_dev), attr)))
 
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc74f131fe4dfad327e71c1a8f0a4b66cdc526e5..ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1982,7 +1982,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
 		return -EMSGSIZE;
 
 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
-		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
 
 	return 0;
 }
@@ -2068,9 +2068,9 @@ static int inet_netconf_msgsize_devconf(int type)
 }
 
 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
-				     struct ipv4_devconf *devconf, u32 portid,
-				     u32 seq, int event, unsigned int flags,
-				     int type)
+				     const struct ipv4_devconf *devconf,
+				     u32 portid, u32 seq, int event,
+				     unsigned int flags, int type)
 {
 	struct nlmsghdr  *nlh;
 	struct netconfmsg *ncm;
@@ -2095,27 +2095,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 
 	if ((all || type == NETCONFA_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_FORWARDING,
-			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
+			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
 		goto nla_put_failure;
 	if ((all || type == NETCONFA_RP_FILTER) &&
 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
-			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
+			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
 		goto nla_put_failure;
 	if ((all || type == NETCONFA_MC_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
-			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
+			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
 		goto nla_put_failure;
 	if ((all || type == NETCONFA_BC_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
-			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
 		goto nla_put_failure;
 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
-			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
+			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
 		goto nla_put_failure;
 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
-			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+			IPV4_DEVCONF_RO(*devconf,
+					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
 		goto nla_put_failure;
 
 out:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index efeeca2b13285a3149645ab945b0364391f6721b..717e97a389a8aee75f51a5a5d859cb94a5d868eb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -120,12 +120,12 @@
  */
 
 #define IGMP_V1_SEEN(in_dev) \
-	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
+	(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
 	 ((in_dev)->mr_v1_seen && \
 	  time_before(jiffies, (in_dev)->mr_v1_seen)))
 #define IGMP_V2_SEEN(in_dev) \
-	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
+	(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
 	 ((in_dev)->mr_v2_seen && \
 	  time_before(jiffies, (in_dev)->mr_v2_seen)))
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5f4654ebff487494b6afbaa69174df4c004395dc..914bc9c35cc702395aee257fb010034294e501de 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -395,7 +395,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
+		   IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2,
 		   READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
 
 	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b512288d6fcc6bc0ce586bc5dc501b979ac3b9c1..c8f76f56dc1653371ca39663f29cc798b062e60d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2313,7 +2313,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (IN_DEV_BFORWARD(in_dev))
 			goto make_route;
 		/* not do cache if bc_forwarding is enabled */
-		if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+		if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING))
 			do_cache = false;
 		goto brd_input;
 	}
@@ -2993,7 +2993,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 #ifdef CONFIG_IP_MROUTE
 			if (ipv4_is_multicast(dst) &&
 			    !ipv4_is_local_multicast(dst) &&
-			    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+			    IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) {
 				int err = ipmr_get_route(net, skb,
 							 fl4->saddr, fl4->daddr,
 							 r, portid);
-- 
2.44.0.rc1.240.g4c46232300-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
  2024-02-27  9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
  2024-02-27  9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
@ 2024-02-27  9:24 ` Eric Dumazet
  2024-02-27 12:59   ` Jiri Pirko
  2024-02-27  9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
  2024-02-29  3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf
  3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27  9:24 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

"ip -4 netconf show dev XXXX" no longer acquires RTNL.

Return -ENODEV instead of -EINVAL if no netdev or idev can be found.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/devinet.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 				    struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(in_skb->sk);
-	struct nlattr *tb[NETCONFA_MAX+1];
+	struct nlattr *tb[NETCONFA_MAX + 1];
+	const struct ipv4_devconf *devconf;
+	struct in_device *in_dev = NULL;
+	struct net_device *dev = NULL;
 	struct sk_buff *skb;
-	struct ipv4_devconf *devconf;
-	struct in_device *in_dev;
-	struct net_device *dev;
 	int ifindex;
 	int err;
 
 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
 	if (err)
-		goto errout;
+		return err;
 
-	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
-		goto errout;
+		return -EINVAL;
 
 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
 	switch (ifindex) {
@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 		devconf = net->ipv4.devconf_dflt;
 		break;
 	default:
-		dev = __dev_get_by_index(net, ifindex);
-		if (!dev)
-			goto errout;
-		in_dev = __in_dev_get_rtnl(dev);
+		err = -ENODEV;
+		dev = dev_get_by_index(net, ifindex);
+		if (dev)
+			in_dev = in_dev_get(dev);
 		if (!in_dev)
 			goto errout;
 		devconf = &in_dev->cnf;
@@ -2257,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
 	}
 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
+	if (in_dev)
+		in_dev_put(in_dev);
+	dev_put(dev);
 	return err;
 }
 
@@ -2826,5 +2828,6 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
-		      inet_netconf_dump_devconf, 0);
+		      inet_netconf_dump_devconf,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 }
-- 
2.44.0.rc1.240.g4c46232300-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
  2024-02-27  9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
  2024-02-27  9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
  2024-02-27  9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
@ 2024-02-27  9:24 ` Eric Dumazet
  2024-02-27 13:07   ` Jiri Pirko
  2024-02-29  3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf
  3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27  9:24 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

1) inet_netconf_dump_devconf() can run under RCU protection
   instead of RTNL.

2) properly return 0 at the end of a dump, avoiding an
   an extra recvmsg() system call.

3) Do not use inet_base_seq() anymore, for_each_netdev_dump()
   has nice properties. Restarting a GETDEVCONF dump if a device has
   been added/removed or if net->ipv4.dev_addr_genid has changed is moot.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/devinet.c | 101 +++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 58 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f045a34e90b974b17512a30c3b719bdfc3cba153..af741af61830aeb695e7e75608515547dade8f39 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2267,11 +2267,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
 {
 	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
-	int h, s_h;
-	int idx, s_idx;
+	struct {
+		unsigned long ifindex;
+		unsigned int all_default;
+	} *ctx = (void *)cb->ctx;
+	const struct in_device *in_dev;
 	struct net_device *dev;
-	struct in_device *in_dev;
-	struct hlist_head *head;
+	int err = 0;
 
 	if (cb->strict_check) {
 		struct netlink_ext_ack *extack = cb->extack;
@@ -2288,64 +2290,47 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
 		}
 	}
 
-	s_h = cb->args[0];
-	s_idx = idx = cb->args[1];
-
-	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
-		idx = 0;
-		head = &net->dev_index_head[h];
-		rcu_read_lock();
-		cb->seq = inet_base_seq(net);
-		hlist_for_each_entry_rcu(dev, head, index_hlist) {
-			if (idx < s_idx)
-				goto cont;
-			in_dev = __in_dev_get_rcu(dev);
-			if (!in_dev)
-				goto cont;
-
-			if (inet_netconf_fill_devconf(skb, dev->ifindex,
-						      &in_dev->cnf,
-						      NETLINK_CB(cb->skb).portid,
-						      nlh->nlmsg_seq,
-						      RTM_NEWNETCONF,
-						      NLM_F_MULTI,
-						      NETCONFA_ALL) < 0) {
-				rcu_read_unlock();
-				goto done;
-			}
-			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
-			idx++;
-		}
-		rcu_read_unlock();
+	rcu_read_lock();
+	for_each_netdev_dump(net, dev, ctx->ifindex) {
+		in_dev = __in_dev_get_rcu(dev);
+		if (!in_dev)
+			continue;
+		err = inet_netconf_fill_devconf(skb, dev->ifindex,
+						&in_dev->cnf,
+						NETLINK_CB(cb->skb).portid,
+						nlh->nlmsg_seq,
+						RTM_NEWNETCONF, NLM_F_MULTI,
+						NETCONFA_ALL);
+		if (err < 0)
+			goto done;
 	}
-	if (h == NETDEV_HASHENTRIES) {
-		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
-					      net->ipv4.devconf_all,
-					      NETLINK_CB(cb->skb).portid,
-					      nlh->nlmsg_seq,
-					      RTM_NEWNETCONF, NLM_F_MULTI,
-					      NETCONFA_ALL) < 0)
+	if (ctx->all_default == 0) {
+		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+						net->ipv4.devconf_all,
+						NETLINK_CB(cb->skb).portid,
+						nlh->nlmsg_seq,
+						RTM_NEWNETCONF, NLM_F_MULTI,
+						NETCONFA_ALL);
+		if (err < 0)
 			goto done;
-		else
-			h++;
-	}
-	if (h == NETDEV_HASHENTRIES + 1) {
-		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
-					      net->ipv4.devconf_dflt,
-					      NETLINK_CB(cb->skb).portid,
-					      nlh->nlmsg_seq,
-					      RTM_NEWNETCONF, NLM_F_MULTI,
-					      NETCONFA_ALL) < 0)
+		ctx->all_default++;
+	}
+	if (ctx->all_default == 1) {
+		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+						net->ipv4.devconf_dflt,
+						NETLINK_CB(cb->skb).portid,
+						nlh->nlmsg_seq,
+						RTM_NEWNETCONF, NLM_F_MULTI,
+						NETCONFA_ALL);
+		if (err < 0)
 			goto done;
-		else
-			h++;
+		ctx->all_default++;
 	}
 done:
-	cb->args[0] = h;
-	cb->args[1] = idx;
-
-	return skb->len;
+	if (err < 0 && likely(skb->len))
+		err = skb->len;
+	rcu_read_unlock();
+	return err;
 }
 
 #ifdef CONFIG_SYSCTL
@@ -2829,5 +2814,5 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
 		      inet_netconf_dump_devconf,
-		      RTNL_FLAG_DOIT_UNLOCKED);
+		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
 }
-- 
2.44.0.rc1.240.g4c46232300-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
  2024-02-27  9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
@ 2024-02-27 12:59   ` Jiri Pirko
  2024-02-27 13:09     ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 12:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
	Jiri Pirko, netdev, eric.dumazet

Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
>"ip -4 netconf show dev XXXX" no longer acquires RTNL.

I was under impression that you refer to the current code, confused me a
bit :/


>
>Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
>---
> net/ipv4/devinet.c | 27 +++++++++++++++------------
> 1 file changed, 15 insertions(+), 12 deletions(-)
>
>diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
>--- a/net/ipv4/devinet.c
>+++ b/net/ipv4/devinet.c
>@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> 				    struct netlink_ext_ack *extack)
> {
> 	struct net *net = sock_net(in_skb->sk);
>-	struct nlattr *tb[NETCONFA_MAX+1];
>+	struct nlattr *tb[NETCONFA_MAX + 1];
>+	const struct ipv4_devconf *devconf;
>+	struct in_device *in_dev = NULL;
>+	struct net_device *dev = NULL;
> 	struct sk_buff *skb;
>-	struct ipv4_devconf *devconf;
>-	struct in_device *in_dev;
>-	struct net_device *dev;
> 	int ifindex;
> 	int err;
> 
> 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
> 	if (err)
>-		goto errout;
>+		return err;
> 
>-	err = -EINVAL;
> 	if (!tb[NETCONFA_IFINDEX])
>-		goto errout;
>+		return -EINVAL;
> 
> 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
> 	switch (ifindex) {
>@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> 		devconf = net->ipv4.devconf_dflt;
> 		break;
> 	default:
>-		dev = __dev_get_by_index(net, ifindex);
>-		if (!dev)
>-			goto errout;
>-		in_dev = __in_dev_get_rtnl(dev);
>+		err = -ENODEV;
>+		dev = dev_get_by_index(net, ifindex);

Comment says:
/* Deprecated for new users, call netdev_get_by_index() instead */
struct net_device *dev_get_by_index(struct net *net, int ifindex)

Perhaps better to use:
netdev_get_by_index() and netdev_put()?


>+		if (dev)
>+			in_dev = in_dev_get(dev);

The original flow:
		err = -ENODEV;
		dev = dev_get_by_index(net, ifindex);
		if (!dev)
			goto errout;
		in_dev = in_dev_get(dev);
 		if (!in_dev)
 			goto errout;
Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.



> 		if (!in_dev)
> 			goto errout;
> 		devconf = &in_dev->cnf;
>@@ -2257,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> 	}
> 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
> errout:
>+	if (in_dev)
>+		in_dev_put(in_dev);
>+	dev_put(dev);
> 	return err;
> }
> 
>@@ -2826,5 +2828,6 @@ void __init devinet_init(void)
> 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
> 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
> 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
>-		      inet_netconf_dump_devconf, 0);
>+		      inet_netconf_dump_devconf,
>+		      RTNL_FLAG_DOIT_UNLOCKED);
> }
>-- 
>2.44.0.rc1.240.g4c46232300-goog
>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 1/3] inet: annotate devconf data-races
  2024-02-27  9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
@ 2024-02-27 12:59   ` Jiri Pirko
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 12:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
	Jiri Pirko, netdev, eric.dumazet

Tue, Feb 27, 2024 at 10:24:09AM CET, edumazet@google.com wrote:
>Add READ_ONCE() in ipv4_devconf_get() and corresponding
>WRITE_ONCE() in ipv4_devconf_set()
>
>Add IPV4_DEVCONF_RO() and IPV4_DEVCONF_ALL_RO() macros,
>and use them when reading devconf fields.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Jiri Pirko <jiri@nvidia.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
  2024-02-27  9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
@ 2024-02-27 13:07   ` Jiri Pirko
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 13:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
	Jiri Pirko, netdev, eric.dumazet

Tue, Feb 27, 2024 at 10:24:11AM CET, edumazet@google.com wrote:
>1) inet_netconf_dump_devconf() can run under RCU protection
>   instead of RTNL.
>
>2) properly return 0 at the end of a dump, avoiding an
>   an extra recvmsg() system call.
>
>3) Do not use inet_base_seq() anymore, for_each_netdev_dump()
>   has nice properties. Restarting a GETDEVCONF dump if a device has
>   been added/removed or if net->ipv4.dev_addr_genid has changed is moot.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Jiri Pirko <jiri@nvidia.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
  2024-02-27 12:59   ` Jiri Pirko
@ 2024-02-27 13:09     ` Eric Dumazet
  2024-02-27 15:11       ` Jiri Pirko
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 13:09 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
	Jiri Pirko, netdev, eric.dumazet

On Tue, Feb 27, 2024 at 1:59 PM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
> >"ip -4 netconf show dev XXXX" no longer acquires RTNL.
>
> I was under impression that you refer to the current code, confused me a
> bit :/
>
>
> >
> >Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
> >
> >Signed-off-by: Eric Dumazet <edumazet@google.com>
> >---
> > net/ipv4/devinet.c | 27 +++++++++++++++------------
> > 1 file changed, 15 insertions(+), 12 deletions(-)
> >
> >diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> >index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
> >--- a/net/ipv4/devinet.c
> >+++ b/net/ipv4/devinet.c
> >@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> >                                   struct netlink_ext_ack *extack)
> > {
> >       struct net *net = sock_net(in_skb->sk);
> >-      struct nlattr *tb[NETCONFA_MAX+1];
> >+      struct nlattr *tb[NETCONFA_MAX + 1];
> >+      const struct ipv4_devconf *devconf;
> >+      struct in_device *in_dev = NULL;
> >+      struct net_device *dev = NULL;
> >       struct sk_buff *skb;
> >-      struct ipv4_devconf *devconf;
> >-      struct in_device *in_dev;
> >-      struct net_device *dev;
> >       int ifindex;
> >       int err;
> >
> >       err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
> >       if (err)
> >-              goto errout;
> >+              return err;
> >
> >-      err = -EINVAL;
> >       if (!tb[NETCONFA_IFINDEX])
> >-              goto errout;
> >+              return -EINVAL;
> >
> >       ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
> >       switch (ifindex) {
> >@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> >               devconf = net->ipv4.devconf_dflt;
> >               break;
> >       default:
> >-              dev = __dev_get_by_index(net, ifindex);
> >-              if (!dev)
> >-                      goto errout;
> >-              in_dev = __in_dev_get_rtnl(dev);
> >+              err = -ENODEV;
> >+              dev = dev_get_by_index(net, ifindex);
>
> Comment says:
> /* Deprecated for new users, call netdev_get_by_index() instead */
> struct net_device *dev_get_by_index(struct net *net, int ifindex)

Only for long-standing allocations, where we are not sure if a leak
could happen or not.
We do not bother allocating a tracker otherwise.
Look at inet6_netconf_get_devconf() :
We left there dev_get_by_index() and dev_put().

I think I am aware of the tracking facility, I implemented it...


>
> Perhaps better to use:
> netdev_get_by_index() and netdev_put()?
>
>
> >+              if (dev)
> >+                      in_dev = in_dev_get(dev);
>
> The original flow:
>                 err = -ENODEV;
>                 dev = dev_get_by_index(net, ifindex);
>                 if (!dev)
>                         goto errout;
>                 in_dev = in_dev_get(dev);
>                 if (!in_dev)
>                         goto errout;

A single goto looks nicer to me.

> Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
  2024-02-27 13:09     ` Eric Dumazet
@ 2024-02-27 15:11       ` Jiri Pirko
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 15:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
	Jiri Pirko, netdev, eric.dumazet

Tue, Feb 27, 2024 at 02:09:49PM CET, edumazet@google.com wrote:
>On Tue, Feb 27, 2024 at 1:59 PM Jiri Pirko <jiri@resnulli.us> wrote:
>>
>> Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
>> >"ip -4 netconf show dev XXXX" no longer acquires RTNL.
>>
>> I was under impression that you refer to the current code, confused me a
>> bit :/
>>
>>
>> >
>> >Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
>> >
>> >Signed-off-by: Eric Dumazet <edumazet@google.com>
>> >---
>> > net/ipv4/devinet.c | 27 +++++++++++++++------------
>> > 1 file changed, 15 insertions(+), 12 deletions(-)
>> >
>> >diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>> >index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
>> >--- a/net/ipv4/devinet.c
>> >+++ b/net/ipv4/devinet.c
>> >@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
>> >                                   struct netlink_ext_ack *extack)
>> > {
>> >       struct net *net = sock_net(in_skb->sk);
>> >-      struct nlattr *tb[NETCONFA_MAX+1];
>> >+      struct nlattr *tb[NETCONFA_MAX + 1];
>> >+      const struct ipv4_devconf *devconf;
>> >+      struct in_device *in_dev = NULL;
>> >+      struct net_device *dev = NULL;
>> >       struct sk_buff *skb;
>> >-      struct ipv4_devconf *devconf;
>> >-      struct in_device *in_dev;
>> >-      struct net_device *dev;
>> >       int ifindex;
>> >       int err;
>> >
>> >       err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
>> >       if (err)
>> >-              goto errout;
>> >+              return err;
>> >
>> >-      err = -EINVAL;
>> >       if (!tb[NETCONFA_IFINDEX])
>> >-              goto errout;
>> >+              return -EINVAL;
>> >
>> >       ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
>> >       switch (ifindex) {
>> >@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
>> >               devconf = net->ipv4.devconf_dflt;
>> >               break;
>> >       default:
>> >-              dev = __dev_get_by_index(net, ifindex);
>> >-              if (!dev)
>> >-                      goto errout;
>> >-              in_dev = __in_dev_get_rtnl(dev);
>> >+              err = -ENODEV;
>> >+              dev = dev_get_by_index(net, ifindex);
>>
>> Comment says:
>> /* Deprecated for new users, call netdev_get_by_index() instead */
>> struct net_device *dev_get_by_index(struct net *net, int ifindex)
>
>Only for long-standing allocations, where we are not sure if a leak
>could happen or not.
>We do not bother allocating a tracker otherwise.

Makes sense. Would it make sense to fix the "deprecated" comment then to
also reflect this usecase?


>Look at inet6_netconf_get_devconf() :
>We left there dev_get_by_index() and dev_put().
>
>I think I am aware of the tracking facility, I implemented it...

Yeah, I was just refering to the comment.


>
>
>>
>> Perhaps better to use:
>> netdev_get_by_index() and netdev_put()?
>>
>>
>> >+              if (dev)
>> >+                      in_dev = in_dev_get(dev);
>>
>> The original flow:
>>                 err = -ENODEV;
>>                 dev = dev_get_by_index(net, ifindex);
>>                 if (!dev)
>>                         goto errout;
>>                 in_dev = in_dev_get(dev);
>>                 if (!in_dev)
>>                         goto errout;
>
>A single goto looks nicer to me.

:)

Reviewed-by: Jiri Pirko <jiri@nvidia.com>


>
>> Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.
>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops
  2024-02-27  9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
                   ` (2 preceding siblings ...)
  2024-02-27  9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
@ 2024-02-29  3:50 ` patchwork-bot+netdevbpf
  3 siblings, 0 replies; 10+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-02-29  3:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, kuba, pabeni, dsahern, jiri, netdev, eric.dumazet

Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 27 Feb 2024 09:24:08 +0000 you wrote:
> This series removes RTNL use for RTM_GETNETCONF operations on AF_INET.
> 
> - Annotate data-races to avoid possible KCSAN splats.
> 
> - "ip -4 netconf show dev XXX" can be implemented without RTNL [1]
> 
> - "ip -4 netconf" dumps can be implemented using RCU instead of RTNL [1]
> 
> [...]

Here is the summary with links:
  - [net-next,1/3] inet: annotate devconf data-races
    https://git.kernel.org/netdev/net-next/c/0598f8f3bb77
  - [net-next,2/3] inet: do not use RTNL in inet_netconf_get_devconf()
    https://git.kernel.org/netdev/net-next/c/bbcf91053bb6
  - [net-next,3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
    https://git.kernel.org/netdev/net-next/c/167487070d64

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-02-29  3:50 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-27  9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
2024-02-27  9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
2024-02-27 12:59   ` Jiri Pirko
2024-02-27  9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
2024-02-27 12:59   ` Jiri Pirko
2024-02-27 13:09     ` Eric Dumazet
2024-02-27 15:11       ` Jiri Pirko
2024-02-27  9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
2024-02-27 13:07   ` Jiri Pirko
2024-02-29  3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).