* [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops
@ 2024-02-27 9:24 Eric Dumazet
2024-02-27 9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 9:24 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet
This series removes RTNL use for RTM_GETNETCONF operations on AF_INET.
- Annotate data-races to avoid possible KCSAN splats.
- "ip -4 netconf show dev XXX" can be implemented without RTNL [1]
- "ip -4 netconf" dumps can be implemented using RCU instead of RTNL [1]
[1] This only refers to RTM_GETNETCONF operation, "ip" command
also uses RTM_GETLINK dumps which are using RTNL at this moment.
Eric Dumazet (3):
inet: annotate devconf data-races
inet: do not use RTNL in inet_netconf_get_devconf()
inet: use xa_array iterator to implement inet_netconf_dump_devconf()
include/linux/inetdevice.h | 14 ++--
net/ipv4/devinet.c | 147 +++++++++++++++++--------------------
net/ipv4/igmp.c | 4 +-
net/ipv4/proc.c | 2 +-
net/ipv4/route.c | 4 +-
5 files changed, 81 insertions(+), 90 deletions(-)
--
2.44.0.rc1.240.g4c46232300-goog
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next 1/3] inet: annotate devconf data-races
2024-02-27 9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
@ 2024-02-27 9:24 ` Eric Dumazet
2024-02-27 12:59 ` Jiri Pirko
2024-02-27 9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 9:24 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet
Add READ_ONCE() in ipv4_devconf_get() and corresponding
WRITE_ONCE() in ipv4_devconf_set()
Add IPV4_DEVCONF_RO() and IPV4_DEVCONF_ALL_RO() macros,
and use them when reading devconf fields.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/inetdevice.h | 14 ++++++++------
net/ipv4/devinet.c | 21 +++++++++++----------
net/ipv4/igmp.c | 4 ++--
net/ipv4/proc.c | 2 +-
net/ipv4/route.c | 4 ++--
5 files changed, 24 insertions(+), 21 deletions(-)
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ddb27fc0ee8c8862d62f8c6243c4239ea53374f2..cb5280e6cc219106bcc55972dac850edf34988ff 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -53,13 +53,15 @@ struct in_device {
};
#define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
+#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr))
#define IPV4_DEVCONF_ALL(net, attr) \
IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
+#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr))
-static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+static inline int ipv4_devconf_get(const struct in_device *in_dev, int index)
{
index--;
- return in_dev->cnf.data[index];
+ return READ_ONCE(in_dev->cnf.data[index]);
}
static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
@@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
{
index--;
set_bit(index, in_dev->cnf.state);
- in_dev->cnf.data[index] = val;
+ WRITE_ONCE(in_dev->cnf.data[index], val);
}
static inline void ipv4_devconf_setall(struct in_device *in_dev)
@@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))
#define IN_DEV_ANDCONF(in_dev, attr) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_NET_ORCONF(in_dev, net, attr) \
- (IPV4_DEVCONF_ALL(net, attr) || \
+ (IPV4_DEVCONF_ALL_RO(net, attr) || \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_ORCONF(in_dev, attr) \
IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)
#define IN_DEV_MAXCONF(in_dev, attr) \
- (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
+ (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \
IN_DEV_CONF_GET((in_dev), attr)))
#define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc74f131fe4dfad327e71c1a8f0a4b66cdc526e5..ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1982,7 +1982,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
return -EMSGSIZE;
for (i = 0; i < IPV4_DEVCONF_MAX; i++)
- ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+ ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
return 0;
}
@@ -2068,9 +2068,9 @@ static int inet_netconf_msgsize_devconf(int type)
}
static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
- struct ipv4_devconf *devconf, u32 portid,
- u32 seq, int event, unsigned int flags,
- int type)
+ const struct ipv4_devconf *devconf,
+ u32 portid, u32 seq, int event,
+ unsigned int flags, int type)
{
struct nlmsghdr *nlh;
struct netconfmsg *ncm;
@@ -2095,27 +2095,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
- IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_RP_FILTER) &&
nla_put_s32(skb, NETCONFA_RP_FILTER,
- IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
+ IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_BC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_BC_FORWARDING,
- IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+ IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_PROXY_NEIGH) &&
nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
- IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
+ IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+ IPV4_DEVCONF_RO(*devconf,
+ IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
out:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index efeeca2b13285a3149645ab945b0364391f6721b..717e97a389a8aee75f51a5a5d859cb94a5d868eb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -120,12 +120,12 @@
*/
#define IGMP_V1_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
((in_dev)->mr_v1_seen && \
time_before(jiffies, (in_dev)->mr_v1_seen)))
#define IGMP_V2_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
((in_dev)->mr_v2_seen && \
time_before(jiffies, (in_dev)->mr_v2_seen)))
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5f4654ebff487494b6afbaa69174df4c004395dc..914bc9c35cc702395aee257fb010034294e501de 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -395,7 +395,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
+ IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2,
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b512288d6fcc6bc0ce586bc5dc501b979ac3b9c1..c8f76f56dc1653371ca39663f29cc798b062e60d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2313,7 +2313,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (IN_DEV_BFORWARD(in_dev))
goto make_route;
/* not do cache if bc_forwarding is enabled */
- if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING))
do_cache = false;
goto brd_input;
}
@@ -2993,7 +2993,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
#ifdef CONFIG_IP_MROUTE
if (ipv4_is_multicast(dst) &&
!ipv4_is_local_multicast(dst) &&
- IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+ IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
r, portid);
--
2.44.0.rc1.240.g4c46232300-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
2024-02-27 9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
2024-02-27 9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
@ 2024-02-27 9:24 ` Eric Dumazet
2024-02-27 12:59 ` Jiri Pirko
2024-02-27 9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
2024-02-29 3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf
3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 9:24 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet
"ip -4 netconf show dev XXXX" no longer acquires RTNL.
Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/devinet.c | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
- struct nlattr *tb[NETCONFA_MAX+1];
+ struct nlattr *tb[NETCONFA_MAX + 1];
+ const struct ipv4_devconf *devconf;
+ struct in_device *in_dev = NULL;
+ struct net_device *dev = NULL;
struct sk_buff *skb;
- struct ipv4_devconf *devconf;
- struct in_device *in_dev;
- struct net_device *dev;
int ifindex;
int err;
err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
if (err)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv4.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- goto errout;
- in_dev = __in_dev_get_rtnl(dev);
+ err = -ENODEV;
+ dev = dev_get_by_index(net, ifindex);
+ if (dev)
+ in_dev = in_dev_get(dev);
if (!in_dev)
goto errout;
devconf = &in_dev->cnf;
@@ -2257,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in_dev)
+ in_dev_put(in_dev);
+ dev_put(dev);
return err;
}
@@ -2826,5 +2828,6 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf, 0);
+ inet_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED);
}
--
2.44.0.rc1.240.g4c46232300-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
2024-02-27 9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
2024-02-27 9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
2024-02-27 9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
@ 2024-02-27 9:24 ` Eric Dumazet
2024-02-27 13:07 ` Jiri Pirko
2024-02-29 3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf
3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 9:24 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: David Ahern, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet
1) inet_netconf_dump_devconf() can run under RCU protection
instead of RTNL.
2) properly return 0 at the end of a dump, avoiding an
an extra recvmsg() system call.
3) Do not use inet_base_seq() anymore, for_each_netdev_dump()
has nice properties. Restarting a GETDEVCONF dump if a device has
been added/removed or if net->ipv4.dev_addr_genid has changed is moot.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/devinet.c | 101 +++++++++++++++++++--------------------------
1 file changed, 43 insertions(+), 58 deletions(-)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f045a34e90b974b17512a30c3b719bdfc3cba153..af741af61830aeb695e7e75608515547dade8f39 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2267,11 +2267,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
+ const struct in_device *in_dev;
struct net_device *dev;
- struct in_device *in_dev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -2288,64 +2290,47 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = inet_base_seq(net);
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- in_dev = __in_dev_get_rcu(dev);
- if (!in_dev)
- goto cont;
-
- if (inet_netconf_fill_devconf(skb, dev->ifindex,
- &in_dev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ continue;
+ err = inet_netconf_fill_devconf(skb, dev->ifindex,
+ &in_dev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv4.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv4.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv4.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ if (err < 0 && likely(skb->len))
+ err = skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -2829,5 +2814,5 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
inet_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED);
+ RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
}
--
2.44.0.rc1.240.g4c46232300-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
2024-02-27 9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
@ 2024-02-27 12:59 ` Jiri Pirko
2024-02-27 13:09 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 12:59 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
Jiri Pirko, netdev, eric.dumazet
Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
>"ip -4 netconf show dev XXXX" no longer acquires RTNL.
I was under impression that you refer to the current code, confused me a
bit :/
>
>Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
>---
> net/ipv4/devinet.c | 27 +++++++++++++++------------
> 1 file changed, 15 insertions(+), 12 deletions(-)
>
>diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
>--- a/net/ipv4/devinet.c
>+++ b/net/ipv4/devinet.c
>@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> struct netlink_ext_ack *extack)
> {
> struct net *net = sock_net(in_skb->sk);
>- struct nlattr *tb[NETCONFA_MAX+1];
>+ struct nlattr *tb[NETCONFA_MAX + 1];
>+ const struct ipv4_devconf *devconf;
>+ struct in_device *in_dev = NULL;
>+ struct net_device *dev = NULL;
> struct sk_buff *skb;
>- struct ipv4_devconf *devconf;
>- struct in_device *in_dev;
>- struct net_device *dev;
> int ifindex;
> int err;
>
> err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
> if (err)
>- goto errout;
>+ return err;
>
>- err = -EINVAL;
> if (!tb[NETCONFA_IFINDEX])
>- goto errout;
>+ return -EINVAL;
>
> ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
> switch (ifindex) {
>@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> devconf = net->ipv4.devconf_dflt;
> break;
> default:
>- dev = __dev_get_by_index(net, ifindex);
>- if (!dev)
>- goto errout;
>- in_dev = __in_dev_get_rtnl(dev);
>+ err = -ENODEV;
>+ dev = dev_get_by_index(net, ifindex);
Comment says:
/* Deprecated for new users, call netdev_get_by_index() instead */
struct net_device *dev_get_by_index(struct net *net, int ifindex)
Perhaps better to use:
netdev_get_by_index() and netdev_put()?
>+ if (dev)
>+ in_dev = in_dev_get(dev);
The original flow:
err = -ENODEV;
dev = dev_get_by_index(net, ifindex);
if (!dev)
goto errout;
in_dev = in_dev_get(dev);
if (!in_dev)
goto errout;
Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.
> if (!in_dev)
> goto errout;
> devconf = &in_dev->cnf;
>@@ -2257,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> }
> err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
> errout:
>+ if (in_dev)
>+ in_dev_put(in_dev);
>+ dev_put(dev);
> return err;
> }
>
>@@ -2826,5 +2828,6 @@ void __init devinet_init(void)
> rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
> rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
> rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
>- inet_netconf_dump_devconf, 0);
>+ inet_netconf_dump_devconf,
>+ RTNL_FLAG_DOIT_UNLOCKED);
> }
>--
>2.44.0.rc1.240.g4c46232300-goog
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 1/3] inet: annotate devconf data-races
2024-02-27 9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
@ 2024-02-27 12:59 ` Jiri Pirko
0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 12:59 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
Jiri Pirko, netdev, eric.dumazet
Tue, Feb 27, 2024 at 10:24:09AM CET, edumazet@google.com wrote:
>Add READ_ONCE() in ipv4_devconf_get() and corresponding
>WRITE_ONCE() in ipv4_devconf_set()
>
>Add IPV4_DEVCONF_RO() and IPV4_DEVCONF_ALL_RO() macros,
>and use them when reading devconf fields.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
2024-02-27 9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
@ 2024-02-27 13:07 ` Jiri Pirko
0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 13:07 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
Jiri Pirko, netdev, eric.dumazet
Tue, Feb 27, 2024 at 10:24:11AM CET, edumazet@google.com wrote:
>1) inet_netconf_dump_devconf() can run under RCU protection
> instead of RTNL.
>
>2) properly return 0 at the end of a dump, avoiding an
> an extra recvmsg() system call.
>
>3) Do not use inet_base_seq() anymore, for_each_netdev_dump()
> has nice properties. Restarting a GETDEVCONF dump if a device has
> been added/removed or if net->ipv4.dev_addr_genid has changed is moot.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
2024-02-27 12:59 ` Jiri Pirko
@ 2024-02-27 13:09 ` Eric Dumazet
2024-02-27 15:11 ` Jiri Pirko
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-02-27 13:09 UTC (permalink / raw)
To: Jiri Pirko
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
Jiri Pirko, netdev, eric.dumazet
On Tue, Feb 27, 2024 at 1:59 PM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
> >"ip -4 netconf show dev XXXX" no longer acquires RTNL.
>
> I was under impression that you refer to the current code, confused me a
> bit :/
>
>
> >
> >Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
> >
> >Signed-off-by: Eric Dumazet <edumazet@google.com>
> >---
> > net/ipv4/devinet.c | 27 +++++++++++++++------------
> > 1 file changed, 15 insertions(+), 12 deletions(-)
> >
> >diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> >index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
> >--- a/net/ipv4/devinet.c
> >+++ b/net/ipv4/devinet.c
> >@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> > struct netlink_ext_ack *extack)
> > {
> > struct net *net = sock_net(in_skb->sk);
> >- struct nlattr *tb[NETCONFA_MAX+1];
> >+ struct nlattr *tb[NETCONFA_MAX + 1];
> >+ const struct ipv4_devconf *devconf;
> >+ struct in_device *in_dev = NULL;
> >+ struct net_device *dev = NULL;
> > struct sk_buff *skb;
> >- struct ipv4_devconf *devconf;
> >- struct in_device *in_dev;
> >- struct net_device *dev;
> > int ifindex;
> > int err;
> >
> > err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
> > if (err)
> >- goto errout;
> >+ return err;
> >
> >- err = -EINVAL;
> > if (!tb[NETCONFA_IFINDEX])
> >- goto errout;
> >+ return -EINVAL;
> >
> > ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
> > switch (ifindex) {
> >@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
> > devconf = net->ipv4.devconf_dflt;
> > break;
> > default:
> >- dev = __dev_get_by_index(net, ifindex);
> >- if (!dev)
> >- goto errout;
> >- in_dev = __in_dev_get_rtnl(dev);
> >+ err = -ENODEV;
> >+ dev = dev_get_by_index(net, ifindex);
>
> Comment says:
> /* Deprecated for new users, call netdev_get_by_index() instead */
> struct net_device *dev_get_by_index(struct net *net, int ifindex)
Only for long-standing allocations, where we are not sure if a leak
could happen or not.
We do not bother allocating a tracker otherwise.
Look at inet6_netconf_get_devconf() :
We left there dev_get_by_index() and dev_put().
I think I am aware of the tracking facility, I implemented it...
>
> Perhaps better to use:
> netdev_get_by_index() and netdev_put()?
>
>
> >+ if (dev)
> >+ in_dev = in_dev_get(dev);
>
> The original flow:
> err = -ENODEV;
> dev = dev_get_by_index(net, ifindex);
> if (!dev)
> goto errout;
> in_dev = in_dev_get(dev);
> if (!in_dev)
> goto errout;
A single goto looks nicer to me.
> Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf()
2024-02-27 13:09 ` Eric Dumazet
@ 2024-02-27 15:11 ` Jiri Pirko
0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2024-02-27 15:11 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, David Ahern,
Jiri Pirko, netdev, eric.dumazet
Tue, Feb 27, 2024 at 02:09:49PM CET, edumazet@google.com wrote:
>On Tue, Feb 27, 2024 at 1:59 PM Jiri Pirko <jiri@resnulli.us> wrote:
>>
>> Tue, Feb 27, 2024 at 10:24:10AM CET, edumazet@google.com wrote:
>> >"ip -4 netconf show dev XXXX" no longer acquires RTNL.
>>
>> I was under impression that you refer to the current code, confused me a
>> bit :/
>>
>>
>> >
>> >Return -ENODEV instead of -EINVAL if no netdev or idev can be found.
>> >
>> >Signed-off-by: Eric Dumazet <edumazet@google.com>
>> >---
>> > net/ipv4/devinet.c | 27 +++++++++++++++------------
>> > 1 file changed, 15 insertions(+), 12 deletions(-)
>> >
>> >diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>> >index ca75d0fff1d1ebd8c199fb74a6f0e2f51160635c..f045a34e90b974b17512a30c3b719bdfc3cba153 100644
>> >--- a/net/ipv4/devinet.c
>> >+++ b/net/ipv4/devinet.c
>> >@@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
>> > struct netlink_ext_ack *extack)
>> > {
>> > struct net *net = sock_net(in_skb->sk);
>> >- struct nlattr *tb[NETCONFA_MAX+1];
>> >+ struct nlattr *tb[NETCONFA_MAX + 1];
>> >+ const struct ipv4_devconf *devconf;
>> >+ struct in_device *in_dev = NULL;
>> >+ struct net_device *dev = NULL;
>> > struct sk_buff *skb;
>> >- struct ipv4_devconf *devconf;
>> >- struct in_device *in_dev;
>> >- struct net_device *dev;
>> > int ifindex;
>> > int err;
>> >
>> > err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
>> > if (err)
>> >- goto errout;
>> >+ return err;
>> >
>> >- err = -EINVAL;
>> > if (!tb[NETCONFA_IFINDEX])
>> >- goto errout;
>> >+ return -EINVAL;
>> >
>> > ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
>> > switch (ifindex) {
>> >@@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
>> > devconf = net->ipv4.devconf_dflt;
>> > break;
>> > default:
>> >- dev = __dev_get_by_index(net, ifindex);
>> >- if (!dev)
>> >- goto errout;
>> >- in_dev = __in_dev_get_rtnl(dev);
>> >+ err = -ENODEV;
>> >+ dev = dev_get_by_index(net, ifindex);
>>
>> Comment says:
>> /* Deprecated for new users, call netdev_get_by_index() instead */
>> struct net_device *dev_get_by_index(struct net *net, int ifindex)
>
>Only for long-standing allocations, where we are not sure if a leak
>could happen or not.
>We do not bother allocating a tracker otherwise.
Makes sense. Would it make sense to fix the "deprecated" comment then to
also reflect this usecase?
>Look at inet6_netconf_get_devconf() :
>We left there dev_get_by_index() and dev_put().
>
>I think I am aware of the tracking facility, I implemented it...
Yeah, I was just refering to the comment.
>
>
>>
>> Perhaps better to use:
>> netdev_get_by_index() and netdev_put()?
>>
>>
>> >+ if (dev)
>> >+ in_dev = in_dev_get(dev);
>>
>> The original flow:
>> err = -ENODEV;
>> dev = dev_get_by_index(net, ifindex);
>> if (!dev)
>> goto errout;
>> in_dev = in_dev_get(dev);
>> if (!in_dev)
>> goto errout;
>
>A single goto looks nicer to me.
:)
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
>
>> Reads a bit nicer to me. Not sure why you changed it. Yeah, it's a nit.
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops
2024-02-27 9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
` (2 preceding siblings ...)
2024-02-27 9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
@ 2024-02-29 3:50 ` patchwork-bot+netdevbpf
3 siblings, 0 replies; 10+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-02-29 3:50 UTC (permalink / raw)
To: Eric Dumazet; +Cc: davem, kuba, pabeni, dsahern, jiri, netdev, eric.dumazet
Hello:
This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:
On Tue, 27 Feb 2024 09:24:08 +0000 you wrote:
> This series removes RTNL use for RTM_GETNETCONF operations on AF_INET.
>
> - Annotate data-races to avoid possible KCSAN splats.
>
> - "ip -4 netconf show dev XXX" can be implemented without RTNL [1]
>
> - "ip -4 netconf" dumps can be implemented using RCU instead of RTNL [1]
>
> [...]
Here is the summary with links:
- [net-next,1/3] inet: annotate devconf data-races
https://git.kernel.org/netdev/net-next/c/0598f8f3bb77
- [net-next,2/3] inet: do not use RTNL in inet_netconf_get_devconf()
https://git.kernel.org/netdev/net-next/c/bbcf91053bb6
- [net-next,3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf()
https://git.kernel.org/netdev/net-next/c/167487070d64
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2024-02-29 3:50 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-27 9:24 [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops Eric Dumazet
2024-02-27 9:24 ` [PATCH net-next 1/3] inet: annotate devconf data-races Eric Dumazet
2024-02-27 12:59 ` Jiri Pirko
2024-02-27 9:24 ` [PATCH net-next 2/3] inet: do not use RTNL in inet_netconf_get_devconf() Eric Dumazet
2024-02-27 12:59 ` Jiri Pirko
2024-02-27 13:09 ` Eric Dumazet
2024-02-27 15:11 ` Jiri Pirko
2024-02-27 9:24 ` [PATCH net-next 3/3] inet: use xa_array iterator to implement inet_netconf_dump_devconf() Eric Dumazet
2024-02-27 13:07 ` Jiri Pirko
2024-02-29 3:50 ` [PATCH net-next 0/3] inet: implement lockless RTM_GETNETCONF ops patchwork-bot+netdevbpf
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).