All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr()
@ 2024-03-06 15:51 Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Eric Dumazet @ 2024-03-06 15:51 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, netdev, eric.dumazet, Eric Dumazet

This series removes RTNL locking to dump ipv6 addresses.

Eric Dumazet (4):
  ipv6: make inet6_fill_ifaddr() lockless
  ipv6: make in6_dump_addrs() lockless
  ipv6: use xa_array iterator to implement inet6_dump_addr()
  ipv6: remove RTNL protection from inet6_dump_addr()

 net/ipv6/addrconf.c | 168 ++++++++++++++++++++------------------------
 1 file changed, 78 insertions(+), 90 deletions(-)

-- 
2.44.0.278.ge034bb2e1d-goog


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless
  2024-03-06 15:51 [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr() Eric Dumazet
@ 2024-03-06 15:51 ` Eric Dumazet
  2024-03-06 23:38   ` David Ahern
  2024-03-06 15:51 ` [PATCH net-next 2/4] ipv6: make in6_dump_addrs() lockless Eric Dumazet
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2024-03-06 15:51 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, netdev, eric.dumazet, Eric Dumazet

Make inet6_fill_ifaddr() lockless, and add approriate annotations
on ifa->tstamp, ifa->valid_lft, ifa->preferred_lft, ifa->ifa_proto
and ifa->rt_priority.

Also constify 2nd argument of inet6_fill_ifaddr(), inet6_fill_ifmcaddr()
and inet6_fill_ifacaddr().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 66 +++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2f84e6ecf19f48602cadb47bc378c9b5a1cdbf65..480a1f9492b590bb13008cede5ea7dc9c422af67 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2730,7 +2730,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		if (update_lft) {
 			ifp->valid_lft = valid_lft;
 			ifp->prefered_lft = prefered_lft;
-			ifp->tstamp = now;
+			WRITE_ONCE(ifp->tstamp, now);
 			flags = ifp->flags;
 			ifp->flags &= ~IFA_F_DEPRECATED;
 			spin_unlock_bh(&ifp->lock);
@@ -4898,13 +4898,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
 			IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
 			IFA_F_NOPREFIXROUTE);
 	ifp->flags |= cfg->ifa_flags;
-	ifp->tstamp = jiffies;
-	ifp->valid_lft = cfg->valid_lft;
-	ifp->prefered_lft = cfg->preferred_lft;
-	ifp->ifa_proto = cfg->ifa_proto;
+	WRITE_ONCE(ifp->tstamp, jiffies);
+	WRITE_ONCE(ifp->valid_lft, cfg->valid_lft);
+	WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft);
+	WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto);
 
 	if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
-		ifp->rt_priority = cfg->rt_priority;
+		WRITE_ONCE(ifp->rt_priority, cfg->rt_priority);
 
 	if (new_peer)
 		ifp->peer_addr = *cfg->peer_pfx;
@@ -5125,17 +5125,21 @@ struct inet6_fill_args {
 	enum addr_type_t type;
 };
 
-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+static int inet6_fill_ifaddr(struct sk_buff *skb,
+			     const struct inet6_ifaddr *ifa,
 			     struct inet6_fill_args *args)
 {
-	struct nlmsghdr  *nlh;
+	struct nlmsghdr *nlh;
 	u32 preferred, valid;
+	u32 flags, priority;
+	u8 proto;
 
 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
 			sizeof(struct ifaddrmsg), args->flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
+	flags = READ_ONCE(ifa->flags);
 	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
 		      ifa->idev->dev->ifindex);
 
@@ -5143,13 +5147,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
 		goto error;
 
-	spin_lock_bh(&ifa->lock);
-	if (!((ifa->flags&IFA_F_PERMANENT) &&
-	      (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
-		preferred = ifa->prefered_lft;
-		valid = ifa->valid_lft;
+	preferred = READ_ONCE(ifa->prefered_lft);
+	valid = READ_ONCE(ifa->valid_lft);
+
+	if (!((flags & IFA_F_PERMANENT) &&
+	      (preferred == INFINITY_LIFE_TIME))) {
 		if (preferred != INFINITY_LIFE_TIME) {
-			long tval = (jiffies - ifa->tstamp)/HZ;
+			long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ;
+
 			if (preferred > tval)
 				preferred -= tval;
 			else
@@ -5165,28 +5170,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 		preferred = INFINITY_LIFE_TIME;
 		valid = INFINITY_LIFE_TIME;
 	}
-	spin_unlock_bh(&ifa->lock);
 
 	if (!ipv6_addr_any(&ifa->peer_addr)) {
 		if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
 		    nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
 			goto error;
-	} else
+	} else {
 		if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
 			goto error;
+	}
 
-	if (ifa->rt_priority &&
-	    nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+	priority = READ_ONCE(ifa->rt_priority);
+	if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority))
 		goto error;
 
-	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+	if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp),
+			  preferred, valid) < 0)
 		goto error;
 
-	if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+	if (nla_put_u32(skb, IFA_FLAGS, flags) < 0)
 		goto error;
 
-	if (ifa->ifa_proto &&
-	    nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+	proto = READ_ONCE(ifa->ifa_proto);
+	if (proto && nla_put_u8(skb, IFA_PROTO, proto))
 		goto error;
 
 	nlmsg_end(skb, nlh);
@@ -5197,12 +5203,13 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	return -EMSGSIZE;
 }
 
-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+static int inet6_fill_ifmcaddr(struct sk_buff *skb,
+			       const struct ifmcaddr6 *ifmca,
 			       struct inet6_fill_args *args)
 {
-	struct nlmsghdr  *nlh;
-	u8 scope = RT_SCOPE_UNIVERSE;
 	int ifindex = ifmca->idev->dev->ifindex;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	struct nlmsghdr *nlh;
 
 	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
@@ -5220,7 +5227,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
-	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+	    put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp),
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
 		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
@@ -5230,13 +5237,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 	return 0;
 }
 
-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+static int inet6_fill_ifacaddr(struct sk_buff *skb,
+			       const struct ifacaddr6 *ifaca,
 			       struct inet6_fill_args *args)
 {
 	struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
 	int ifindex = dev ? dev->ifindex : 1;
-	struct nlmsghdr  *nlh;
 	u8 scope = RT_SCOPE_UNIVERSE;
+	struct nlmsghdr *nlh;
 
 	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
@@ -5254,7 +5262,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
-	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+	    put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp),
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
 		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
-- 
2.44.0.278.ge034bb2e1d-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 2/4] ipv6: make in6_dump_addrs() lockless
  2024-03-06 15:51 [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr() Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless Eric Dumazet
@ 2024-03-06 15:51 ` Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 3/4] ipv6: use xa_array iterator to implement inet6_dump_addr() Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 4/4] ipv6: remove RTNL protection from inet6_dump_addr() Eric Dumazet
  3 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2024-03-06 15:51 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, netdev, eric.dumazet, Eric Dumazet

in6_dump_addrs() is called with RCU protection.

There is no need holding idev->lock to iterate through unicast addresses.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 480a1f9492b590bb13008cede5ea7dc9c422af67..f063a718893d989bc3362416a6b49ed14bb4c4ea 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5273,23 +5273,22 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb,
 }
 
 /* called with rcu_read_lock() */
-static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
+static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
 			  struct netlink_callback *cb, int s_ip_idx,
 			  struct inet6_fill_args *fillargs)
 {
-	struct ifmcaddr6 *ifmca;
-	struct ifacaddr6 *ifaca;
+	const struct ifmcaddr6 *ifmca;
+	const struct ifacaddr6 *ifaca;
 	int ip_idx = 0;
 	int err = 1;
 
-	read_lock_bh(&idev->lock);
 	switch (fillargs->type) {
 	case UNICAST_ADDR: {
-		struct inet6_ifaddr *ifa;
+		const struct inet6_ifaddr *ifa;
 		fillargs->event = RTM_NEWADDR;
 
 		/* unicast address incl. temp addr */
-		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
 			if (ip_idx < s_ip_idx)
 				goto next;
 			err = inet6_fill_ifaddr(skb, ifa, fillargs);
@@ -5302,7 +5301,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 		break;
 	}
 	case MULTICAST_ADDR:
-		read_unlock_bh(&idev->lock);
 		fillargs->event = RTM_GETMULTICAST;
 
 		/* multicast address */
@@ -5315,7 +5313,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			if (err < 0)
 				break;
 		}
-		read_lock_bh(&idev->lock);
 		break;
 	case ANYCAST_ADDR:
 		fillargs->event = RTM_GETANYCAST;
@@ -5332,7 +5329,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 	default:
 		break;
 	}
-	read_unlock_bh(&idev->lock);
 	cb->args[2] = ip_idx;
 	return err;
 }
-- 
2.44.0.278.ge034bb2e1d-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 3/4] ipv6: use xa_array iterator to implement inet6_dump_addr()
  2024-03-06 15:51 [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr() Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 2/4] ipv6: make in6_dump_addrs() lockless Eric Dumazet
@ 2024-03-06 15:51 ` Eric Dumazet
  2024-03-06 15:51 ` [PATCH net-next 4/4] ipv6: remove RTNL protection from inet6_dump_addr() Eric Dumazet
  3 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2024-03-06 15:51 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, netdev, eric.dumazet, Eric Dumazet

inet6_dump_addr() can use the new xa_array iterator
for better scalability.

Make it ready for RCU-only protection.
RTNL use is removed in the following patch.

Also properly return 0 at the end of a dump to avoid
and extra recvmsg() to get NLMSG_DONE.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 79 +++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 49 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f063a718893d989bc3362416a6b49ed14bb4c4ea..82ba44a23bd7434e93e8a847f38cc72d8ce228a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -717,7 +717,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 static u32 inet6_base_seq(const struct net *net)
 {
 	u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
-		  net->dev_base_seq;
+		  READ_ONCE(net->dev_base_seq);
 
 	/* Must not return 0 (see nl_dump_check_consistent()).
 	 * Chose a value far away from 0.
@@ -5274,13 +5274,13 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb,
 
 /* called with rcu_read_lock() */
 static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
-			  struct netlink_callback *cb, int s_ip_idx,
+			  struct netlink_callback *cb, int *s_ip_idx,
 			  struct inet6_fill_args *fillargs)
 {
 	const struct ifmcaddr6 *ifmca;
 	const struct ifacaddr6 *ifaca;
 	int ip_idx = 0;
-	int err = 1;
+	int err = 0;
 
 	switch (fillargs->type) {
 	case UNICAST_ADDR: {
@@ -5289,7 +5289,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
 
 		/* unicast address incl. temp addr */
 		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
-			if (ip_idx < s_ip_idx)
+			if (ip_idx < *s_ip_idx)
 				goto next;
 			err = inet6_fill_ifaddr(skb, ifa, fillargs);
 			if (err < 0)
@@ -5307,7 +5307,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
 		for (ifmca = rcu_dereference(idev->mc_list);
 		     ifmca;
 		     ifmca = rcu_dereference(ifmca->next), ip_idx++) {
-			if (ip_idx < s_ip_idx)
+			if (ip_idx < *s_ip_idx)
 				continue;
 			err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
 			if (err < 0)
@@ -5319,7 +5319,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
 		/* anycast address */
 		for (ifaca = rcu_dereference(idev->ac_list); ifaca;
 		     ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) {
-			if (ip_idx < s_ip_idx)
+			if (ip_idx < *s_ip_idx)
 				continue;
 			err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
 			if (err < 0)
@@ -5329,7 +5329,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
 	default:
 		break;
 	}
-	cb->args[2] = ip_idx;
+	*s_ip_idx = err ? ip_idx : 0;
 	return err;
 }
 
@@ -5392,6 +5392,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
 static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			   enum addr_type_t type)
 {
+	struct net *tgt_net = sock_net(skb->sk);
 	const struct nlmsghdr *nlh = cb->nlh;
 	struct inet6_fill_args fillargs = {
 		.portid = NETLINK_CB(cb->skb).portid,
@@ -5400,72 +5401,52 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 		.netnsid = -1,
 		.type = type,
 	};
-	struct net *tgt_net = sock_net(skb->sk);
-	int idx, s_idx, s_ip_idx;
-	int h, s_h;
+	struct {
+		unsigned long ifindex;
+		int ip_idx;
+	} *ctx = (void *)cb->ctx;
 	struct net_device *dev;
 	struct inet6_dev *idev;
-	struct hlist_head *head;
 	int err = 0;
 
-	s_h = cb->args[0];
-	s_idx = idx = cb->args[1];
-	s_ip_idx = cb->args[2];
-
 	rcu_read_lock();
 	if (cb->strict_check) {
 		err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
 						  skb->sk, cb);
 		if (err < 0)
-			goto put_tgt_net;
+			goto done;
 
 		err = 0;
 		if (fillargs.ifindex) {
-			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
-			if (!dev) {
-				err = -ENODEV;
-				goto put_tgt_net;
-			}
+			err = -ENODEV;
+			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
+			if (!dev)
+				goto done;
 			idev = __in6_dev_get(dev);
-			if (idev) {
-				err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+			if (idev)
+				err = in6_dump_addrs(idev, skb, cb,
+						     &ctx->ip_idx,
 						     &fillargs);
-				if (err > 0)
-					err = 0;
-			}
-			goto put_tgt_net;
+			goto done;
 		}
 	}
 
 	cb->seq = inet6_base_seq(tgt_net);
-	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
-		idx = 0;
-		head = &tgt_net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, head, index_hlist) {
-			if (idx < s_idx)
-				goto cont;
-			if (h > s_h || idx > s_idx)
-				s_ip_idx = 0;
-			idev = __in6_dev_get(dev);
-			if (!idev)
-				goto cont;
-
-			if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
-					   &fillargs) < 0)
-				goto done;
-cont:
-			idx++;
-		}
+	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+		idev = __in6_dev_get(dev);
+		if (!idev)
+			continue;
+		err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx,
+				     &fillargs);
+		if (err < 0)
+			goto done;
 	}
 done:
-	cb->args[0] = h;
-	cb->args[1] = idx;
-put_tgt_net:
 	rcu_read_unlock();
 	if (fillargs.netnsid >= 0)
 		put_net(tgt_net);
 
-	return skb->len ? : err;
+	return err;
 }
 
 static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
-- 
2.44.0.278.ge034bb2e1d-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 4/4] ipv6: remove RTNL protection from inet6_dump_addr()
  2024-03-06 15:51 [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr() Eric Dumazet
                   ` (2 preceding siblings ...)
  2024-03-06 15:51 ` [PATCH net-next 3/4] ipv6: use xa_array iterator to implement inet6_dump_addr() Eric Dumazet
@ 2024-03-06 15:51 ` Eric Dumazet
  3 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2024-03-06 15:51 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: David Ahern, netdev, eric.dumazet, Eric Dumazet

We can now remove RTNL acquisition while running
inet6_dump_addr(), inet6_dump_ifmcaddr()
and inet6_dump_ifacaddr().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 82ba44a23bd7434e93e8a847f38cc72d8ce228a8..b72bdbb850a86a45b4ba7c83df2772f7214891e2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7459,15 +7459,18 @@ int __init addrconf_init(void)
 		goto errout;
 	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
 				   inet6_rtm_getaddr, inet6_dump_ifaddr,
-				   RTNL_FLAG_DOIT_UNLOCKED);
+				   RTNL_FLAG_DOIT_UNLOCKED |
+				   RTNL_FLAG_DUMP_UNLOCKED);
 	if (err < 0)
 		goto errout;
 	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
-				   NULL, inet6_dump_ifmcaddr, 0);
+				   NULL, inet6_dump_ifmcaddr,
+				   RTNL_FLAG_DUMP_UNLOCKED);
 	if (err < 0)
 		goto errout;
 	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
-				   NULL, inet6_dump_ifacaddr, 0);
+				   NULL, inet6_dump_ifacaddr,
+				   RTNL_FLAG_DUMP_UNLOCKED);
 	if (err < 0)
 		goto errout;
 	err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
-- 
2.44.0.278.ge034bb2e1d-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless
  2024-03-06 15:51 ` [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless Eric Dumazet
@ 2024-03-06 23:38   ` David Ahern
  2024-03-07  6:24     ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: David Ahern @ 2024-03-06 23:38 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, eric.dumazet

On 3/6/24 8:51 AM, Eric Dumazet wrote:
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 2f84e6ecf19f48602cadb47bc378c9b5a1cdbf65..480a1f9492b590bb13008cede5ea7dc9c422af67 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -2730,7 +2730,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
>  		if (update_lft) {
>  			ifp->valid_lft = valid_lft;
>  			ifp->prefered_lft = prefered_lft;
> -			ifp->tstamp = now;
> +			WRITE_ONCE(ifp->tstamp, now);

There are a lot of instances of ifp->tstamp not annotated with READ_ONCE
or WRITE_ONCE. Most of them before this function seem to be updated or
read under rtnl. What's the general mode of operation for this patch?
e.g., there are tstamp references just above this one in this function
not modified. Commit message does not describe why some are updated and
others not.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless
  2024-03-06 23:38   ` David Ahern
@ 2024-03-07  6:24     ` Eric Dumazet
  2024-03-08 15:26       ` David Ahern
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2024-03-07  6:24 UTC (permalink / raw)
  To: David Ahern
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev, eric.dumazet

On Thu, Mar 7, 2024 at 12:38 AM David Ahern <dsahern@kernel.org> wrote:
>
> On 3/6/24 8:51 AM, Eric Dumazet wrote:
> > diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> > index 2f84e6ecf19f48602cadb47bc378c9b5a1cdbf65..480a1f9492b590bb13008cede5ea7dc9c422af67 100644
> > --- a/net/ipv6/addrconf.c
> > +++ b/net/ipv6/addrconf.c
> > @@ -2730,7 +2730,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
> >               if (update_lft) {
> >                       ifp->valid_lft = valid_lft;
> >                       ifp->prefered_lft = prefered_lft;
> > -                     ifp->tstamp = now;
> > +                     WRITE_ONCE(ifp->tstamp, now);
>
> There are a lot of instances of ifp->tstamp not annotated with READ_ONCE
> or WRITE_ONCE. Most of them before this function seem to be updated or
> read under rtnl. What's the general mode of operation for this patch?
> e.g., there are tstamp references just above this one in this function
> not modified. Commit message does not describe why some are updated and
> others not.


Writes on objects that are not yet visible to other threads/cpu do not
need a WRITE_ONCE()

ipv6_add_addr() allocates a fresh object, so

ifa->cstamp = ifa->tstamp = jiffies;  // do not need any WRITE_ONCE()


Reads while we own ifa->lock do no need a READ_ONCE()

So check_cleanup_prefix_route() :

  if (time_before(*expires, ifa->tstamp + lifetime * HZ))  // no need
       *expires = ifa->tstamp + lifetime * HZ;   // no need

In ipv6_create_tempaddr()

  age = (now - ifp->tstamp) / HZ; // no need because we hold ifp->lock;

In ipv6_create_tempaddr()

  age = (now - ifp->tstamp) / HZ; // no need, we hold ifp->lock;

  tmp_tstamp = ifp->tstamp; // no need, we hold ifp->lock;

 addrconf_prefix_rcv_add_addr()
  The reads are done under ifp->lock
  The write uses WRITE_ONCE()

I did a full audit and I think I did not miss any READ_ONCE()/WRITE_ONCE()

Of course, this is extra precaution anyway, the race has no impact
other than KCSAN and/or would require a dumb compiler in the first
place.

If I had to explain this in the changelog, I guess I would not do all
these changes, this would be too time consuming.

Thanks !

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless
  2024-03-07  6:24     ` Eric Dumazet
@ 2024-03-08 15:26       ` David Ahern
  2024-03-08 19:31         ` Jakub Kicinski
  0 siblings, 1 reply; 9+ messages in thread
From: David Ahern @ 2024-03-08 15:26 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev, eric.dumazet

On 3/6/24 11:24 PM, Eric Dumazet wrote:
> I did a full audit and I think I did not miss any READ_ONCE()/WRITE_ONCE()
> 
> Of course, this is extra precaution anyway, the race has no impact
> other than KCSAN and/or would require a dumb compiler in the first
> place.

Then I guess I do not need to waste my time on a detailed review:

Acked-by: David Ahern <dsahern@kernel.org>

> 
> If I had to explain this in the changelog, I guess I would not do all
> these changes, this would be too time consuming.

The request was something simple as the following in the changelog:

"New objects not in any list or table do not need the annotations, nor
do updates done while holding a lock."

Reminder for current reviewer and in the future of the intent behind the
change.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless
  2024-03-08 15:26       ` David Ahern
@ 2024-03-08 19:31         ` Jakub Kicinski
  0 siblings, 0 replies; 9+ messages in thread
From: Jakub Kicinski @ 2024-03-08 19:31 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, David S . Miller, Paolo Abeni, netdev, eric.dumazet

On Fri, 8 Mar 2024 08:26:56 -0700 David Ahern wrote:
> > If I had to explain this in the changelog, I guess I would not do all
> > these changes, this would be too time consuming.  
> 
> The request was something simple as the following in the changelog:
> 
> "New objects not in any list or table do not need the annotations, nor
> do updates done while holding a lock."

I was gonna add this when apply but looks like DaveM's already merged
this :S

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-03-08 19:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-06 15:51 [PATCH net-next 0/4] ipv6: lockless inet6_dump_addr() Eric Dumazet
2024-03-06 15:51 ` [PATCH net-next 1/4] ipv6: make inet6_fill_ifaddr() lockless Eric Dumazet
2024-03-06 23:38   ` David Ahern
2024-03-07  6:24     ` Eric Dumazet
2024-03-08 15:26       ` David Ahern
2024-03-08 19:31         ` Jakub Kicinski
2024-03-06 15:51 ` [PATCH net-next 2/4] ipv6: make in6_dump_addrs() lockless Eric Dumazet
2024-03-06 15:51 ` [PATCH net-next 3/4] ipv6: use xa_array iterator to implement inet6_dump_addr() Eric Dumazet
2024-03-06 15:51 ` [PATCH net-next 4/4] ipv6: remove RTNL protection from inet6_dump_addr() Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.