All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andy Roulin <aroulin@nvidia.com>
To: fruggeri@arista.com
Cc: netdev@vger.kernel.org
Subject: Re: neighbour netlink notifications delivered in wrong order
Date: Mon, 6 Jun 2022 19:07:04 -0700	[thread overview]
Message-ID: <ed6768c1-80b8-aee2-e545-b51661d49336@nvidia.com> (raw)
In-Reply-To: <20220606230107.D70B55EC0B30@us226.sjc.aristanetworks.com>

Below is the patch I have been using and it has worked for me. I didn't 
get a chance yet to test all cases or with net-next but I am planning to 
send upstream.

----

neigh_update sends a rtnl notification if an update, e.g.,
nud_state change, was done but there is no guarantee of
ordering of the rtnl notifications. Consider the following
scenario:

userspace thread                   kernel thread
================                   =============
neigh_update
   write_lock_bh(n->lock)
   n->nud_state = STALE
   write_unlock_bh(n->lock)
   neigh_notify
     neigh_fill_info
       read_lock_bh(n->lock)
       ndm->nud_state = STALE
       read_unlock_bh(n->lock)
     -------------------------->
			          neigh:update
				  write_lock_bh(n->lock)
				  n->nud_state = REACHABLE
				  write_unlock_bh(n->lock)
			          neigh_notify
			            neigh_fill_info
                                       read_lock_bh(n->lock)
                                       ndm->nud_state = REACHABLE
                                       read_unlock_bh(n->lock)
			            rtnl_nofify
				  RTNL REACHABLE sent
		        <--------
    rtnl_notify
    RTNL STALE sent

In this scenario, the kernel neigh is updated first to STALE and
then REACHABLE but the netlink notifications are sent out of order,
first REACHABLE and then STALE.

To fix this ordering, use read_lock_bh(n->lock) for both reading the
neigh state (neigh_fill_info) __and__ sending the netlink notification
(rtnl_notify).

Signed-off-by: Andy Roulin <aroulin@nvidia.com>
---
  net/core/neighbour.c | 25 ++++++++++++++++---------
  1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 54625287ee5b..a91dfcbfc01c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2531,23 +2531,19 @@ static int neigh_fill_info(struct sk_buff *skb, 
struct neighbour *neigh,
  	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
  		goto nla_put_failure;

-	read_lock_bh(&neigh->lock);
  	ndm->ndm_state	 = neigh->nud_state;
  	if (neigh->nud_state & NUD_VALID) {
  		char haddr[MAX_ADDR_LEN];

  		neigh_ha_snapshot(haddr, neigh, neigh->dev);
-		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
-			read_unlock_bh(&neigh->lock);
+		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0)
  			goto nla_put_failure;
-		}
  	}

  	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
  	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
  	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
  	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
-	read_unlock_bh(&neigh->lock);

  	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
  	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
@@ -2674,10 +2670,15 @@ static int neigh_dump_table(struct neigh_table 
*tbl, struct sk_buff *skb,
  			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
  			    neigh_master_filtered(n->dev, filter->master_idx))
  				goto next;
-			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
-					    cb->nlh->nlmsg_seq,
-					    RTM_NEWNEIGH,
-					    flags) < 0) {
+
+			read_lock_bh(&n->lock);
+			rc = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+					     cb->nlh->nlmsg_seq,
+					     RTM_NEWNEIGH,
+					     flags);
+			read_unlock_bh(&n->lock);
+
+			if (rc < 0) {
  				rc = -1;
  				goto out;
  			}
@@ -2926,7 +2927,10 @@ static int neigh_get_reply(struct net *net, 
struct neighbour *neigh,
  	if (!skb)
  		return -ENOBUFS;

+	read_lock_bh(&neigh->lock);
  	err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+	read_unlock_bh(&neigh->lock);
+
  	if (err) {
  		kfree_skb(skb);
  		goto errout;
@@ -3460,14 +3464,17 @@ static void __neigh_notify(struct neighbour *n, 
int type, int flags,
  	if (skb == NULL)
  		goto errout;

+	read_lock_bh(&n->lock);
  	err = neigh_fill_info(skb, n, pid, 0, type, flags);
  	if (err < 0) {
  		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
  		WARN_ON(err == -EMSGSIZE);
+		read_unlock_bh(&n->lock);
  		kfree_skb(skb);
  		goto errout;
  	}
  	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	read_unlock_bh(&n->lock);
  	return;
  errout:
  	if (err < 0)
-- 
2.20.1


  reply	other threads:[~2022-06-07  2:07 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-06 23:01 neighbour netlink notifications delivered in wrong order Francesco Ruggeri
2022-06-07  2:07 ` Andy Roulin [this message]
2022-06-07  3:19   ` Stephen Hemminger
2022-06-07 16:29     ` Francesco Ruggeri
2022-06-07 17:32       ` Stephen Hemminger
2022-06-07 20:03         ` Francesco Ruggeri
2022-06-08  3:49           ` Andy Roulin
2022-06-09 16:40             ` Francesco Ruggeri
2022-06-10 16:18               ` Francesco Ruggeri
2022-06-16 18:33                 ` Andy Roulin
2023-04-11 19:49                   ` Kevin Mitchell
2023-04-12  0:41             ` Stephen Hemminger
2023-04-12  1:22               ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ed6768c1-80b8-aee2-e545-b51661d49336@nvidia.com \
    --to=aroulin@nvidia.com \
    --cc=fruggeri@arista.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.