All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] Managed Neighbor Entries
@ 2021-10-11 12:12 Daniel Borkmann
  2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-11 12:12 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf, Daniel Borkmann

This series adds a couple of fixes related to NTF_EXT_LEARNED and NTF_USE
neighbor flags, extends the UAPI with a new NDA_FLAGS_EXT netlink attribute
in order to be able to add new neighbor flags from user space given all
current struct ndmsg / ndm_flags bits are used up. Finally, the core of this
series adds a new NTF_EXT_MANAGED flag to neighbors, which allows user space
control planes to add 'managed' neighbor entries. Meaning, user space may
either transition existing entries or can push down new L3 entries without
lladdr into the kernel where the latter will periodically try to keep such
NTF_EXT_MANAGED managed entries in reachable state. Main use case for this
series are XDP / tc BPF load-balancers which make use of the bpf_fib_lookup()
helper for backends. For more details, please see individual patches. Thanks!

Daniel Borkmann (3):
  net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
  net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
  net, neigh: Add NTF_MANAGED flag for managed neighbor entries

Roopa Prabhu (1):
  net, neigh: Extend neigh->flags to 32 bit to allow for extensions

 include/net/neighbour.h        |  34 ++++--
 include/uapi/linux/neighbour.h |  35 ++++--
 net/core/neighbour.c           | 196 +++++++++++++++++++++++----------
 3 files changed, 187 insertions(+), 78 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
  2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
@ 2021-10-11 12:12 ` Daniel Borkmann
  2021-10-12 14:23   ` David Ahern
  2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-11 12:12 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf, Daniel Borkmann

The NTF_EXT_LEARNED neigh flag is usually propagated back to user space
upon dump of the neighbor table. However, when used in combination with
NTF_USE flag this is not the case despite exempting the entry from the
garbage collector. This results in inconsistent state since entries are
typically marked in neigh->flags with NTF_EXT_LEARNED, but here they are
not. Fix it by propagating the creation flag to ___neigh_create().

Before fix:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
  [...]

After fix:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE
  [...]

Fixes: 9ce33e46531d ("neighbour: support for NTF_EXT_LEARNED flag")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
---
 net/core/neighbour.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2d5bc3a75fae..8457d5f97022 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -379,7 +379,7 @@ EXPORT_SYMBOL(neigh_ifdown);
 
 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 				     struct net_device *dev,
-				     bool exempt_from_gc)
+				     u8 flags, bool exempt_from_gc)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
@@ -412,6 +412,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 	n->updated	  = n->used = now;
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
+	n->flags	  = flags;
 	seqlock_init(&n->hh.hh_lock);
 	n->parms	  = neigh_parms_clone(&tbl->parms);
 	timer_setup(&n->timer, neigh_timer_handler, 0);
@@ -575,19 +576,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 }
 EXPORT_SYMBOL(neigh_lookup_nodev);
 
-static struct neighbour *___neigh_create(struct neigh_table *tbl,
-					 const void *pkey,
-					 struct net_device *dev,
-					 bool exempt_from_gc, bool want_ref)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+		struct net_device *dev, u8 flags,
+		bool exempt_from_gc, bool want_ref)
 {
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
-	u32 hash_val;
-	unsigned int key_len = tbl->key_len;
-	int error;
+	u32 hash_val, key_len = tbl->key_len;
+	struct neighbour *n1, *rc, *n;
 	struct neigh_hash_table *nht;
+	int error;
 
+	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
-
 	if (!n) {
 		rc = ERR_PTR(-ENOBUFS);
 		goto out;
@@ -674,7 +674,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 				 struct net_device *dev, bool want_ref)
 {
-	return ___neigh_create(tbl, pkey, dev, false, want_ref);
+	return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
 }
 EXPORT_SYMBOL(__neigh_create);
 
@@ -1942,7 +1942,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
 				 ndm->ndm_flags & NTF_EXT_LEARNED;
-		neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
+		neigh = ___neigh_create(tbl, dst, dev,
+					ndm->ndm_flags & NTF_EXT_LEARNED,
+					exempt_from_gc, true);
 		if (IS_ERR(neigh)) {
 			err = PTR_ERR(neigh);
 			goto out;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
  2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
  2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
@ 2021-10-11 12:12 ` Daniel Borkmann
  2021-10-12 14:25   ` David Ahern
  2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
  2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
  3 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-11 12:12 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf, Daniel Borkmann

Currently, it is not possible to migrate a neighbor entry between NUD_PERMANENT
state and NTF_USE flag with a dynamic NUD state from a user space control plane.
Similarly, it is not possible to add/remove NTF_EXT_LEARNED flag from an existing
neighbor entry in combination with NTF_USE flag.

This is due to the latter directly calling into neigh_event_send() without any
meta data updates as happening in __neigh_update(). Thus, to enable this use
case, extend the latter with a NEIGH_UPDATE_F_USE flag where we break the
NUD_PERMANENT state in particular so that a latter neigh_event_send() is able
to re-resolve a neighbor entry.

Before fix, NUD_PERMANENT -> NUD_* & NTF_USE:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT
  [...]

As can be seen, despite the admin-triggered replace, the entry remains in the
NUD_PERMANENT state.

After fix, NUD_PERMANENT -> NUD_* & NTF_USE:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE
  [...]
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn STALE
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a PERMANENT
  [...]

After the fix, the admin-triggered replace switches to a dynamic state from
the NTF_USE flag which triggered a new neighbor resolution. Likewise, we can
transition back from there, if needed, into NUD_PERMANENT.

Similar before/after behavior can be observed for below transitions:

Before fix, NTF_USE -> NTF_USE | NTF_EXT_LEARNED -> NTF_USE:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
  [...]

After fix, NTF_USE -> NTF_USE | NTF_EXT_LEARNED -> NTF_USE:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE
  [...]
  # ./ip/ip n replace 192.168.178.30 dev enp5s0 use
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
  [..]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
---
 include/net/neighbour.h |  1 +
 net/core/neighbour.c    | 22 +++++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 22ced1381ede..eb2a7c03a5b0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n)
 #define NEIGH_UPDATE_F_OVERRIDE			0x00000001
 #define NEIGH_UPDATE_F_WEAK_OVERRIDE		0x00000002
 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER	0x00000004
+#define NEIGH_UPDATE_F_USE			0x10000000
 #define NEIGH_UPDATE_F_EXT_LEARNED		0x20000000
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8457d5f97022..3e58037a8ae6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1217,7 +1217,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
 				lladdr instead of overriding it
 				if it is different.
 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
-
+	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
 				NTF_ROUTER flag.
 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
@@ -1255,6 +1255,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
 		goto out;
 
 	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
+	if (flags & NEIGH_UPDATE_F_USE) {
+		new = old & ~NUD_PERMANENT;
+		neigh->nud_state = new;
+		err = 0;
+		goto out;
+	}
 
 	if (!(new & NUD_VALID)) {
 		neigh_del_timer(neigh);
@@ -1963,22 +1969,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (protocol)
 		neigh->protocol = protocol;
-
 	if (ndm->ndm_flags & NTF_EXT_LEARNED)
 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-
 	if (ndm->ndm_flags & NTF_ROUTER)
 		flags |= NEIGH_UPDATE_F_ISROUTER;
+	if (ndm->ndm_flags & NTF_USE)
+		flags |= NEIGH_UPDATE_F_USE;
 
-	if (ndm->ndm_flags & NTF_USE) {
+	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+			     NETLINK_CB(skb).portid, extack);
+	if (!err && ndm->ndm_flags & NTF_USE) {
 		neigh_event_send(neigh, NULL);
 		err = 0;
-	} else
-		err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
-				     NETLINK_CB(skb).portid, extack);
-
+	}
 	neigh_release(neigh);
-
 out:
 	return err;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions
  2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
  2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
  2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
@ 2021-10-11 12:12 ` Daniel Borkmann
  2021-10-12 14:31   ` David Ahern
  2021-10-12 21:46   ` Jakub Kicinski
  2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
  3 siblings, 2 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-11 12:12 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf, Daniel Borkmann

From: Roopa Prabhu <roopa@nvidia.com>

Currently, all bits in struct ndmsg's ndm_flags are used up with the most
recent addition of 435f2e7cc0b7 ("net: bridge: add support for sticky fdb
entries"). This makes it impossible to extend the neighboring subsystem
with new NTF_* flags:

  struct ndmsg {
    __u8   ndm_family;
    __u8   ndm_pad1;
    __u16  ndm_pad2;
    __s32  ndm_ifindex;
    __u16  ndm_state;
    __u8   ndm_flags;
    __u8   ndm_type;
  };

There are ndm_pad{1,2} attributes which are not used. However, due to
uncareful design, the kernel does not enforce them to be zero upon new
neighbor entry addition, and given they've been around forever, it is
not possible to reuse them today due to risk of breakage. One option to
overcome this limitation is to add a new NDA_FLAGS_EXT attribute for
extended flags.

In struct neighbour, there is a 3 byte hole between protocol and ha_lock,
which allows neigh->flags to be extended from 8 to 32 bits while still
being on the same cacheline as before. This also allows for all future
NTF_* flags being in neigh->flags rather than yet another flags field.
Unknown flags in NDA_FLAGS_EXT will be rejected by the kernel.

Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Roopa Prabhu <roopa@nvidia.com>
---
 include/net/neighbour.h        | 14 +++++----
 include/uapi/linux/neighbour.h |  1 +
 net/core/neighbour.c           | 55 ++++++++++++++++++++++++----------
 3 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index eb2a7c03a5b0..26d4ada0aea9 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -144,11 +144,11 @@ struct neighbour {
 	struct timer_list	timer;
 	unsigned long		used;
 	atomic_t		probes;
-	__u8			flags;
-	__u8			nud_state;
-	__u8			type;
-	__u8			dead;
+	u8			nud_state;
+	u8			type;
+	u8			dead;
 	u8			protocol;
+	u32			flags;
 	seqlock_t		ha_lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
 	struct hh_cache		hh;
@@ -172,7 +172,7 @@ struct pneigh_entry {
 	struct pneigh_entry	*next;
 	possible_net_t		net;
 	struct net_device	*dev;
-	u8			flags;
+	u32			flags;
 	u8			protocol;
 	u8			key[];
 };
@@ -258,6 +258,10 @@ static inline void *neighbour_priv(const struct neighbour *n)
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
+/* In-kernel representation for NDA_FLAGS_EXT flags: */
+#define NTF_OLD_MASK		0xff
+#define NTF_EXT_SHIFT		8
+
 extern const struct nla_policy nda_policy[];
 
 static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey)
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 00a60695fa53..a80cca141855 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -31,6 +31,7 @@ enum {
 	NDA_PROTOCOL,  /* Originator of entry */
 	NDA_NH_ID,
 	NDA_FDB_EXT_ATTRS,
+	NDA_FLAGS_EXT,
 	__NDA_MAX
 };
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3e58037a8ae6..5245e888c981 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -159,7 +159,7 @@ static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
 				     int *notify)
 {
 	bool rc = false;
-	u8 ndm_flags;
+	u32 ndm_flags;
 
 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
 		return rc;
@@ -379,7 +379,7 @@ EXPORT_SYMBOL(neigh_ifdown);
 
 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 				     struct net_device *dev,
-				     u8 flags, bool exempt_from_gc)
+				     u32 flags, bool exempt_from_gc)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(neigh_lookup_nodev);
 
 static struct neighbour *
 ___neigh_create(struct neigh_table *tbl, const void *pkey,
-		struct net_device *dev, u8 flags,
+		struct net_device *dev, u32 flags,
 		bool exempt_from_gc, bool want_ref)
 {
 	u32 hash_val, key_len = tbl->key_len;
@@ -1789,6 +1789,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
 	[NDA_MASTER]		= { .type = NLA_U32 },
 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
 	[NDA_NH_ID]		= { .type = NLA_U32 },
+	[NDA_FLAGS_EXT]		= { .type = NLA_U32 },
 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
 };
 
@@ -1861,7 +1862,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		     struct netlink_ext_ack *extack)
 {
 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
-		NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
@@ -1870,6 +1871,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct neighbour *neigh;
 	void *dst, *lladdr;
 	u8 protocol = 0;
+	u32 ndm_flags;
 	int err;
 
 	ASSERT_RTNL();
@@ -1885,6 +1887,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	ndm = nlmsg_data(nlh);
+	ndm_flags = ndm->ndm_flags;
+	if (tb[NDA_FLAGS_EXT]) {
+		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+		if (ext & ~0) {
+			NL_SET_ERR_MSG(extack, "Invalid extended flags");
+			goto out;
+		}
+		ndm_flags |= (ext << NTF_EXT_SHIFT);
+	}
 	if (ndm->ndm_ifindex) {
 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
 		if (dev == NULL) {
@@ -1912,14 +1924,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (tb[NDA_PROTOCOL])
 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
-
-	if (ndm->ndm_flags & NTF_PROXY) {
+	if (ndm_flags & NTF_PROXY) {
 		struct pneigh_entry *pn;
 
 		err = -ENOBUFS;
 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
 		if (pn) {
-			pn->flags = ndm->ndm_flags;
+			pn->flags = ndm_flags;
 			if (protocol)
 				pn->protocol = protocol;
 			err = 0;
@@ -1947,9 +1958,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		}
 
 		exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
-				 ndm->ndm_flags & NTF_EXT_LEARNED;
+				 ndm_flags & NTF_EXT_LEARNED;
 		neigh = ___neigh_create(tbl, dst, dev,
-					ndm->ndm_flags & NTF_EXT_LEARNED,
+					ndm_flags & NTF_EXT_LEARNED,
 					exempt_from_gc, true);
 		if (IS_ERR(neigh)) {
 			err = PTR_ERR(neigh);
@@ -1969,16 +1980,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (protocol)
 		neigh->protocol = protocol;
-	if (ndm->ndm_flags & NTF_EXT_LEARNED)
+	if (ndm_flags & NTF_EXT_LEARNED)
 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-	if (ndm->ndm_flags & NTF_ROUTER)
+	if (ndm_flags & NTF_ROUTER)
 		flags |= NEIGH_UPDATE_F_ISROUTER;
-	if (ndm->ndm_flags & NTF_USE)
+	if (ndm_flags & NTF_USE)
 		flags |= NEIGH_UPDATE_F_USE;
 
 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
 			     NETLINK_CB(skb).portid, extack);
-	if (!err && ndm->ndm_flags & NTF_USE) {
+	if (!err && ndm_flags & NTF_USE) {
 		neigh_event_send(neigh, NULL);
 		err = 0;
 	}
@@ -2433,6 +2444,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 			   u32 pid, u32 seq, int type, unsigned int flags)
 {
+	u32 neigh_flags, neigh_flags_ext;
 	unsigned long now = jiffies;
 	struct nda_cacheinfo ci;
 	struct nlmsghdr *nlh;
@@ -2442,11 +2454,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
+	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+	neigh_flags     = neigh->flags & NTF_OLD_MASK;
+
 	ndm = nlmsg_data(nlh);
 	ndm->ndm_family	 = neigh->ops->family;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = neigh->flags;
+	ndm->ndm_flags	 = neigh_flags;
 	ndm->ndm_type	 = neigh->type;
 	ndm->ndm_ifindex = neigh->dev->ifindex;
 
@@ -2477,6 +2492,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 
 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
 		goto nla_put_failure;
+	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -2490,6 +2507,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 			    u32 pid, u32 seq, int type, unsigned int flags,
 			    struct neigh_table *tbl)
 {
+	u32 neigh_flags, neigh_flags_ext;
 	struct nlmsghdr *nlh;
 	struct ndmsg *ndm;
 
@@ -2497,11 +2515,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
+	neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
+	neigh_flags     = pn->flags & NTF_OLD_MASK;
+
 	ndm = nlmsg_data(nlh);
 	ndm->ndm_family	 = tbl->family;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
+	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
 	ndm->ndm_type	 = RTN_UNICAST;
 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
 	ndm->ndm_state	 = NUD_NONE;
@@ -2511,6 +2532,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 
 	if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
 		goto nla_put_failure;
+	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -2826,6 +2849,7 @@ static inline size_t neigh_nlmsg_size(void)
 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
 	       + nla_total_size(sizeof(struct nda_cacheinfo))
 	       + nla_total_size(4)  /* NDA_PROBES */
+	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
 	       + nla_total_size(1); /* NDA_PROTOCOL */
 }
 
@@ -2854,6 +2878,7 @@ static inline size_t pneigh_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
 	       + nla_total_size(1); /* NDA_PROTOCOL */
 }
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
                   ` (2 preceding siblings ...)
  2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
@ 2021-10-11 12:12 ` Daniel Borkmann
  2021-10-12 14:51   ` David Ahern
                     ` (2 more replies)
  3 siblings, 3 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-11 12:12 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf, Daniel Borkmann

Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
flag. The flag then indicates to the kernel that the neighbor entry should be
periodically probed for keeping the entry in NUD_REACHABLE state iff possible.

The use case for this is targeting XDP or tc BPF load-balancers which use
the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
for their backends. Given they cannot be resolved in fast-path, a control
plane inserts the L3 (without L2) entries manually into the neighbor table
and lets the kernel do the neighbor resolution either on the gateway or on
the backend directly in case the latter resides in the same L2. This avoids
to deal with L2 in the control plane and to rebuild what the kernel already
does best anyway.

NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
table which gets triggered by the system work queue to periodically call
neigh_event_send() for performing the resolution. The implementation allows
migration from/to NTF_MANAGED neighbor entries, so that already existing
entries can be converted by the control plane if needed. Potentially, we could
make the interval for periodically calling neigh_event_send() configurable;
right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
Periodically update the kernel's neigh table"). In future, the latter could
possibly reuse the NTF_MANAGED neighbors as well.

Example:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
Link: https://linuxplumbersconf.org/event/11/contributions/953/
---
 include/net/neighbour.h        |  21 ++++--
 include/uapi/linux/neighbour.h |  34 ++++++----
 net/core/neighbour.c           | 113 ++++++++++++++++++++++++---------
 3 files changed, 120 insertions(+), 48 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 26d4ada0aea9..e8e48be66755 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -155,6 +155,7 @@ struct neighbour {
 	int			(*output)(struct neighbour *, struct sk_buff *);
 	const struct neigh_ops	*ops;
 	struct list_head	gc_list;
+	struct list_head	managed_list;
 	struct rcu_head		rcu;
 	struct net_device	*dev;
 	u8			primary_key[0];
@@ -216,11 +217,13 @@ struct neigh_table {
 	int			gc_thresh3;
 	unsigned long		last_flush;
 	struct delayed_work	gc_work;
+	struct delayed_work	managed_work;
 	struct timer_list 	proxy_timer;
 	struct sk_buff_head	proxy_queue;
 	atomic_t		entries;
 	atomic_t		gc_entries;
 	struct list_head	gc_list;
+	struct list_head	managed_list;
 	rwlock_t		lock;
 	unsigned long		last_rand;
 	struct neigh_statistics	__percpu *stats;
@@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
 }
 
 /* flags for neigh_update() */
-#define NEIGH_UPDATE_F_OVERRIDE			0x00000001
-#define NEIGH_UPDATE_F_WEAK_OVERRIDE		0x00000002
-#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER	0x00000004
-#define NEIGH_UPDATE_F_USE			0x10000000
-#define NEIGH_UPDATE_F_EXT_LEARNED		0x20000000
-#define NEIGH_UPDATE_F_ISROUTER			0x40000000
-#define NEIGH_UPDATE_F_ADMIN			0x80000000
+#define NEIGH_UPDATE_F_OVERRIDE			BIT(0)
+#define NEIGH_UPDATE_F_WEAK_OVERRIDE		BIT(1)
+#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER	BIT(2)
+#define NEIGH_UPDATE_F_USE			BIT(3)
+#define NEIGH_UPDATE_F_MANAGED			BIT(4)
+#define NEIGH_UPDATE_F_EXT_LEARNED		BIT(5)
+#define NEIGH_UPDATE_F_ISROUTER			BIT(6)
+#define NEIGH_UPDATE_F_ADMIN			BIT(7)
 
 /* In-kernel representation for NDA_FLAGS_EXT flags: */
 #define NTF_OLD_MASK		0xff
 #define NTF_EXT_SHIFT		8
+#define NTF_EXT_MASK		(NTF_EXT_MANAGED)
+
+#define NTF_MANAGED		(NTF_EXT_MANAGED << NTF_EXT_SHIFT)
 
 extern const struct nla_policy nda_policy[];
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index a80cca141855..db05fb55055e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -41,14 +41,16 @@ enum {
  *	Neighbor Cache Entry Flags
  */
 
-#define NTF_USE		0x01
-#define NTF_SELF	0x02
-#define NTF_MASTER	0x04
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_EXT_LEARNED	0x10
-#define NTF_OFFLOADED   0x20
-#define NTF_STICKY	0x40
-#define NTF_ROUTER	0x80
+#define NTF_USE		(1 << 0)
+#define NTF_SELF	(1 << 1)
+#define NTF_MASTER	(1 << 2)
+#define NTF_PROXY	(1 << 3)	/* == ATF_PUBL */
+#define NTF_EXT_LEARNED	(1 << 4)
+#define NTF_OFFLOADED   (1 << 5)
+#define NTF_STICKY	(1 << 6)
+#define NTF_ROUTER	(1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED	(1 << 0)
 
 /*
  *	Neighbor Cache Entry States.
@@ -66,12 +68,22 @@ enum {
 #define NUD_PERMANENT	0x80
 #define NUD_NONE	0x00
 
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
- * and make no address resolution or NUD.
- * NUD_PERMANENT also cannot be deleted by garbage collectors.
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
  * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
  * states don't make sense and thus are ignored. Such entries don't age and
  * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
  */
 
 struct nda_cacheinfo {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5245e888c981..eae73efa9245 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
 		list_del_init(&n->gc_list);
 		atomic_dec(&n->tbl->gc_entries);
 	}
+	if (!list_empty(&n->managed_list))
+		list_del_init(&n->managed_list);
 }
 
 static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
 
 	write_lock_bh(&n->tbl->lock);
 	write_lock(&n->lock);
-
 	if (n->dead)
 		goto out;
 
@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
 		atomic_inc(&n->tbl->gc_entries);
 	}
+out:
+	write_unlock(&n->lock);
+	write_unlock_bh(&n->tbl->lock);
+}
+
+static void neigh_update_managed_list(struct neighbour *n)
+{
+	bool on_managed_list, add_to_managed;
+
+	write_lock_bh(&n->tbl->lock);
+	write_lock(&n->lock);
+	if (n->dead)
+		goto out;
+
+	add_to_managed = n->flags & NTF_MANAGED;
+	on_managed_list = !list_empty(&n->managed_list);
 
+	if (!add_to_managed && on_managed_list)
+		list_del_init(&n->managed_list);
+	else if (add_to_managed && !on_managed_list)
+		list_add_tail(&n->managed_list, &n->tbl->managed_list);
 out:
 	write_unlock(&n->lock);
 	write_unlock_bh(&n->tbl->lock);
 }
 
-static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
-				     int *notify)
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+			       bool *gc_update, bool *managed_update)
 {
-	bool rc = false;
-	u32 ndm_flags;
+	u32 ndm_flags, old_flags = neigh->flags;
 
 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
-		return rc;
+		return;
+
+	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
 
-	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
-	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
 		if (ndm_flags & NTF_EXT_LEARNED)
 			neigh->flags |= NTF_EXT_LEARNED;
 		else
 			neigh->flags &= ~NTF_EXT_LEARNED;
-		rc = true;
 		*notify = 1;
+		*gc_update = true;
+	}
+	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+		if (ndm_flags & NTF_MANAGED)
+			neigh->flags |= NTF_MANAGED;
+		else
+			neigh->flags &= ~NTF_MANAGED;
+		*notify = 1;
+		*managed_update = true;
 	}
-
-	return rc;
 }
 
 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 	refcount_set(&n->refcnt, 1);
 	n->dead		  = 1;
 	INIT_LIST_HEAD(&n->gc_list);
+	INIT_LIST_HEAD(&n->managed_list);
 
 	atomic_inc(&tbl->entries);
 out:
@@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
 	n->dead = 0;
 	if (!exempt_from_gc)
 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
-
+	if (n->flags & NTF_MANAGED)
+		list_add_tail(&n->managed_list, &n->tbl->managed_list);
 	if (want_ref)
 		neigh_hold(n);
 	rcu_assign_pointer(n->next,
@@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
 	}
 }
 
-
-
 /* Generic update routine.
    -- lladdr is new lladdr or NULL, if it is not supplied.
    -- new    is new state.
@@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
 				if it is different.
 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
+	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
 				NTF_ROUTER flag.
 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
@@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
 
    Caller MUST hold reference count on the entry.
  */
-
 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
 			  u8 new, u32 flags, u32 nlmsg_pid,
 			  struct netlink_ext_ack *extack)
 {
-	bool ext_learn_change = false;
-	u8 old;
-	int err;
-	int notify = 0;
-	struct net_device *dev;
+	bool gc_update = false, managed_update = false;
 	int update_isrouter = 0;
+	struct net_device *dev;
+	int err, notify = 0;
+	u8 old;
 
 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
 
@@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
 	    (old & (NUD_NOARP | NUD_PERMANENT)))
 		goto out;
 
-	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
-	if (flags & NEIGH_UPDATE_F_USE) {
+	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
 		new = old & ~NUD_PERMANENT;
 		neigh->nud_state = new;
 		err = 0;
@@ -1405,15 +1432,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
 	if (update_isrouter)
 		neigh_update_is_router(neigh, flags, &notify);
 	write_unlock_bh(&neigh->lock);
-
-	if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+	if (((new ^ old) & NUD_PERMANENT) || gc_update)
 		neigh_update_gc_list(neigh);
-
+	if (managed_update)
+		neigh_update_managed_list(neigh);
 	if (notify)
 		neigh_update_notify(neigh, nlmsg_pid);
-
 	trace_neigh_update_done(neigh, err);
-
 	return err;
 }
 
@@ -1539,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(neigh_direct_output);
 
+static void neigh_managed_work(struct work_struct *work)
+{
+	struct neigh_table *tbl = container_of(work, struct neigh_table,
+					       managed_work.work);
+	struct neighbour *neigh;
+
+	write_lock_bh(&tbl->lock);
+	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+		neigh_event_send(neigh, NULL);
+	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+			   NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+	write_unlock_bh(&tbl->lock);
+}
+
 static void neigh_proxy_process(struct timer_list *t)
 {
 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1685,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
 
 	INIT_LIST_HEAD(&tbl->parms_list);
 	INIT_LIST_HEAD(&tbl->gc_list);
+	INIT_LIST_HEAD(&tbl->managed_list);
+
 	list_add(&tbl->parms.list, &tbl->parms_list);
 	write_pnet(&tbl->parms.net, &init_net);
 	refcount_set(&tbl->parms.refcnt, 1);
@@ -1716,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
 
 	rwlock_init(&tbl->lock);
+
 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
 			tbl->parms.reachable_time);
+	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
 	skb_queue_head_init_class(&tbl->proxy_queue,
 			&neigh_table_proxy_queue_class);
@@ -1891,7 +1936,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tb[NDA_FLAGS_EXT]) {
 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
 
-		if (ext & ~0) {
+		if (ext & ~NTF_EXT_MASK) {
 			NL_SET_ERR_MSG(extack, "Invalid extended flags");
 			goto out;
 		}
@@ -1927,6 +1972,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (ndm_flags & NTF_PROXY) {
 		struct pneigh_entry *pn;
 
+		if (ndm_flags & NTF_MANAGED) {
+			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
+			goto out;
+		}
+
 		err = -ENOBUFS;
 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
 		if (pn) {
@@ -1960,7 +2010,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
 				 ndm_flags & NTF_EXT_LEARNED;
 		neigh = ___neigh_create(tbl, dst, dev,
-					ndm_flags & NTF_EXT_LEARNED,
+					ndm_flags &
+					(NTF_EXT_LEARNED | NTF_MANAGED),
 					exempt_from_gc, true);
 		if (IS_ERR(neigh)) {
 			err = PTR_ERR(neigh);
@@ -1984,12 +2035,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
 	if (ndm_flags & NTF_ROUTER)
 		flags |= NEIGH_UPDATE_F_ISROUTER;
+	if (ndm_flags & NTF_MANAGED)
+		flags |= NEIGH_UPDATE_F_MANAGED;
 	if (ndm_flags & NTF_USE)
 		flags |= NEIGH_UPDATE_F_USE;
 
 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
 			     NETLINK_CB(skb).portid, extack);
-	if (!err && ndm_flags & NTF_USE) {
+	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
 		neigh_event_send(neigh, NULL);
 		err = 0;
 	}
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
  2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
@ 2021-10-12 14:23   ` David Ahern
  0 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2021-10-12 14:23 UTC (permalink / raw)
  To: Daniel Borkmann, davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/11/21 6:12 AM, Daniel Borkmann wrote:
> The NTF_EXT_LEARNED neigh flag is usually propagated back to user space
> upon dump of the neighbor table. However, when used in combination with
> NTF_USE flag this is not the case despite exempting the entry from the
> garbage collector. This results in inconsistent state since entries are
> typically marked in neigh->flags with NTF_EXT_LEARNED, but here they are
> not. Fix it by propagating the creation flag to ___neigh_create().
> 
> Before fix:
> 
>   # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
>   # ./ip/ip n
>   192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a REACHABLE
>   [...]
> 
> After fix:
> 
>   # ./ip/ip n replace 192.168.178.30 dev enp5s0 use extern_learn
>   # ./ip/ip n
>   192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a extern_learn REACHABLE
>   [...]
> 
> Fixes: 9ce33e46531d ("neighbour: support for NTF_EXT_LEARNED flag")
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Roopa Prabhu <roopa@nvidia.com>
> ---
>  net/core/neighbour.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
  2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
@ 2021-10-12 14:25   ` David Ahern
  0 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2021-10-12 14:25 UTC (permalink / raw)
  To: Daniel Borkmann, davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/11/21 6:12 AM, Daniel Borkmann wrote:
> Currently, it is not possible to migrate a neighbor entry between NUD_PERMANENT
> state and NTF_USE flag with a dynamic NUD state from a user space control plane.
> Similarly, it is not possible to add/remove NTF_EXT_LEARNED flag from an existing
> neighbor entry in combination with NTF_USE flag.
> 
> This is due to the latter directly calling into neigh_event_send() without any
> meta data updates as happening in __neigh_update(). Thus, to enable this use
> case, extend the latter with a NEIGH_UPDATE_F_USE flag where we break the
> NUD_PERMANENT state in particular so that a latter neigh_event_send() is able
> to re-resolve a neighbor entry.
> 

...

> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Roopa Prabhu <roopa@nvidia.com>
> ---
>  include/net/neighbour.h |  1 +
>  net/core/neighbour.c    | 22 +++++++++++++---------
>  2 files changed, 14 insertions(+), 9 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions
  2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
@ 2021-10-12 14:31   ` David Ahern
  2021-10-12 14:46     ` Daniel Borkmann
  2021-10-12 21:46   ` Jakub Kicinski
  1 sibling, 1 reply; 20+ messages in thread
From: David Ahern @ 2021-10-12 14:31 UTC (permalink / raw)
  To: Daniel Borkmann, davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/11/21 6:12 AM, Daniel Borkmann wrote:
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index eb2a7c03a5b0..26d4ada0aea9 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -144,11 +144,11 @@ struct neighbour {
>  	struct timer_list	timer;
>  	unsigned long		used;
>  	atomic_t		probes;
> -	__u8			flags;
> -	__u8			nud_state;
> -	__u8			type;
> -	__u8			dead;
> +	u8			nud_state;
> +	u8			type;
> +	u8			dead;
>  	u8			protocol;
> +	u32			flags;
>  	seqlock_t		ha_lock;
>  	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
>  	struct hh_cache		hh;
> @@ -172,7 +172,7 @@ struct pneigh_entry {
>  	struct pneigh_entry	*next;
>  	possible_net_t		net;
>  	struct net_device	*dev;
> -	u8			flags;
> +	u32			flags;
>  	u8			protocol;
>  	u8			key[];
>  };
> @@ -258,6 +258,10 @@ static inline void *neighbour_priv(const struct neighbour *n)
>  #define NEIGH_UPDATE_F_ISROUTER			0x40000000
>  #define NEIGH_UPDATE_F_ADMIN			0x80000000
>  
> +/* In-kernel representation for NDA_FLAGS_EXT flags: */
> +#define NTF_OLD_MASK		0xff
> +#define NTF_EXT_SHIFT		8

so only 24 EXT flags can be added. That should be documented; far off
today, but that's an easy overflow to miss.

Reviewed-by: David Ahern <dsahern@kernel.org>



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions
  2021-10-12 14:31   ` David Ahern
@ 2021-10-12 14:46     ` Daniel Borkmann
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-12 14:46 UTC (permalink / raw)
  To: David Ahern, davem, kuba; +Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/12/21 4:31 PM, David Ahern wrote:
> On 10/11/21 6:12 AM, Daniel Borkmann wrote:
>> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
>> index eb2a7c03a5b0..26d4ada0aea9 100644
>> --- a/include/net/neighbour.h
>> +++ b/include/net/neighbour.h
>> @@ -144,11 +144,11 @@ struct neighbour {
>>   	struct timer_list	timer;
>>   	unsigned long		used;
>>   	atomic_t		probes;
>> -	__u8			flags;
>> -	__u8			nud_state;
>> -	__u8			type;
>> -	__u8			dead;
>> +	u8			nud_state;
>> +	u8			type;
>> +	u8			dead;
>>   	u8			protocol;
>> +	u32			flags;
>>   	seqlock_t		ha_lock;
>>   	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
>>   	struct hh_cache		hh;
>> @@ -172,7 +172,7 @@ struct pneigh_entry {
>>   	struct pneigh_entry	*next;
>>   	possible_net_t		net;
>>   	struct net_device	*dev;
>> -	u8			flags;
>> +	u32			flags;
>>   	u8			protocol;
>>   	u8			key[];
>>   };
>> @@ -258,6 +258,10 @@ static inline void *neighbour_priv(const struct neighbour *n)
>>   #define NEIGH_UPDATE_F_ISROUTER			0x40000000
>>   #define NEIGH_UPDATE_F_ADMIN			0x80000000
>>   
>> +/* In-kernel representation for NDA_FLAGS_EXT flags: */
>> +#define NTF_OLD_MASK		0xff
>> +#define NTF_EXT_SHIFT		8
> 
> so only 24 EXT flags can be added. That should be documented; far off
> today, but that's an easy overflow to miss.

Agree, far off today, but this is only kernel internal, so there's always the
option to extend it iff really needed e.g. with u64 as neigh->flags. I'll add
a comment.

> Reviewed-by: David Ahern <dsahern@kernel.org>

Thanks!

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
@ 2021-10-12 14:51   ` David Ahern
  2021-10-12 15:05     ` Daniel Borkmann
  2021-10-12 16:31   ` Ido Schimmel
  2022-01-31 20:43   ` Eric Dumazet
  2 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2021-10-12 14:51 UTC (permalink / raw)
  To: Daniel Borkmann, davem, kuba, Ido Schimmel
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/11/21 6:12 AM, Daniel Borkmann wrote:
> @@ -66,12 +68,22 @@ enum {
>  #define NUD_PERMANENT	0x80
>  #define NUD_NONE	0x00
>  
> -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
> - * and make no address resolution or NUD.
> - * NUD_PERMANENT also cannot be deleted by garbage collectors.
> +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
> + * address resolution or NUD.
> + *
> + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
> + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
> + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
> + * flagged entries explicitly are (which is also consistent with the routing
> + * subsystem).
> + *
>   * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
>   * states don't make sense and thus are ignored. Such entries don't age and
>   * can roam.
> + *
> + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
> + * of a user space control plane, and automatically refreshed so that (if
> + * possible) they remain in NUD_REACHABLE state.

switchdev use cases need this capability as well to offload routes.
Similar functionality exists in mlxsw to resolve gateways. It would be
good for this design to cover both needs - and that may be as simple as
mlxsw setting the MANAGED flag on the entry to let the neigh subsystem
takeover.

>   */
>  
>  struct nda_cacheinfo {
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 5245e888c981..eae73efa9245 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
>  		list_del_init(&n->gc_list);
>  		atomic_dec(&n->tbl->gc_entries);
>  	}
> +	if (!list_empty(&n->managed_list))
> +		list_del_init(&n->managed_list);
>  }
>  
>  static void neigh_update_gc_list(struct neighbour *n)
> @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
>  
>  	write_lock_bh(&n->tbl->lock);
>  	write_lock(&n->lock);
> -

I like the extra newline - it makes locks stand out.


>  	if (n->dead)
>  		goto out;
>  
> @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
>  		list_add_tail(&n->gc_list, &n->tbl->gc_list);
>  		atomic_inc(&n->tbl->gc_entries);
>  	}
> +out:
> +	write_unlock(&n->lock);
> +	write_unlock_bh(&n->tbl->lock);
> +}
> +
> +static void neigh_update_managed_list(struct neighbour *n)
> +{
> +	bool on_managed_list, add_to_managed;
> +
> +	write_lock_bh(&n->tbl->lock);
> +	write_lock(&n->lock);
> +	if (n->dead)
> +		goto out;
> +
> +	add_to_managed = n->flags & NTF_MANAGED;
> +	on_managed_list = !list_empty(&n->managed_list);
>  
> +	if (!add_to_managed && on_managed_list)
> +		list_del_init(&n->managed_list);
> +	else if (add_to_managed && !on_managed_list)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>  out:
>  	write_unlock(&n->lock);
>  	write_unlock_bh(&n->tbl->lock);
>  }
>  
> -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
> -				     int *notify)
> +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
> +			       bool *gc_update, bool *managed_update)
>  {
> -	bool rc = false;
> -	u32 ndm_flags;
> +	u32 ndm_flags, old_flags = neigh->flags;
>  
>  	if (!(flags & NEIGH_UPDATE_F_ADMIN))
> -		return rc;
> +		return;
> +
> +	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> +	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
>  
> -	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> -	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
> +	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>  		if (ndm_flags & NTF_EXT_LEARNED)
>  			neigh->flags |= NTF_EXT_LEARNED;
>  		else
>  			neigh->flags &= ~NTF_EXT_LEARNED;
> -		rc = true;
>  		*notify = 1;
> +		*gc_update = true;
> +	}
> +	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
> +		if (ndm_flags & NTF_MANAGED)
> +			neigh->flags |= NTF_MANAGED;
> +		else
> +			neigh->flags &= ~NTF_MANAGED;
> +		*notify = 1;
> +		*managed_update = true;
>  	}
> -
> -	return rc;
>  }
>  
>  static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
> @@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
>  	refcount_set(&n->refcnt, 1);
>  	n->dead		  = 1;
>  	INIT_LIST_HEAD(&n->gc_list);
> +	INIT_LIST_HEAD(&n->managed_list);
>  
>  	atomic_inc(&tbl->entries);
>  out:
> @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>  	n->dead = 0;
>  	if (!exempt_from_gc)
>  		list_add_tail(&n->gc_list, &n->tbl->gc_list);
> -
> +	if (n->flags & NTF_MANAGED)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>  	if (want_ref)
>  		neigh_hold(n);
>  	rcu_assign_pointer(n->next,
> @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  	}
>  }
>  
> -
> -
>  /* Generic update routine.
>     -- lladdr is new lladdr or NULL, if it is not supplied.
>     -- new    is new state.
> @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  				if it is different.
>  	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
>  	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
> +	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
>  	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
>  				NTF_ROUTER flag.
>  	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
> @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  
>     Caller MUST hold reference count on the entry.
>   */
> -
>  static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>  			  u8 new, u32 flags, u32 nlmsg_pid,
>  			  struct netlink_ext_ack *extack)
>  {
> -	bool ext_learn_change = false;
> -	u8 old;
> -	int err;
> -	int notify = 0;
> -	struct net_device *dev;
> +	bool gc_update = false, managed_update = false;
>  	int update_isrouter = 0;
> +	struct net_device *dev;
> +	int err, notify = 0;
> +	u8 old;
>  
>  	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
>  
> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>  	    (old & (NUD_NOARP | NUD_PERMANENT)))
>  		goto out;
>  
> -	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
> -	if (flags & NEIGH_UPDATE_F_USE) {
> +	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
> +	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>  		new = old & ~NUD_PERMANENT;

so a neighbor entry can not be both managed and permanent, but you don't
check for the combination in neigh_add and error out with a message to
the user.



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-12 14:51   ` David Ahern
@ 2021-10-12 15:05     ` Daniel Borkmann
  2021-10-12 15:26       ` Daniel Borkmann
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-12 15:05 UTC (permalink / raw)
  To: David Ahern, davem, kuba, Ido Schimmel
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/12/21 4:51 PM, David Ahern wrote:
> On 10/11/21 6:12 AM, Daniel Borkmann wrote:
>> @@ -66,12 +68,22 @@ enum {
>>   #define NUD_PERMANENT	0x80
>>   #define NUD_NONE	0x00
>>   
>> -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
>> - * and make no address resolution or NUD.
>> - * NUD_PERMANENT also cannot be deleted by garbage collectors.
>> +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
>> + * address resolution or NUD.
>> + *
>> + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
>> + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
>> + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
>> + * flagged entries explicitly are (which is also consistent with the routing
>> + * subsystem).
>> + *
>>    * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
>>    * states don't make sense and thus are ignored. Such entries don't age and
>>    * can roam.
>> + *
>> + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
>> + * of a user space control plane, and automatically refreshed so that (if
>> + * possible) they remain in NUD_REACHABLE state.
> 
> switchdev use cases need this capability as well to offload routes.
> Similar functionality exists in mlxsw to resolve gateways. It would be
> good for this design to cover both needs - and that may be as simple as
> mlxsw setting the MANAGED flag on the entry to let the neigh subsystem
> takeover.

Ack, that would definitely be nice to reuse it there as well.

>>    */
>>   
>>   struct nda_cacheinfo {
>> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
>> index 5245e888c981..eae73efa9245 100644
>> --- a/net/core/neighbour.c
>> +++ b/net/core/neighbour.c
>> @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
>>   		list_del_init(&n->gc_list);
>>   		atomic_dec(&n->tbl->gc_entries);
>>   	}
>> +	if (!list_empty(&n->managed_list))
>> +		list_del_init(&n->managed_list);
>>   }
>>   
>>   static void neigh_update_gc_list(struct neighbour *n)
>> @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
>>   
>>   	write_lock_bh(&n->tbl->lock);
>>   	write_lock(&n->lock);
>> -
> 
> I like the extra newline - it makes locks stand out.

Ok, will drop, and add one to neigh_update_managed_list(), too.

>>   	if (n->dead)
>>   		goto out;
>>   
>> @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
>>   		list_add_tail(&n->gc_list, &n->tbl->gc_list);
>>   		atomic_inc(&n->tbl->gc_entries);
>>   	}
>> +out:
>> +	write_unlock(&n->lock);
>> +	write_unlock_bh(&n->tbl->lock);
>> +}
>> +
>> +static void neigh_update_managed_list(struct neighbour *n)
>> +{
>> +	bool on_managed_list, add_to_managed;
>> +
>> +	write_lock_bh(&n->tbl->lock);
>> +	write_lock(&n->lock);
>> +	if (n->dead)
>> +		goto out;
>> +
>> +	add_to_managed = n->flags & NTF_MANAGED;
>> +	on_managed_list = !list_empty(&n->managed_list);
>>   
>> +	if (!add_to_managed && on_managed_list)
>> +		list_del_init(&n->managed_list);
>> +	else if (add_to_managed && !on_managed_list)
>> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>>   out:
>>   	write_unlock(&n->lock);
>>   	write_unlock_bh(&n->tbl->lock);
>>   }
>>   
>> -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
>> -				     int *notify)
>> +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
>> +			       bool *gc_update, bool *managed_update)
>>   {
>> -	bool rc = false;
>> -	u32 ndm_flags;
>> +	u32 ndm_flags, old_flags = neigh->flags;
>>   
>>   	if (!(flags & NEIGH_UPDATE_F_ADMIN))
>> -		return rc;
>> +		return;
>> +
>> +	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
>> +	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
>>   
>> -	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
>> -	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>> +	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>>   		if (ndm_flags & NTF_EXT_LEARNED)
>>   			neigh->flags |= NTF_EXT_LEARNED;
>>   		else
>>   			neigh->flags &= ~NTF_EXT_LEARNED;
>> -		rc = true;
>>   		*notify = 1;
>> +		*gc_update = true;
>> +	}
>> +	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
>> +		if (ndm_flags & NTF_MANAGED)
>> +			neigh->flags |= NTF_MANAGED;
>> +		else
>> +			neigh->flags &= ~NTF_MANAGED;
>> +		*notify = 1;
>> +		*managed_update = true;
>>   	}
>> -
>> -	return rc;
>>   }
>>   
>>   static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
>> @@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
>>   	refcount_set(&n->refcnt, 1);
>>   	n->dead		  = 1;
>>   	INIT_LIST_HEAD(&n->gc_list);
>> +	INIT_LIST_HEAD(&n->managed_list);
>>   
>>   	atomic_inc(&tbl->entries);
>>   out:
>> @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>>   	n->dead = 0;
>>   	if (!exempt_from_gc)
>>   		list_add_tail(&n->gc_list, &n->tbl->gc_list);
>> -
>> +	if (n->flags & NTF_MANAGED)
>> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>>   	if (want_ref)
>>   		neigh_hold(n);
>>   	rcu_assign_pointer(n->next,
>> @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
>>   	}
>>   }
>>   
>> -
>> -
>>   /* Generic update routine.
>>      -- lladdr is new lladdr or NULL, if it is not supplied.
>>      -- new    is new state.
>> @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
>>   				if it is different.
>>   	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
>>   	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
>> +	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
>>   	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
>>   				NTF_ROUTER flag.
>>   	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
>> @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
>>   
>>      Caller MUST hold reference count on the entry.
>>    */
>> -
>>   static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>>   			  u8 new, u32 flags, u32 nlmsg_pid,
>>   			  struct netlink_ext_ack *extack)
>>   {
>> -	bool ext_learn_change = false;
>> -	u8 old;
>> -	int err;
>> -	int notify = 0;
>> -	struct net_device *dev;
>> +	bool gc_update = false, managed_update = false;
>>   	int update_isrouter = 0;
>> +	struct net_device *dev;
>> +	int err, notify = 0;
>> +	u8 old;
>>   
>>   	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
>>   
>> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>>   	    (old & (NUD_NOARP | NUD_PERMANENT)))
>>   		goto out;
>>   
>> -	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
>> -	if (flags & NEIGH_UPDATE_F_USE) {
>> +	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
>> +	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>>   		new = old & ~NUD_PERMANENT;
> 
> so a neighbor entry can not be both managed and permanent, but you don't
> check for the combination in neigh_add and error out with a message to
> the user.

Good point, I'll error out if both NUD_PERMANENT and NTF_MANAGED is set in neigh_add().

Thanks for the review!
Daniel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-12 15:05     ` Daniel Borkmann
@ 2021-10-12 15:26       ` Daniel Borkmann
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-12 15:26 UTC (permalink / raw)
  To: David Ahern, davem, kuba, Ido Schimmel
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/12/21 5:05 PM, Daniel Borkmann wrote:
> On 10/12/21 4:51 PM, David Ahern wrote:
>> On 10/11/21 6:12 AM, Daniel Borkmann wrote:
[...]
>>> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>>>           (old & (NUD_NOARP | NUD_PERMANENT)))
>>>           goto out;
>>> -    ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
>>> -    if (flags & NEIGH_UPDATE_F_USE) {
>>> +    neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
>>> +    if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>>>           new = old & ~NUD_PERMANENT;
>>
>> so a neighbor entry can not be both managed and permanent, but you don't
>> check for the combination in neigh_add and error out with a message to
>> the user.
> 
> Good point, I'll error out if both NUD_PERMANENT and NTF_MANAGED is set in neigh_add().
> 
> Thanks for the review!

Ah, I missed that this was already applied, will send a relative diff in that case.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
  2021-10-12 14:51   ` David Ahern
@ 2021-10-12 16:31   ` Ido Schimmel
  2021-10-13  9:26     ` Daniel Borkmann
  2022-01-31 20:43   ` Eric Dumazet
  2 siblings, 1 reply; 20+ messages in thread
From: Ido Schimmel @ 2021-10-12 16:31 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: davem, kuba, roopa, dsahern, m, john.fastabend, netdev, bpf

On Mon, Oct 11, 2021 at 02:12:38PM +0200, Daniel Borkmann wrote:
> Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
> flag. The flag then indicates to the kernel that the neighbor entry should be
> periodically probed for keeping the entry in NUD_REACHABLE state iff possible.

Nice idea

> 
> The use case for this is targeting XDP or tc BPF load-balancers which use
> the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
> for their backends. Given they cannot be resolved in fast-path,

Out of curiosity, can you explain why that is? Because XDP is only fast
path? At least that's what I understand from commit 87f5fc7e48dd ("bpf:
Provide helper to do forwarding lookups in kernel FIB table") and it is
similar to L3 offload

> a control plane inserts the L3 (without L2) entries manually into the
> neighbor table and lets the kernel do the neighbor resolution either
> on the gateway or on the backend directly in case the latter resides
> in the same L2. This avoids to deal with L2 in the control plane and
> to rebuild what the kernel already does best anyway.

Are you using 'fib_multipath_use_neigh' sysctl to avoid going through
failed nexthops? Looking at how the bpf_fib_lookup() helper is
implemented, seems that you can benefit from it in XDP

> 
> NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
> eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
> table which gets triggered by the system work queue to periodically call
> neigh_event_send() for performing the resolution. The implementation allows
> migration from/to NTF_MANAGED neighbor entries, so that already existing
> entries can be converted by the control plane if needed. Potentially, we could
> make the interval for periodically calling neigh_event_send() configurable;
> right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
> has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
> Periodically update the kernel's neigh table"). In future, the latter could
> possibly reuse the NTF_MANAGED neighbors as well.

Yes, mlxsw can set this flag on neighbours used for its nexthops. Looks
like the use cases are similar: Avoid going to slow path, either from
XDP or HW.

In our HW the nexthop table is squashed together with the neighbour
table, so that it provides {netdev, MAC} and not {netdev, IP} with which
the kernel performs another lookup in its neighbour table. We want to
avoid situations where we perform multipathing between valid and failed
nexthop (basically, fib_multipath_use_neigh=1), so we only program valid
nexthop. But it means that nothing will trigger the resolution of the
failed nexthops, thus the need to probe the neighbours.

> 
> Example:
> 
>   # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
>   # ./ip/ip n
>   192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
>   [...]
> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Roopa Prabhu <roopa@nvidia.com>
> Link: https://linuxplumbersconf.org/event/11/contributions/953/

I was going to ask why not just default the kernel to resolve GW IPs (it
knows them when the nexthops are configured), but then I saw slide 34. I
guess that's what you meant by "... or on the backend directly in case
the latter resides in the same L2"?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions
  2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
  2021-10-12 14:31   ` David Ahern
@ 2021-10-12 21:46   ` Jakub Kicinski
  1 sibling, 0 replies; 20+ messages in thread
From: Jakub Kicinski @ 2021-10-12 21:46 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, roopa, dsahern, m, john.fastabend, netdev, bpf

On Mon, 11 Oct 2021 14:12:37 +0200 Daniel Borkmann wrote:
> +		if (ext & ~0) {
> +			NL_SET_ERR_MSG(extack, "Invalid extended flags");
> +			goto out;
> +		}

Could you also follow up and use NLA_POLICY_MASK() instead of
validating in the code? It's probably less important for non-genl
but still a good best practice.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-12 16:31   ` Ido Schimmel
@ 2021-10-13  9:26     ` Daniel Borkmann
  2021-10-13  9:59       ` Ido Schimmel
  2021-10-13 14:10       ` David Ahern
  0 siblings, 2 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-13  9:26 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: davem, kuba, roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/12/21 6:31 PM, Ido Schimmel wrote:
> On Mon, Oct 11, 2021 at 02:12:38PM +0200, Daniel Borkmann wrote:
>> Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
>> flag. The flag then indicates to the kernel that the neighbor entry should be
>> periodically probed for keeping the entry in NUD_REACHABLE state iff possible.
> 
> Nice idea
> 
>> The use case for this is targeting XDP or tc BPF load-balancers which use
>> the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
>> for their backends. Given they cannot be resolved in fast-path,
> 
> Out of curiosity, can you explain why that is? Because XDP is only fast
> path? At least that's what I understand from commit 87f5fc7e48dd ("bpf:
> Provide helper to do forwarding lookups in kernel FIB table") and it is
> similar to L3 offload

Yep, exactly this. The packet reaches the BPF program at XDP (or tc) layer, it
does the usual mangling e.g. to forward to backends in case of LB, and pushes
the packet back out via XDP_{TX,REDIRECT}. (There is no way we could (or should)
do the neighbor resolution slow path from there.)

>> a control plane inserts the L3 (without L2) entries manually into the
>> neighbor table and lets the kernel do the neighbor resolution either
>> on the gateway or on the backend directly in case the latter resides
>> in the same L2. This avoids to deal with L2 in the control plane and
>> to rebuild what the kernel already does best anyway.
> 
> Are you using 'fib_multipath_use_neigh' sysctl to avoid going through
> failed nexthops? Looking at how the bpf_fib_lookup() helper is
> implemented, seems that you can benefit from it in XDP

Thanks for the pointer, we don't use it yet, but that's a great idea!

>> NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
>> eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
>> table which gets triggered by the system work queue to periodically call
>> neigh_event_send() for performing the resolution. The implementation allows
>> migration from/to NTF_MANAGED neighbor entries, so that already existing
>> entries can be converted by the control plane if needed. Potentially, we could
>> make the interval for periodically calling neigh_event_send() configurable;
>> right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
>> has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
>> Periodically update the kernel's neigh table"). In future, the latter could
>> possibly reuse the NTF_MANAGED neighbors as well.
> 
> Yes, mlxsw can set this flag on neighbours used for its nexthops. Looks
> like the use cases are similar: Avoid going to slow path, either from
> XDP or HW.

Yes, correct.

> In our HW the nexthop table is squashed together with the neighbour
> table, so that it provides {netdev, MAC} and not {netdev, IP} with which
> the kernel performs another lookup in its neighbour table. We want to
> avoid situations where we perform multipathing between valid and failed
> nexthop (basically, fib_multipath_use_neigh=1), so we only program valid
> nexthop. But it means that nothing will trigger the resolution of the
> failed nexthops, thus the need to probe the neighbours.

Makes sense. Given you have the setup/HW, if you have a chance to consolidate
the mlxsw logic with the new NTF_MANAGED entries, that would be awesome!

>> Example:
>>
>>    # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
>>    # ./ip/ip n
>>    192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
>>    [...]
>>
>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>> Acked-by: Roopa Prabhu <roopa@nvidia.com>
>> Link: https://linuxplumbersconf.org/event/11/contributions/953/
> 
> I was going to ask why not just default the kernel to resolve GW IPs (it
> knows them when the nexthops are configured), but then I saw slide 34. I
> guess that's what you meant by "... or on the backend directly in case
> the latter resides in the same L2"?

Yes, that's correct, not on all setups the backends are behind a GW.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-13  9:26     ` Daniel Borkmann
@ 2021-10-13  9:59       ` Ido Schimmel
  2021-10-13 11:23         ` Daniel Borkmann
  2021-10-13 14:10       ` David Ahern
  1 sibling, 1 reply; 20+ messages in thread
From: Ido Schimmel @ 2021-10-13  9:59 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: davem, kuba, roopa, dsahern, m, john.fastabend, netdev, bpf

On Wed, Oct 13, 2021 at 11:26:50AM +0200, Daniel Borkmann wrote:
> On 10/12/21 6:31 PM, Ido Schimmel wrote:
> > In our HW the nexthop table is squashed together with the neighbour
> > table, so that it provides {netdev, MAC} and not {netdev, IP} with which
> > the kernel performs another lookup in its neighbour table. We want to
> > avoid situations where we perform multipathing between valid and failed
> > nexthop (basically, fib_multipath_use_neigh=1), so we only program valid
> > nexthop. But it means that nothing will trigger the resolution of the
> > failed nexthops, thus the need to probe the neighbours.
> 
> Makes sense. Given you have the setup/HW, if you have a chance to consolidate
> the mlxsw logic with the new NTF_MANAGED entries, that would be awesome!

Yes, I will take care of that

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-13  9:59       ` Ido Schimmel
@ 2021-10-13 11:23         ` Daniel Borkmann
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-10-13 11:23 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: davem, kuba, roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/13/21 11:59 AM, Ido Schimmel wrote:
> On Wed, Oct 13, 2021 at 11:26:50AM +0200, Daniel Borkmann wrote:
>> On 10/12/21 6:31 PM, Ido Schimmel wrote:
>>> In our HW the nexthop table is squashed together with the neighbour
>>> table, so that it provides {netdev, MAC} and not {netdev, IP} with which
>>> the kernel performs another lookup in its neighbour table. We want to
>>> avoid situations where we perform multipathing between valid and failed
>>> nexthop (basically, fib_multipath_use_neigh=1), so we only program valid
>>> nexthop. But it means that nothing will trigger the resolution of the
>>> failed nexthops, thus the need to probe the neighbours.
>>
>> Makes sense. Given you have the setup/HW, if you have a chance to consolidate
>> the mlxsw logic with the new NTF_MANAGED entries, that would be awesome!
> 
> Yes, I will take care of that

Perfect, thanks a lot!

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-13  9:26     ` Daniel Borkmann
  2021-10-13  9:59       ` Ido Schimmel
@ 2021-10-13 14:10       ` David Ahern
  1 sibling, 0 replies; 20+ messages in thread
From: David Ahern @ 2021-10-13 14:10 UTC (permalink / raw)
  To: Daniel Borkmann, Ido Schimmel
  Cc: davem, kuba, roopa, dsahern, m, john.fastabend, netdev, bpf

On 10/13/21 3:26 AM, Daniel Borkmann wrote:
>>> a control plane inserts the L3 (without L2) entries manually into the
>>> neighbor table and lets the kernel do the neighbor resolution either
>>> on the gateway or on the backend directly in case the latter resides
>>> in the same L2. This avoids to deal with L2 in the control plane and
>>> to rebuild what the kernel already does best anyway.
>>
>> Are you using 'fib_multipath_use_neigh' sysctl to avoid going through
>> failed nexthops? Looking at how the bpf_fib_lookup() helper is
>> implemented, seems that you can benefit from it in XDP
> 
> Thanks for the pointer, we don't use it yet, but that's a great idea!

you should not have to do anything (beyond setting it if you have
control over that level).

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
  2021-10-12 14:51   ` David Ahern
  2021-10-12 16:31   ` Ido Schimmel
@ 2022-01-31 20:43   ` Eric Dumazet
  2022-01-31 21:17     ` Daniel Borkmann
  2 siblings, 1 reply; 20+ messages in thread
From: Eric Dumazet @ 2022-01-31 20:43 UTC (permalink / raw)
  To: Daniel Borkmann, davem, kuba
  Cc: roopa, dsahern, m, john.fastabend, netdev, bpf


On 10/11/21 05:12, Daniel Borkmann wrote:
> Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
> flag. The flag then indicates to the kernel that the neighbor entry should be
> periodically probed for keeping the entry in NUD_REACHABLE state iff possible.
>
> The use case for this is targeting XDP or tc BPF load-balancers which use
> the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
> for their backends. Given they cannot be resolved in fast-path, a control
> plane inserts the L3 (without L2) entries manually into the neighbor table
> and lets the kernel do the neighbor resolution either on the gateway or on
> the backend directly in case the latter resides in the same L2. This avoids
> to deal with L2 in the control plane and to rebuild what the kernel already
> does best anyway.
>
> NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
> eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
> table which gets triggered by the system work queue to periodically call
> neigh_event_send() for performing the resolution. The implementation allows
> migration from/to NTF_MANAGED neighbor entries, so that already existing
> entries can be converted by the control plane if needed. Potentially, we could
> make the interval for periodically calling neigh_event_send() configurable;
> right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
> has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
> Periodically update the kernel's neigh table"). In future, the latter could
> possibly reuse the NTF_MANAGED neighbors as well.
>
> Example:
>
>    # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
>    # ./ip/ip n
>    192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
>    [...]
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Roopa Prabhu <roopa@nvidia.com>
> Link: https://linuxplumbersconf.org/event/11/contributions/953/
> ---
>   include/net/neighbour.h        |  21 ++++--
>   include/uapi/linux/neighbour.h |  34 ++++++----
>   net/core/neighbour.c           | 113 ++++++++++++++++++++++++---------
>   3 files changed, 120 insertions(+), 48 deletions(-)
>
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index 26d4ada0aea9..e8e48be66755 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -155,6 +155,7 @@ struct neighbour {
>   	int			(*output)(struct neighbour *, struct sk_buff *);
>   	const struct neigh_ops	*ops;
>   	struct list_head	gc_list;
> +	struct list_head	managed_list;
>   	struct rcu_head		rcu;
>   	struct net_device	*dev;
>   	u8			primary_key[0];
> @@ -216,11 +217,13 @@ struct neigh_table {
>   	int			gc_thresh3;
>   	unsigned long		last_flush;
>   	struct delayed_work	gc_work;
> +	struct delayed_work	managed_work;
>   	struct timer_list 	proxy_timer;
>   	struct sk_buff_head	proxy_queue;
>   	atomic_t		entries;
>   	atomic_t		gc_entries;
>   	struct list_head	gc_list;
> +	struct list_head	managed_list;
>   	rwlock_t		lock;
>   	unsigned long		last_rand;
>   	struct neigh_statistics	__percpu *stats;
> @@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
>   }
>   
>   /* flags for neigh_update() */
> -#define NEIGH_UPDATE_F_OVERRIDE			0x00000001
> -#define NEIGH_UPDATE_F_WEAK_OVERRIDE		0x00000002
> -#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER	0x00000004
> -#define NEIGH_UPDATE_F_USE			0x10000000
> -#define NEIGH_UPDATE_F_EXT_LEARNED		0x20000000
> -#define NEIGH_UPDATE_F_ISROUTER			0x40000000
> -#define NEIGH_UPDATE_F_ADMIN			0x80000000
> +#define NEIGH_UPDATE_F_OVERRIDE			BIT(0)
> +#define NEIGH_UPDATE_F_WEAK_OVERRIDE		BIT(1)
> +#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER	BIT(2)
> +#define NEIGH_UPDATE_F_USE			BIT(3)
> +#define NEIGH_UPDATE_F_MANAGED			BIT(4)
> +#define NEIGH_UPDATE_F_EXT_LEARNED		BIT(5)
> +#define NEIGH_UPDATE_F_ISROUTER			BIT(6)
> +#define NEIGH_UPDATE_F_ADMIN			BIT(7)
>   
>   /* In-kernel representation for NDA_FLAGS_EXT flags: */
>   #define NTF_OLD_MASK		0xff
>   #define NTF_EXT_SHIFT		8
> +#define NTF_EXT_MASK		(NTF_EXT_MANAGED)
> +
> +#define NTF_MANAGED		(NTF_EXT_MANAGED << NTF_EXT_SHIFT)
>   
>   extern const struct nla_policy nda_policy[];
>   
> diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
> index a80cca141855..db05fb55055e 100644
> --- a/include/uapi/linux/neighbour.h
> +++ b/include/uapi/linux/neighbour.h
> @@ -41,14 +41,16 @@ enum {
>    *	Neighbor Cache Entry Flags
>    */
>   
> -#define NTF_USE		0x01
> -#define NTF_SELF	0x02
> -#define NTF_MASTER	0x04
> -#define NTF_PROXY	0x08	/* == ATF_PUBL */
> -#define NTF_EXT_LEARNED	0x10
> -#define NTF_OFFLOADED   0x20
> -#define NTF_STICKY	0x40
> -#define NTF_ROUTER	0x80
> +#define NTF_USE		(1 << 0)
> +#define NTF_SELF	(1 << 1)
> +#define NTF_MASTER	(1 << 2)
> +#define NTF_PROXY	(1 << 3)	/* == ATF_PUBL */
> +#define NTF_EXT_LEARNED	(1 << 4)
> +#define NTF_OFFLOADED   (1 << 5)
> +#define NTF_STICKY	(1 << 6)
> +#define NTF_ROUTER	(1 << 7)
> +/* Extended flags under NDA_FLAGS_EXT: */
> +#define NTF_EXT_MANAGED	(1 << 0)
>   
>   /*
>    *	Neighbor Cache Entry States.
> @@ -66,12 +68,22 @@ enum {
>   #define NUD_PERMANENT	0x80
>   #define NUD_NONE	0x00
>   
> -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
> - * and make no address resolution or NUD.
> - * NUD_PERMANENT also cannot be deleted by garbage collectors.
> +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
> + * address resolution or NUD.
> + *
> + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
> + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
> + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
> + * flagged entries explicitly are (which is also consistent with the routing
> + * subsystem).
> + *
>    * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
>    * states don't make sense and thus are ignored. Such entries don't age and
>    * can roam.
> + *
> + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
> + * of a user space control plane, and automatically refreshed so that (if
> + * possible) they remain in NUD_REACHABLE state.
>    */
>   
>   struct nda_cacheinfo {
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 5245e888c981..eae73efa9245 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
>   		list_del_init(&n->gc_list);
>   		atomic_dec(&n->tbl->gc_entries);
>   	}
> +	if (!list_empty(&n->managed_list))
> +		list_del_init(&n->managed_list);
>   }
>   
>   static void neigh_update_gc_list(struct neighbour *n)
> @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
>   
>   	write_lock_bh(&n->tbl->lock);
>   	write_lock(&n->lock);
> -
>   	if (n->dead)
>   		goto out;
>   
> @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
>   		list_add_tail(&n->gc_list, &n->tbl->gc_list);
>   		atomic_inc(&n->tbl->gc_entries);
>   	}
> +out:
> +	write_unlock(&n->lock);
> +	write_unlock_bh(&n->tbl->lock);
> +}
> +
> +static void neigh_update_managed_list(struct neighbour *n)
> +{
> +	bool on_managed_list, add_to_managed;
> +
> +	write_lock_bh(&n->tbl->lock);
> +	write_lock(&n->lock);
> +	if (n->dead)
> +		goto out;
> +
> +	add_to_managed = n->flags & NTF_MANAGED;
> +	on_managed_list = !list_empty(&n->managed_list);
>   
> +	if (!add_to_managed && on_managed_list)
> +		list_del_init(&n->managed_list);
> +	else if (add_to_managed && !on_managed_list)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>   out:
>   	write_unlock(&n->lock);
>   	write_unlock_bh(&n->tbl->lock);
>   }
>   
> -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
> -				     int *notify)
> +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
> +			       bool *gc_update, bool *managed_update)
>   {
> -	bool rc = false;
> -	u32 ndm_flags;
> +	u32 ndm_flags, old_flags = neigh->flags;
>   
>   	if (!(flags & NEIGH_UPDATE_F_ADMIN))
> -		return rc;
> +		return;
> +
> +	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> +	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
>   
> -	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> -	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
> +	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>   		if (ndm_flags & NTF_EXT_LEARNED)
>   			neigh->flags |= NTF_EXT_LEARNED;
>   		else
>   			neigh->flags &= ~NTF_EXT_LEARNED;
> -		rc = true;
>   		*notify = 1;
> +		*gc_update = true;
> +	}
> +	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
> +		if (ndm_flags & NTF_MANAGED)
> +			neigh->flags |= NTF_MANAGED;
> +		else
> +			neigh->flags &= ~NTF_MANAGED;
> +		*notify = 1;
> +		*managed_update = true;
>   	}
> -
> -	return rc;
>   }
>   
>   static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
> @@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
>   	refcount_set(&n->refcnt, 1);
>   	n->dead		  = 1;
>   	INIT_LIST_HEAD(&n->gc_list);
> +	INIT_LIST_HEAD(&n->managed_list);
>   
>   	atomic_inc(&tbl->entries);
>   out:
> @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>   	n->dead = 0;
>   	if (!exempt_from_gc)
>   		list_add_tail(&n->gc_list, &n->tbl->gc_list);
> -
> +	if (n->flags & NTF_MANAGED)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>   	if (want_ref)
>   		neigh_hold(n);
>   	rcu_assign_pointer(n->next,
> @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
>   	}
>   }
>   
> -
> -
>   /* Generic update routine.
>      -- lladdr is new lladdr or NULL, if it is not supplied.
>      -- new    is new state.
> @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
>   				if it is different.
>   	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
>   	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
> +	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
>   	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
>   				NTF_ROUTER flag.
>   	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
> @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
>   
>      Caller MUST hold reference count on the entry.
>    */
> -
>   static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>   			  u8 new, u32 flags, u32 nlmsg_pid,
>   			  struct netlink_ext_ack *extack)
>   {
> -	bool ext_learn_change = false;
> -	u8 old;
> -	int err;
> -	int notify = 0;
> -	struct net_device *dev;
> +	bool gc_update = false, managed_update = false;
>   	int update_isrouter = 0;
> +	struct net_device *dev;
> +	int err, notify = 0;
> +	u8 old;
>   
>   	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
>   
> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>   	    (old & (NUD_NOARP | NUD_PERMANENT)))
>   		goto out;
>   
> -	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
> -	if (flags & NEIGH_UPDATE_F_USE) {
> +	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
> +	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>   		new = old & ~NUD_PERMANENT;
>   		neigh->nud_state = new;
>   		err = 0;
> @@ -1405,15 +1432,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>   	if (update_isrouter)
>   		neigh_update_is_router(neigh, flags, &notify);
>   	write_unlock_bh(&neigh->lock);
> -
> -	if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
> +	if (((new ^ old) & NUD_PERMANENT) || gc_update)
>   		neigh_update_gc_list(neigh);
> -
> +	if (managed_update)
> +		neigh_update_managed_list(neigh);
>   	if (notify)
>   		neigh_update_notify(neigh, nlmsg_pid);
> -
>   	trace_neigh_update_done(neigh, err);
> -
>   	return err;
>   }
>   
> @@ -1539,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
>   }
>   EXPORT_SYMBOL(neigh_direct_output);
>   
> +static void neigh_managed_work(struct work_struct *work)
> +{
> +	struct neigh_table *tbl = container_of(work, struct neigh_table,
> +					       managed_work.work);
> +	struct neighbour *neigh;
> +
> +	write_lock_bh(&tbl->lock);
> +	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
> +		neigh_event_send(neigh, NULL);

neigh_event_send() can need to lock tbl->lock, leading to a deadlock ?

__raw_write_lock_bh include/linux/rwlock_api_smp.h:202 [inline]
  _raw_write_lock_bh+0x2f/0x40 kernel/locking/spinlock.c:334
  ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652
  ip6_finish_output2+0x1070/0x14f0 net/ipv6/ip6_output.c:123
  __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
  __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
  ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
  NF_HOOK_COND include/linux/netfilter.h:296 [inline]
  ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
  dst_output include/net/dst.h:451 [inline]
  NF_HOOK include/linux/netfilter.h:307 [inline]
  ndisc_send_skb+0xa99/0x17f0 net/ipv6/ndisc.c:508
  ndisc_send_ns+0x3a9/0x840 net/ipv6/ndisc.c:650
  ndisc_solicit+0x2cd/0x4f0 net/ipv6/ndisc.c:742
  neigh_probe+0xc2/0x110 net/core/neighbour.c:1040
  __neigh_event_send+0x37d/0x1570 net/core/neighbour.c:1201
  neigh_event_send include/net/neighbour.h:470 [inline]
  neigh_managed_work+0x162/0x250 net/core/neighbour.c:1574
  process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
  worker_thread+0x657/0x1110 kernel/workqueue.c:2454
  kthread+0x2e9/0x3a0 kernel/kthread.c:377
  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

> +	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
> +			   NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
> +	write_unlock_bh(&tbl->lock);
> +}
> +
>   static void neigh_proxy_process(struct timer_list *t)
>   {
>   	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
> @@ -1685,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
>   
>   	INIT_LIST_HEAD(&tbl->parms_list);
>   	INIT_LIST_HEAD(&tbl->gc_list);
> +	INIT_LIST_HEAD(&tbl->managed_list);
> +
>   	list_add(&tbl->parms.list, &tbl->parms_list);
>   	write_pnet(&tbl->parms.net, &init_net);
>   	refcount_set(&tbl->parms.refcnt, 1);
> @@ -1716,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
>   		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
>   
>   	rwlock_init(&tbl->lock);
> +
>   	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
>   	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
>   			tbl->parms.reachable_time);
> +	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
> +	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
> +
>   	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
>   	skb_queue_head_init_class(&tbl->proxy_queue,
>   			&neigh_table_proxy_queue_class);
> @@ -1891,7 +1936,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
>   	if (tb[NDA_FLAGS_EXT]) {
>   		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
>   
> -		if (ext & ~0) {
> +		if (ext & ~NTF_EXT_MASK) {
>   			NL_SET_ERR_MSG(extack, "Invalid extended flags");
>   			goto out;
>   		}
> @@ -1927,6 +1972,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
>   	if (ndm_flags & NTF_PROXY) {
>   		struct pneigh_entry *pn;
>   
> +		if (ndm_flags & NTF_MANAGED) {
> +			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
> +			goto out;
> +		}
> +
>   		err = -ENOBUFS;
>   		pn = pneigh_lookup(tbl, net, dst, dev, 1);
>   		if (pn) {
> @@ -1960,7 +2010,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
>   		exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
>   				 ndm_flags & NTF_EXT_LEARNED;
>   		neigh = ___neigh_create(tbl, dst, dev,
> -					ndm_flags & NTF_EXT_LEARNED,
> +					ndm_flags &
> +					(NTF_EXT_LEARNED | NTF_MANAGED),
>   					exempt_from_gc, true);
>   		if (IS_ERR(neigh)) {
>   			err = PTR_ERR(neigh);
> @@ -1984,12 +2035,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
>   		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
>   	if (ndm_flags & NTF_ROUTER)
>   		flags |= NEIGH_UPDATE_F_ISROUTER;
> +	if (ndm_flags & NTF_MANAGED)
> +		flags |= NEIGH_UPDATE_F_MANAGED;
>   	if (ndm_flags & NTF_USE)
>   		flags |= NEIGH_UPDATE_F_USE;
>   
>   	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
>   			     NETLINK_CB(skb).portid, extack);
> -	if (!err && ndm_flags & NTF_USE) {
> +	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
>   		neigh_event_send(neigh, NULL);
>   		err = 0;
>   	}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
  2022-01-31 20:43   ` Eric Dumazet
@ 2022-01-31 21:17     ` Daniel Borkmann
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2022-01-31 21:17 UTC (permalink / raw)
  To: Eric Dumazet, davem, kuba; +Cc: roopa, dsahern, m, john.fastabend, netdev, bpf

On 1/31/22 9:43 PM, Eric Dumazet wrote:
[...]
>> @@ -1539,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
>>   }
>>   EXPORT_SYMBOL(neigh_direct_output);
>> +static void neigh_managed_work(struct work_struct *work)
>> +{
>> +    struct neigh_table *tbl = container_of(work, struct neigh_table,
>> +                           managed_work.work);
>> +    struct neighbour *neigh;
>> +
>> +    write_lock_bh(&tbl->lock);
>> +    list_for_each_entry(neigh, &tbl->managed_list, managed_list)
>> +        neigh_event_send(neigh, NULL);
> 
> neigh_event_send() can need to lock tbl->lock, leading to a deadlock ?

Thanks for forwarding the syzbot report! I'll take a look.

> __raw_write_lock_bh include/linux/rwlock_api_smp.h:202 [inline]
>   _raw_write_lock_bh+0x2f/0x40 kernel/locking/spinlock.c:334
>   ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652
>   ip6_finish_output2+0x1070/0x14f0 net/ipv6/ip6_output.c:123
>   __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
>   __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
>   ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
>   NF_HOOK_COND include/linux/netfilter.h:296 [inline]
>   ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
>   dst_output include/net/dst.h:451 [inline]
>   NF_HOOK include/linux/netfilter.h:307 [inline]
>   ndisc_send_skb+0xa99/0x17f0 net/ipv6/ndisc.c:508
>   ndisc_send_ns+0x3a9/0x840 net/ipv6/ndisc.c:650
>   ndisc_solicit+0x2cd/0x4f0 net/ipv6/ndisc.c:742
>   neigh_probe+0xc2/0x110 net/core/neighbour.c:1040
>   __neigh_event_send+0x37d/0x1570 net/core/neighbour.c:1201
>   neigh_event_send include/net/neighbour.h:470 [inline]
>   neigh_managed_work+0x162/0x250 net/core/neighbour.c:1574
>   process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
>   worker_thread+0x657/0x1110 kernel/workqueue.c:2454
>   kthread+0x2e9/0x3a0 kernel/kthread.c:377
>   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
> 
>> +    queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
>> +               NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
>> +    write_unlock_bh(&tbl->lock);
>> +}
>> +
>>   static void neigh_proxy_process(struct timer_list *t)
>>   {
>>       struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
>> @@ -1685,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
>>       INIT_LIST_HEAD(&tbl->parms_list);
>>       INIT_LIST_HEAD(&tbl->gc_list);
>> +    INIT_LIST_HEAD(&tbl->managed_list);
>> +

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-01-31 21:18 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
2021-10-12 14:23   ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
2021-10-12 14:25   ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
2021-10-12 14:31   ` David Ahern
2021-10-12 14:46     ` Daniel Borkmann
2021-10-12 21:46   ` Jakub Kicinski
2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
2021-10-12 14:51   ` David Ahern
2021-10-12 15:05     ` Daniel Borkmann
2021-10-12 15:26       ` Daniel Borkmann
2021-10-12 16:31   ` Ido Schimmel
2021-10-13  9:26     ` Daniel Borkmann
2021-10-13  9:59       ` Ido Schimmel
2021-10-13 11:23         ` Daniel Borkmann
2021-10-13 14:10       ` David Ahern
2022-01-31 20:43   ` Eric Dumazet
2022-01-31 21:17     ` Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.