bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Ahern <dsahern@gmail.com>
To: Daniel Borkmann <daniel@iogearbox.net>,
	davem@davemloft.net, kuba@kernel.org,
	Ido Schimmel <idosch@idosch.org>
Cc: roopa@nvidia.com, dsahern@kernel.org, m@lambda.lt,
	john.fastabend@gmail.com, netdev@vger.kernel.org,
	bpf@vger.kernel.org
Subject: Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Date: Tue, 12 Oct 2021 08:51:57 -0600	[thread overview]
Message-ID: <05807c5b-59aa-839d-fbb0-b9712857741e@gmail.com> (raw)
In-Reply-To: <20211011121238.25542-5-daniel@iogearbox.net>

On 10/11/21 6:12 AM, Daniel Borkmann wrote:
> @@ -66,12 +68,22 @@ enum {
>  #define NUD_PERMANENT	0x80
>  #define NUD_NONE	0x00
>  
> -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
> - * and make no address resolution or NUD.
> - * NUD_PERMANENT also cannot be deleted by garbage collectors.
> +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
> + * address resolution or NUD.
> + *
> + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
> + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
> + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
> + * flagged entries explicitly are (which is also consistent with the routing
> + * subsystem).
> + *
>   * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
>   * states don't make sense and thus are ignored. Such entries don't age and
>   * can roam.
> + *
> + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
> + * of a user space control plane, and automatically refreshed so that (if
> + * possible) they remain in NUD_REACHABLE state.

switchdev use cases need this capability as well to offload routes.
Similar functionality exists in mlxsw to resolve gateways. It would be
good for this design to cover both needs - and that may be as simple as
mlxsw setting the MANAGED flag on the entry to let the neigh subsystem
takeover.

>   */
>  
>  struct nda_cacheinfo {
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 5245e888c981..eae73efa9245 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
>  		list_del_init(&n->gc_list);
>  		atomic_dec(&n->tbl->gc_entries);
>  	}
> +	if (!list_empty(&n->managed_list))
> +		list_del_init(&n->managed_list);
>  }
>  
>  static void neigh_update_gc_list(struct neighbour *n)
> @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
>  
>  	write_lock_bh(&n->tbl->lock);
>  	write_lock(&n->lock);
> -

I like the extra newline - it makes locks stand out.


>  	if (n->dead)
>  		goto out;
>  
> @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
>  		list_add_tail(&n->gc_list, &n->tbl->gc_list);
>  		atomic_inc(&n->tbl->gc_entries);
>  	}
> +out:
> +	write_unlock(&n->lock);
> +	write_unlock_bh(&n->tbl->lock);
> +}
> +
> +static void neigh_update_managed_list(struct neighbour *n)
> +{
> +	bool on_managed_list, add_to_managed;
> +
> +	write_lock_bh(&n->tbl->lock);
> +	write_lock(&n->lock);
> +	if (n->dead)
> +		goto out;
> +
> +	add_to_managed = n->flags & NTF_MANAGED;
> +	on_managed_list = !list_empty(&n->managed_list);
>  
> +	if (!add_to_managed && on_managed_list)
> +		list_del_init(&n->managed_list);
> +	else if (add_to_managed && !on_managed_list)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>  out:
>  	write_unlock(&n->lock);
>  	write_unlock_bh(&n->tbl->lock);
>  }
>  
> -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
> -				     int *notify)
> +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
> +			       bool *gc_update, bool *managed_update)
>  {
> -	bool rc = false;
> -	u32 ndm_flags;
> +	u32 ndm_flags, old_flags = neigh->flags;
>  
>  	if (!(flags & NEIGH_UPDATE_F_ADMIN))
> -		return rc;
> +		return;
> +
> +	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> +	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
>  
> -	ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
> -	if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
> +	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>  		if (ndm_flags & NTF_EXT_LEARNED)
>  			neigh->flags |= NTF_EXT_LEARNED;
>  		else
>  			neigh->flags &= ~NTF_EXT_LEARNED;
> -		rc = true;
>  		*notify = 1;
> +		*gc_update = true;
> +	}
> +	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
> +		if (ndm_flags & NTF_MANAGED)
> +			neigh->flags |= NTF_MANAGED;
> +		else
> +			neigh->flags &= ~NTF_MANAGED;
> +		*notify = 1;
> +		*managed_update = true;
>  	}
> -
> -	return rc;
>  }
>  
>  static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
> @@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
>  	refcount_set(&n->refcnt, 1);
>  	n->dead		  = 1;
>  	INIT_LIST_HEAD(&n->gc_list);
> +	INIT_LIST_HEAD(&n->managed_list);
>  
>  	atomic_inc(&tbl->entries);
>  out:
> @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>  	n->dead = 0;
>  	if (!exempt_from_gc)
>  		list_add_tail(&n->gc_list, &n->tbl->gc_list);
> -
> +	if (n->flags & NTF_MANAGED)
> +		list_add_tail(&n->managed_list, &n->tbl->managed_list);
>  	if (want_ref)
>  		neigh_hold(n);
>  	rcu_assign_pointer(n->next,
> @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  	}
>  }
>  
> -
> -
>  /* Generic update routine.
>     -- lladdr is new lladdr or NULL, if it is not supplied.
>     -- new    is new state.
> @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  				if it is different.
>  	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
>  	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
> +	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
>  	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
>  				NTF_ROUTER flag.
>  	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
> @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
>  
>     Caller MUST hold reference count on the entry.
>   */
> -
>  static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>  			  u8 new, u32 flags, u32 nlmsg_pid,
>  			  struct netlink_ext_ack *extack)
>  {
> -	bool ext_learn_change = false;
> -	u8 old;
> -	int err;
> -	int notify = 0;
> -	struct net_device *dev;
> +	bool gc_update = false, managed_update = false;
>  	int update_isrouter = 0;
> +	struct net_device *dev;
> +	int err, notify = 0;
> +	u8 old;
>  
>  	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
>  
> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>  	    (old & (NUD_NOARP | NUD_PERMANENT)))
>  		goto out;
>  
> -	ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
> -	if (flags & NEIGH_UPDATE_F_USE) {
> +	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
> +	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>  		new = old & ~NUD_PERMANENT;

so a neighbor entry can not be both managed and permanent, but you don't
check for the combination in neigh_add and error out with a message to
the user.



  reply	other threads:[~2021-10-12 14:52 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
2021-10-12 14:23   ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
2021-10-12 14:25   ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
2021-10-12 14:31   ` David Ahern
2021-10-12 14:46     ` Daniel Borkmann
2021-10-12 21:46   ` Jakub Kicinski
2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
2021-10-12 14:51   ` David Ahern [this message]
2021-10-12 15:05     ` Daniel Borkmann
2021-10-12 15:26       ` Daniel Borkmann
2021-10-12 16:31   ` Ido Schimmel
2021-10-13  9:26     ` Daniel Borkmann
2021-10-13  9:59       ` Ido Schimmel
2021-10-13 11:23         ` Daniel Borkmann
2021-10-13 14:10       ` David Ahern
2022-01-31 20:43   ` Eric Dumazet
2022-01-31 21:17     ` Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=05807c5b-59aa-839d-fbb0-b9712857741e@gmail.com \
    --to=dsahern@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=idosch@idosch.org \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=m@lambda.lt \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).