From: Daniel Borkmann <daniel@iogearbox.net>
To: David Ahern <dsahern@gmail.com>,
davem@davemloft.net, kuba@kernel.org,
Ido Schimmel <idosch@idosch.org>
Cc: roopa@nvidia.com, dsahern@kernel.org, m@lambda.lt,
john.fastabend@gmail.com, netdev@vger.kernel.org,
bpf@vger.kernel.org
Subject: Re: [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Date: Tue, 12 Oct 2021 17:05:46 +0200 [thread overview]
Message-ID: <bf31a3fe-c12d-fd75-c2eb-9685cc8528f2@iogearbox.net> (raw)
In-Reply-To: <05807c5b-59aa-839d-fbb0-b9712857741e@gmail.com>
On 10/12/21 4:51 PM, David Ahern wrote:
> On 10/11/21 6:12 AM, Daniel Borkmann wrote:
>> @@ -66,12 +68,22 @@ enum {
>> #define NUD_PERMANENT 0x80
>> #define NUD_NONE 0x00
>>
>> -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
>> - * and make no address resolution or NUD.
>> - * NUD_PERMANENT also cannot be deleted by garbage collectors.
>> +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
>> + * address resolution or NUD.
>> + *
>> + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
>> + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
>> + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
>> + * flagged entries explicitly are (which is also consistent with the routing
>> + * subsystem).
>> + *
>> * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
>> * states don't make sense and thus are ignored. Such entries don't age and
>> * can roam.
>> + *
>> + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
>> + * of a user space control plane, and automatically refreshed so that (if
>> + * possible) they remain in NUD_REACHABLE state.
>
> switchdev use cases need this capability as well to offload routes.
> Similar functionality exists in mlxsw to resolve gateways. It would be
> good for this design to cover both needs - and that may be as simple as
> mlxsw setting the MANAGED flag on the entry to let the neigh subsystem
> takeover.
Ack, that would definitely be nice to reuse it there as well.
>> */
>>
>> struct nda_cacheinfo {
>> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
>> index 5245e888c981..eae73efa9245 100644
>> --- a/net/core/neighbour.c
>> +++ b/net/core/neighbour.c
>> @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
>> list_del_init(&n->gc_list);
>> atomic_dec(&n->tbl->gc_entries);
>> }
>> + if (!list_empty(&n->managed_list))
>> + list_del_init(&n->managed_list);
>> }
>>
>> static void neigh_update_gc_list(struct neighbour *n)
>> @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
>>
>> write_lock_bh(&n->tbl->lock);
>> write_lock(&n->lock);
>> -
>
> I like the extra newline - it makes locks stand out.
Ok, will drop, and add one to neigh_update_managed_list(), too.
>> if (n->dead)
>> goto out;
>>
>> @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
>> list_add_tail(&n->gc_list, &n->tbl->gc_list);
>> atomic_inc(&n->tbl->gc_entries);
>> }
>> +out:
>> + write_unlock(&n->lock);
>> + write_unlock_bh(&n->tbl->lock);
>> +}
>> +
>> +static void neigh_update_managed_list(struct neighbour *n)
>> +{
>> + bool on_managed_list, add_to_managed;
>> +
>> + write_lock_bh(&n->tbl->lock);
>> + write_lock(&n->lock);
>> + if (n->dead)
>> + goto out;
>> +
>> + add_to_managed = n->flags & NTF_MANAGED;
>> + on_managed_list = !list_empty(&n->managed_list);
>>
>> + if (!add_to_managed && on_managed_list)
>> + list_del_init(&n->managed_list);
>> + else if (add_to_managed && !on_managed_list)
>> + list_add_tail(&n->managed_list, &n->tbl->managed_list);
>> out:
>> write_unlock(&n->lock);
>> write_unlock_bh(&n->tbl->lock);
>> }
>>
>> -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
>> - int *notify)
>> +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
>> + bool *gc_update, bool *managed_update)
>> {
>> - bool rc = false;
>> - u32 ndm_flags;
>> + u32 ndm_flags, old_flags = neigh->flags;
>>
>> if (!(flags & NEIGH_UPDATE_F_ADMIN))
>> - return rc;
>> + return;
>> +
>> + ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
>> + ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
>>
>> - ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
>> - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>> + if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
>> if (ndm_flags & NTF_EXT_LEARNED)
>> neigh->flags |= NTF_EXT_LEARNED;
>> else
>> neigh->flags &= ~NTF_EXT_LEARNED;
>> - rc = true;
>> *notify = 1;
>> + *gc_update = true;
>> + }
>> + if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
>> + if (ndm_flags & NTF_MANAGED)
>> + neigh->flags |= NTF_MANAGED;
>> + else
>> + neigh->flags &= ~NTF_MANAGED;
>> + *notify = 1;
>> + *managed_update = true;
>> }
>> -
>> - return rc;
>> }
>>
>> static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
>> @@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
>> refcount_set(&n->refcnt, 1);
>> n->dead = 1;
>> INIT_LIST_HEAD(&n->gc_list);
>> + INIT_LIST_HEAD(&n->managed_list);
>>
>> atomic_inc(&tbl->entries);
>> out:
>> @@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>> n->dead = 0;
>> if (!exempt_from_gc)
>> list_add_tail(&n->gc_list, &n->tbl->gc_list);
>> -
>> + if (n->flags & NTF_MANAGED)
>> + list_add_tail(&n->managed_list, &n->tbl->managed_list);
>> if (want_ref)
>> neigh_hold(n);
>> rcu_assign_pointer(n->next,
>> @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
>> }
>> }
>>
>> -
>> -
>> /* Generic update routine.
>> -- lladdr is new lladdr or NULL, if it is not supplied.
>> -- new is new state.
>> @@ -1218,6 +1246,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
>> if it is different.
>> NEIGH_UPDATE_F_ADMIN means that the change is administrative.
>> NEIGH_UPDATE_F_USE means that the entry is user triggered.
>> + NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
>> NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
>> NTF_ROUTER flag.
>> NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
>> @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
>>
>> Caller MUST hold reference count on the entry.
>> */
>> -
>> static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>> u8 new, u32 flags, u32 nlmsg_pid,
>> struct netlink_ext_ack *extack)
>> {
>> - bool ext_learn_change = false;
>> - u8 old;
>> - int err;
>> - int notify = 0;
>> - struct net_device *dev;
>> + bool gc_update = false, managed_update = false;
>> int update_isrouter = 0;
>> + struct net_device *dev;
>> + int err, notify = 0;
>> + u8 old;
>>
>> trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
>>
>> @@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
>> (old & (NUD_NOARP | NUD_PERMANENT)))
>> goto out;
>>
>> - ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
>> - if (flags & NEIGH_UPDATE_F_USE) {
>> + neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
>> + if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
>> new = old & ~NUD_PERMANENT;
>
> so a neighbor entry can not be both managed and permanent, but you don't
> check for the combination in neigh_add and error out with a message to
> the user.
Good point, I'll error out if both NUD_PERMANENT and NTF_MANAGED is set in neigh_add().
Thanks for the review!
Daniel
next prev parent reply other threads:[~2021-10-12 15:05 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-11 12:12 [PATCH net-next 0/4] Managed Neighbor Entries Daniel Borkmann
2021-10-11 12:12 ` [PATCH net-next 1/4] net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE Daniel Borkmann
2021-10-12 14:23 ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 2/4] net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE Daniel Borkmann
2021-10-12 14:25 ` David Ahern
2021-10-11 12:12 ` [PATCH net-next 3/4] net, neigh: Extend neigh->flags to 32 bit to allow for extensions Daniel Borkmann
2021-10-12 14:31 ` David Ahern
2021-10-12 14:46 ` Daniel Borkmann
2021-10-12 21:46 ` Jakub Kicinski
2021-10-11 12:12 ` [PATCH net-next 4/4] net, neigh: Add NTF_MANAGED flag for managed neighbor entries Daniel Borkmann
2021-10-12 14:51 ` David Ahern
2021-10-12 15:05 ` Daniel Borkmann [this message]
2021-10-12 15:26 ` Daniel Borkmann
2021-10-12 16:31 ` Ido Schimmel
2021-10-13 9:26 ` Daniel Borkmann
2021-10-13 9:59 ` Ido Schimmel
2021-10-13 11:23 ` Daniel Borkmann
2021-10-13 14:10 ` David Ahern
2022-01-31 20:43 ` Eric Dumazet
2022-01-31 21:17 ` Daniel Borkmann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bf31a3fe-c12d-fd75-c2eb-9685cc8528f2@iogearbox.net \
--to=daniel@iogearbox.net \
--cc=bpf@vger.kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=dsahern@kernel.org \
--cc=idosch@idosch.org \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=m@lambda.lt \
--cc=netdev@vger.kernel.org \
--cc=roopa@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).