All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] net: flush neighbor entries when carrier is off
@ 2017-11-06  6:57 David Ahern
  2017-11-06  6:57 ` [PATCH net-next 1/3] net: neigh: Add helper to flush entries on carrier down David Ahern
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: David Ahern @ 2017-11-06  6:57 UTC (permalink / raw)
  To: netdev; +Cc: jhs, David Ahern

Commit a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath
routes") added support for checking neighbor state when selecting a path
for multipath route lookups. It works but incurs a delay waiting for
the neighbor entry to timeout. Improve the path selection by flushing
non-permanent neighbor entries when carrier is off.

David Ahern (3):
  net: neigh: Add helper to flush entries on carrier down
  net: ipv4: flush neighbor entries when carrier is off
  net: ipv6: flush neighbor entries when carrier is off

 include/net/neighbour.h |  1 +
 net/core/neighbour.c    | 26 ++++++++++++++++++++++----
 net/ipv4/fib_frontend.c |  7 +++++--
 net/ipv6/addrconf.c     |  3 +++
 4 files changed, 31 insertions(+), 6 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/3] net: neigh: Add helper to flush entries on carrier down
  2017-11-06  6:57 [PATCH net-next 0/3] net: flush neighbor entries when carrier is off David Ahern
@ 2017-11-06  6:57 ` David Ahern
  2017-11-06  6:57 ` [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off David Ahern
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2017-11-06  6:57 UTC (permalink / raw)
  To: netdev; +Cc: jhs, David Ahern

Add neigh_carrier_down helper to flush non-permanent entries on carrier
down.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/neighbour.h |  1 +
 net/core/neighbour.c    | 26 ++++++++++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2492000e1035..b2eff6ffbfdc 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -320,6 +320,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
 void __neigh_set_probe_once(struct neighbour *neigh);
 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl);
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev);
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb);
 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6ea3a1a7f36a..a7012bca1ce9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -229,7 +229,8 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
 	}
 }
 
-static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
+			    bool skip_perm)
 {
 	int i;
 	struct neigh_hash_table *nht;
@@ -247,6 +248,10 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 				np = &n->next;
 				continue;
 			}
+			if (skip_perm && n->nud_state & NUD_PERMANENT) {
+				np = &n->next;
+				continue;
+			}
 			rcu_assign_pointer(*np,
 				   rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
@@ -282,20 +287,33 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 {
 	write_lock_bh(&tbl->lock);
-	neigh_flush_dev(tbl, dev);
+	neigh_flush_dev(tbl, dev, false);
 	write_unlock_bh(&tbl->lock);
 }
 EXPORT_SYMBOL(neigh_changeaddr);
 
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static void __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+			   bool skip_perm)
 {
 	write_lock_bh(&tbl->lock);
-	neigh_flush_dev(tbl, dev);
+	neigh_flush_dev(tbl, dev, skip_perm);
 	pneigh_ifdown(tbl, dev);
 	write_unlock_bh(&tbl->lock);
 
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
+}
+
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
+{
+	__neigh_ifdown(tbl, dev, true);
+	return 0;
+}
+EXPORT_SYMBOL(neigh_carrier_down);
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	__neigh_ifdown(tbl, dev, false);
 	return 0;
 }
 EXPORT_SYMBOL(neigh_ifdown);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off
  2017-11-06  6:57 [PATCH net-next 0/3] net: flush neighbor entries when carrier is off David Ahern
  2017-11-06  6:57 ` [PATCH net-next 1/3] net: neigh: Add helper to flush entries on carrier down David Ahern
@ 2017-11-06  6:57 ` David Ahern
  2017-11-06 10:47   ` Ido Schimmel
  2017-11-06  6:57 ` [PATCH net-next 3/3] net: ipv6: " David Ahern
       [not found] ` <CAKYOsXz3RFqbzENy1rz8kjsr3aF0u2kxBHrSQ3uNfFfQTWbJRQ@mail.gmail.com>
  3 siblings, 1 reply; 8+ messages in thread
From: David Ahern @ 2017-11-06  6:57 UTC (permalink / raw)
  To: netdev; +Cc: jhs, David Ahern, Satish Ashok

Commit a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath
routes") added support for checking neighbor state when selecting a path
for multipath route lookups. It works but incurs a delay waiting for
the neighbor entry to timeout. Improve the path selection by flushing
non-permanent neighbor entries when carrier is off.

Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv4/fib_frontend.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f02819134ba2..aa8fea74858f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1226,10 +1226,13 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		break;
 	case NETDEV_CHANGE:
 		flags = dev_get_flags(dev);
-		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+		if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
 			fib_sync_up(dev, RTNH_F_LINKDOWN);
-		else
+		} else {
 			fib_sync_down_dev(dev, event, false);
+			if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+				neigh_carrier_down(&arp_tbl, dev);
+		}
 		/* fall through */
 	case NETDEV_CHANGEMTU:
 		rt_cache_flush(net);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/3] net: ipv6: flush neighbor entries when carrier is off
  2017-11-06  6:57 [PATCH net-next 0/3] net: flush neighbor entries when carrier is off David Ahern
  2017-11-06  6:57 ` [PATCH net-next 1/3] net: neigh: Add helper to flush entries on carrier down David Ahern
  2017-11-06  6:57 ` [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off David Ahern
@ 2017-11-06  6:57 ` David Ahern
       [not found] ` <CAKYOsXz3RFqbzENy1rz8kjsr3aF0u2kxBHrSQ3uNfFfQTWbJRQ@mail.gmail.com>
  3 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2017-11-06  6:57 UTC (permalink / raw)
  To: netdev; +Cc: jhs, David Ahern, Satish Ashok

Similar to IPv4, flush non-permanent neighbor entries on carrier down
to improve path selection for multipath routes.

Signed-off-by: Satish Ashok <sashok@cmulusnetworks.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/addrconf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5a8a10229a07..85bddff5eac6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3432,6 +3432,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				run_pending = 1;
 			}
 		} else if (event == NETDEV_CHANGE) {
+			if (idev && idev->cnf.ignore_routes_with_linkdown)
+				neigh_carrier_down(&nd_tbl, dev);
+
 			if (!addrconf_link_ready(dev)) {
 				/* device is still not ready. */
 				break;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off
  2017-11-06  6:57 ` [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off David Ahern
@ 2017-11-06 10:47   ` Ido Schimmel
  2017-11-06 14:49     ` David Ahern
  0 siblings, 1 reply; 8+ messages in thread
From: Ido Schimmel @ 2017-11-06 10:47 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, jhs, Satish Ashok

Hi David,

On Sun, Nov 05, 2017 at 10:57:52PM -0800, David Ahern wrote:
> Commit a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath
> routes") added support for checking neighbor state when selecting a path
> for multipath route lookups. It works but incurs a delay waiting for
> the neighbor entry to timeout. Improve the path selection by flushing
> non-permanent neighbor entries when carrier is off.
> 
> Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/ipv4/fib_frontend.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index f02819134ba2..aa8fea74858f 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -1226,10 +1226,13 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>  		break;
>  	case NETDEV_CHANGE:
>  		flags = dev_get_flags(dev);
> -		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
> +		if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
>  			fib_sync_up(dev, RTNH_F_LINKDOWN);
> -		else
> +		} else {
>  			fib_sync_down_dev(dev, event, false);
> +			if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))

Can you please explain why the flushing is conditioned on this sysctl
being enabled? If carrier is down and the sysctl is enabled, then the
nexthop will not be used anyway.

Thanks

> +				neigh_carrier_down(&arp_tbl, dev);
> +		}
>  		/* fall through */
>  	case NETDEV_CHANGEMTU:
>  		rt_cache_flush(net);
> -- 
> 2.1.4
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off
  2017-11-06 10:47   ` Ido Schimmel
@ 2017-11-06 14:49     ` David Ahern
  2017-11-07  5:29       ` David Ahern
  0 siblings, 1 reply; 8+ messages in thread
From: David Ahern @ 2017-11-06 14:49 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, jhs, Satish Ashok

On 11/6/17 7:47 PM, Ido Schimmel wrote:
> Hi David,
> 
> On Sun, Nov 05, 2017 at 10:57:52PM -0800, David Ahern wrote:
>> Commit a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath
>> routes") added support for checking neighbor state when selecting a path
>> for multipath route lookups. It works but incurs a delay waiting for
>> the neighbor entry to timeout. Improve the path selection by flushing
>> non-permanent neighbor entries when carrier is off.
>>
>> Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>> ---
>>  net/ipv4/fib_frontend.c | 7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
>> index f02819134ba2..aa8fea74858f 100644
>> --- a/net/ipv4/fib_frontend.c
>> +++ b/net/ipv4/fib_frontend.c
>> @@ -1226,10 +1226,13 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>>  		break;
>>  	case NETDEV_CHANGE:
>>  		flags = dev_get_flags(dev);
>> -		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
>> +		if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
>>  			fib_sync_up(dev, RTNH_F_LINKDOWN);
>> -		else
>> +		} else {
>>  			fib_sync_down_dev(dev, event, false);
>> +			if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
> 
> Can you please explain why the flushing is conditioned on this sysctl
> being enabled? If carrier is down and the sysctl is enabled, then the
> nexthop will not be used anyway.

I pulled the patch from our queue in response to a problem Jamal reported.

For the output path, the result set by fib_table_lookup is ignored and
fib_select_path is invoked which for multipath can pick a nexthop with
carrier down. Perhaps that is the better change ... to check dead and
linkdown flags in fib_select_multipath. I'll try it out tomorrow.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off
  2017-11-06 14:49     ` David Ahern
@ 2017-11-07  5:29       ` David Ahern
  0 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2017-11-07  5:29 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, jhs, Satish Ashok

On 11/6/17 11:49 PM, David Ahern wrote:
>>> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
>>> index f02819134ba2..aa8fea74858f 100644
>>> --- a/net/ipv4/fib_frontend.c
>>> +++ b/net/ipv4/fib_frontend.c
>>> @@ -1226,10 +1226,13 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>>>  		break;
>>>  	case NETDEV_CHANGE:
>>>  		flags = dev_get_flags(dev);
>>> -		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
>>> +		if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
>>>  			fib_sync_up(dev, RTNH_F_LINKDOWN);
>>> -		else
>>> +		} else {
>>>  			fib_sync_down_dev(dev, event, false);
>>> +			if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
>>
>> Can you please explain why the flushing is conditioned on this sysctl
>> being enabled? If carrier is down and the sysctl is enabled, then the
>> nexthop will not be used anyway.

you are right, of course. ;-) With the sysctl enabled nexthops with
linkdown have upper_bound set to -1 and will not get selected in
fib_select_multipath.

Jamal: the existing code with the sysctl's enabled should fix your use case.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/3] net: flush neighbor entries when carrier is off
       [not found] ` <CAKYOsXz3RFqbzENy1rz8kjsr3aF0u2kxBHrSQ3uNfFfQTWbJRQ@mail.gmail.com>
@ 2017-11-08  3:39   ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2017-11-08  3:39 UTC (permalink / raw)
  To: dsahern; +Cc: netdev

From: David Ahern <dsahern@gmail.com>
Date: Wed, 8 Nov 2017 12:35:16 +0900

> On Mon, Nov 6, 2017 at 3:57 PM, David Ahern <dsahern@gmail.com> wrote:
> 
>> Commit a6db4494d218c ("net: ipv4: Consider failed nexthops in multipath
>> routes") added support for checking neighbor state when selecting a path
>> for multipath route lookups. It works but incurs a delay waiting for
>> the neighbor entry to timeout. Improve the path selection by flushing
>> non-permanent neighbor entries when carrier is off.
>>
>> David Ahern (3):
>>   net: neigh: Add helper to flush entries on carrier down
>>   net: ipv4: flush neighbor entries when carrier is off
>>   net: ipv6: flush neighbor entries when carrier is off
>>
>>  include/net/neighbour.h |  1 +
>>  net/core/neighbour.c    | 26 ++++++++++++++++++++++----
>>  net/ipv4/fib_frontend.c |  7 +++++--
>>  net/ipv6/addrconf.c     |  3 +++
>>  4 files changed, 31 insertions(+), 6 deletions(-)
>>
>>
> Dave: please drop this set. It's not needed for the use case mentioned
> above, so at a minimum the commit message needs to be updated.

Yep, sure, no problem.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-11-08  3:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-06  6:57 [PATCH net-next 0/3] net: flush neighbor entries when carrier is off David Ahern
2017-11-06  6:57 ` [PATCH net-next 1/3] net: neigh: Add helper to flush entries on carrier down David Ahern
2017-11-06  6:57 ` [PATCH net-next 2/3] net: ipv4: flush neighbor entries when carrier is off David Ahern
2017-11-06 10:47   ` Ido Schimmel
2017-11-06 14:49     ` David Ahern
2017-11-07  5:29       ` David Ahern
2017-11-06  6:57 ` [PATCH net-next 3/3] net: ipv6: " David Ahern
     [not found] ` <CAKYOsXz3RFqbzENy1rz8kjsr3aF0u2kxBHrSQ3uNfFfQTWbJRQ@mail.gmail.com>
2017-11-08  3:39   ` [PATCH net-next 0/3] net: " David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.