All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] neighbour: allow referenced neighbours to be removed
@ 2021-03-17 18:53 Thadeu Lima de Souza Cascardo
  2021-03-17 18:53 ` [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed Thadeu Lima de Souza Cascardo
  2021-03-17 23:42 ` [PATCH 1/2] neighbour: allow referenced neighbours to be removed David Ahern
  0 siblings, 2 replies; 8+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2021-03-17 18:53 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, dsahern, Thadeu Lima de Souza Cascardo

During forced garbage collection, neighbours with more than a reference are
not removed. It's possible to DoS the neighbour table by using ARP spoofing
in such a way that there is always a timer pending for all neighbours,
preventing any of them from being removed. That will cause any new
neighbour creation to fail.

Use the same code as used by neigh_flush_dev, which deletes the timer and
cleans the queue when there are still references left.

With the same ARP spoofing technique, it was still possible to reach a valid
destination when this fix was applied, with no more table overflows.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
---
 net/core/neighbour.c | 117 +++++++++++++++++++------------------------
 1 file changed, 51 insertions(+), 66 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e2982b3970b8..bbc89c7ffdfd 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -173,25 +173,48 @@ static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
 	return rc;
 }
 
+static int neigh_del_timer(struct neighbour *n)
+{
+	if ((n->nud_state & NUD_IN_TIMER) &&
+	    del_timer(&n->timer)) {
+		neigh_release(n);
+		return 1;
+	}
+	return 0;
+}
+
 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
 		      struct neigh_table *tbl)
 {
-	bool retval = false;
-
+	rcu_assign_pointer(*np,
+		   rcu_dereference_protected(n->next,
+				lockdep_is_held(&tbl->lock)));
 	write_lock(&n->lock);
-	if (refcount_read(&n->refcnt) == 1) {
-		struct neighbour *neigh;
-
-		neigh = rcu_dereference_protected(n->next,
-						  lockdep_is_held(&tbl->lock));
-		rcu_assign_pointer(*np, neigh);
-		neigh_mark_dead(n);
-		retval = true;
+	neigh_del_timer(n);
+	neigh_mark_dead(n);
+	if (refcount_read(&n->refcnt) != 1) {
+		/* The most unpleasant situation.
+		   We must destroy neighbour entry,
+		   but someone still uses it.
+
+		   The destroy will be delayed until
+		   the last user releases us, but
+		   we must kill timers etc. and move
+		   it to safe state.
+		 */
+		__skb_queue_purge(&n->arp_queue);
+		n->arp_queue_len_bytes = 0;
+		n->output = neigh_blackhole;
+		if (n->nud_state & NUD_VALID)
+			n->nud_state = NUD_NOARP;
+		else
+			n->nud_state = NUD_NONE;
+		neigh_dbg(2, "neigh %p is stray\n", n);
 	}
 	write_unlock(&n->lock);
-	if (retval)
-		neigh_cleanup_and_release(n);
-	return retval;
+	neigh_cleanup_and_release(n);
+
+	return true;
 }
 
 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
@@ -229,22 +252,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 	write_lock_bh(&tbl->lock);
 
 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
-		if (refcount_read(&n->refcnt) == 1) {
-			bool remove = false;
-
-			write_lock(&n->lock);
-			if ((n->nud_state == NUD_FAILED) ||
-			    (tbl->is_multicast &&
-			     tbl->is_multicast(n->primary_key)) ||
-			    time_after(tref, n->updated))
-				remove = true;
-			write_unlock(&n->lock);
-
-			if (remove && neigh_remove_one(n, tbl))
-				shrunk++;
-			if (shrunk >= max_clean)
-				break;
-		}
+		bool remove = false;
+
+		write_lock(&n->lock);
+		if ((n->nud_state == NUD_FAILED) ||
+		    (tbl->is_multicast &&
+		     tbl->is_multicast(n->primary_key)) ||
+		    time_after(tref, n->updated))
+			remove = true;
+		write_unlock(&n->lock);
+
+		if (remove && neigh_remove_one(n, tbl))
+			shrunk++;
+		if (shrunk >= max_clean)
+			break;
 	}
 
 	tbl->last_flush = jiffies;
@@ -264,16 +285,6 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when)
 	}
 }
 
-static int neigh_del_timer(struct neighbour *n)
-{
-	if ((n->nud_state & NUD_IN_TIMER) &&
-	    del_timer(&n->timer)) {
-		neigh_release(n);
-		return 1;
-	}
-	return 0;
-}
-
 static void pneigh_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
@@ -307,33 +318,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 				np = &n->next;
 				continue;
 			}
-			rcu_assign_pointer(*np,
-				   rcu_dereference_protected(n->next,
-						lockdep_is_held(&tbl->lock)));
-			write_lock(&n->lock);
-			neigh_del_timer(n);
-			neigh_mark_dead(n);
-			if (refcount_read(&n->refcnt) != 1) {
-				/* The most unpleasant situation.
-				   We must destroy neighbour entry,
-				   but someone still uses it.
-
-				   The destroy will be delayed until
-				   the last user releases us, but
-				   we must kill timers etc. and move
-				   it to safe state.
-				 */
-				__skb_queue_purge(&n->arp_queue);
-				n->arp_queue_len_bytes = 0;
-				n->output = neigh_blackhole;
-				if (n->nud_state & NUD_VALID)
-					n->nud_state = NUD_NOARP;
-				else
-					n->nud_state = NUD_NONE;
-				neigh_dbg(2, "neigh %p is stray\n", n);
-			}
-			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
+			neigh_del(n, np, tbl);
 		}
 	}
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed
  2021-03-17 18:53 [PATCH 1/2] neighbour: allow referenced neighbours to be removed Thadeu Lima de Souza Cascardo
@ 2021-03-17 18:53 ` Thadeu Lima de Souza Cascardo
  2021-04-19 16:44   ` Kasper Dupont
  2021-03-17 23:42 ` [PATCH 1/2] neighbour: allow referenced neighbours to be removed David Ahern
  1 sibling, 1 reply; 8+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2021-03-17 18:53 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, dsahern, Thadeu Lima de Souza Cascardo, Kasper Dupont

IFF_POINTOPOINT interfaces use NUD_NOARP entries for IPv6. It's possible to
fill up the neighbour table with enough entries that it will overflow for
valid connections after that.

This behaviour is more prevalent after commit 58956317c8de ("neighbor:
Improve garbage collection") is applied, as it prevents removal from
entries that are not NUD_FAILED, unless they are more than 5s old.

Fixes: 58956317c8de (neighbor: Improve garbage collection)
Reported-by: Kasper Dupont <kasperd@gjkwv.06.feb.2021.kasperd.net>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
---
 net/core/neighbour.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bbc89c7ffdfd..be5ca411b149 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -256,6 +256,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
 		write_lock(&n->lock);
 		if ((n->nud_state == NUD_FAILED) ||
+		    (n->nud_state == NUD_NOARP) ||
 		    (tbl->is_multicast &&
 		     tbl->is_multicast(n->primary_key)) ||
 		    time_after(tref, n->updated))
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] neighbour: allow referenced neighbours to be removed
  2021-03-17 18:53 [PATCH 1/2] neighbour: allow referenced neighbours to be removed Thadeu Lima de Souza Cascardo
  2021-03-17 18:53 ` [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed Thadeu Lima de Souza Cascardo
@ 2021-03-17 23:42 ` David Ahern
  2021-03-22 21:33   ` Thadeu Lima de Souza Cascardo
  1 sibling, 1 reply; 8+ messages in thread
From: David Ahern @ 2021-03-17 23:42 UTC (permalink / raw)
  To: Thadeu Lima de Souza Cascardo, netdev; +Cc: davem, kuba, dsahern

On 3/17/21 12:53 PM, Thadeu Lima de Souza Cascardo wrote:
> During forced garbage collection, neighbours with more than a reference are
> not removed. It's possible to DoS the neighbour table by using ARP spoofing
> in such a way that there is always a timer pending for all neighbours,
> preventing any of them from being removed. That will cause any new
> neighbour creation to fail.
> 
> Use the same code as used by neigh_flush_dev, which deletes the timer and
> cleans the queue when there are still references left.
> 
> With the same ARP spoofing technique, it was still possible to reach a valid
> destination when this fix was applied, with no more table overflows.

And how fast are neighbor entries removed with this patch? The current
code gives a neighbor entry a minimum lifetime to allow it to exist long
enough to be confirmed. Removing the minimum lifetime means neighbor
entries are constantly churning which is just as bad as the arp spoofing
problem.

> 
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---
>  net/core/neighbour.c | 117 +++++++++++++++++++------------------------
>  1 file changed, 51 insertions(+), 66 deletions(-)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index e2982b3970b8..bbc89c7ffdfd 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -173,25 +173,48 @@ static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
>  	return rc;
>  }
>  
> +static int neigh_del_timer(struct neighbour *n)
> +{
> +	if ((n->nud_state & NUD_IN_TIMER) &&
> +	    del_timer(&n->timer)) {
> +		neigh_release(n);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
>  static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
>  		      struct neigh_table *tbl)
>  {
> -	bool retval = false;
> -
> +	rcu_assign_pointer(*np,
> +		   rcu_dereference_protected(n->next,
> +				lockdep_is_held(&tbl->lock)));
>  	write_lock(&n->lock);
> -	if (refcount_read(&n->refcnt) == 1) {
> -		struct neighbour *neigh;
> -
> -		neigh = rcu_dereference_protected(n->next,
> -						  lockdep_is_held(&tbl->lock));
> -		rcu_assign_pointer(*np, neigh);
> -		neigh_mark_dead(n);
> -		retval = true;
> +	neigh_del_timer(n);
> +	neigh_mark_dead(n);
> +	if (refcount_read(&n->refcnt) != 1) {
> +		/* The most unpleasant situation.
> +		   We must destroy neighbour entry,
> +		   but someone still uses it.
> +
> +		   The destroy will be delayed until
> +		   the last user releases us, but
> +		   we must kill timers etc. and move
> +		   it to safe state.
> +		 */
> +		__skb_queue_purge(&n->arp_queue);
> +		n->arp_queue_len_bytes = 0;
> +		n->output = neigh_blackhole;
> +		if (n->nud_state & NUD_VALID)
> +			n->nud_state = NUD_NOARP;
> +		else
> +			n->nud_state = NUD_NONE;
> +		neigh_dbg(2, "neigh %p is stray\n", n);
>  	}
>  	write_unlock(&n->lock);
> -	if (retval)
> -		neigh_cleanup_and_release(n);
> -	return retval;
> +	neigh_cleanup_and_release(n);
> +
> +	return true;
>  }
>  
>  bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
> @@ -229,22 +252,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
>  	write_lock_bh(&tbl->lock);
>  
>  	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
> -		if (refcount_read(&n->refcnt) == 1) {
> -			bool remove = false;
> -
> -			write_lock(&n->lock);
> -			if ((n->nud_state == NUD_FAILED) ||
> -			    (tbl->is_multicast &&
> -			     tbl->is_multicast(n->primary_key)) ||
> -			    time_after(tref, n->updated))
> -				remove = true;
> -			write_unlock(&n->lock);
> -
> -			if (remove && neigh_remove_one(n, tbl))
> -				shrunk++;
> -			if (shrunk >= max_clean)
> -				break;
> -		}
> +		bool remove = false;
> +
> +		write_lock(&n->lock);
> +		if ((n->nud_state == NUD_FAILED) ||
> +		    (tbl->is_multicast &&
> +		     tbl->is_multicast(n->primary_key)) ||
> +		    time_after(tref, n->updated))
> +			remove = true;
> +		write_unlock(&n->lock);
> +
> +		if (remove && neigh_remove_one(n, tbl))
> +			shrunk++;
> +		if (shrunk >= max_clean)
> +			break;
>  	}
>  
>  	tbl->last_flush = jiffies;
> @@ -264,16 +285,6 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when)
>  	}
>  }
>  
> -static int neigh_del_timer(struct neighbour *n)
> -{
> -	if ((n->nud_state & NUD_IN_TIMER) &&
> -	    del_timer(&n->timer)) {
> -		neigh_release(n);
> -		return 1;
> -	}
> -	return 0;
> -}
> -
>  static void pneigh_queue_purge(struct sk_buff_head *list)
>  {
>  	struct sk_buff *skb;
> @@ -307,33 +318,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
>  				np = &n->next;
>  				continue;
>  			}
> -			rcu_assign_pointer(*np,
> -				   rcu_dereference_protected(n->next,
> -						lockdep_is_held(&tbl->lock)));
> -			write_lock(&n->lock);
> -			neigh_del_timer(n);
> -			neigh_mark_dead(n);
> -			if (refcount_read(&n->refcnt) != 1) {
> -				/* The most unpleasant situation.
> -				   We must destroy neighbour entry,
> -				   but someone still uses it.
> -
> -				   The destroy will be delayed until
> -				   the last user releases us, but
> -				   we must kill timers etc. and move
> -				   it to safe state.
> -				 */
> -				__skb_queue_purge(&n->arp_queue);
> -				n->arp_queue_len_bytes = 0;
> -				n->output = neigh_blackhole;
> -				if (n->nud_state & NUD_VALID)
> -					n->nud_state = NUD_NOARP;
> -				else
> -					n->nud_state = NUD_NONE;
> -				neigh_dbg(2, "neigh %p is stray\n", n);
> -			}
> -			write_unlock(&n->lock);
> -			neigh_cleanup_and_release(n);
> +			neigh_del(n, np, tbl);
>  		}
>  	}
>  }
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] neighbour: allow referenced neighbours to be removed
  2021-03-17 23:42 ` [PATCH 1/2] neighbour: allow referenced neighbours to be removed David Ahern
@ 2021-03-22 21:33   ` Thadeu Lima de Souza Cascardo
  0 siblings, 0 replies; 8+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2021-03-22 21:33 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, kuba, dsahern

On Wed, Mar 17, 2021 at 05:42:00PM -0600, David Ahern wrote:
> On 3/17/21 12:53 PM, Thadeu Lima de Souza Cascardo wrote:
> > During forced garbage collection, neighbours with more than a reference are
> > not removed. It's possible to DoS the neighbour table by using ARP spoofing
> > in such a way that there is always a timer pending for all neighbours,
> > preventing any of them from being removed. That will cause any new
> > neighbour creation to fail.
> > 
> > Use the same code as used by neigh_flush_dev, which deletes the timer and
> > cleans the queue when there are still references left.
> > 
> > With the same ARP spoofing technique, it was still possible to reach a valid
> > destination when this fix was applied, with no more table overflows.
> 
> And how fast are neighbor entries removed with this patch? The current
> code gives a neighbor entry a minimum lifetime to allow it to exist long
> enough to be confirmed. Removing the minimum lifetime means neighbor
> entries are constantly churning which is just as bad as the arp spoofing
> problem.
> 

The patch should not change the rules of removing entries, so they are removed
only after 5 seconds. When trying to reach the other endpoint of a veth device,
it usually takes between 0 and 6 failures (with 1s interval) before succeeding,
and then, succeeding in succession.

I will be honest and say that I wasn't able yet to find out the exact order of
events in respect to neighbor state and lifetime updates, but the code still
only removes entries if they have been updated more than 5s before "now".

The change here is that entries are removed even if there is a reference to it
because of the timer. The timer is then removed and the entry can be removed.

I still see lots of table overflows with the tests I was able to run lately, so
not sure what changed in respect to when I first tested this patch. However,
then this patch is not applied, I cannot reach the other veth endpoint at all.
And, eventually, I loose remote access to the machine. With the patch applied,
the system is at least accessible, though it may "lag" sometimes, indicating
that access has been lost temporarily, but restored eventually.

Cascardo.

> > 
> > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> > ---
> >  net/core/neighbour.c | 117 +++++++++++++++++++------------------------
> >  1 file changed, 51 insertions(+), 66 deletions(-)
> > 
> > diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> > index e2982b3970b8..bbc89c7ffdfd 100644
> > --- a/net/core/neighbour.c
> > +++ b/net/core/neighbour.c
> > @@ -173,25 +173,48 @@ static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
> >  	return rc;
> >  }
> >  
> > +static int neigh_del_timer(struct neighbour *n)
> > +{
> > +	if ((n->nud_state & NUD_IN_TIMER) &&
> > +	    del_timer(&n->timer)) {
> > +		neigh_release(n);
> > +		return 1;
> > +	}
> > +	return 0;
> > +}
> > +
> >  static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
> >  		      struct neigh_table *tbl)
> >  {
> > -	bool retval = false;
> > -
> > +	rcu_assign_pointer(*np,
> > +		   rcu_dereference_protected(n->next,
> > +				lockdep_is_held(&tbl->lock)));
> >  	write_lock(&n->lock);
> > -	if (refcount_read(&n->refcnt) == 1) {
> > -		struct neighbour *neigh;
> > -
> > -		neigh = rcu_dereference_protected(n->next,
> > -						  lockdep_is_held(&tbl->lock));
> > -		rcu_assign_pointer(*np, neigh);
> > -		neigh_mark_dead(n);
> > -		retval = true;
> > +	neigh_del_timer(n);
> > +	neigh_mark_dead(n);
> > +	if (refcount_read(&n->refcnt) != 1) {
> > +		/* The most unpleasant situation.
> > +		   We must destroy neighbour entry,
> > +		   but someone still uses it.
> > +
> > +		   The destroy will be delayed until
> > +		   the last user releases us, but
> > +		   we must kill timers etc. and move
> > +		   it to safe state.
> > +		 */
> > +		__skb_queue_purge(&n->arp_queue);
> > +		n->arp_queue_len_bytes = 0;
> > +		n->output = neigh_blackhole;
> > +		if (n->nud_state & NUD_VALID)
> > +			n->nud_state = NUD_NOARP;
> > +		else
> > +			n->nud_state = NUD_NONE;
> > +		neigh_dbg(2, "neigh %p is stray\n", n);
> >  	}
> >  	write_unlock(&n->lock);
> > -	if (retval)
> > -		neigh_cleanup_and_release(n);
> > -	return retval;
> > +	neigh_cleanup_and_release(n);
> > +
> > +	return true;
> >  }
> >  
> >  bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
> > @@ -229,22 +252,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
> >  	write_lock_bh(&tbl->lock);
> >  
> >  	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
> > -		if (refcount_read(&n->refcnt) == 1) {
> > -			bool remove = false;
> > -
> > -			write_lock(&n->lock);
> > -			if ((n->nud_state == NUD_FAILED) ||
> > -			    (tbl->is_multicast &&
> > -			     tbl->is_multicast(n->primary_key)) ||
> > -			    time_after(tref, n->updated))
> > -				remove = true;
> > -			write_unlock(&n->lock);
> > -
> > -			if (remove && neigh_remove_one(n, tbl))
> > -				shrunk++;
> > -			if (shrunk >= max_clean)
> > -				break;
> > -		}
> > +		bool remove = false;
> > +
> > +		write_lock(&n->lock);
> > +		if ((n->nud_state == NUD_FAILED) ||
> > +		    (tbl->is_multicast &&
> > +		     tbl->is_multicast(n->primary_key)) ||
> > +		    time_after(tref, n->updated))
> > +			remove = true;
> > +		write_unlock(&n->lock);
> > +
> > +		if (remove && neigh_remove_one(n, tbl))
> > +			shrunk++;
> > +		if (shrunk >= max_clean)
> > +			break;
> >  	}
> >  
> >  	tbl->last_flush = jiffies;
> > @@ -264,16 +285,6 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when)
> >  	}
> >  }
> >  
> > -static int neigh_del_timer(struct neighbour *n)
> > -{
> > -	if ((n->nud_state & NUD_IN_TIMER) &&
> > -	    del_timer(&n->timer)) {
> > -		neigh_release(n);
> > -		return 1;
> > -	}
> > -	return 0;
> > -}
> > -
> >  static void pneigh_queue_purge(struct sk_buff_head *list)
> >  {
> >  	struct sk_buff *skb;
> > @@ -307,33 +318,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
> >  				np = &n->next;
> >  				continue;
> >  			}
> > -			rcu_assign_pointer(*np,
> > -				   rcu_dereference_protected(n->next,
> > -						lockdep_is_held(&tbl->lock)));
> > -			write_lock(&n->lock);
> > -			neigh_del_timer(n);
> > -			neigh_mark_dead(n);
> > -			if (refcount_read(&n->refcnt) != 1) {
> > -				/* The most unpleasant situation.
> > -				   We must destroy neighbour entry,
> > -				   but someone still uses it.
> > -
> > -				   The destroy will be delayed until
> > -				   the last user releases us, but
> > -				   we must kill timers etc. and move
> > -				   it to safe state.
> > -				 */
> > -				__skb_queue_purge(&n->arp_queue);
> > -				n->arp_queue_len_bytes = 0;
> > -				n->output = neigh_blackhole;
> > -				if (n->nud_state & NUD_VALID)
> > -					n->nud_state = NUD_NOARP;
> > -				else
> > -					n->nud_state = NUD_NONE;
> > -				neigh_dbg(2, "neigh %p is stray\n", n);
> > -			}
> > -			write_unlock(&n->lock);
> > -			neigh_cleanup_and_release(n);
> > +			neigh_del(n, np, tbl);
> >  		}
> >  	}
> >  }
> > 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed
  2021-03-17 18:53 ` [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed Thadeu Lima de Souza Cascardo
@ 2021-04-19 16:44   ` Kasper Dupont
  2021-04-19 17:10     ` David Ahern
  0 siblings, 1 reply; 8+ messages in thread
From: Kasper Dupont @ 2021-04-19 16:44 UTC (permalink / raw)
  To: netdev; +Cc: Thadeu Lima de Souza Cascardo, davem, kuba, dsahern, Kasper Dupont

On 17/03/21 15.53, Thadeu Lima de Souza Cascardo wrote:
> IFF_POINTOPOINT interfaces use NUD_NOARP entries for IPv6. It's possible to
> fill up the neighbour table with enough entries that it will overflow for
> valid connections after that.
> 
> This behaviour is more prevalent after commit 58956317c8de ("neighbor:
> Improve garbage collection") is applied, as it prevents removal from
> entries that are not NUD_FAILED, unless they are more than 5s old.
> 
> Fixes: 58956317c8de (neighbor: Improve garbage collection)
> Reported-by: Kasper Dupont <kasperd@gjkwv.06.feb.2021.kasperd.net>
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---
>  net/core/neighbour.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index bbc89c7ffdfd..be5ca411b149 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -256,6 +256,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
>  
>  		write_lock(&n->lock);
>  		if ((n->nud_state == NUD_FAILED) ||
> +		    (n->nud_state == NUD_NOARP) ||
>  		    (tbl->is_multicast &&
>  		     tbl->is_multicast(n->primary_key)) ||
>  		    time_after(tref, n->updated))
> -- 
> 2.27.0
> 

Is there any update regarding this change?

I noticed this regression when it was used in a DoS attack on one of
my servers which I had upgraded from Ubuntu 18.04 to 20.04.

I have verified that Ubuntu 18.04 is not subject to this attack and
Ubuntu 20.04 is vulnerable. I have also verified that the one-line
change which Cascardo has provided fixes the vulnerability on Ubuntu
20.04.

Kind regards
Kasper

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed
  2021-04-19 16:44   ` Kasper Dupont
@ 2021-04-19 17:10     ` David Ahern
  2021-04-19 17:52       ` Kasper Dupont
  0 siblings, 1 reply; 8+ messages in thread
From: David Ahern @ 2021-04-19 17:10 UTC (permalink / raw)
  To: Kasper Dupont, netdev
  Cc: Thadeu Lima de Souza Cascardo, davem, kuba, dsahern, Kasper Dupont

On 4/19/21 9:44 AM, Kasper Dupont wrote:
> On 17/03/21 15.53, Thadeu Lima de Souza Cascardo wrote:
>> IFF_POINTOPOINT interfaces use NUD_NOARP entries for IPv6. It's possible to
>> fill up the neighbour table with enough entries that it will overflow for
>> valid connections after that.
>>
>> This behaviour is more prevalent after commit 58956317c8de ("neighbor:
>> Improve garbage collection") is applied, as it prevents removal from
>> entries that are not NUD_FAILED, unless they are more than 5s old.
>>
>> Fixes: 58956317c8de (neighbor: Improve garbage collection)
>> Reported-by: Kasper Dupont <kasperd@gjkwv.06.feb.2021.kasperd.net>
>> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
>> ---
>>  net/core/neighbour.c | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
>> index bbc89c7ffdfd..be5ca411b149 100644
>> --- a/net/core/neighbour.c
>> +++ b/net/core/neighbour.c
>> @@ -256,6 +256,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
>>  
>>  		write_lock(&n->lock);
>>  		if ((n->nud_state == NUD_FAILED) ||
>> +		    (n->nud_state == NUD_NOARP) ||
>>  		    (tbl->is_multicast &&
>>  		     tbl->is_multicast(n->primary_key)) ||
>>  		    time_after(tref, n->updated))
>> -- 
>> 2.27.0
>>
> 
> Is there any update regarding this change?
> 
> I noticed this regression when it was used in a DoS attack on one of
> my servers which I had upgraded from Ubuntu 18.04 to 20.04.
> 
> I have verified that Ubuntu 18.04 is not subject to this attack and
> Ubuntu 20.04 is vulnerable. I have also verified that the one-line
> change which Cascardo has provided fixes the vulnerability on Ubuntu
> 20.04.
> 

your testing included both patches or just this one?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed
  2021-04-19 17:10     ` David Ahern
@ 2021-04-19 17:52       ` Kasper Dupont
  2021-04-20  4:27         ` David Ahern
  0 siblings, 1 reply; 8+ messages in thread
From: Kasper Dupont @ 2021-04-19 17:52 UTC (permalink / raw)
  To: David Ahern
  Cc: Kasper Dupont, netdev, Thadeu Lima de Souza Cascardo, davem,
	kuba, dsahern, Kasper Dupont

On 19/04/21 10.10, David Ahern wrote:
> On 4/19/21 9:44 AM, Kasper Dupont wrote:
> > On 17/03/21 15.53, Thadeu Lima de Souza Cascardo wrote:
> >> IFF_POINTOPOINT interfaces use NUD_NOARP entries for IPv6. It's possible to
> >> fill up the neighbour table with enough entries that it will overflow for
> >> valid connections after that.
> >>
> >> This behaviour is more prevalent after commit 58956317c8de ("neighbor:
> >> Improve garbage collection") is applied, as it prevents removal from
> >> entries that are not NUD_FAILED, unless they are more than 5s old.
> >>
> >> Fixes: 58956317c8de (neighbor: Improve garbage collection)
> >> Reported-by: Kasper Dupont <kasperd@gjkwv.06.feb.2021.kasperd.net>
> >> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> >> ---
> >>  net/core/neighbour.c | 1 +
> >>  1 file changed, 1 insertion(+)
> >>
> >> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> >> index bbc89c7ffdfd..be5ca411b149 100644
> >> --- a/net/core/neighbour.c
> >> +++ b/net/core/neighbour.c
> >> @@ -256,6 +256,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
> >>  
> >>  		write_lock(&n->lock);
> >>  		if ((n->nud_state == NUD_FAILED) ||
> >> +		    (n->nud_state == NUD_NOARP) ||
> >>  		    (tbl->is_multicast &&
> >>  		     tbl->is_multicast(n->primary_key)) ||
> >>  		    time_after(tref, n->updated))
> >> -- 
> >> 2.27.0
> >>
> > 
> > Is there any update regarding this change?
> > 
> > I noticed this regression when it was used in a DoS attack on one of
> > my servers which I had upgraded from Ubuntu 18.04 to 20.04.
> > 
> > I have verified that Ubuntu 18.04 is not subject to this attack and
> > Ubuntu 20.04 is vulnerable. I have also verified that the one-line
> > change which Cascardo has provided fixes the vulnerability on Ubuntu
> > 20.04.
> > 
> 
> your testing included both patches or just this one?

I applied only this one line change on top of the kernel in Ubuntu
20.04. The behavior I observed was that without the patch the kernel
was vulnerable and with that patch I was unable to reproduce the
problem.

The other longer patch is for a different issue which Cascardo
discovered while working on the one I had reported. I don't have an
environment set up where I can reproduce the issue addressed by that
larger patch.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed
  2021-04-19 17:52       ` Kasper Dupont
@ 2021-04-20  4:27         ` David Ahern
  0 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2021-04-20  4:27 UTC (permalink / raw)
  To: Kasper Dupont
  Cc: netdev, Thadeu Lima de Souza Cascardo, davem, kuba, dsahern,
	Kasper Dupont

On 4/19/21 10:52 AM, Kasper Dupont wrote:
> On 19/04/21 10.10, David Ahern wrote:
>> On 4/19/21 9:44 AM, Kasper Dupont wrote:
>>>
>>> Is there any update regarding this change?
>>>
>>> I noticed this regression when it was used in a DoS attack on one of
>>> my servers which I had upgraded from Ubuntu 18.04 to 20.04.
>>>
>>> I have verified that Ubuntu 18.04 is not subject to this attack and
>>> Ubuntu 20.04 is vulnerable. I have also verified that the one-line
>>> change which Cascardo has provided fixes the vulnerability on Ubuntu
>>> 20.04.
>>>
>>
>> your testing included both patches or just this one?
> 
> I applied only this one line change on top of the kernel in Ubuntu
> 20.04. The behavior I observed was that without the patch the kernel
> was vulnerable and with that patch I was unable to reproduce the
> problem.

This patch should be re-submitted standalone for -net

> 
> The other longer patch is for a different issue which Cascardo
> discovered while working on the one I had reported. I don't have an
> environment set up where I can reproduce the issue addressed by that
> larger patch.
> 

The first patch is the one I have concerns about.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-04-20  4:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-17 18:53 [PATCH 1/2] neighbour: allow referenced neighbours to be removed Thadeu Lima de Souza Cascardo
2021-03-17 18:53 ` [PATCH 2/2] neighbour: allow NUD_NOARP entries to be forced GCed Thadeu Lima de Souza Cascardo
2021-04-19 16:44   ` Kasper Dupont
2021-04-19 17:10     ` David Ahern
2021-04-19 17:52       ` Kasper Dupont
2021-04-20  4:27         ` David Ahern
2021-03-17 23:42 ` [PATCH 1/2] neighbour: allow referenced neighbours to be removed David Ahern
2021-03-22 21:33   ` Thadeu Lima de Souza Cascardo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.