netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] ipvs: avoid expiring many connections from timer
@ 2020-06-20 10:03 Julian Anastasov
  2020-06-30 15:19 ` Simon Horman
  2020-07-01  8:17 ` Pablo Neira Ayuso
  0 siblings, 2 replies; 5+ messages in thread
From: Julian Anastasov @ 2020-06-20 10:03 UTC (permalink / raw)
  To: Simon Horman; +Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Andrew Sy Kim

Add new functions ip_vs_conn_del() and ip_vs_conn_del_put()
to release many IPVS connections in process context.
They are suitable for connections found in table
when we do not want to overload the timers.

Currently, the change is useful for the dropentry delayed
work but it will be used also in following patch
when flushing connections to failed destinations.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 net/netfilter/ipvs/ip_vs_conn.c | 53 +++++++++++++++++++++++----------
 net/netfilter/ipvs/ip_vs_ctl.c  |  6 ++--
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 02f2f636798d..b3921ae92740 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -807,6 +807,31 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
 	kmem_cache_free(ip_vs_conn_cachep, cp);
 }
 
+/* Try to delete connection while not holding reference */
+static void ip_vs_conn_del(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer)) {
+		/* Drop cp->control chain too */
+		if (cp->control)
+			cp->timeout = 0;
+		ip_vs_conn_expire(&cp->timer);
+	}
+}
+
+/* Try to delete connection while holding reference */
+static void ip_vs_conn_del_put(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer)) {
+		/* Drop cp->control chain too */
+		if (cp->control)
+			cp->timeout = 0;
+		__ip_vs_conn_put(cp);
+		ip_vs_conn_expire(&cp->timer);
+	} else {
+		__ip_vs_conn_put(cp);
+	}
+}
+
 static void ip_vs_conn_expire(struct timer_list *t)
 {
 	struct ip_vs_conn *cp = from_timer(cp, t, timer);
@@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t)
 
 		/* does anybody control me? */
 		if (ct) {
+			bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
+
 			ip_vs_control_del(cp);
 			/* Drop CTL or non-assured TPL if not used anymore */
-			if (!cp->timeout && !atomic_read(&ct->n_control) &&
+			if (has_ref && !atomic_read(&ct->n_control) &&
 			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
 			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
 				IP_VS_DBG(4, "drop controlling connection\n");
-				ct->timeout = 0;
-				ip_vs_conn_expire_now(ct);
+				ip_vs_conn_del_put(ct);
+			} else if (has_ref) {
+				__ip_vs_conn_put(ct);
 			}
 		}
 
@@ -1317,8 +1345,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
 
 drop:
 			IP_VS_DBG(4, "drop connection\n");
-			cp->timeout = 0;
-			ip_vs_conn_expire_now(cp);
+			ip_vs_conn_del(cp);
 		}
 		cond_resched_rcu();
 	}
@@ -1341,19 +1368,15 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (cp->ipvs != ipvs)
 				continue;
-			/* As timers are expired in LIFO order, restart
-			 * the timer of controlling connection first, so
-			 * that it is expired after us.
-			 */
+			if (atomic_read(&cp->n_control))
+				continue;
 			cp_c = cp->control;
-			/* cp->control is valid only with reference to cp */
-			if (cp_c && __ip_vs_conn_get(cp)) {
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_del(cp);
+			if (cp_c && !atomic_read(&cp_c->n_control)) {
 				IP_VS_DBG(4, "del controlling connection\n");
-				ip_vs_conn_expire_now(cp_c);
-				__ip_vs_conn_put(cp);
+				ip_vs_conn_del(cp_c);
 			}
-			IP_VS_DBG(4, "del connection\n");
-			ip_vs_conn_expire_now(cp);
 		}
 		cond_resched_rcu();
 	}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 412656c34f20..1a231f518e3f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -224,7 +224,8 @@ static void defense_work_handler(struct work_struct *work)
 	update_defense_level(ipvs);
 	if (atomic_read(&ipvs->dropentry))
 		ip_vs_random_dropentry(ipvs);
-	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+	queue_delayed_work(system_long_wq, &ipvs->defense_work,
+			   DEFENSE_TIMER_PERIOD);
 }
 #endif
 
@@ -4063,7 +4064,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
 	ipvs->sysctl_tbl = tbl;
 	/* Schedule defense work */
 	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
-	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+	queue_delayed_work(system_long_wq, &ipvs->defense_work,
+			   DEFENSE_TIMER_PERIOD);
 
 	return 0;
 }
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next] ipvs: avoid expiring many connections from timer
  2020-06-20 10:03 [PATCH net-next] ipvs: avoid expiring many connections from timer Julian Anastasov
@ 2020-06-30 15:19 ` Simon Horman
  2020-06-30 16:10   ` Julian Anastasov
  2020-07-01  8:17 ` Pablo Neira Ayuso
  1 sibling, 1 reply; 5+ messages in thread
From: Simon Horman @ 2020-06-30 15:19 UTC (permalink / raw)
  To: Julian Anastasov
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Andrew Sy Kim

Hi Julian,

sorry for not noticing this earlier.

On Sat, Jun 20, 2020 at 01:03:55PM +0300, Julian Anastasov wrote:
> Add new functions ip_vs_conn_del() and ip_vs_conn_del_put()
> to release many IPVS connections in process context.
> They are suitable for connections found in table
> when we do not want to overload the timers.
> 
> Currently, the change is useful for the dropentry delayed
> work but it will be used also in following patch
> when flushing connections to failed destinations.
> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  net/netfilter/ipvs/ip_vs_conn.c | 53 +++++++++++++++++++++++----------
>  net/netfilter/ipvs/ip_vs_ctl.c  |  6 ++--
>  2 files changed, 42 insertions(+), 17 deletions(-)
> 
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 02f2f636798d..b3921ae92740 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -807,6 +807,31 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
>  	kmem_cache_free(ip_vs_conn_cachep, cp);
>  }
>  
> +/* Try to delete connection while not holding reference */
> +static void ip_vs_conn_del(struct ip_vs_conn *cp)
> +{
> +	if (del_timer(&cp->timer)) {
> +		/* Drop cp->control chain too */
> +		if (cp->control)
> +			cp->timeout = 0;
> +		ip_vs_conn_expire(&cp->timer);
> +	}
> +}
> +
> +/* Try to delete connection while holding reference */
> +static void ip_vs_conn_del_put(struct ip_vs_conn *cp)
> +{
> +	if (del_timer(&cp->timer)) {
> +		/* Drop cp->control chain too */
> +		if (cp->control)
> +			cp->timeout = 0;
> +		__ip_vs_conn_put(cp);
> +		ip_vs_conn_expire(&cp->timer);
> +	} else {
> +		__ip_vs_conn_put(cp);
> +	}
> +}
> +
>  static void ip_vs_conn_expire(struct timer_list *t)
>  {
>  	struct ip_vs_conn *cp = from_timer(cp, t, timer);
> @@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t)
>  
>  		/* does anybody control me? */
>  		if (ct) {
> +			bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
> +
>  			ip_vs_control_del(cp);
>  			/* Drop CTL or non-assured TPL if not used anymore */
> -			if (!cp->timeout && !atomic_read(&ct->n_control) &&
> +			if (has_ref && !atomic_read(&ct->n_control) &&
>  			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
>  			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
>  				IP_VS_DBG(4, "drop controlling connection\n");
> -				ct->timeout = 0;
> -				ip_vs_conn_expire_now(ct);
> +				ip_vs_conn_del_put(ct);

Previously this code did not put the ct, now it does.
Is that intentional.

> +			} else if (has_ref) {
> +				__ip_vs_conn_put(ct);
>  			}
>  		}
>  
> @@ -1317,8 +1345,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
>  
>  drop:
>  			IP_VS_DBG(4, "drop connection\n");
> -			cp->timeout = 0;
> -			ip_vs_conn_expire_now(cp);
> +			ip_vs_conn_del(cp);
>  		}
>  		cond_resched_rcu();
>  	}
> @@ -1341,19 +1368,15 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
>  		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
>  			if (cp->ipvs != ipvs)
>  				continue;
> -			/* As timers are expired in LIFO order, restart
> -			 * the timer of controlling connection first, so
> -			 * that it is expired after us.
> -			 */
> +			if (atomic_read(&cp->n_control))
> +				continue;
>  			cp_c = cp->control;
> -			/* cp->control is valid only with reference to cp */
> -			if (cp_c && __ip_vs_conn_get(cp)) {
> +			IP_VS_DBG(4, "del connection\n");
> +			ip_vs_conn_del(cp);
> +			if (cp_c && !atomic_read(&cp_c->n_control)) {
>  				IP_VS_DBG(4, "del controlling connection\n");
> -				ip_vs_conn_expire_now(cp_c);
> -				__ip_vs_conn_put(cp);
> +				ip_vs_conn_del(cp_c);

Conversely, previously this code put the ct, now it doesn't.
Is that also intentional?

>  			}
> -			IP_VS_DBG(4, "del connection\n");
> -			ip_vs_conn_expire_now(cp);
>  		}
>  		cond_resched_rcu();
>  	}
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 412656c34f20..1a231f518e3f 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -224,7 +224,8 @@ static void defense_work_handler(struct work_struct *work)
>  	update_defense_level(ipvs);
>  	if (atomic_read(&ipvs->dropentry))
>  		ip_vs_random_dropentry(ipvs);
> -	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
> +	queue_delayed_work(system_long_wq, &ipvs->defense_work,
> +			   DEFENSE_TIMER_PERIOD);
>  }
>  #endif
>  
> @@ -4063,7 +4064,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
>  	ipvs->sysctl_tbl = tbl;
>  	/* Schedule defense work */
>  	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
> -	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
> +	queue_delayed_work(system_long_wq, &ipvs->defense_work,
> +			   DEFENSE_TIMER_PERIOD);
>  
>  	return 0;
>  }
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next] ipvs: avoid expiring many connections from timer
  2020-06-30 15:19 ` Simon Horman
@ 2020-06-30 16:10   ` Julian Anastasov
  2020-07-01  6:54     ` Simon Horman
  0 siblings, 1 reply; 5+ messages in thread
From: Julian Anastasov @ 2020-06-30 16:10 UTC (permalink / raw)
  To: Simon Horman; +Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Andrew Sy Kim


	Hello,

On Tue, 30 Jun 2020, Simon Horman wrote:

> > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> > index 02f2f636798d..b3921ae92740 100644
> > --- a/net/netfilter/ipvs/ip_vs_conn.c
> > +++ b/net/netfilter/ipvs/ip_vs_conn.c

> > @@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t)
> >  
> >  		/* does anybody control me? */
> >  		if (ct) {
> > +			bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
> > +
> >  			ip_vs_control_del(cp);
> >  			/* Drop CTL or non-assured TPL if not used anymore */
> > -			if (!cp->timeout && !atomic_read(&ct->n_control) &&
> > +			if (has_ref && !atomic_read(&ct->n_control) &&
> >  			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
> >  			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
> >  				IP_VS_DBG(4, "drop controlling connection\n");
> > -				ct->timeout = 0;
> > -				ip_vs_conn_expire_now(ct);
> > +				ip_vs_conn_del_put(ct);
> 
> Previously this code did not put the ct, now it does.
> Is that intentional.

	Yes, as ip_vs_conn_expire() now can be called both in
timer and process context we need a reference for ct while
calling del_timer() in ip_vs_conn_del_put(). As ct->n_control
is 0 after our ip_vs_control_del(), ct can be expired by
timer while we are trying to del it here.

> > @@ -1341,19 +1368,15 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
> >  		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
> >  			if (cp->ipvs != ipvs)
> >  				continue;
> > -			/* As timers are expired in LIFO order, restart
> > -			 * the timer of controlling connection first, so
> > -			 * that it is expired after us.
> > -			 */
> > +			if (atomic_read(&cp->n_control))
> > +				continue;
> >  			cp_c = cp->control;
> > -			/* cp->control is valid only with reference to cp */
> > -			if (cp_c && __ip_vs_conn_get(cp)) {
> > +			IP_VS_DBG(4, "del connection\n");
> > +			ip_vs_conn_del(cp);
> > +			if (cp_c && !atomic_read(&cp_c->n_control)) {
> >  				IP_VS_DBG(4, "del controlling connection\n");
> > -				ip_vs_conn_expire_now(cp_c);
> > -				__ip_vs_conn_put(cp);
> > +				ip_vs_conn_del(cp_c);
> 
> Conversely, previously this code put the ct, now it doesn't.
> Is that also intentional?

	Now we do not get reference to cp because in RCU
section the cp structure can not go away. So, we have an
implicit reference to cp. Same for its cp->control (ct).
The conn structures are freed later in RCU callback.

	In this case we may run del_timer() after
another CPU, eg. after ip_vs_conn_expire() was already
called after timer expire or after ip_vs_conn_del*(). But
for us del_timer will not succeed.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next] ipvs: avoid expiring many connections from timer
  2020-06-30 16:10   ` Julian Anastasov
@ 2020-07-01  6:54     ` Simon Horman
  0 siblings, 0 replies; 5+ messages in thread
From: Simon Horman @ 2020-07-01  6:54 UTC (permalink / raw)
  To: Julian Anastasov, Pablo Neira Ayuso
  Cc: lvs-devel, netfilter-devel, Andrew Sy Kim

Hi,

On Tue, Jun 30, 2020 at 07:10:06PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Tue, 30 Jun 2020, Simon Horman wrote:
> 
> > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> > > index 02f2f636798d..b3921ae92740 100644
> > > --- a/net/netfilter/ipvs/ip_vs_conn.c
> > > +++ b/net/netfilter/ipvs/ip_vs_conn.c
> 
> > > @@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t)
> > >  
> > >  		/* does anybody control me? */
> > >  		if (ct) {
> > > +			bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
> > > +
> > >  			ip_vs_control_del(cp);
> > >  			/* Drop CTL or non-assured TPL if not used anymore */
> > > -			if (!cp->timeout && !atomic_read(&ct->n_control) &&
> > > +			if (has_ref && !atomic_read(&ct->n_control) &&
> > >  			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
> > >  			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
> > >  				IP_VS_DBG(4, "drop controlling connection\n");
> > > -				ct->timeout = 0;
> > > -				ip_vs_conn_expire_now(ct);
> > > +				ip_vs_conn_del_put(ct);
> > 
> > Previously this code did not put the ct, now it does.
> > Is that intentional.
> 
> 	Yes, as ip_vs_conn_expire() now can be called both in
> timer and process context we need a reference for ct while
> calling del_timer() in ip_vs_conn_del_put(). As ct->n_control
> is 0 after our ip_vs_control_del(), ct can be expired by
> timer while we are trying to del it here.
> 
> > > @@ -1341,19 +1368,15 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
> > >  		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
> > >  			if (cp->ipvs != ipvs)
> > >  				continue;
> > > -			/* As timers are expired in LIFO order, restart
> > > -			 * the timer of controlling connection first, so
> > > -			 * that it is expired after us.
> > > -			 */
> > > +			if (atomic_read(&cp->n_control))
> > > +				continue;
> > >  			cp_c = cp->control;
> > > -			/* cp->control is valid only with reference to cp */
> > > -			if (cp_c && __ip_vs_conn_get(cp)) {
> > > +			IP_VS_DBG(4, "del connection\n");
> > > +			ip_vs_conn_del(cp);
> > > +			if (cp_c && !atomic_read(&cp_c->n_control)) {
> > >  				IP_VS_DBG(4, "del controlling connection\n");
> > > -				ip_vs_conn_expire_now(cp_c);
> > > -				__ip_vs_conn_put(cp);
> > > +				ip_vs_conn_del(cp_c);
> > 
> > Conversely, previously this code put the ct, now it doesn't.
> > Is that also intentional?
> 
> 	Now we do not get reference to cp because in RCU
> section the cp structure can not go away. So, we have an
> implicit reference to cp. Same for its cp->control (ct).
> The conn structures are freed later in RCU callback.
> 
> 	In this case we may run del_timer() after
> another CPU, eg. after ip_vs_conn_expire() was already
> called after timer expire or after ip_vs_conn_del*(). But
> for us del_timer will not succeed.

Thanks for the explanation. This now looks good to me.

Reviewed-by: Simon Horman <horms@verge.net.au>

Pablo, could you consider applying this to nf-next?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next] ipvs: avoid expiring many connections from timer
  2020-06-20 10:03 [PATCH net-next] ipvs: avoid expiring many connections from timer Julian Anastasov
  2020-06-30 15:19 ` Simon Horman
@ 2020-07-01  8:17 ` Pablo Neira Ayuso
  1 sibling, 0 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2020-07-01  8:17 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: Simon Horman, lvs-devel, netfilter-devel, Andrew Sy Kim

On Sat, Jun 20, 2020 at 01:03:55PM +0300, Julian Anastasov wrote:
> Add new functions ip_vs_conn_del() and ip_vs_conn_del_put()
> to release many IPVS connections in process context.
> They are suitable for connections found in table
> when we do not want to overload the timers.
> 
> Currently, the change is useful for the dropentry delayed
> work but it will be used also in following patch
> when flushing connections to failed destinations.

Applied, thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-07-01  8:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-20 10:03 [PATCH net-next] ipvs: avoid expiring many connections from timer Julian Anastasov
2020-06-30 15:19 ` Simon Horman
2020-06-30 16:10   ` Julian Anastasov
2020-07-01  6:54     ` Simon Horman
2020-07-01  8:17 ` Pablo Neira Ayuso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).