All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
@ 2019-04-05 17:56 Vlad Buslov
  2019-04-06  5:59 ` Jiri Pirko
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Vlad Buslov @ 2019-04-05 17:56 UTC (permalink / raw)
  To: netdev; +Cc: jhs, xiyou.wangcong, jiri, davem, john.hurley, Vlad Buslov

John reports:

Recent refactoring of fl_change aims to use the classifier spinlock to
avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
function was moved to before the lock is taken. This can create problems
for drivers if duplicate filters are created (commmon in ovs tc offload
due to filters being triggered by user-space matches).

Drivers registered for such filters will now receive multiple copies of
the same rule, each with a different cookie value. This means that the
drivers would need to do a full match field lookup to determine
duplicates, repeating work that will happen in flower __fl_lookup().
Currently, drivers do not expect to receive duplicate filters.

To fix this, verify that filter with same key is not present in flower
classifier hash table and insert the new filter to the flower hash table
before offloading it to hardware. Implement helper function
fl_ht_insert_unique() to atomically verify/insert a filter.

This change makes filter visible to fast path at the beginning of
fl_change() function, which means it can no longer be freed directly in
case of error. Refactor fl_change() error handling code to deallocate the
filter with rcu timeout.

Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
Reported-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/cls_flower.c | 64 +++++++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6050e3caee31..2763176e369c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1459,6 +1459,28 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 	return 0;
 }
 
+static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
+			       struct cls_fl_filter *fold,
+			       bool *in_ht)
+{
+	struct fl_flow_mask *mask = fnew->mask;
+	int err;
+
+	err = rhashtable_insert_fast(&mask->ht,
+				     &fnew->ht_node,
+				     mask->filter_ht_params);
+	if (err) {
+		*in_ht = false;
+		/* It is okay if filter with same key exists when
+		 * overwriting.
+		 */
+		return fold && err == -EEXIST ? 0 : err;
+	}
+
+	*in_ht = true;
+	return 0;
+}
+
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
@@ -1470,6 +1492,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	struct cls_fl_filter *fnew;
 	struct fl_flow_mask *mask;
 	struct nlattr **tb;
+	bool in_ht;
 	int err;
 
 	if (!tca[TCA_OPTIONS]) {
@@ -1528,10 +1551,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout;
 
+	err = fl_ht_insert_unique(fnew, fold, &in_ht);
+	if (err)
+		goto errout_mask;
+
 	if (!tc_skip_hw(fnew->flags)) {
 		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
 		if (err)
-			goto errout_mask;
+			goto errout_ht;
 	}
 
 	if (!tc_in_hw(fnew->flags))
@@ -1557,10 +1584,17 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 
 		fnew->handle = handle;
 
-		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
-					     fnew->mask->filter_ht_params);
-		if (err)
-			goto errout_hw;
+		if (!in_ht) {
+			struct rhashtable_params params =
+				fnew->mask->filter_ht_params;
+
+			err = rhashtable_insert_fast(&fnew->mask->ht,
+						     &fnew->ht_node,
+						     params);
+			if (err)
+				goto errout_hw;
+			in_ht = true;
+		}
 
 		rhashtable_remove_fast(&fold->mask->ht,
 				       &fold->ht_node,
@@ -1582,11 +1616,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		refcount_dec(&fold->refcnt);
 		__fl_put(fold);
 	} else {
-		if (__fl_lookup(fnew->mask, &fnew->mkey)) {
-			err = -EEXIST;
-			goto errout_hw;
-		}
-
 		if (handle) {
 			/* user specifies a handle and it doesn't exist */
 			err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
@@ -1609,12 +1638,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			goto errout_hw;
 
 		fnew->handle = handle;
-
-		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
-					     fnew->mask->filter_ht_params);
-		if (err)
-			goto errout_idr;
-
 		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
 		spin_unlock(&tp->lock);
 	}
@@ -1625,17 +1648,18 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(mask);
 	return 0;
 
-errout_idr:
-	idr_remove(&head->handle_idr, fnew->handle);
 errout_hw:
 	spin_unlock(&tp->lock);
 	if (!tc_skip_hw(fnew->flags))
 		fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
+errout_ht:
+	if (in_ht)
+		rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+				       fnew->mask->filter_ht_params);
 errout_mask:
 	fl_mask_put(head, fnew->mask, true);
 errout:
-	tcf_exts_destroy(&fnew->exts);
-	kfree(fnew);
+	tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
 errout_tb:
 	kfree(tb);
 errout_mask_alloc:
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-05 17:56 [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Vlad Buslov
@ 2019-04-06  5:59 ` Jiri Pirko
  2019-04-08  2:34 ` David Miller
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 25+ messages in thread
From: Jiri Pirko @ 2019-04-06  5:59 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, davem, john.hurley

Fri, Apr 05, 2019 at 07:56:26PM CEST, vladbu@mellanox.com wrote:
>John reports:
>
>Recent refactoring of fl_change aims to use the classifier spinlock to
>avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
>function was moved to before the lock is taken. This can create problems
>for drivers if duplicate filters are created (commmon in ovs tc offload
>due to filters being triggered by user-space matches).
>
>Drivers registered for such filters will now receive multiple copies of
>the same rule, each with a different cookie value. This means that the
>drivers would need to do a full match field lookup to determine
>duplicates, repeating work that will happen in flower __fl_lookup().
>Currently, drivers do not expect to receive duplicate filters.
>
>To fix this, verify that filter with same key is not present in flower
>classifier hash table and insert the new filter to the flower hash table
>before offloading it to hardware. Implement helper function
>fl_ht_insert_unique() to atomically verify/insert a filter.
>
>This change makes filter visible to fast path at the beginning of
>fl_change() function, which means it can no longer be freed directly in
>case of error. Refactor fl_change() error handling code to deallocate the
>filter with rcu timeout.
>
>Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
>Reported-by: John Hurley <john.hurley@netronome.com>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-05 17:56 [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Vlad Buslov
  2019-04-06  5:59 ` Jiri Pirko
@ 2019-04-08  2:34 ` David Miller
  2019-04-08 22:26 ` Jakub Kicinski
  2019-04-11 11:13 ` [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Ido Schimmel
  3 siblings, 0 replies; 25+ messages in thread
From: David Miller @ 2019-04-08  2:34 UTC (permalink / raw)
  To: vladbu; +Cc: netdev, jhs, xiyou.wangcong, jiri, john.hurley

From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri,  5 Apr 2019 20:56:26 +0300

> John reports:
> 
> Recent refactoring of fl_change aims to use the classifier spinlock to
> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
> function was moved to before the lock is taken. This can create problems
> for drivers if duplicate filters are created (commmon in ovs tc offload
> due to filters being triggered by user-space matches).
> 
> Drivers registered for such filters will now receive multiple copies of
> the same rule, each with a different cookie value. This means that the
> drivers would need to do a full match field lookup to determine
> duplicates, repeating work that will happen in flower __fl_lookup().
> Currently, drivers do not expect to receive duplicate filters.
> 
> To fix this, verify that filter with same key is not present in flower
> classifier hash table and insert the new filter to the flower hash table
> before offloading it to hardware. Implement helper function
> fl_ht_insert_unique() to atomically verify/insert a filter.
> 
> This change makes filter visible to fast path at the beginning of
> fl_change() function, which means it can no longer be freed directly in
> case of error. Refactor fl_change() error handling code to deallocate the
> filter with rcu timeout.
> 
> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
> Reported-by: John Hurley <john.hurley@netronome.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Applied, thanks Vlad.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-05 17:56 [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Vlad Buslov
  2019-04-06  5:59 ` Jiri Pirko
  2019-04-08  2:34 ` David Miller
@ 2019-04-08 22:26 ` Jakub Kicinski
  2019-04-09  8:23   ` Vlad Buslov
  2019-04-11 11:13 ` [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Ido Schimmel
  3 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-08 22:26 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Fri,  5 Apr 2019 20:56:26 +0300, Vlad Buslov wrote:
> John reports:
> 
> Recent refactoring of fl_change aims to use the classifier spinlock to
> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
> function was moved to before the lock is taken. This can create problems
> for drivers if duplicate filters are created (commmon in ovs tc offload
> due to filters being triggered by user-space matches).
> 
> Drivers registered for such filters will now receive multiple copies of
> the same rule, each with a different cookie value. This means that the
> drivers would need to do a full match field lookup to determine
> duplicates, repeating work that will happen in flower __fl_lookup().
> Currently, drivers do not expect to receive duplicate filters.
> 
> To fix this, verify that filter with same key is not present in flower
> classifier hash table and insert the new filter to the flower hash table
> before offloading it to hardware. Implement helper function
> fl_ht_insert_unique() to atomically verify/insert a filter.
> 
> This change makes filter visible to fast path at the beginning of
> fl_change() function, which means it can no longer be freed directly in
> case of error. Refactor fl_change() error handling code to deallocate the
> filter with rcu timeout.
> 
> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
> Reported-by: John Hurley <john.hurley@netronome.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

How is re-offload consistency guaranteed?  IIUC the code is:

 insert into HT
 offload
 insert into IDR

What guarantees re-offload consistency if new callback is added just
after offload is requested but before rules ends up in IDR?


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-08 22:26 ` Jakub Kicinski
@ 2019-04-09  8:23   ` Vlad Buslov
  2019-04-09 17:10     ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-09  8:23 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley


On Tue 09 Apr 2019 at 01:26, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Fri,  5 Apr 2019 20:56:26 +0300, Vlad Buslov wrote:
>> John reports:
>>
>> Recent refactoring of fl_change aims to use the classifier spinlock to
>> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
>> function was moved to before the lock is taken. This can create problems
>> for drivers if duplicate filters are created (commmon in ovs tc offload
>> due to filters being triggered by user-space matches).
>>
>> Drivers registered for such filters will now receive multiple copies of
>> the same rule, each with a different cookie value. This means that the
>> drivers would need to do a full match field lookup to determine
>> duplicates, repeating work that will happen in flower __fl_lookup().
>> Currently, drivers do not expect to receive duplicate filters.
>>
>> To fix this, verify that filter with same key is not present in flower
>> classifier hash table and insert the new filter to the flower hash table
>> before offloading it to hardware. Implement helper function
>> fl_ht_insert_unique() to atomically verify/insert a filter.
>>
>> This change makes filter visible to fast path at the beginning of
>> fl_change() function, which means it can no longer be freed directly in
>> case of error. Refactor fl_change() error handling code to deallocate the
>> filter with rcu timeout.
>>
>> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
>> Reported-by: John Hurley <john.hurley@netronome.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>
> How is re-offload consistency guaranteed?  IIUC the code is:
>
>  insert into HT
>  offload
>  insert into IDR
>
> What guarantees re-offload consistency if new callback is added just
> after offload is requested but before rules ends up in IDR?

Hi Jakub,

At the moment cls hardware offloads API is always called with rtnl lock,
so rule can't be offloaded while reoffload is in progress.

For my next patch set that unlocks the offloads API I implemented the
algorithm to track reoffload count for each tp that works like this:

1. struct tcf_proto is extended with reoffload_count counter that
   incremented each time reoffload is called on particular tp instance.
   Counter is protected by tp->lock.

2. struct cls_fl_filter is also extended with reoffload_count counter.
   Its value is set to current tp->reoffload_count when offloading the
   filter.

3. After offloading the filter, but before inserting it to idr,
   f->reoffload_count is compared with tp->reoffload_count. If values
   don't match, filter is deleted and -EAGAIN is returned. Cls API
   retries filter insertion on -EAGAIN.

Regards,
Vlad

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-09  8:23   ` Vlad Buslov
@ 2019-04-09 17:10     ` Jakub Kicinski
  2019-04-10 14:53       ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-09 17:10 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Tue, 9 Apr 2019 08:23:40 +0000, Vlad Buslov wrote:
> On Tue 09 Apr 2019 at 01:26, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > On Fri,  5 Apr 2019 20:56:26 +0300, Vlad Buslov wrote:  
> >> John reports:
> >>
> >> Recent refactoring of fl_change aims to use the classifier spinlock to
> >> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
> >> function was moved to before the lock is taken. This can create problems
> >> for drivers if duplicate filters are created (commmon in ovs tc offload
> >> due to filters being triggered by user-space matches).
> >>
> >> Drivers registered for such filters will now receive multiple copies of
> >> the same rule, each with a different cookie value. This means that the
> >> drivers would need to do a full match field lookup to determine
> >> duplicates, repeating work that will happen in flower __fl_lookup().
> >> Currently, drivers do not expect to receive duplicate filters.
> >>
> >> To fix this, verify that filter with same key is not present in flower
> >> classifier hash table and insert the new filter to the flower hash table
> >> before offloading it to hardware. Implement helper function
> >> fl_ht_insert_unique() to atomically verify/insert a filter.
> >>
> >> This change makes filter visible to fast path at the beginning of
> >> fl_change() function, which means it can no longer be freed directly in
> >> case of error. Refactor fl_change() error handling code to deallocate the
> >> filter with rcu timeout.
> >>
> >> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
> >> Reported-by: John Hurley <john.hurley@netronome.com>
> >> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>  
> >
> > How is re-offload consistency guaranteed?  IIUC the code is:
> >
> >  insert into HT
> >  offload
> >  insert into IDR
> >
> > What guarantees re-offload consistency if new callback is added just
> > after offload is requested but before rules ends up in IDR?  
> 
> Hi Jakub,
> 
> At the moment cls hardware offloads API is always called with rtnl lock,
> so rule can't be offloaded while reoffload is in progress.

Does that somehow imply atomicity of offloading vs inserting into IDR?
Doesn't seem so from a cursory look.  Or do you mean rtnl_held is
always true?

> For my next patch set that unlocks the offloads API I implemented the
> algorithm to track reoffload count for each tp that works like this:
> 
> 1. struct tcf_proto is extended with reoffload_count counter that
>    incremented each time reoffload is called on particular tp instance.
>    Counter is protected by tp->lock.
> 
> 2. struct cls_fl_filter is also extended with reoffload_count counter.
>    Its value is set to current tp->reoffload_count when offloading the
>    filter.
> 
> 3. After offloading the filter, but before inserting it to idr,
>    f->reoffload_count is compared with tp->reoffload_count. If values
>    don't match, filter is deleted and -EAGAIN is returned. Cls API
>    retries filter insertion on -EAGAIN.

Sounds good for add.  Does this solve delete case as well?

   CPU 0                       CPU 1

__fl_delete
  IDR remove
                           cb unregister
                             hw delete all flows  <- doesn't see the
                                                     remove in progress

  hw delete  <- doesn't see 
                the removed cb

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-09 17:10     ` Jakub Kicinski
@ 2019-04-10 14:53       ` Vlad Buslov
  2019-04-10 15:48         ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-10 14:53 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley


On Tue 09 Apr 2019 at 20:10, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Tue, 9 Apr 2019 08:23:40 +0000, Vlad Buslov wrote:
>> On Tue 09 Apr 2019 at 01:26, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> > On Fri,  5 Apr 2019 20:56:26 +0300, Vlad Buslov wrote:
>> >> John reports:
>> >>
>> >> Recent refactoring of fl_change aims to use the classifier spinlock to
>> >> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
>> >> function was moved to before the lock is taken. This can create problems
>> >> for drivers if duplicate filters are created (commmon in ovs tc offload
>> >> due to filters being triggered by user-space matches).
>> >>
>> >> Drivers registered for such filters will now receive multiple copies of
>> >> the same rule, each with a different cookie value. This means that the
>> >> drivers would need to do a full match field lookup to determine
>> >> duplicates, repeating work that will happen in flower __fl_lookup().
>> >> Currently, drivers do not expect to receive duplicate filters.
>> >>
>> >> To fix this, verify that filter with same key is not present in flower
>> >> classifier hash table and insert the new filter to the flower hash table
>> >> before offloading it to hardware. Implement helper function
>> >> fl_ht_insert_unique() to atomically verify/insert a filter.
>> >>
>> >> This change makes filter visible to fast path at the beginning of
>> >> fl_change() function, which means it can no longer be freed directly in
>> >> case of error. Refactor fl_change() error handling code to deallocate the
>> >> filter with rcu timeout.
>> >>
>> >> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
>> >> Reported-by: John Hurley <john.hurley@netronome.com>
>> >> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> >
>> > How is re-offload consistency guaranteed?  IIUC the code is:
>> >
>> >  insert into HT
>> >  offload
>> >  insert into IDR
>> >
>> > What guarantees re-offload consistency if new callback is added just
>> > after offload is requested but before rules ends up in IDR?
>>
>> Hi Jakub,
>>
>> At the moment cls hardware offloads API is always called with rtnl lock,
>> so rule can't be offloaded while reoffload is in progress.
>
> Does that somehow imply atomicity of offloading vs inserting into IDR?
> Doesn't seem so from a cursory look.  Or do you mean rtnl_held is
> always true?

Sorry, I forgot that we are discussing shared block for which rtnl is
not taken in tc_new_tfilter(). Now I understand the issue and will send
my 'reoffload_count' implementation as a fix.

>
>> For my next patch set that unlocks the offloads API I implemented the
>> algorithm to track reoffload count for each tp that works like this:
>>
>> 1. struct tcf_proto is extended with reoffload_count counter that
>>    incremented each time reoffload is called on particular tp instance.
>>    Counter is protected by tp->lock.
>>
>> 2. struct cls_fl_filter is also extended with reoffload_count counter.
>>    Its value is set to current tp->reoffload_count when offloading the
>>    filter.
>>
>> 3. After offloading the filter, but before inserting it to idr,
>>    f->reoffload_count is compared with tp->reoffload_count. If values
>>    don't match, filter is deleted and -EAGAIN is returned. Cls API
>>    retries filter insertion on -EAGAIN.
>
> Sounds good for add.  Does this solve delete case as well?
>
>    CPU 0                       CPU 1
>
> __fl_delete
>   IDR remove
>                            cb unregister
>                              hw delete all flows  <- doesn't see the
>                                                      remove in progress
>
>   hw delete  <- doesn't see
>                 the removed cb

Thanks for pointing that out! Looks like I need to move call to hw
delete in __fl_delete() function to be executed before idr removal.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-10 14:53       ` Vlad Buslov
@ 2019-04-10 15:48         ` Jakub Kicinski
  2019-04-10 16:02           ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-10 15:48 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Wed, 10 Apr 2019 14:53:53 +0000, Vlad Buslov wrote:
> >> For my next patch set that unlocks the offloads API I implemented the
> >> algorithm to track reoffload count for each tp that works like this:
> >>
> >> 1. struct tcf_proto is extended with reoffload_count counter that
> >>    incremented each time reoffload is called on particular tp instance.
> >>    Counter is protected by tp->lock.
> >>
> >> 2. struct cls_fl_filter is also extended with reoffload_count counter.
> >>    Its value is set to current tp->reoffload_count when offloading the
> >>    filter.
> >>
> >> 3. After offloading the filter, but before inserting it to idr,
> >>    f->reoffload_count is compared with tp->reoffload_count. If values
> >>    don't match, filter is deleted and -EAGAIN is returned. Cls API
> >>    retries filter insertion on -EAGAIN.  
> >
> > Sounds good for add.  Does this solve delete case as well?
> >
> >    CPU 0                       CPU 1
> >
> > __fl_delete
> >   IDR remove
> >                            cb unregister
> >                              hw delete all flows  <- doesn't see the
> >                                                      remove in progress
> >
> >   hw delete  <- doesn't see
> >                 the removed cb  
> 
> Thanks for pointing that out! Looks like I need to move call to hw
> delete in __fl_delete() function to be executed before idr removal.

Ack, plus you need to do the same retry mechanism.  Save CB "count"/seq,
hw delete, remove from IDR, if CB "count"/seq changed hw delete again.
Right?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-10 15:48         ` Jakub Kicinski
@ 2019-04-10 16:02           ` Vlad Buslov
  2019-04-10 16:09             ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-10 16:02 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley


On Wed 10 Apr 2019 at 18:48, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Wed, 10 Apr 2019 14:53:53 +0000, Vlad Buslov wrote:
>> >> For my next patch set that unlocks the offloads API I implemented the
>> >> algorithm to track reoffload count for each tp that works like this:
>> >>
>> >> 1. struct tcf_proto is extended with reoffload_count counter that
>> >>    incremented each time reoffload is called on particular tp instance.
>> >>    Counter is protected by tp->lock.
>> >>
>> >> 2. struct cls_fl_filter is also extended with reoffload_count counter.
>> >>    Its value is set to current tp->reoffload_count when offloading the
>> >>    filter.
>> >>
>> >> 3. After offloading the filter, but before inserting it to idr,
>> >>    f->reoffload_count is compared with tp->reoffload_count. If values
>> >>    don't match, filter is deleted and -EAGAIN is returned. Cls API
>> >>    retries filter insertion on -EAGAIN.
>> >
>> > Sounds good for add.  Does this solve delete case as well?
>> >
>> >    CPU 0                       CPU 1
>> >
>> > __fl_delete
>> >   IDR remove
>> >                            cb unregister
>> >                              hw delete all flows  <- doesn't see the
>> >                                                      remove in progress
>> >
>> >   hw delete  <- doesn't see
>> >                 the removed cb
>>
>> Thanks for pointing that out! Looks like I need to move call to hw
>> delete in __fl_delete() function to be executed before idr removal.
>
> Ack, plus you need to do the same retry mechanism.  Save CB "count"/seq,
> hw delete, remove from IDR, if CB "count"/seq changed hw delete again.
> Right?

Actually, I intended to modify fl_reoffload() to ignore filters with
'deleted' flag set when adding, but I guess reusing 'reoffload_count' to
retry fl_hw_destroy_filter() would also work.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-10 16:02           ` Vlad Buslov
@ 2019-04-10 16:09             ` Jakub Kicinski
  2019-04-10 16:26               ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-10 16:09 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Wed, 10 Apr 2019 16:02:17 +0000, Vlad Buslov wrote:
> On Wed 10 Apr 2019 at 18:48, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > On Wed, 10 Apr 2019 14:53:53 +0000, Vlad Buslov wrote:  
> >> >> For my next patch set that unlocks the offloads API I implemented the
> >> >> algorithm to track reoffload count for each tp that works like this:
> >> >>
> >> >> 1. struct tcf_proto is extended with reoffload_count counter that
> >> >>    incremented each time reoffload is called on particular tp instance.
> >> >>    Counter is protected by tp->lock.
> >> >>
> >> >> 2. struct cls_fl_filter is also extended with reoffload_count counter.
> >> >>    Its value is set to current tp->reoffload_count when offloading the
> >> >>    filter.
> >> >>
> >> >> 3. After offloading the filter, but before inserting it to idr,
> >> >>    f->reoffload_count is compared with tp->reoffload_count. If values
> >> >>    don't match, filter is deleted and -EAGAIN is returned. Cls API
> >> >>    retries filter insertion on -EAGAIN.  
> >> >
> >> > Sounds good for add.  Does this solve delete case as well?
> >> >
> >> >    CPU 0                       CPU 1
> >> >
> >> > __fl_delete
> >> >   IDR remove
> >> >                            cb unregister
> >> >                              hw delete all flows  <- doesn't see the
> >> >                                                      remove in progress
> >> >
> >> >   hw delete  <- doesn't see
> >> >                 the removed cb  
> >>
> >> Thanks for pointing that out! Looks like I need to move call to hw
> >> delete in __fl_delete() function to be executed before idr removal.  
> >
> > Ack, plus you need to do the same retry mechanism.  Save CB "count"/seq,
> > hw delete, remove from IDR, if CB "count"/seq changed hw delete again.
> > Right?  
> 
> Actually, I intended to modify fl_reoffload() to ignore filters with
> 'deleted' flag set when adding, but I guess reusing 'reoffload_count' to
> retry fl_hw_destroy_filter() would also work.

Yeah, I don't see how you can ignore deleted safely.  Perhaps lack of
coffee :)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-10 16:09             ` Jakub Kicinski
@ 2019-04-10 16:26               ` Vlad Buslov
  2019-04-10 17:00                 ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-10 16:26 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley


On Wed 10 Apr 2019 at 19:09, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Wed, 10 Apr 2019 16:02:17 +0000, Vlad Buslov wrote:
>> On Wed 10 Apr 2019 at 18:48, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> > On Wed, 10 Apr 2019 14:53:53 +0000, Vlad Buslov wrote:
>> >> >> For my next patch set that unlocks the offloads API I implemented the
>> >> >> algorithm to track reoffload count for each tp that works like this:
>> >> >>
>> >> >> 1. struct tcf_proto is extended with reoffload_count counter that
>> >> >>    incremented each time reoffload is called on particular tp instance.
>> >> >>    Counter is protected by tp->lock.
>> >> >>
>> >> >> 2. struct cls_fl_filter is also extended with reoffload_count counter.
>> >> >>    Its value is set to current tp->reoffload_count when offloading the
>> >> >>    filter.
>> >> >>
>> >> >> 3. After offloading the filter, but before inserting it to idr,
>> >> >>    f->reoffload_count is compared with tp->reoffload_count. If values
>> >> >>    don't match, filter is deleted and -EAGAIN is returned. Cls API
>> >> >>    retries filter insertion on -EAGAIN.
>> >> >
>> >> > Sounds good for add.  Does this solve delete case as well?
>> >> >
>> >> >    CPU 0                       CPU 1
>> >> >
>> >> > __fl_delete
>> >> >   IDR remove
>> >> >                            cb unregister
>> >> >                              hw delete all flows  <- doesn't see the
>> >> >                                                      remove in progress
>> >> >
>> >> >   hw delete  <- doesn't see
>> >> >                 the removed cb
>> >>
>> >> Thanks for pointing that out! Looks like I need to move call to hw
>> >> delete in __fl_delete() function to be executed before idr removal.
>> >
>> > Ack, plus you need to do the same retry mechanism.  Save CB "count"/seq,
>> > hw delete, remove from IDR, if CB "count"/seq changed hw delete again.
>> > Right?
>>
>> Actually, I intended to modify fl_reoffload() to ignore filters with
>> 'deleted' flag set when adding, but I guess reusing 'reoffload_count' to
>> retry fl_hw_destroy_filter() would also work.
>
> Yeah, I don't see how you can ignore deleted safely.  Perhaps lack of
> coffee :)

Well, drivers are supposed to account for double deletion or deletion of
filters that were not successfully offloaded to them. If filter is not
marked as skip_sw, its creation will succeed even if hw callbacks have
failed, but __fl_delete() still calls fl_hw_destroy_filter() on such
filters. The main thing is that we must guarantee that code doesn't
delete a new filter with same key. However, in case of flower classifier
'cookie' is pointer to filter, and filter is freed only when last
reference to it is released, so code is safe in this regard.

So I guess there is nothing wrong with reoffload calling cb()
on all classifier filters (including marked as 'deleted'), if delete
code doesn't miss any of the callbacks afterwards.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-10 16:26               ` Vlad Buslov
@ 2019-04-10 17:00                 ` Jakub Kicinski
  2019-04-16 14:20                   ` [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-10 17:00 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Wed, 10 Apr 2019 16:26:38 +0000, Vlad Buslov wrote:
> >> Actually, I intended to modify fl_reoffload() to ignore filters with
> >> 'deleted' flag set when adding, but I guess reusing 'reoffload_count' to
> >> retry fl_hw_destroy_filter() would also work.  
> >
> > Yeah, I don't see how you can ignore deleted safely.  Perhaps lack of
> > coffee :)  
> 
> Well, drivers are supposed to account for double deletion or deletion of
> filters that were not successfully offloaded to them. If filter is not
> marked as skip_sw, its creation will succeed even if hw callbacks have
> failed, but __fl_delete() still calls fl_hw_destroy_filter() on such
> filters. The main thing is that we must guarantee that code doesn't
> delete a new filter with same key. However, in case of flower classifier
> 'cookie' is pointer to filter, and filter is freed only when last
> reference to it is released, so code is safe in this regard.
> 
> So I guess there is nothing wrong with reoffload calling cb()
> on all classifier filters (including marked as 'deleted'), if delete
> code doesn't miss any of the callbacks afterwards.

Yup.  Plus multiple/spurious deletes are already a fact of life, since
we don't keep track of which callback accepted the filter and which
didn't.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-05 17:56 [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Vlad Buslov
                   ` (2 preceding siblings ...)
  2019-04-08 22:26 ` Jakub Kicinski
@ 2019-04-11 11:13 ` Ido Schimmel
  2019-04-11 11:28   ` Vlad Buslov
  3 siblings, 1 reply; 25+ messages in thread
From: Ido Schimmel @ 2019-04-11 11:13 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley

On Fri, Apr 05, 2019 at 08:56:26PM +0300, Vlad Buslov wrote:
> John reports:
> 
> Recent refactoring of fl_change aims to use the classifier spinlock to
> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
> function was moved to before the lock is taken. This can create problems
> for drivers if duplicate filters are created (commmon in ovs tc offload
> due to filters being triggered by user-space matches).
> 
> Drivers registered for such filters will now receive multiple copies of
> the same rule, each with a different cookie value. This means that the
> drivers would need to do a full match field lookup to determine
> duplicates, repeating work that will happen in flower __fl_lookup().
> Currently, drivers do not expect to receive duplicate filters.
> 
> To fix this, verify that filter with same key is not present in flower
> classifier hash table and insert the new filter to the flower hash table
> before offloading it to hardware. Implement helper function
> fl_ht_insert_unique() to atomically verify/insert a filter.
> 
> This change makes filter visible to fast path at the beginning of
> fl_change() function, which means it can no longer be freed directly in
> case of error. Refactor fl_change() error handling code to deallocate the
> filter with rcu timeout.
> 
> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
> Reported-by: John Hurley <john.hurley@netronome.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Vlad,

Our regression machines all hit a NULL pointer dereference [1] which I
bisected to this patch. Created this reproducer that you can use:

ip netns add ns1
ip -n ns1 link add dev dummy1 type dummy
tc -n ns1 qdisc add dev dummy1 clsact
tc -n ns1 filter add dev dummy1 ingress pref 1 proto ip \
        flower skip_hw src_ip 192.0.2.1 action drop
ip netns del ns1

Can you please look into this? Thanks

[1]
[    5.332176] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[    5.334372] #PF error: [normal kernel read fault]
[    5.335619] PGD 0 P4D 0
[    5.336360] Oops: 0000 [#1] SMP
[    5.337249] CPU: 0 PID: 7 Comm: kworker/u2:0 Not tainted 5.1.0-rc4-custom-01473-g526bb57a6ad6 #1374
[    5.339232] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014
[    5.341982] Workqueue: netns cleanup_net
[    5.342843] RIP: 0010:__fl_put+0x24/0xb0
[    5.343808] Code: 84 00 00 00 00 00 3e ff 8f f0 03 00 00 0f 88 da 7b 14 00 74 01 c3 80 bf f4 03 00 00 00 0f 84 83 00 00 00 4c 8b 87 c8 01 00 00 <41> 8b 50 04 49 8d 70 04
85 d2 74 60 8d 4a 01 39 ca 7f 52 81 fa fe
[    5.348099] RSP: 0018:ffffabe300663be0 EFLAGS: 00010202
[    5.349223] RAX: ffff9ea4ba1aff00 RBX: ffff9ea4b99af400 RCX: ffffabe300663c67
[    5.350572] RDX: 00000000000004c5 RSI: 0000000000000000 RDI: ffff9ea4b99af400
[    5.351919] RBP: ffff9ea4ba28e900 R08: 0000000000000000 R09: ffffffff9d1b0075
[    5.353272] R10: ffffeb2884e66b80 R11: ffffffff9dc4dcd8 R12: ffff9ea4b99af408
[    5.354635] R13: ffff9ea4b99ae400 R14: ffff9ea4b9a47800 R15: ffff9ea4b99ae000
[    5.355988] FS:  0000000000000000(0000) GS:ffff9ea4bba00000(0000) knlGS:0000000000000000
[    5.357436] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.358530] CR2: 0000000000000004 CR3: 00000001398fa004 CR4: 0000000000160ef0
[    5.359876] Call Trace:
[    5.360360]  __fl_delete+0x223/0x3b0
[    5.361008]  fl_destroy+0xb4/0x130
[    5.361641]  tcf_proto_destroy+0x15/0x40
[    5.362429]  tcf_chain_flush+0x4e/0x60
[    5.363125]  __tcf_block_put+0xb4/0x150
[    5.363805]  clsact_destroy+0x30/0x40
[    5.364507]  qdisc_destroy+0x44/0x110
[    5.365218]  dev_shutdown+0x6e/0xa0
[    5.365821]  rollback_registered_many+0x25d/0x510
[    5.366724]  ? netdev_run_todo+0x221/0x280
[    5.367485]  unregister_netdevice_many+0x15/0xa0
[    5.368355]  default_device_exit_batch+0x13f/0x170
[    5.369268]  ? wait_woken+0x80/0x80
[    5.369910]  cleanup_net+0x19a/0x280
[    5.370558]  process_one_work+0x1f5/0x3f0
[    5.371326]  worker_thread+0x28/0x3c0
[    5.372038]  ? process_one_work+0x3f0/0x3f0
[    5.372755]  kthread+0x10d/0x130
[    5.373358]  ? __kthread_create_on_node+0x180/0x180
[    5.374298]  ret_from_fork+0x35/0x40
[    5.374934] CR2: 0000000000000004
[    5.375454] ---[ end trace c20e7f74127772e5 ]---
[    5.376284] RIP: 0010:__fl_put+0x24/0xb0
[    5.377003] Code: 84 00 00 00 00 00 3e ff 8f f0 03 00 00 0f 88 da 7b 14 00 74 01 c3 80 bf f4 03 00 00 00 0f 84 83 00 00 00 4c 8b 87 c8 01 00 00 <41> 8b 50 04 49 8d 70 04
85 d2 74 60 8d 4a 01 39 ca 7f 52 81 fa fe
[    5.380269] RSP: 0018:ffffabe300663be0 EFLAGS: 00010202
[    5.381237] RAX: ffff9ea4ba1aff00 RBX: ffff9ea4b99af400 RCX: ffffabe300663c67
[    5.382551] RDX: 00000000000004c5 RSI: 0000000000000000 RDI: ffff9ea4b99af400
[    5.383972] RBP: ffff9ea4ba28e900 R08: 0000000000000000 R09: ffffffff9d1b0075
[    5.385314] R10: ffffeb2884e66b80 R11: ffffffff9dc4dcd8 R12: ffff9ea4b99af408
[    5.386616] R13: ffff9ea4b99ae400 R14: ffff9ea4b9a47800 R15: ffff9ea4b99ae000
[    5.387986] FS:  0000000000000000(0000) GS:ffff9ea4bba00000(0000) knlGS:0000000000000000
[    5.389512] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.390546] CR2: 0000000000000004 CR3: 00000001398fa004 CR4: 0000000000160ef0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw
  2019-04-11 11:13 ` [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Ido Schimmel
@ 2019-04-11 11:28   ` Vlad Buslov
  0 siblings, 0 replies; 25+ messages in thread
From: Vlad Buslov @ 2019-04-11 11:28 UTC (permalink / raw)
  To: Ido Schimmel
  Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem, john.hurley


On Thu 11 Apr 2019 at 14:13, Ido Schimmel <idosch@idosch.org> wrote:
> On Fri, Apr 05, 2019 at 08:56:26PM +0300, Vlad Buslov wrote:
>> John reports:
>> 
>> Recent refactoring of fl_change aims to use the classifier spinlock to
>> avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
>> function was moved to before the lock is taken. This can create problems
>> for drivers if duplicate filters are created (commmon in ovs tc offload
>> due to filters being triggered by user-space matches).
>> 
>> Drivers registered for such filters will now receive multiple copies of
>> the same rule, each with a different cookie value. This means that the
>> drivers would need to do a full match field lookup to determine
>> duplicates, repeating work that will happen in flower __fl_lookup().
>> Currently, drivers do not expect to receive duplicate filters.
>> 
>> To fix this, verify that filter with same key is not present in flower
>> classifier hash table and insert the new filter to the flower hash table
>> before offloading it to hardware. Implement helper function
>> fl_ht_insert_unique() to atomically verify/insert a filter.
>> 
>> This change makes filter visible to fast path at the beginning of
>> fl_change() function, which means it can no longer be freed directly in
>> case of error. Refactor fl_change() error handling code to deallocate the
>> filter with rcu timeout.
>> 
>> Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
>> Reported-by: John Hurley <john.hurley@netronome.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>
> Vlad,
>
> Our regression machines all hit a NULL pointer dereference [1] which I
> bisected to this patch. Created this reproducer that you can use:
>
> ip netns add ns1
> ip -n ns1 link add dev dummy1 type dummy
> tc -n ns1 qdisc add dev dummy1 clsact
> tc -n ns1 filter add dev dummy1 ingress pref 1 proto ip \
>         flower skip_hw src_ip 192.0.2.1 action drop
> ip netns del ns1
>
> Can you please look into this? Thanks

Will do.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-10 17:00                 ` Jakub Kicinski
@ 2019-04-16 14:20                   ` Vlad Buslov
  2019-04-16 21:49                     ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-16 14:20 UTC (permalink / raw)
  To: jakub.kicinski; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem, Vlad Buslov

Recent changes that introduced unlocked flower did not properly account for
case when reoffload is initiated concurrently with filter updates. To fix
the issue, extend flower with 'hw_filters' list that is used to store
filters that don't have 'skip_hw' flag set. Filter is added to the list
before it is inserted to hardware and only removed from it after last
reference to filter has been released. This ensures that concurrent
reoffload can still access filter that is being deleted and removes race
condition when driver callback can be removed when filter is no longer
accessible trough idr, but is still present in hardware.

Refactor fl_change() to insert filter to hw_list before offloading it to
hardware and to properly cleanup filter in case of error with __fl_put().
This allows for concurrent access to filter from fl_reoffload() and
protects it with reference counting. Refactor fl_reoffload() to iterate
over hw_filters list instead of idr. Implement fl_get_next_hw_filter()
helper function that is used to iterate over hw_filters list with reference
counting and skips filters that are being concurrently deleted.

Fixes: 92149190067d ("net: sched: flower: set unlocked flag for flower proto ops")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/cls_flower.c | 82 +++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 21 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 4b5585358699..e0f921c537ab 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -90,6 +90,7 @@ struct cls_fl_head {
 	struct rhashtable ht;
 	spinlock_t masks_lock; /* Protect masks list */
 	struct list_head masks;
+	struct list_head hw_filters;
 	struct rcu_work rwork;
 	struct idr handle_idr;
 };
@@ -102,6 +103,7 @@ struct cls_fl_filter {
 	struct tcf_result res;
 	struct fl_flow_key key;
 	struct list_head list;
+	struct list_head hw_list;
 	u32 handle;
 	u32 flags;
 	u32 in_hw_count;
@@ -315,6 +317,7 @@ static int fl_init(struct tcf_proto *tp)
 
 	spin_lock_init(&head->masks_lock);
 	INIT_LIST_HEAD_RCU(&head->masks);
+	INIT_LIST_HEAD(&head->hw_filters);
 	rcu_assign_pointer(tp->root, head);
 	idr_init(&head->handle_idr);
 
@@ -485,12 +488,15 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
 	return rcu_dereference_raw(tp->root);
 }
 
-static void __fl_put(struct cls_fl_filter *f)
+static void __fl_put(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
 	if (!refcount_dec_and_test(&f->refcnt))
 		return;
 
-	WARN_ON(!f->deleted);
+	spin_lock(&tp->lock);
+	if (!list_empty(&f->hw_list))
+		list_del(&f->hw_list);
+	spin_unlock(&tp->lock);
 
 	if (tcf_exts_get_net(&f->exts))
 		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
@@ -554,7 +560,7 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f, rtnl_held, extack);
 	tcf_unbind_filter(tp, &f->res);
-	__fl_put(f);
+	__fl_put(tp, f);
 
 	return 0;
 }
@@ -595,7 +601,7 @@ static void fl_put(struct tcf_proto *tp, void *arg)
 {
 	struct cls_fl_filter *f = arg;
 
-	__fl_put(f);
+	__fl_put(tp, f);
 }
 
 static void *fl_get(struct tcf_proto *tp, u32 handle)
@@ -1522,6 +1528,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		err = -ENOBUFS;
 		goto errout_tb;
 	}
+	INIT_LIST_HEAD(&fnew->hw_list);
 	refcount_set(&fnew->refcnt, 1);
 
 	err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
@@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout_mask;
 
 	if (!tc_skip_hw(fnew->flags)) {
+		spin_lock(&tp->lock);
+		list_add(&fnew->hw_list, &head->hw_filters);
+		spin_unlock(&tp->lock);
+
 		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
 		if (err)
 			goto errout_ht;
@@ -1569,7 +1580,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout_hw;
 	}
 
-	refcount_inc(&fnew->refcnt);
 	if (fold) {
 		/* Fold filter was deleted concurrently. Retry lookup. */
 		if (fold->deleted) {
@@ -1591,6 +1601,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			in_ht = true;
 		}
 
+		refcount_inc(&fnew->refcnt);
 		rhashtable_remove_fast(&fold->mask->ht,
 				       &fold->ht_node,
 				       fold->mask->filter_ht_params);
@@ -1608,7 +1619,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		 * after this.
 		 */
 		refcount_dec(&fold->refcnt);
-		__fl_put(fold);
+		__fl_put(tp, fold);
 	} else {
 		if (handle) {
 			/* user specifies a handle and it doesn't exist */
@@ -1631,6 +1642,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		if (err)
 			goto errout_hw;
 
+		refcount_inc(&fnew->refcnt);
 		fnew->handle = handle;
 		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
 		spin_unlock(&tp->lock);
@@ -1642,26 +1654,27 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(mask);
 	return 0;
 
+errout_ht:
+	spin_lock(&tp->lock);
 errout_hw:
+	fnew->deleted = true;
 	spin_unlock(&tp->lock);
 	if (!tc_skip_hw(fnew->flags))
 		fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
-errout_ht:
 	if (in_ht)
 		rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
 				       fnew->mask->filter_ht_params);
 errout_mask:
 	fl_mask_put(head, fnew->mask);
 errout:
-	tcf_exts_get_net(&fnew->exts);
-	tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
+	__fl_put(tp, fnew);
 errout_tb:
 	kfree(tb);
 errout_mask_alloc:
 	kfree(mask);
 errout_fold:
 	if (fold)
-		__fl_put(fold);
+		__fl_put(tp, fold);
 	return err;
 }
 
@@ -1675,7 +1688,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
 
 	err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
 	*last = list_empty(&head->masks);
-	__fl_put(f);
+	__fl_put(tp, f);
 
 	return err;
 }
@@ -1689,33 +1702,61 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 
 	while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
 		if (arg->fn(tp, f, arg) < 0) {
-			__fl_put(f);
+			__fl_put(tp, f);
 			arg->stop = 1;
 			break;
 		}
-		__fl_put(f);
+		__fl_put(tp, f);
 		arg->cookie++;
 		arg->count++;
 	}
 }
 
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	if (!f) {
+		if (list_empty(&head->hw_filters)) {
+			spin_unlock(&tp->lock);
+			return NULL;
+		}
+
+		f = list_first_entry(&head->hw_filters, struct cls_fl_filter,
+				     hw_list);
+	} else {
+		f = list_next_entry(f, hw_list);
+	}
+
+	list_for_each_entry_from(f, &head->hw_filters, hw_list) {
+		if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+			spin_unlock(&tp->lock);
+			return f;
+		}
+	}
+
+	spin_unlock(&tp->lock);
+	return NULL;
+}
+
 static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
-	unsigned long handle = 0;
-	struct cls_fl_filter *f;
+	struct cls_fl_filter *f = NULL;
 	int err;
 
-	while ((f = fl_get_next_filter(tp, &handle))) {
+	while ((f = fl_get_next_hw_filter(tp, f, add))) {
 		if (tc_skip_hw(f->flags))
 			goto next_flow;
 
 		cls_flower.rule =
 			flow_rule_alloc(tcf_exts_num_actions(&f->exts));
 		if (!cls_flower.rule) {
-			__fl_put(f);
+			__fl_put(tp, f);
 			return -ENOMEM;
 		}
 
@@ -1733,7 +1774,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			kfree(cls_flower.rule);
 			if (tc_skip_sw(f->flags)) {
 				NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
-				__fl_put(f);
+				__fl_put(tp, f);
 				return err;
 			}
 			goto next_flow;
@@ -1746,7 +1787,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 
 		if (err) {
 			if (add && tc_skip_sw(f->flags)) {
-				__fl_put(f);
+				__fl_put(tp, f);
 				return err;
 			}
 			goto next_flow;
@@ -1757,8 +1798,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 					  add);
 		spin_unlock(&tp->lock);
 next_flow:
-		handle++;
-		__fl_put(f);
+		__fl_put(tp, f);
 	}
 
 	return 0;
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-16 14:20                   ` [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access Vlad Buslov
@ 2019-04-16 21:49                     ` Jakub Kicinski
  2019-04-17  7:29                       ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-16 21:49 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem

On Tue, 16 Apr 2019 17:20:47 +0300, Vlad Buslov wrote:
> @@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>  		goto errout_mask;
>  
>  	if (!tc_skip_hw(fnew->flags)) {
> +		spin_lock(&tp->lock);
> +		list_add(&fnew->hw_list, &head->hw_filters);
> +		spin_unlock(&tp->lock);
> +
>  		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
>  		if (err)
>  			goto errout_ht;

Duplicated deletes should be fine, but I'm not sure same is true for
adds.  Won't seeing an add with the same cookie twice confuse drivers?

There's also the minor issue of offloaded count being off in that
case :)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-16 21:49                     ` Jakub Kicinski
@ 2019-04-17  7:29                       ` Vlad Buslov
  2019-04-17 16:34                         ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-17  7:29 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem


On Wed 17 Apr 2019 at 00:49, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Tue, 16 Apr 2019 17:20:47 +0300, Vlad Buslov wrote:
>> @@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>>  		goto errout_mask;
>>
>>  	if (!tc_skip_hw(fnew->flags)) {
>> +		spin_lock(&tp->lock);
>> +		list_add(&fnew->hw_list, &head->hw_filters);
>> +		spin_unlock(&tp->lock);
>> +
>>  		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
>>  		if (err)
>>  			goto errout_ht;
>
> Duplicated deletes should be fine, but I'm not sure same is true for
> adds.  Won't seeing an add with the same cookie twice confuse drivers?
>
> There's also the minor issue of offloaded count being off in that
> case :)

Hmmm, okay. Rejecting duplicate cookies should be a trivial change to
drivers though. Do you see any faults with this approach in general?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-17  7:29                       ` Vlad Buslov
@ 2019-04-17 16:34                         ` Jakub Kicinski
  2019-04-17 17:01                           ` Vlad Buslov
  2019-04-18 16:33                           ` Vlad Buslov
  0 siblings, 2 replies; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-17 16:34 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem

On Wed, 17 Apr 2019 07:29:36 +0000, Vlad Buslov wrote:
> On Wed 17 Apr 2019 at 00:49, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > On Tue, 16 Apr 2019 17:20:47 +0300, Vlad Buslov wrote:  
> >> @@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
> >>  		goto errout_mask;
> >>
> >>  	if (!tc_skip_hw(fnew->flags)) {
> >> +		spin_lock(&tp->lock);
> >> +		list_add(&fnew->hw_list, &head->hw_filters);
> >> +		spin_unlock(&tp->lock);
> >> +
> >>  		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
> >>  		if (err)
> >>  			goto errout_ht;  
> >
> > Duplicated deletes should be fine, but I'm not sure same is true for
> > adds.  Won't seeing an add with the same cookie twice confuse drivers?
> >
> > There's also the minor issue of offloaded count being off in that
> > case :)  
> 
> Hmmm, okay. Rejecting duplicate cookies should be a trivial change to
> drivers though. Do you see any faults with this approach in general?

Trivial or not it adds up, the stack should make driver authors' job as
easy as possible.  The simplest thing to do would be to add a mutex
around the HW calls.  But that obviously doesn't work for you, cause
you want multiple outstanding requests to the FW for a single tp, right?

How about a RW lock, that would take R on normal add/replace/del paths
and W on replays?  That should scale, no?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-17 16:34                         ` Jakub Kicinski
@ 2019-04-17 17:01                           ` Vlad Buslov
  2019-04-18 16:33                           ` Vlad Buslov
  1 sibling, 0 replies; 25+ messages in thread
From: Vlad Buslov @ 2019-04-17 17:01 UTC (permalink / raw)
  To: Jakub Kicinski, jiri; +Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, davem


On Wed 17 Apr 2019 at 19:34, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Wed, 17 Apr 2019 07:29:36 +0000, Vlad Buslov wrote:
>> On Wed 17 Apr 2019 at 00:49, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> > On Tue, 16 Apr 2019 17:20:47 +0300, Vlad Buslov wrote:
>> >> @@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>> >>  		goto errout_mask;
>> >>
>> >>  	if (!tc_skip_hw(fnew->flags)) {
>> >> +		spin_lock(&tp->lock);
>> >> +		list_add(&fnew->hw_list, &head->hw_filters);
>> >> +		spin_unlock(&tp->lock);
>> >> +
>> >>  		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
>> >>  		if (err)
>> >>  			goto errout_ht;
>> >
>> > Duplicated deletes should be fine, but I'm not sure same is true for
>> > adds.  Won't seeing an add with the same cookie twice confuse drivers?
>> >
>> > There's also the minor issue of offloaded count being off in that
>> > case :)
>>
>> Hmmm, okay. Rejecting duplicate cookies should be a trivial change to
>> drivers though. Do you see any faults with this approach in general?
>
> Trivial or not it adds up, the stack should make driver authors' job as
> easy as possible.  The simplest thing to do would be to add a mutex

Agree. However, all driver flower offload implementations already have
all necessary functionality to lookup flow by cookie because they need
it to implement flow deletion.

> around the HW calls.  But that obviously doesn't work for you, cause
> you want multiple outstanding requests to the FW for a single tp,
> right?

Right.

>
> How about a RW lock, that would take R on normal add/replace/del paths
> and W on replays?  That should scale, no?

Yes. But I would prefer to avoid adding another sleeping lock on
rule update path of every filter (including non-offloaded use cases when
reoffload is not used at all).

Jiri, what approach would you prefer?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-17 16:34                         ` Jakub Kicinski
  2019-04-17 17:01                           ` Vlad Buslov
@ 2019-04-18 16:33                           ` Vlad Buslov
  2019-04-18 17:46                             ` Jakub Kicinski
  1 sibling, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-18 16:33 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem


On Wed 17 Apr 2019 at 19:34, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Wed, 17 Apr 2019 07:29:36 +0000, Vlad Buslov wrote:
>> On Wed 17 Apr 2019 at 00:49, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> > On Tue, 16 Apr 2019 17:20:47 +0300, Vlad Buslov wrote:
>> >> @@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>> >>  		goto errout_mask;
>> >>
>> >>  	if (!tc_skip_hw(fnew->flags)) {
>> >> +		spin_lock(&tp->lock);
>> >> +		list_add(&fnew->hw_list, &head->hw_filters);
>> >> +		spin_unlock(&tp->lock);
>> >> +
>> >>  		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
>> >>  		if (err)
>> >>  			goto errout_ht;
>> >
>> > Duplicated deletes should be fine, but I'm not sure same is true for
>> > adds.  Won't seeing an add with the same cookie twice confuse drivers?
>> >
>> > There's also the minor issue of offloaded count being off in that
>> > case :)
>>
>> Hmmm, okay. Rejecting duplicate cookies should be a trivial change to
>> drivers though. Do you see any faults with this approach in general?
>
> Trivial or not it adds up, the stack should make driver authors' job as
> easy as possible.  The simplest thing to do would be to add a mutex
> around the HW calls.  But that obviously doesn't work for you, cause
> you want multiple outstanding requests to the FW for a single tp, right?
>
> How about a RW lock, that would take R on normal add/replace/del paths
> and W on replays?  That should scale, no?

I've been thinking some more about possible ways to mitigate the
problem. First of all I tried to implement POC of rwlock in flower and
it isn't straightforward because of lock ordering. Observe that
fl_reoffload() is always called with rtnl lock taken (I didn't do any work to
unlock bind/unbind), but fl_change() can be called without rtnl lock and
needs to obtain it before offloading rules. This means that we have
deadlock here, if fl_change() obtains locks in order rwlock --->
rtnl_lock and fl_reoffload() obtains locks in order rtnl_lock --->
rwlock.

Considering this, I tried to improve my solution to remove possibility
of multiple adds of same filter and it seems to me that it would be
enough to move hw_filters list management in flower offloads functions:
add filter to list while holding rtnl lock in fl_hw_replace_filter() and
remove it from list while holding rtnl lock in fl_hw_destroy_filter().
What do you think?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-18 16:33                           ` Vlad Buslov
@ 2019-04-18 17:46                             ` Jakub Kicinski
  2019-04-18 17:58                               ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-18 17:46 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem

On Thu, 18 Apr 2019 16:33:22 +0000, Vlad Buslov wrote:
> Considering this, I tried to improve my solution to remove possibility
> of multiple adds of same filter and it seems to me that it would be
> enough to move hw_filters list management in flower offloads functions:
> add filter to list while holding rtnl lock in fl_hw_replace_filter() and
> remove it from list while holding rtnl lock in fl_hw_destroy_filter().
> What do you think?

Sounds good for now, but I presume the plan is to get rid of rtnl
around the driver call.. at which point we would switch to a rwlock? :)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-18 17:46                             ` Jakub Kicinski
@ 2019-04-18 17:58                               ` Vlad Buslov
  2019-04-18 18:02                                 ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-18 17:58 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem


On Thu 18 Apr 2019 at 20:46, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Thu, 18 Apr 2019 16:33:22 +0000, Vlad Buslov wrote:
>> Considering this, I tried to improve my solution to remove possibility
>> of multiple adds of same filter and it seems to me that it would be
>> enough to move hw_filters list management in flower offloads functions:
>> add filter to list while holding rtnl lock in fl_hw_replace_filter() and
>> remove it from list while holding rtnl lock in fl_hw_destroy_filter().
>> What do you think?
>
> Sounds good for now, but I presume the plan is to get rid of rtnl
> around the driver call.. at which point we would switch to a rwlock? :)

Yes, but I would like the lock to be in cls hw offloads API
(tc_setup_cb_replace(), etc) and not in flower itself. That would also
solve deadlock issue and make code reusable for any further unlocked
classifiers implementations.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-18 17:58                               ` Vlad Buslov
@ 2019-04-18 18:02                                 ` Jakub Kicinski
  2019-04-18 18:13                                   ` Vlad Buslov
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-18 18:02 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem

On Thu, 18 Apr 2019 17:58:26 +0000, Vlad Buslov wrote:
> On Thu 18 Apr 2019 at 20:46, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > On Thu, 18 Apr 2019 16:33:22 +0000, Vlad Buslov wrote:  
> >> Considering this, I tried to improve my solution to remove possibility
> >> of multiple adds of same filter and it seems to me that it would be
> >> enough to move hw_filters list management in flower offloads functions:
> >> add filter to list while holding rtnl lock in fl_hw_replace_filter() and
> >> remove it from list while holding rtnl lock in fl_hw_destroy_filter().
> >> What do you think?  
> >
> > Sounds good for now, but I presume the plan is to get rid of rtnl
> > around the driver call.. at which point we would switch to a rwlock? :)  
> 
> Yes, but I would like the lock to be in cls hw offloads API
> (tc_setup_cb_replace(), etc) and not in flower itself. That would also
> solve deadlock issue and make code reusable for any further unlocked
> classifiers implementations.

And then the HW list goes along with it into the common code?
That'd be quite nice.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-18 18:02                                 ` Jakub Kicinski
@ 2019-04-18 18:13                                   ` Vlad Buslov
  2019-04-18 18:15                                     ` Jakub Kicinski
  0 siblings, 1 reply; 25+ messages in thread
From: Vlad Buslov @ 2019-04-18 18:13 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Vlad Buslov, netdev, jhs, xiyou.wangcong, jiri, davem


On Thu 18 Apr 2019 at 21:02, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> On Thu, 18 Apr 2019 17:58:26 +0000, Vlad Buslov wrote:
>> On Thu 18 Apr 2019 at 20:46, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> > On Thu, 18 Apr 2019 16:33:22 +0000, Vlad Buslov wrote:  
>> >> Considering this, I tried to improve my solution to remove possibility
>> >> of multiple adds of same filter and it seems to me that it would be
>> >> enough to move hw_filters list management in flower offloads functions:
>> >> add filter to list while holding rtnl lock in fl_hw_replace_filter() and
>> >> remove it from list while holding rtnl lock in fl_hw_destroy_filter().
>> >> What do you think?  
>> >
>> > Sounds good for now, but I presume the plan is to get rid of rtnl
>> > around the driver call.. at which point we would switch to a rwlock? :)  
>> 
>> Yes, but I would like the lock to be in cls hw offloads API
>> (tc_setup_cb_replace(), etc) and not in flower itself. That would also
>> solve deadlock issue and make code reusable for any further unlocked
>> classifiers implementations.
>
> And then the HW list goes along with it into the common code?
> That'd be quite nice.

The goal is to have a lock in tcf_block and use it synchronize cb_list
and all related counters (offloadcnt, etc). Now I also want to use it to
protect hw_filters list and prevent the double-add issue. Meanwhile rtnl
lock can do the job.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access
  2019-04-18 18:13                                   ` Vlad Buslov
@ 2019-04-18 18:15                                     ` Jakub Kicinski
  0 siblings, 0 replies; 25+ messages in thread
From: Jakub Kicinski @ 2019-04-18 18:15 UTC (permalink / raw)
  To: Vlad Buslov; +Cc: netdev, jhs, xiyou.wangcong, jiri, davem

On Thu, 18 Apr 2019 18:13:37 +0000, Vlad Buslov wrote:
> On Thu 18 Apr 2019 at 21:02, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > On Thu, 18 Apr 2019 17:58:26 +0000, Vlad Buslov wrote:  
> >> On Thu 18 Apr 2019 at 20:46, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:  
> >> > On Thu, 18 Apr 2019 16:33:22 +0000, Vlad Buslov wrote:    
> >> >> Considering this, I tried to improve my solution to remove possibility
> >> >> of multiple adds of same filter and it seems to me that it would be
> >> >> enough to move hw_filters list management in flower offloads functions:
> >> >> add filter to list while holding rtnl lock in fl_hw_replace_filter() and
> >> >> remove it from list while holding rtnl lock in fl_hw_destroy_filter().
> >> >> What do you think?    
> >> >
> >> > Sounds good for now, but I presume the plan is to get rid of rtnl
> >> > around the driver call.. at which point we would switch to a rwlock? :)    
> >> 
> >> Yes, but I would like the lock to be in cls hw offloads API
> >> (tc_setup_cb_replace(), etc) and not in flower itself. That would also
> >> solve deadlock issue and make code reusable for any further unlocked
> >> classifiers implementations.  
> >
> > And then the HW list goes along with it into the common code?
> > That'd be quite nice.  
> 
> The goal is to have a lock in tcf_block and use it synchronize cb_list
> and all related counters (offloadcnt, etc). Now I also want to use it to
> protect hw_filters list and prevent the double-add issue. Meanwhile rtnl
> lock can do the job.

SGTM 👍

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2019-04-18 18:34 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-05 17:56 [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Vlad Buslov
2019-04-06  5:59 ` Jiri Pirko
2019-04-08  2:34 ` David Miller
2019-04-08 22:26 ` Jakub Kicinski
2019-04-09  8:23   ` Vlad Buslov
2019-04-09 17:10     ` Jakub Kicinski
2019-04-10 14:53       ` Vlad Buslov
2019-04-10 15:48         ` Jakub Kicinski
2019-04-10 16:02           ` Vlad Buslov
2019-04-10 16:09             ` Jakub Kicinski
2019-04-10 16:26               ` Vlad Buslov
2019-04-10 17:00                 ` Jakub Kicinski
2019-04-16 14:20                   ` [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access Vlad Buslov
2019-04-16 21:49                     ` Jakub Kicinski
2019-04-17  7:29                       ` Vlad Buslov
2019-04-17 16:34                         ` Jakub Kicinski
2019-04-17 17:01                           ` Vlad Buslov
2019-04-18 16:33                           ` Vlad Buslov
2019-04-18 17:46                             ` Jakub Kicinski
2019-04-18 17:58                               ` Vlad Buslov
2019-04-18 18:02                                 ` Jakub Kicinski
2019-04-18 18:13                                   ` Vlad Buslov
2019-04-18 18:15                                     ` Jakub Kicinski
2019-04-11 11:13 ` [PATCH net-next] net: sched: flower: insert filter to ht before offloading it to hw Ido Schimmel
2019-04-11 11:28   ` Vlad Buslov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.