[PATCH net] net: sched: flower: don't call synchronize_rcu() on mask creation

* [PATCH net] net: sched: flower: don't call synchronize_rcu() on mask creation
@ 2019-06-13 14:54 Vlad Buslov
  2019-06-13 15:49 ` Jiri Pirko
  2019-06-15  2:30 ` David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Vlad Buslov @ 2019-06-13 14:54 UTC (permalink / raw)
  To: netdev
  Cc: jhs, xiyou.wangcong, jiri, davem, pablo, alexanderk, pabeni,
	mlxsw, Vlad Buslov, Jiri Pirko

Current flower mask creating code assumes that temporary mask that is used
when inserting new filter is stack allocated. To prevent race condition
with data patch synchronize_rcu() is called every time fl_create_new_mask()
replaces temporary stack allocated mask. As reported by Jiri, this
increases runtime of creating 20000 flower classifiers from 4 seconds to
163 seconds. However, this design is no longer necessary since temporary
mask was converted to be dynamically allocated by commit 2cddd2014782
("net/sched: cls_flower: allocate mask dynamically in fl_change()").

Remove synchronize_rcu() calls from mask creation code. Instead, refactor
fl_change() to always deallocate temporary mask with rcu grace period.

Fixes: 195c234d15c9 ("net: sched: flower: handle concurrent mask insertion")
Reported-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 net/sched/cls_flower.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c388372df0e2..eedd5786c084 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -320,10 +320,13 @@ static int fl_init(struct tcf_proto *tp)
 	return rhashtable_init(&head->ht, &mask_ht_params);
 }
 
-static void fl_mask_free(struct fl_flow_mask *mask)
+static void fl_mask_free(struct fl_flow_mask *mask, bool mask_init_done)
 {
-	WARN_ON(!list_empty(&mask->filters));
-	rhashtable_destroy(&mask->ht);
+	/* temporary masks don't have their filters list and ht initialized */
+	if (mask_init_done) {
+		WARN_ON(!list_empty(&mask->filters));
+		rhashtable_destroy(&mask->ht);
+	}
 	kfree(mask);
 }
 
@@ -332,7 +335,15 @@ static void fl_mask_free_work(struct work_struct *work)
 	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
 						 struct fl_flow_mask, rwork);
 
-	fl_mask_free(mask);
+	fl_mask_free(mask, true);
+}
+
+static void fl_uninit_mask_free_work(struct work_struct *work)
+{
+	struct fl_flow_mask *mask = container_of(to_rcu_work(work),
+						 struct fl_flow_mask, rwork);
+
+	fl_mask_free(mask, false);
 }
 
 static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
@@ -1346,9 +1357,6 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
 	if (err)
 		goto errout_destroy;
 
-	/* Wait until any potential concurrent users of mask are finished */
-	synchronize_rcu();
-
 	spin_lock(&head->masks_lock);
 	list_add_tail_rcu(&newmask->list, &head->masks);
 	spin_unlock(&head->masks_lock);
@@ -1375,11 +1383,7 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
 
 	/* Insert mask as temporary node to prevent concurrent creation of mask
 	 * with same key. Any concurrent lookups with same key will return
-	 * -EAGAIN because mask's refcnt is zero. It is safe to insert
-	 * stack-allocated 'mask' to masks hash table because we call
-	 * synchronize_rcu() before returning from this function (either in case
-	 * of error or after replacing it with heap-allocated mask in
-	 * fl_create_new_mask()).
+	 * -EAGAIN because mask's refcnt is zero.
 	 */
 	fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht,
 						       &mask->ht_node,
@@ -1414,8 +1418,6 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
 errout_cleanup:
 	rhashtable_remove_fast(&head->ht, &mask->ht_node,
 			       mask_ht_params);
-	/* Wait until any potential concurrent users of mask are finished */
-	synchronize_rcu();
 	return ret;
 }
 
@@ -1644,7 +1646,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	*arg = fnew;
 
 	kfree(tb);
-	kfree(mask);
+	tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work);
 	return 0;
 
 errout_ht:
@@ -1664,7 +1666,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 errout_tb:
 	kfree(tb);
 errout_mask_alloc:
-	kfree(mask);
+	tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work);
 errout_fold:
 	if (fold)
 		__fl_put(fold);
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread