All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex
@ 2018-10-02 19:50 Cong Wang
  2018-10-03  5:31 ` Ido Schimmel
  2018-10-05  7:38 ` David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Cong Wang @ 2018-10-02 19:50 UTC (permalink / raw)
  To: netdev; +Cc: jiri, jhs, vladbu, idosch, Cong Wang, Jiri Pirko

In commit ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking
with the spinlock held. Unfortunately, this causes a lot of
troubles here:

1. tcf_chain_destroy() could be called right after we queue the work
   but before the work runs. This is a use-after-free.

2. The chain refcnt is already 0, we can't even just hold it again.
   We can check refcnt==1 but it is ugly.

3. The chain with refcnt 0 is still visible in its block, which means
   it could be still found and used!

4. The block has a refcnt too, we can't hold it without introducing a
   proper API either.

We can make it working but the end result is ugly. Instead of wasting
time on reviewing it, let's just convert the troubling spinlock to
a mutex, which allows us to use non-atomic allocations too.

Fixes: ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
Reported-by: Ido Schimmel <idosch@idosch.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/act_api.h  |  4 ++--
 net/sched/act_api.c    | 44 ++++++++++++++++++++++----------------------
 net/sched/cls_flower.c | 13 ++-----------
 3 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1ddff3360592..05c7df41d737 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -13,7 +13,7 @@
 #include <net/netns/generic.h>
 
 struct tcf_idrinfo {
-	spinlock_t	lock;
+	struct mutex	lock;
 	struct idr	action_idr;
 };
 
@@ -117,7 +117,7 @@ int tc_action_net_init(struct tc_action_net *tn,
 	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
-	spin_lock_init(&tn->idrinfo->lock);
+	mutex_init(&tn->idrinfo->lock);
 	idr_init(&tn->idrinfo->action_idr);
 	return err;
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3c7c23421885..55153da00278 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -104,11 +104,11 @@ static int __tcf_action_put(struct tc_action *p, bool bind)
 {
 	struct tcf_idrinfo *idrinfo = p->idrinfo;
 
-	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+	if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
 		if (bind)
 			atomic_dec(&p->tcfa_bindcnt);
 		idr_remove(&idrinfo->action_idr, p->tcfa_index);
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 
 		tcf_action_cleanup(p);
 		return 1;
@@ -200,7 +200,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	struct tc_action *p;
 	unsigned long id = 1;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
@@ -235,7 +235,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	if (index >= 0)
 		cb->args[0] = index + 1;
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	if (n_i) {
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
@@ -277,18 +277,18 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	idr_for_each_entry_ul(idr, p, id) {
 		ret = tcf_idr_release_unsafe(p);
 		if (ret == ACT_P_DELETED) {
 			module_put(ops->owner);
 			n_i++;
 		} else if (ret < 0) {
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 			goto nla_put_failure;
 		}
 	}
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
 		goto nla_put_failure;
@@ -324,13 +324,13 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 	struct tc_action *p;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (IS_ERR(p))
 		p = NULL;
 	else if (p)
 		refcount_inc(&p->tcfa_refcnt);
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 
 	if (p) {
 		*a = p;
@@ -345,10 +345,10 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 	struct tc_action *p;
 	int ret = 0;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (!p) {
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 		return -ENOENT;
 	}
 
@@ -358,7 +358,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 
 			WARN_ON(p != idr_remove(&idrinfo->action_idr,
 						p->tcfa_index));
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 
 			tcf_action_cleanup(p);
 			module_put(owner);
@@ -369,7 +369,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 		ret = -EPERM;
 	}
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 
@@ -431,10 +431,10 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
@@ -444,10 +444,10 @@ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_cleanup);
 
@@ -465,14 +465,14 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
 	int ret;
 
 again:
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	if (*index) {
 		p = idr_find(&idrinfo->action_idr, *index);
 		if (IS_ERR(p)) {
 			/* This means that another process allocated
 			 * index but did not assign the pointer yet.
 			 */
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 			goto again;
 		}
 
@@ -485,7 +485,7 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
 		} else {
 			*a = NULL;
 			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-					    *index, GFP_ATOMIC);
+					    *index, GFP_KERNEL);
 			if (!ret)
 				idr_replace(&idrinfo->action_idr,
 					    ERR_PTR(-EBUSY), *index);
@@ -494,12 +494,12 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
 		*index = 1;
 		*a = NULL;
 		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-				    UINT_MAX, GFP_ATOMIC);
+				    UINT_MAX, GFP_KERNEL);
 		if (!ret)
 			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
 				    *index);
 	}
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 EXPORT_SYMBOL(tcf_idr_check_alloc);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 92dd5071a708..9aada2d0ef06 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -79,7 +79,6 @@ struct fl_flow_tmplt {
 	struct fl_flow_key mask;
 	struct flow_dissector dissector;
 	struct tcf_chain *chain;
-	struct rcu_work rwork;
 };
 
 struct cls_fl_head {
@@ -1438,20 +1437,12 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 	return ERR_PTR(err);
 }
 
-static void fl_tmplt_destroy_work(struct work_struct *work)
-{
-	struct fl_flow_tmplt *tmplt = container_of(to_rcu_work(work),
-						 struct fl_flow_tmplt, rwork);
-
-	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
-	kfree(tmplt);
-}
-
 static void fl_tmplt_destroy(void *tmplt_priv)
 {
 	struct fl_flow_tmplt *tmplt = tmplt_priv;
 
-	tcf_queue_work(&tmplt->rwork, fl_tmplt_destroy_work);
+	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
+	kfree(tmplt);
 }
 
 static int fl_dump_key_val(struct sk_buff *skb,
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex
  2018-10-02 19:50 [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex Cong Wang
@ 2018-10-03  5:31 ` Ido Schimmel
  2018-10-05  7:38 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: Ido Schimmel @ 2018-10-03  5:31 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, jiri, jhs, vladbu, Jiri Pirko

On Tue, Oct 02, 2018 at 12:50:19PM -0700, Cong Wang wrote:
> In commit ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
> we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking
> with the spinlock held. Unfortunately, this causes a lot of
> troubles here:
> 
> 1. tcf_chain_destroy() could be called right after we queue the work
>    but before the work runs. This is a use-after-free.
> 
> 2. The chain refcnt is already 0, we can't even just hold it again.
>    We can check refcnt==1 but it is ugly.
> 
> 3. The chain with refcnt 0 is still visible in its block, which means
>    it could be still found and used!
> 
> 4. The block has a refcnt too, we can't hold it without introducing a
>    proper API either.
> 
> We can make it working but the end result is ugly. Instead of wasting
> time on reviewing it, let's just convert the troubling spinlock to
> a mutex, which allows us to use non-atomic allocations too.
> 
> Fixes: ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
> Reported-by: Ido Schimmel <idosch@idosch.org>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Vlad Buslov <vladbu@mellanox.com>
> Cc: Jiri Pirko <jiri@mellanox.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Tested-by: Ido Schimmel <idosch@mellanox.com>

Thanks a lot!

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex
  2018-10-02 19:50 [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex Cong Wang
  2018-10-03  5:31 ` Ido Schimmel
@ 2018-10-05  7:38 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: David Miller @ 2018-10-05  7:38 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev, jiri, jhs, vladbu, idosch, jiri

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue,  2 Oct 2018 12:50:19 -0700

> In commit ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
> we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking
> with the spinlock held. Unfortunately, this causes a lot of
> troubles here:
> 
> 1. tcf_chain_destroy() could be called right after we queue the work
>    but before the work runs. This is a use-after-free.
> 
> 2. The chain refcnt is already 0, we can't even just hold it again.
>    We can check refcnt==1 but it is ugly.
> 
> 3. The chain with refcnt 0 is still visible in its block, which means
>    it could be still found and used!
> 
> 4. The block has a refcnt too, we can't hold it without introducing a
>    proper API either.
> 
> We can make it working but the end result is ugly. Instead of wasting
> time on reviewing it, let's just convert the troubling spinlock to
> a mutex, which allows us to use non-atomic allocations too.
> 
> Fixes: ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
> Reported-by: Ido Schimmel <idosch@idosch.org>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Vlad Buslov <vladbu@mellanox.com>
> Cc: Jiri Pirko <jiri@mellanox.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-10-05 14:35 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-02 19:50 [Patch net-next] net_sched: convert idrinfo->lock from spinlock to a mutex Cong Wang
2018-10-03  5:31 ` Ido Schimmel
2018-10-05  7:38 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.