netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module()
@ 2021-01-17  0:56 Cong Wang
  2021-01-17 15:15 ` Jamal Hadi Salim
  2021-01-19  4:30 ` patchwork-bot+netdevbpf
  0 siblings, 2 replies; 4+ messages in thread
From: Cong Wang @ 2021-01-17  0:56 UTC (permalink / raw)
  To: netdev
  Cc: Cong Wang, syzbot+82752bc5331601cf4899,
	syzbot+b3b63b6bff456bd95294, syzbot+ba67b12b1ca729912834,
	Jamal Hadi Salim, Jiri Pirko

From: Cong Wang <cong.wang@bytedance.com>

tcf_action_init_1() loads tc action modules automatically with
request_module() after parsing the tc action names, and it drops RTNL
lock and re-holds it before and after request_module(). This causes a
lot of troubles, as discovered by syzbot, because we can be in the
middle of batch initializations when we create an array of tc actions.

One of the problem is deadlock:

CPU 0					CPU 1
rtnl_lock();
for (...) {
  tcf_action_init_1();
    -> rtnl_unlock();
    -> request_module();
				rtnl_lock();
				for (...) {
				  tcf_action_init_1();
				    -> tcf_idr_check_alloc();
				   // Insert one action into idr,
				   // but it is not committed until
				   // tcf_idr_insert_many(), then drop
				   // the RTNL lock in the _next_
				   // iteration
				   -> rtnl_unlock();
    -> rtnl_lock();
    -> a_o->init();
      -> tcf_idr_check_alloc();
      // Now waiting for the same index
      // to be committed
				    -> request_module();
				    -> rtnl_lock()
				    // Now waiting for RTNL lock
				}
				rtnl_unlock();
}
rtnl_unlock();

This is not easy to solve, we can move the request_module() before
this loop and pre-load all the modules we need for this netlink
message and then do the rest initializations. So the loop breaks down
to two now:

        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                struct tc_action_ops *a_o;

                a_o = tc_action_load_ops(name, tb[i]...);
                ops[i - 1] = a_o;
        }

        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                act = tcf_action_init_1(ops[i - 1]...);
        }

Although this looks serious, it only has been reported by syzbot, so it
seems hard to trigger this by humans. And given the size of this patch,
I'd suggest to make it to net-next and not to backport to stable.

This patch has been tested by syzbot and tested with tdc.py by me.

Fixes: 0fedc63fadf ("net_sched: commit action insertions together")
Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com
Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/act_api.h |   5 +-
 net/sched/act_api.c   | 104 +++++++++++++++++++++++++++---------------
 net/sched/cls_api.c   |  11 ++++-
 3 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 55dab604861f..761c0e331915 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -186,10 +186,13 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
 		    struct tc_action *actions[], size_t *attr_size,
 		    bool rtnl_held, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+					 bool rtnl_held,
+					 struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
-				    bool rtnl_held,
+				    struct tc_action_ops *ops, bool rtnl_held,
 				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
 		    int ref, bool terse);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2e85b636b27b..4dd235ce9a07 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -928,19 +928,13 @@ static void tcf_idr_insert_many(struct tc_action *actions[])
 	}
 }
 
-struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
-				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind,
-				    bool rtnl_held,
-				    struct netlink_ext_ack *extack)
+struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+					 bool rtnl_held,
+					 struct netlink_ext_ack *extack)
 {
-	struct nla_bitfield32 flags = { 0, 0 };
-	u8 hw_stats = TCA_ACT_HW_STATS_ANY;
-	struct tc_action *a;
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct tc_action_ops *a_o;
-	struct tc_cookie *cookie = NULL;
 	char act_name[IFNAMSIZ];
-	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
 	int err;
 
@@ -948,33 +942,21 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
 						  tcf_action_policy, extack);
 		if (err < 0)
-			goto err_out;
+			return ERR_PTR(err);
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
 		if (!kind) {
 			NL_SET_ERR_MSG(extack, "TC action kind must be specified");
-			goto err_out;
+			return ERR_PTR(err);
 		}
 		if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) {
 			NL_SET_ERR_MSG(extack, "TC action name too long");
-			goto err_out;
-		}
-		if (tb[TCA_ACT_COOKIE]) {
-			cookie = nla_memdup_cookie(tb);
-			if (!cookie) {
-				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
-				err = -ENOMEM;
-				goto err_out;
-			}
+			return ERR_PTR(err);
 		}
-		hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
-		if (tb[TCA_ACT_FLAGS])
-			flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
 	} else {
 		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
 			NL_SET_ERR_MSG(extack, "TC action name too long");
-			err = -EINVAL;
-			goto err_out;
+			return ERR_PTR(-EINVAL);
 		}
 	}
 
@@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		 * indicate this using -EAGAIN.
 		 */
 		if (a_o != NULL) {
-			err = -EAGAIN;
-			goto err_mod;
+			module_put(a_o->owner);
+			return ERR_PTR(-EAGAIN);
 		}
 #endif
 		NL_SET_ERR_MSG(extack, "Failed to load TC action module");
-		err = -ENOENT;
-		goto err_free;
+		return ERR_PTR(-ENOENT);
 	}
 
+	return a_o;
+}
+
+struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
+				    struct nlattr *nla, struct nlattr *est,
+				    char *name, int ovr, int bind,
+				    struct tc_action_ops *a_o, bool rtnl_held,
+				    struct netlink_ext_ack *extack)
+{
+	struct nla_bitfield32 flags = { 0, 0 };
+	u8 hw_stats = TCA_ACT_HW_STATS_ANY;
+	struct nlattr *tb[TCA_ACT_MAX + 1];
+	struct tc_cookie *cookie = NULL;
+	struct tc_action *a;
+	int err;
+
 	/* backward compatibility for policer */
-	if (name == NULL)
+	if (name == NULL) {
+		err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+						  tcf_action_policy, extack);
+		if (err < 0)
+			return ERR_PTR(err);
+		if (tb[TCA_ACT_COOKIE]) {
+			cookie = nla_memdup_cookie(tb);
+			if (!cookie) {
+				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
+				err = -ENOMEM;
+				goto err_out;
+			}
+		}
+		hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
+		if (tb[TCA_ACT_FLAGS])
+			flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
 				rtnl_held, tp, flags.value, extack);
-	else
+	} else {
 		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
 				tp, flags.value, extack);
+	}
 	if (err < 0)
-		goto err_mod;
+		goto err_out;
 
 	if (!name && tb[TCA_ACT_COOKIE])
 		tcf_set_action_cookie(&a->act_cookie, cookie);
@@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
 	return a;
 
-err_mod:
-	module_put(a_o->owner);
-err_free:
+err_out:
 	if (cookie) {
 		kfree(cookie->data);
 		kfree(cookie);
 	}
-err_out:
 	return ERR_PTR(err);
 }
 
@@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct tc_action *actions[], size_t *attr_size,
 		    bool rtnl_held, struct netlink_ext_ack *extack)
 {
+	struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
 	size_t sz = 0;
@@ -1059,9 +1071,20 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 	if (err < 0)
 		return err;
 
+	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		struct tc_action_ops *a_o;
+
+		a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+		if (IS_ERR(a_o)) {
+			err = PTR_ERR(a_o);
+			goto err_mod;
+		}
+		ops[i - 1] = a_o;
+	}
+
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-					rtnl_held, extack);
+					ops[i - 1], rtnl_held, extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 
 err:
 	tcf_action_destroy(actions, bind);
+err_mod:
+	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+		if (ops[i])
+			module_put(ops[i]->owner);
+	}
 	return err;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 37b77bd30974..a67c66a512a4 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3043,12 +3043,19 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		size_t attr_size = 0;
 
 		if (exts->police && tb[exts->police]) {
+			struct tc_action_ops *a_o;
+
+			a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+			if (IS_ERR(a_o))
+				return PTR_ERR(a_o);
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, rtnl_held,
+						TCA_ACT_BIND, a_o, rtnl_held,
 						extack);
-			if (IS_ERR(act))
+			if (IS_ERR(act)) {
+				module_put(a_o->owner);
 				return PTR_ERR(act);
+			}
 
 			act->type = exts->type = TCA_OLD_COMPAT;
 			exts->actions[0] = act;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module()
  2021-01-17  0:56 [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module() Cong Wang
@ 2021-01-17 15:15 ` Jamal Hadi Salim
  2021-01-18 14:31   ` Vlad Buslov
  2021-01-19  4:30 ` patchwork-bot+netdevbpf
  1 sibling, 1 reply; 4+ messages in thread
From: Jamal Hadi Salim @ 2021-01-17 15:15 UTC (permalink / raw)
  To: Cong Wang, netdev
  Cc: Cong Wang, syzbot+82752bc5331601cf4899,
	syzbot+b3b63b6bff456bd95294, syzbot+ba67b12b1ca729912834,
	Jiri Pirko, Marcelo Ricardo Leitner, Davide Caratti, Vlad Buslov,
	Briana Oursler

On 2021-01-16 7:56 p.m., Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> tcf_action_init_1() loads tc action modules automatically with
> request_module() after parsing the tc action names, and it drops RTNL
> lock and re-holds it before and after request_module(). This causes a
> lot of troubles, as discovered by syzbot, because we can be in the
> middle of batch initializations when we create an array of tc actions.
> 
> One of the problem is deadlock:
> 
> CPU 0					CPU 1
> rtnl_lock();
> for (...) {
>    tcf_action_init_1();
>      -> rtnl_unlock();
>      -> request_module();
> 				rtnl_lock();
> 				for (...) {
> 				  tcf_action_init_1();
> 				    -> tcf_idr_check_alloc();
> 				   // Insert one action into idr,
> 				   // but it is not committed until
> 				   // tcf_idr_insert_many(), then drop
> 				   // the RTNL lock in the _next_
> 				   // iteration
> 				   -> rtnl_unlock();
>      -> rtnl_lock();
>      -> a_o->init();
>        -> tcf_idr_check_alloc();
>        // Now waiting for the same index
>        // to be committed
> 				    -> request_module();
> 				    -> rtnl_lock()
> 				    // Now waiting for RTNL lock
> 				}
> 				rtnl_unlock();
> }
> rtnl_unlock();
> 
> This is not easy to solve, we can move the request_module() before
> this loop and pre-load all the modules we need for this netlink
> message and then do the rest initializations. So the loop breaks down
> to two now:
> 
>          for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
>                  struct tc_action_ops *a_o;
> 
>                  a_o = tc_action_load_ops(name, tb[i]...);
>                  ops[i - 1] = a_o;
>          }
> 
>          for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
>                  act = tcf_action_init_1(ops[i - 1]...);
>          }
> 
> Although this looks serious, it only has been reported by syzbot, so it
> seems hard to trigger this by humans. And given the size of this patch,
> I'd suggest to make it to net-next and not to backport to stable.
> 
> This patch has been tested by syzbot and tested with tdc.py by me.
> 

LGTM.
Initially i was worried about performance impact but i found nothing
observable. We need to add a tdc test for batch (I can share how i did
batch testing at next meet).

Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

cheers,
jamal

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module()
  2021-01-17 15:15 ` Jamal Hadi Salim
@ 2021-01-18 14:31   ` Vlad Buslov
  0 siblings, 0 replies; 4+ messages in thread
From: Vlad Buslov @ 2021-01-18 14:31 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Cong Wang, netdev, Cong Wang, syzbot+82752bc5331601cf4899,
	syzbot+b3b63b6bff456bd95294, syzbot+ba67b12b1ca729912834,
	Jiri Pirko, Marcelo Ricardo Leitner, Davide Caratti,
	Briana Oursler

On Sun 17 Jan 2021 at 17:15, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> On 2021-01-16 7:56 p.m., Cong Wang wrote:
>> From: Cong Wang <cong.wang@bytedance.com>
>> tcf_action_init_1() loads tc action modules automatically with
>> request_module() after parsing the tc action names, and it drops RTNL
>> lock and re-holds it before and after request_module(). This causes a
>> lot of troubles, as discovered by syzbot, because we can be in the
>> middle of batch initializations when we create an array of tc actions.
>> One of the problem is deadlock:
>> CPU 0					CPU 1
>> rtnl_lock();
>> for (...) {
>>    tcf_action_init_1();
>>      -> rtnl_unlock();
>>      -> request_module();
>> 				rtnl_lock();
>> 				for (...) {
>> 				  tcf_action_init_1();
>> 				    -> tcf_idr_check_alloc();
>> 				   // Insert one action into idr,
>> 				   // but it is not committed until
>> 				   // tcf_idr_insert_many(), then drop
>> 				   // the RTNL lock in the _next_
>> 				   // iteration
>> 				   -> rtnl_unlock();
>>      -> rtnl_lock();
>>      -> a_o->init();
>>        -> tcf_idr_check_alloc();
>>        // Now waiting for the same index
>>        // to be committed
>> 				    -> request_module();
>> 				    -> rtnl_lock()
>> 				    // Now waiting for RTNL lock
>> 				}
>> 				rtnl_unlock();
>> }
>> rtnl_unlock();
>> This is not easy to solve, we can move the request_module() before
>> this loop and pre-load all the modules we need for this netlink
>> message and then do the rest initializations. So the loop breaks down
>> to two now:
>>          for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
>>                  struct tc_action_ops *a_o;
>>                  a_o = tc_action_load_ops(name, tb[i]...);
>>                  ops[i - 1] = a_o;
>>          }
>>          for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
>>                  act = tcf_action_init_1(ops[i - 1]...);
>>          }
>> Although this looks serious, it only has been reported by syzbot, so it
>> seems hard to trigger this by humans. And given the size of this patch,
>> I'd suggest to make it to net-next and not to backport to stable.
>> This patch has been tested by syzbot and tested with tdc.py by me.
>> 
>
> LGTM.
> Initially i was worried about performance impact but i found nothing
> observable. We need to add a tdc test for batch (I can share how i did
> batch testing at next meet).
>
> Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
>
> cheers,
> jamal

Hi,

Thanks for adding me to the thread!
I ran our performance tests with the patch applied and didn't observe
any regression.

Regards,
Vlad


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module()
  2021-01-17  0:56 [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module() Cong Wang
  2021-01-17 15:15 ` Jamal Hadi Salim
@ 2021-01-19  4:30 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 4+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-01-19  4:30 UTC (permalink / raw)
  To: Cong Wang
  Cc: netdev, cong.wang, syzbot+82752bc5331601cf4899,
	syzbot+b3b63b6bff456bd95294, syzbot+ba67b12b1ca729912834, jhs,
	jiri

Hello:

This patch was applied to netdev/net-next.git (refs/heads/master):

On Sat, 16 Jan 2021 16:56:57 -0800 you wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> tcf_action_init_1() loads tc action modules automatically with
> request_module() after parsing the tc action names, and it drops RTNL
> lock and re-holds it before and after request_module(). This causes a
> lot of troubles, as discovered by syzbot, because we can be in the
> middle of batch initializations when we create an array of tc actions.
> 
> [...]

Here is the summary with links:
  - [net-next] net_sched: fix RTNL deadlock again caused by request_module()
    https://git.kernel.org/netdev/net-next/c/d349f9976868

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-01-19  4:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-17  0:56 [Patch net-next] net_sched: fix RTNL deadlock again caused by request_module() Cong Wang
2021-01-17 15:15 ` Jamal Hadi Salim
2021-01-18 14:31   ` Vlad Buslov
2021-01-19  4:30 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).