netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: Chris Mi <chrism@mellanox.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Jiri Pirko <jiri@resnulli.us>,
	John Fastabend <john.fastabend@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Subject: [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter
Date: Thu, 26 Oct 2017 18:24:28 -0700	[thread overview]
Message-ID: <20171027012443.3306-2-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20171027012443.3306-1-xiyou.wangcong@gmail.com>

This patch introduces a dedicated workqueue for tc filters
so that each tc filter's RCU callback could defer their
action destroy work to this workqueue. The helper
tcf_queue_work() is introduced for them to use.

Because we hold RTNL lock when calling tcf_block_put(), we
can not simply flush works inside it, therefore we have to
defer it again to this workqueue and make sure all flying RCU
callbacks have already queued their work before this one, in
other words, to ensure this is the last one to execute to
prevent any use-after-free.

On the other hand, this makes tcf_block_put() ugly and
harder to understand. Since David and Eric strongly dislike
adding synchronize_rcu(), this is probably the only
solution that could make everyone happy.

Please also see the code comments below.

Reported-by: Chris Mi <chrism@mellanox.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/pkt_cls.h     |  3 +++
 include/net/sch_generic.h |  2 ++
 net/sched/cls_api.c       | 68 +++++++++++++++++++++++++++++++++++------------
 3 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e80edd8879ef..3009547f3c66 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -2,6 +2,7 @@
 #define __NET_PKT_CLS_H
 
 #include <linux/pkt_cls.h>
+#include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
@@ -17,6 +18,8 @@ struct tcf_walker {
 int register_tcf_proto_ops(struct tcf_proto_ops *ops);
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
 
+bool tcf_queue_work(struct work_struct *work);
+
 #ifdef CONFIG_NET_CLS
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 				bool create);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 135f5a2dd931..0dec8a23be57 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -10,6 +10,7 @@
 #include <linux/dynamic_queue_limits.h>
 #include <linux/list.h>
 #include <linux/refcount.h>
+#include <linux/workqueue.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -271,6 +272,7 @@ struct tcf_chain {
 
 struct tcf_block {
 	struct list_head chain_list;
+	struct work_struct work;
 };
 
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..045d13679ad6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(register_tcf_proto_ops);
 
+static struct workqueue_struct *tc_filter_wq;
+
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
 	struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 	 * tcf_proto_ops's destroy() handler.
 	 */
 	rcu_barrier();
+	flush_workqueue(tc_filter_wq);
 
 	write_lock(&cls_mod_lock);
 	list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
+bool tcf_queue_work(struct work_struct *work)
+{
+	return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
 /* Select new prio value from the range, managed by kernel. */
 
 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ int tcf_block_get(struct tcf_block **p_block,
 }
 EXPORT_SYMBOL(tcf_block_get);
 
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
 {
+	struct tcf_block *block = container_of(work, struct tcf_block, work);
 	struct tcf_chain *chain, *tmp;
 
-	if (!block)
-		return;
-
-	/* XXX: Standalone actions are not allowed to jump to any chain, and
-	 * bound actions should be all removed after flushing. However,
-	 * filters are destroyed in RCU callbacks, we have to hold the chains
-	 * first, otherwise we would always race with RCU callbacks on this list
-	 * without proper locking.
-	 */
+	/* At this point, all the chains should have refcnt == 1. */
+	rtnl_lock();
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		tcf_chain_put(chain);
+	rtnl_unlock();
+	kfree(block);
+}
 
-	/* Wait for existing RCU callbacks to cool down. */
-	rcu_barrier();
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are destroyed
+ * in RCU callbacks, we have to hold the chains first, otherwise we would
+ * always race with RCU callbacks on this list without proper locking.
+ */
+static void tcf_block_put_deferred(struct work_struct *work)
+{
+	struct tcf_block *block = container_of(work, struct tcf_block, work);
+	struct tcf_chain *chain;
 
+	rtnl_lock();
 	/* Hold a refcnt for all chains, except 0, in case they are gone. */
 	list_for_each_entry(chain, &block->chain_list, list)
 		if (chain->index)
@@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
 	list_for_each_entry(chain, &block->chain_list, list)
 		tcf_chain_flush(chain);
 
-	/* Wait for RCU callbacks to release the reference count. */
+	INIT_WORK(&block->work, tcf_block_put_final);
+	/* Wait for RCU callbacks to release the reference count and make
+	 * sure their works have been queued before this.
+	 */
 	rcu_barrier();
+	tcf_queue_work(&block->work);
+	rtnl_unlock();
+}
 
-	/* At this point, all the chains should have refcnt == 1. */
-	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
-		tcf_chain_put(chain);
-	kfree(block);
+void tcf_block_put(struct tcf_block *block)
+{
+	if (!block)
+		return;
+
+	INIT_WORK(&block->work, tcf_block_put_deferred);
+	/* Wait for existing RCU callbacks to cool down, make sure their works
+	 * have been queued before this. We can not flush pending works here
+	 * because we are holding the RTNL lock.
+	 */
+	rcu_barrier();
+	tcf_queue_work(&block->work);
 }
 EXPORT_SYMBOL(tcf_block_put);
 
@@ -1030,6 +1060,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
 
 static int __init tc_filter_init(void)
 {
+	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+	if (!tc_filter_wq)
+		return -ENOMEM;
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
-- 
2.13.0

  reply	other threads:[~2017-10-27  1:24 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-27  1:24 [Patch net 00/16] net_sched: fix races with RCU callbacks Cong Wang
2017-10-27  1:24 ` Cong Wang [this message]
2017-10-27  4:05   ` [Patch net 01/16] net_sched: introduce a workqueue for RCU callbacks of tc filter Eric Dumazet
2017-10-27  4:28     ` Cong Wang
2017-10-27  4:39       ` Eric Dumazet
2017-10-27 11:55         ` Paul E. McKenney
2017-10-27 15:43           ` Cong Wang
2017-10-27 15:37         ` Cong Wang
2017-10-27 15:52           ` Eric Dumazet
2017-10-27  1:24 ` [Patch net 02/16] net_sched: use tcf_queue_work() in basic filter Cong Wang
2017-10-27  1:24 ` [Patch net 03/16] net_sched: use tcf_queue_work() in bpf filter Cong Wang
2017-10-27  1:24 ` [Patch net 04/16] net_sched: use tcf_queue_work() in cgroup filter Cong Wang
2017-10-27  1:24 ` [Patch net 05/16] net_sched: use tcf_queue_work() in flow filter Cong Wang
2017-10-27  1:24 ` [Patch net 06/16] net_sched: use tcf_queue_work() in flower filter Cong Wang
2017-10-27  1:24 ` [Patch net 07/16] net_sched: use tcf_queue_work() in fw filter Cong Wang
2017-10-27  1:24 ` [Patch net 08/16] net_sched: use tcf_queue_work() in matchall filter Cong Wang
2017-10-27  1:24 ` [Patch net 09/16] net_sched: use tcf_queue_work() in u32 filter Cong Wang
2017-10-27  1:24 ` [Patch net 10/16] net_sched: use tcf_queue_work() in route filter Cong Wang
2017-10-27  1:24 ` [Patch net 11/16] net_sched: use tcf_queue_work() in rsvp filter Cong Wang
2017-10-27  1:24 ` [Patch net 12/16] net_sched: use tcf_queue_work() in tcindex filter Cong Wang
2017-10-27  1:24 ` [Patch net 13/16] net_sched: add rtnl assertion to tcf_exts_destroy() Cong Wang
2017-10-27  1:24 ` [Patch net 14/16] net_sched: fix call_rcu() race on act_sample module removal Cong Wang
2017-10-27  1:24 ` [Patch net 15/16] selftests: Introduce a new script to generate tc batch file Cong Wang
2017-10-27  1:24 ` [Patch net 16/16] selftests: Introduce a new test case to tc testsuite Cong Wang
2017-10-29 14:41 ` [Patch net 00/16] net_sched: fix races with RCU callbacks David Miller
2017-10-30 22:39 ` Lucas Bates
2017-10-30 23:12   ` Cong Wang
2017-10-31  1:46     ` Lucas Bates
     [not found]     ` <CAMDBHYJTMv4MDQENNKjOXaUBHyXicOnkM_j+i7__3d1CAgdVRg@mail.gmail.com>
2017-10-31  5:44       ` Cong Wang
2017-10-31 11:00         ` Jamal Hadi Salim
2017-10-31 18:55           ` Lucas Bates
2017-10-31 19:13             ` Lucas Bates
2017-10-31 22:09               ` Cong Wang
2017-10-31 23:02                 ` Cong Wang
2017-11-01 16:55                   ` Lucas Bates
2017-11-01 16:59                     ` Cong Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171027012443.3306-2-xiyou.wangcong@gmail.com \
    --to=xiyou.wangcong@gmail.com \
    --cc=chrism@mellanox.com \
    --cc=daniel@iogearbox.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=john.fastabend@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).