Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Vlad Buslov <vladbu@mellanox.com>
To: netdev@vger.kernel.org
Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us,
	davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
	Vlad Buslov <vladbu@mellanox.com>
Subject: [PATCH net-next v4 05/17] net: sched: traverse chains in block with tcf_get_next_chain()
Date: Mon, 11 Feb 2019 10:55:36 +0200
Message-ID: <20190211085548.7190-6-vladbu@mellanox.com> (raw)
In-Reply-To: <20190211085548.7190-1-vladbu@mellanox.com>

All users of block->chain_list rely on rtnl lock and assume that no new
chains are added when traversing the list. Use tcf_get_next_chain() to
traverse chain list without relying on rtnl mutex. This function iterates
over chains by taking reference to current iterator chain only and doesn't
assume external synchronization of chain list.

Don't take reference to all chains in block when flushing and use
tcf_get_next_chain() to safely iterate over chain list instead. Remove
tcf_block_put_all_chains() that is no longer used.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/pkt_cls.h |  2 ++
 net/sched/cls_api.c   | 96 +++++++++++++++++++++++++++++++++++++--------------
 net/sched/sch_api.c   |  4 ++-
 3 files changed, 76 insertions(+), 26 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cb8be396a11f..38bee7dd21d1 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -44,6 +44,8 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 				       u32 chain_index);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
+struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
+				     struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 869ae44d7631..8e2ac785f6fd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -883,28 +883,62 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
 	return block;
 }
 
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-	struct tcf_chain *chain;
+	mutex_lock(&block->lock);
+	if (chain)
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+	else
+		chain = list_first_entry_or_null(&block->chain_list,
+						 struct tcf_chain, list);
 
-	/* Hold a refcnt for all chains, so that they don't disappear
-	 * while we are iterating.
-	 */
-	list_for_each_entry(chain, &block->chain_list, list)
+	/* skip all action-only chains */
+	while (chain && tcf_chain_held_by_acts_only(chain))
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+
+	if (chain)
 		tcf_chain_hold(chain);
+	mutex_unlock(&block->lock);
 
-	list_for_each_entry(chain, &block->chain_list, list)
-		tcf_chain_flush(chain);
+	return chain;
 }
 
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-	struct tcf_chain *chain, *tmp;
+	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
 
-	/* At this point, all the chains should have refcnt >= 1. */
-	list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
-		tcf_chain_put_explicitly_created(chain);
+	if (chain)
 		tcf_chain_put(chain);
+
+	return chain_next;
+}
+EXPORT_SYMBOL(tcf_get_next_chain);
+
+static void tcf_block_flush_all_chains(struct tcf_block *block)
+{
+	struct tcf_chain *chain;
+
+	/* Last reference to block. At this point chains cannot be added or
+	 * removed concurrently.
+	 */
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
+		tcf_chain_put_explicitly_created(chain);
+		tcf_chain_flush(chain);
 	}
 }
 
@@ -923,8 +957,6 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		mutex_unlock(&block->lock);
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, block->net);
-		if (!free_block)
-			tcf_block_flush_all_chains(block);
 
 		if (q)
 			tcf_block_offload_unbind(block, q, ei);
@@ -932,7 +964,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		if (free_block)
 			tcf_block_destroy(block);
 		else
-			tcf_block_put_all_chains(block);
+			tcf_block_flush_all_chains(block);
 	} else if (q) {
 		tcf_block_offload_unbind(block, q, ei);
 	}
@@ -1266,11 +1298,15 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 			    void *cb_priv, bool add, bool offload_in_use,
 			    struct netlink_ext_ack *extack)
 {
-	struct tcf_chain *chain;
+	struct tcf_chain *chain, *chain_prev;
 	struct tcf_proto *tp;
 	int err;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		for (tp = rtnl_dereference(chain->filter_chain); tp;
 		     tp = rtnl_dereference(tp->next)) {
 			if (tp->ops->reoffload) {
@@ -1289,6 +1325,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 	return 0;
 
 err_playback_remove:
+	tcf_chain_put(chain);
 	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
 				    extack);
 	return err;
@@ -2023,11 +2060,11 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcf_chain *chain, *chain_prev;
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	long index_start;
 	long index;
@@ -2091,12 +2128,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	index_start = cb->args[0];
 	index = 0;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		if (tca[TCA_CHAIN] &&
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
 		if (!tcf_chain_dump(chain, q, parent, skb, cb,
 				    index_start, &index)) {
+			tcf_chain_put(chain);
 			err = -EMSGSIZE;
 			break;
 		}
@@ -2364,11 +2406,11 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 /* called with RTNL */
 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcf_chain *chain, *chain_prev;
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	long index_start;
 	long index;
@@ -2432,7 +2474,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	index_start = cb->args[0];
 	index = 0;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		if ((tca[TCA_CHAIN] &&
 		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
 			continue;
@@ -2440,14 +2486,14 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 			index++;
 			continue;
 		}
-		if (tcf_chain_held_by_acts_only(chain))
-			continue;
 		err = tc_chain_fill_node(chain, net, skb, block,
 					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					 RTM_NEWCHAIN);
-		if (err <= 0)
+		if (err <= 0) {
+			tcf_chain_put(chain);
 			break;
+		}
 		index++;
 	}
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 03e26e8d0ec9..80058abc729f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1909,7 +1909,9 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
 	block = cops->tcf_block(q, cl, NULL);
 	if (!block)
 		return;
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
 		struct tcf_proto *tp;
 
 		for (tp = rtnl_dereference(chain->filter_chain);
-- 
2.13.6


  parent reply index

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-11  8:55 [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 01/17] net: sched: protect block state with mutex Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 02/17] net: sched: protect chain->explicitly_created with block->lock Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 03/17] net: sched: refactor tc_ctl_chain() to use block->lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 04/17] net: sched: protect block->chain0 with block->lock Vlad Buslov
2019-02-11  8:55 ` Vlad Buslov [this message]
2019-02-15 22:21   ` [PATCH net-next v4 05/17] net: sched: traverse chains in block with tcf_get_next_chain() Cong Wang
2019-02-18 10:07     ` Vlad Buslov
2019-02-18 18:26       ` Cong Wang
2019-02-19 16:04         ` Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 06/17] net: sched: protect chain template accesses with block lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-14 18:24   ` Ido Schimmel
2019-02-15 10:02     ` Vlad Buslov
2019-02-15 11:30       ` Ido Schimmel
2019-02-15 12:11         ` [PATCH] net: sched: matchall: verify that filter is not NULL in mall_walk() Vlad Buslov
2019-02-15 13:47           ` Ido Schimmel
2019-02-16  0:24           ` Cong Wang
2019-02-18 12:00             ` Vlad Buslov
2019-02-17 21:27           ` David Miller
2019-02-15 12:15         ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-15 15:35         ` Vlad Buslov
2019-02-19  5:26           ` Cong Wang
2019-02-19 12:31             ` Vlad Buslov
2019-02-20 22:43               ` Cong Wang
2019-02-21 15:49                 ` Vlad Buslov
2019-02-19  5:08       ` Cong Wang
2019-02-19 15:20         ` Vlad Buslov
2019-02-20 23:00           ` Cong Wang
2019-02-21 17:11             ` Vlad Buslov
2019-02-15 22:35   ` Cong Wang
2019-02-18 11:06     ` Vlad Buslov
2019-02-18 18:31       ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 08/17] net: sched: introduce reference counting for tcf_proto Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 09/17] net: sched: traverse classifiers in chain with tcf_get_next_proto() Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 10/17] net: sched: refactor tp insert/delete for concurrent execution Vlad Buslov
2019-02-15 23:17   ` Cong Wang
2019-02-18 11:19     ` Vlad Buslov
2019-02-18 19:55       ` Cong Wang
2019-02-19 10:25         ` Vlad Buslov
2019-02-18 19:53   ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 11/17] net: sched: prevent insertion of new classifiers during chain flush Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 12/17] net: sched: track rtnl lock status when validating extensions Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 13/17] net: sched: extend proto ops with 'put' callback Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 14/17] net: sched: extend proto ops to support unlocked classifiers Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 15/17] net: sched: add flags to Qdisc class ops struct Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 16/17] net: sched: refactor tcf_block_find() into standalone functions Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 17/17] net: sched: unlock rules update API Vlad Buslov
2019-02-18 18:56   ` Cong Wang
2019-02-12 18:42 ` [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock David Miller

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190211085548.7190-6-vladbu@mellanox.com \
    --to=vladbu@mellanox.com \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org netdev@archiver.kernel.org
	public-inbox-index netdev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox