Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Vlad Buslov <vladbu@mellanox.com>
To: netdev@vger.kernel.org
Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us,
	davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
	Vlad Buslov <vladbu@mellanox.com>
Subject: [PATCH net-next v4 10/17] net: sched: refactor tp insert/delete for concurrent execution
Date: Mon, 11 Feb 2019 10:55:41 +0200
Message-ID: <20190211085548.7190-11-vladbu@mellanox.com> (raw)
In-Reply-To: <20190211085548.7190-1-vladbu@mellanox.com>

Implement unique insertion function to atomically attach tcf_proto to chain
after verifying that no other tcf proto with specified priority exists.
Implement delete function that verifies that tp is actually empty before
deleting it. Use these functions to refactor cls API to account for
concurrent tp and rule update instead of relying on rtnl lock. Add new
'deleting' flag to tcf proto. Use it to restart search when iterating over
tp's on chain to prevent accessing potentially inval tp->next pointer.

Extend tcf proto with spinlock that is intended to be used to protect its
data from concurrent modification instead of relying on rtnl mutex. Use it
to protect 'deleting' flag. Add lockdep macros to validate that lock is
held when accessing protected fields.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/sch_generic.h |  18 +++++
 net/sched/cls_api.c       | 177 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 170 insertions(+), 25 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4372c08fc4d9..083e566fc380 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -322,6 +322,11 @@ struct tcf_proto {
 	void			*data;
 	const struct tcf_proto_ops	*ops;
 	struct tcf_chain	*chain;
+	/* Lock protects tcf_proto shared state and can be used by unlocked
+	 * classifiers to protect their private data.
+	 */
+	spinlock_t		lock;
+	bool			deleting;
 	refcount_t		refcnt;
 	struct rcu_head		rcu;
 };
@@ -382,16 +387,29 @@ static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
 {
 	return lockdep_is_held(&chain->filter_chain_lock);
 }
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+	return lockdep_is_held(&tp->lock);
+}
 #else
 static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
 {
 	return true;
 }
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+	return true;
+}
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
 #define tcf_chain_dereference(p, chain)					\
 	rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
 
+#define tcf_proto_dereference(p, tp)					\
+	rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
+
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 {
 	if (*flags & TCA_CLS_FLAGS_IN_HW)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dca8a3bee9c2..c6452e3bfc6a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -180,6 +180,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	tp->protocol = protocol;
 	tp->prio = prio;
 	tp->chain = chain;
+	spin_lock_init(&tp->lock);
 	refcount_set(&tp->refcnt, 1);
 
 	err = tp->ops->init(tp);
@@ -217,6 +218,49 @@ static void tcf_proto_put(struct tcf_proto *tp,
 		tcf_proto_destroy(tp, extack);
 }
 
+static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+{
+	return -1;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp)
+{
+	struct tcf_walker walker = { .fn = walker_noop, };
+
+	if (tp->ops->walk) {
+		tp->ops->walk(tp, &walker);
+		return !walker.stop;
+	}
+	return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
+{
+	spin_lock(&tp->lock);
+	if (tcf_proto_is_empty(tp))
+		tp->deleting = true;
+	spin_unlock(&tp->lock);
+	return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+	spin_lock(&tp->lock);
+	tp->deleting = true;
+	spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+	bool deleting;
+
+	spin_lock(&tp->lock);
+	deleting = tp->deleting;
+	spin_unlock(&tp->lock);
+
+	return deleting;
+}
+
 #define ASSERT_BLOCK_LOCKED(block)					\
 	lockdep_assert_held(&(block)->lock)
 
@@ -983,13 +1027,27 @@ EXPORT_SYMBOL(tcf_get_next_chain);
 static struct tcf_proto *
 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
 {
+	u32 prio = 0;
+
 	ASSERT_RTNL();
 	mutex_lock(&chain->filter_chain_lock);
 
-	if (!tp)
+	if (!tp) {
 		tp = tcf_chain_dereference(chain->filter_chain, chain);
-	else
+	} else if (tcf_proto_is_deleting(tp)) {
+		/* 'deleting' flag is set and chain->filter_chain_lock was
+		 * unlocked, which means next pointer could be invalid. Restart
+		 * search.
+		 */
+		prio = tp->prio + 1;
+		tp = tcf_chain_dereference(chain->filter_chain, chain);
+
+		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+			if (!tp->deleting && tp->prio >= prio)
+				break;
+	} else {
 		tp = tcf_chain_dereference(tp->next, chain);
+	}
 
 	if (tp)
 		tcf_proto_get(tp);
@@ -1569,6 +1627,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 {
 	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
 
+	tcf_proto_mark_delete(tp);
 	if (tp == chain->filter_chain)
 		tcf_chain0_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
@@ -1577,6 +1636,79 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 					   struct tcf_chain_info *chain_info,
 					   u32 protocol, u32 prio,
+					   bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+						    struct tcf_proto *tp_new,
+						    u32 protocol, u32 prio)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	tp = tcf_chain_tp_find(chain, &chain_info,
+			       protocol, prio, false);
+	if (!tp)
+		tcf_chain_tp_insert(chain, &chain_info, tp_new);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	if (tp) {
+		tcf_proto_destroy(tp_new, NULL);
+		tp_new = tp;
+	}
+
+	return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+				      struct tcf_proto *tp,
+				      struct netlink_ext_ack *extack)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp_iter;
+	struct tcf_proto **pprev;
+	struct tcf_proto *next;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	/* Atomically find and remove tp from chain. */
+	for (pprev = &chain->filter_chain;
+	     (tp_iter = tcf_chain_dereference(*pprev, chain));
+	     pprev = &tp_iter->next) {
+		if (tp_iter == tp) {
+			chain_info.pprev = pprev;
+			chain_info.next = tp_iter->next;
+			WARN_ON(tp_iter->deleting);
+			break;
+		}
+	}
+	/* Verify that tp still exists and no new filters were inserted
+	 * concurrently.
+	 * Mark tp for deletion if it is empty.
+	 */
+	if (!tp_iter || !tcf_proto_check_delete(tp)) {
+		mutex_unlock(&chain->filter_chain_lock);
+		return;
+	}
+
+	next = tcf_chain_dereference(chain_info.next, chain);
+	if (tp == chain->filter_chain)
+		tcf_chain0_head_change(chain, next);
+	RCU_INIT_POINTER(*chain_info.pprev, next);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	tcf_proto_put(tp, extack);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info,
+					   u32 protocol, u32 prio,
 					   bool prio_allocate)
 {
 	struct tcf_proto **pprev;
@@ -1809,6 +1941,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	}
 
 	if (tp == NULL) {
+		struct tcf_proto *tp_new = NULL;
+
 		/* Proto-tcf does not exist, create new one */
 
 		if (tca[TCA_KIND] == NULL || !protocol) {
@@ -1828,25 +1962,25 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 							       &chain_info));
 
 		mutex_unlock(&chain->filter_chain_lock);
-		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-				      protocol, prio, chain, extack);
-		if (IS_ERR(tp)) {
-			err = PTR_ERR(tp);
+		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+					  protocol, prio, chain, extack);
+		if (IS_ERR(tp_new)) {
+			err = PTR_ERR(tp_new);
 			goto errout;
 		}
 
-		mutex_lock(&chain->filter_chain_lock);
-		tcf_chain_tp_insert(chain, &chain_info, tp);
-		mutex_unlock(&chain->filter_chain_lock);
 		tp_created = 1;
-	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
-		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
-		err = -EINVAL;
-		goto errout_locked;
+		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio);
 	} else {
 		mutex_unlock(&chain->filter_chain_lock);
 	}
 
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
+		err = -EINVAL;
+		goto errout;
+	}
+
 	fh = tp->ops->get(tp, t->tcm_handle);
 
 	if (!fh) {
@@ -1873,12 +2007,10 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (err == 0)
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_NEWTFILTER, false);
-	else if (tp_created)
-		tcf_proto_destroy(tp, NULL);
 
 errout:
-	if (chain)
-		tcf_chain_put(chain);
+	if (err && tp_created)
+		tcf_chain_tp_delete_empty(chain, tp, NULL);
 	if (chain) {
 		if (tp && !IS_ERR(tp))
 			tcf_proto_put(tp, NULL);
@@ -1984,9 +2116,9 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		tcf_chain_tp_remove(chain, &chain_info, tp);
 		mutex_unlock(&chain->filter_chain_lock);
 
+		tcf_proto_put(tp, NULL);
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_DELTFILTER, false);
-		tcf_proto_destroy(tp, extack);
 		err = 0;
 		goto errout;
 	}
@@ -2005,13 +2137,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 					 extack);
 		if (err)
 			goto errout;
-		if (last) {
-			mutex_lock(&chain->filter_chain_lock);
-			tcf_chain_tp_remove(chain, &chain_info, tp);
-			mutex_unlock(&chain->filter_chain_lock);
-
-			tcf_proto_destroy(tp, extack);
-		}
+		if (last)
+			tcf_chain_tp_delete_empty(chain, tp, extack);
 	}
 
 errout:
-- 
2.13.6


  parent reply index

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-11  8:55 [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 01/17] net: sched: protect block state with mutex Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 02/17] net: sched: protect chain->explicitly_created with block->lock Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 03/17] net: sched: refactor tc_ctl_chain() to use block->lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 04/17] net: sched: protect block->chain0 with block->lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 05/17] net: sched: traverse chains in block with tcf_get_next_chain() Vlad Buslov
2019-02-15 22:21   ` Cong Wang
2019-02-18 10:07     ` Vlad Buslov
2019-02-18 18:26       ` Cong Wang
2019-02-19 16:04         ` Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 06/17] net: sched: protect chain template accesses with block lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-14 18:24   ` Ido Schimmel
2019-02-15 10:02     ` Vlad Buslov
2019-02-15 11:30       ` Ido Schimmel
2019-02-15 12:11         ` [PATCH] net: sched: matchall: verify that filter is not NULL in mall_walk() Vlad Buslov
2019-02-15 13:47           ` Ido Schimmel
2019-02-16  0:24           ` Cong Wang
2019-02-18 12:00             ` Vlad Buslov
2019-02-17 21:27           ` David Miller
2019-02-15 12:15         ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-15 15:35         ` Vlad Buslov
2019-02-19  5:26           ` Cong Wang
2019-02-19 12:31             ` Vlad Buslov
2019-02-20 22:43               ` Cong Wang
2019-02-21 15:49                 ` Vlad Buslov
2019-02-19  5:08       ` Cong Wang
2019-02-19 15:20         ` Vlad Buslov
2019-02-20 23:00           ` Cong Wang
2019-02-21 17:11             ` Vlad Buslov
2019-02-15 22:35   ` Cong Wang
2019-02-18 11:06     ` Vlad Buslov
2019-02-18 18:31       ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 08/17] net: sched: introduce reference counting for tcf_proto Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 09/17] net: sched: traverse classifiers in chain with tcf_get_next_proto() Vlad Buslov
2019-02-11  8:55 ` Vlad Buslov [this message]
2019-02-15 23:17   ` [PATCH net-next v4 10/17] net: sched: refactor tp insert/delete for concurrent execution Cong Wang
2019-02-18 11:19     ` Vlad Buslov
2019-02-18 19:55       ` Cong Wang
2019-02-19 10:25         ` Vlad Buslov
2019-02-18 19:53   ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 11/17] net: sched: prevent insertion of new classifiers during chain flush Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 12/17] net: sched: track rtnl lock status when validating extensions Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 13/17] net: sched: extend proto ops with 'put' callback Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 14/17] net: sched: extend proto ops to support unlocked classifiers Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 15/17] net: sched: add flags to Qdisc class ops struct Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 16/17] net: sched: refactor tcf_block_find() into standalone functions Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 17/17] net: sched: unlock rules update API Vlad Buslov
2019-02-18 18:56   ` Cong Wang
2019-02-12 18:42 ` [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock David Miller

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190211085548.7190-11-vladbu@mellanox.com \
    --to=vladbu@mellanox.com \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git