Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Vlad Buslov <vladbu@mellanox.com>
To: netdev@vger.kernel.org
Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us,
	davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
	Vlad Buslov <vladbu@mellanox.com>
Subject: [PATCH net-next v4 17/17] net: sched: unlock rules update API
Date: Mon, 11 Feb 2019 10:55:48 +0200
Message-ID: <20190211085548.7190-18-vladbu@mellanox.com> (raw)
In-Reply-To: <20190211085548.7190-1-vladbu@mellanox.com>

Register netlink protocol handlers for message types RTM_NEWTFILTER,
RTM_DELTFILTER, RTM_GETTFILTER as unlocked. Set rtnl_held variable that
tracks rtnl mutex state to be false by default.

Introduce tcf_proto_is_unlocked() helper that is used to check
tcf_proto_ops->flag to determine if ops can be called without taking rtnl
lock. Manually lookup Qdisc, class and block in rule update handlers.
Verify that both Qdisc ops and proto ops are unlocked before using any of
their callbacks, and obtain rtnl lock otherwise.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_api.c | 131 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 114 insertions(+), 17 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5f9373ee47ce..266fcb34fefe 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -163,6 +163,23 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 	return TC_H_MAJ(first);
 }
 
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+	const struct tcf_proto_ops *ops;
+	bool ret;
+
+	ops = tcf_proto_lookup_ops(kind, false, NULL);
+	/* On error return false to take rtnl lock. Proto lookup/create
+	 * functions will perform lookup again and properly handle errors.
+	 */
+	if (IS_ERR(ops))
+		return false;
+
+	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+	module_put(ops->owner);
+	return ret;
+}
+
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 					  u32 prio, struct tcf_chain *chain,
 					  bool rtnl_held,
@@ -1312,8 +1329,12 @@ static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
 	if (!IS_ERR_OR_NULL(block))
 		tcf_block_refcnt_put(block, rtnl_held);
 
-	if (q)
-		qdisc_put(q);
+	if (q) {
+		if (rtnl_held)
+			qdisc_put(q);
+		else
+			qdisc_put_unlocked(q);
+	}
 }
 
 struct tcf_block_owner_item {
@@ -1966,7 +1987,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	void *fh;
 	int err;
 	int tp_created;
-	bool rtnl_held = true;
+	bool rtnl_held = false;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -1985,6 +2006,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	parent = t->tcm_parent;
 	tp = NULL;
 	cl = 0;
+	block = NULL;
 
 	if (prio == 0) {
 		/* If no priority is provided by the user,
@@ -2001,8 +2023,27 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
+	 * type is not specified, classifier is not unlocked.
+	 */
+	if (rtnl_held ||
+	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -2123,9 +2164,18 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			tcf_chain_put(chain);
 	}
 	tcf_block_release(q, block, rtnl_held);
-	if (err == -EAGAIN)
+
+	if (rtnl_held)
+		rtnl_unlock();
+
+	if (err == -EAGAIN) {
+		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
+		 * of target chain.
+		 */
+		rtnl_held = true;
 		/* Replay the request. */
 		goto replay;
+	}
 	return err;
 
 errout_locked:
@@ -2146,12 +2196,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct Qdisc *q = NULL;
 	struct tcf_chain_info chain_info;
 	struct tcf_chain *chain = NULL;
-	struct tcf_block *block;
+	struct tcf_block *block = NULL;
 	struct tcf_proto *tp = NULL;
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
-	bool rtnl_held = true;
+	bool rtnl_held = false;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -2172,8 +2222,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+	 * found), qdisc is not unlocked, classifier type is not specified,
+	 * classifier is not unlocked.
+	 */
+	if (!prio ||
+	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -2255,6 +2324,10 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		tcf_chain_put(chain);
 	}
 	tcf_block_release(q, block, rtnl_held);
+
+	if (rtnl_held)
+		rtnl_unlock();
+
 	return err;
 
 errout_locked:
@@ -2275,12 +2348,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct Qdisc *q = NULL;
 	struct tcf_chain_info chain_info;
 	struct tcf_chain *chain = NULL;
-	struct tcf_block *block;
+	struct tcf_block *block = NULL;
 	struct tcf_proto *tp = NULL;
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
-	bool rtnl_held = true;
+	bool rtnl_held = false;
 
 	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
@@ -2298,8 +2371,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 	/* Find head of filter chain. */
 
-	block = tcf_block_find(net, &q, &parent, &cl,
-			       t->tcm_ifindex, t->tcm_block_index, extack);
+	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+	if (err)
+		return err;
+
+	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+	 * unlocked, classifier type is not specified, classifier is not
+	 * unlocked.
+	 */
+	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+	    !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+		rtnl_held = true;
+		rtnl_lock();
+	}
+
+	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+	if (err)
+		goto errout;
+
+	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+				 extack);
 	if (IS_ERR(block)) {
 		err = PTR_ERR(block);
 		goto errout;
@@ -2352,6 +2443,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		tcf_chain_put(chain);
 	}
 	tcf_block_release(q, block, rtnl_held);
+
+	if (rtnl_held)
+		rtnl_unlock();
+
 	return err;
 }
 
@@ -3214,10 +3309,12 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_rhash_setup_block_ht;
 
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
-		      tc_dump_tfilter, 0);
+		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
-- 
2.13.6


  parent reply index

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-11  8:55 [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 01/17] net: sched: protect block state with mutex Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 02/17] net: sched: protect chain->explicitly_created with block->lock Vlad Buslov
2019-02-11 14:15   ` Jiri Pirko
2019-02-11  8:55 ` [PATCH net-next v4 03/17] net: sched: refactor tc_ctl_chain() to use block->lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 04/17] net: sched: protect block->chain0 with block->lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 05/17] net: sched: traverse chains in block with tcf_get_next_chain() Vlad Buslov
2019-02-15 22:21   ` Cong Wang
2019-02-18 10:07     ` Vlad Buslov
2019-02-18 18:26       ` Cong Wang
2019-02-19 16:04         ` Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 06/17] net: sched: protect chain template accesses with block lock Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-14 18:24   ` Ido Schimmel
2019-02-15 10:02     ` Vlad Buslov
2019-02-15 11:30       ` Ido Schimmel
2019-02-15 12:11         ` [PATCH] net: sched: matchall: verify that filter is not NULL in mall_walk() Vlad Buslov
2019-02-15 13:47           ` Ido Schimmel
2019-02-16  0:24           ` Cong Wang
2019-02-18 12:00             ` Vlad Buslov
2019-02-17 21:27           ` David Miller
2019-02-15 12:15         ` [PATCH net-next v4 07/17] net: sched: protect filter_chain list with filter_chain_lock mutex Vlad Buslov
2019-02-15 15:35         ` Vlad Buslov
2019-02-19  5:26           ` Cong Wang
2019-02-19 12:31             ` Vlad Buslov
2019-02-20 22:43               ` Cong Wang
2019-02-21 15:49                 ` Vlad Buslov
2019-02-19  5:08       ` Cong Wang
2019-02-19 15:20         ` Vlad Buslov
2019-02-20 23:00           ` Cong Wang
2019-02-21 17:11             ` Vlad Buslov
2019-02-15 22:35   ` Cong Wang
2019-02-18 11:06     ` Vlad Buslov
2019-02-18 18:31       ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 08/17] net: sched: introduce reference counting for tcf_proto Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 09/17] net: sched: traverse classifiers in chain with tcf_get_next_proto() Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 10/17] net: sched: refactor tp insert/delete for concurrent execution Vlad Buslov
2019-02-15 23:17   ` Cong Wang
2019-02-18 11:19     ` Vlad Buslov
2019-02-18 19:55       ` Cong Wang
2019-02-19 10:25         ` Vlad Buslov
2019-02-18 19:53   ` Cong Wang
2019-02-11  8:55 ` [PATCH net-next v4 11/17] net: sched: prevent insertion of new classifiers during chain flush Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 12/17] net: sched: track rtnl lock status when validating extensions Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 13/17] net: sched: extend proto ops with 'put' callback Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 14/17] net: sched: extend proto ops to support unlocked classifiers Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 15/17] net: sched: add flags to Qdisc class ops struct Vlad Buslov
2019-02-11  8:55 ` [PATCH net-next v4 16/17] net: sched: refactor tcf_block_find() into standalone functions Vlad Buslov
2019-02-11  8:55 ` Vlad Buslov [this message]
2019-02-18 18:56   ` [PATCH net-next v4 17/17] net: sched: unlock rules update API Cong Wang
2019-02-12 18:42 ` [PATCH net-next v4 00/17] Refactor classifier API to work with chain/classifiers without rtnl lock David Miller

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190211085548.7190-18-vladbu@mellanox.com \
    --to=vladbu@mellanox.com \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org netdev@archiver.kernel.org
	public-inbox-index netdev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox