All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v4 0/2] net/sched: transition act_pedit to rcu and percpu stats
@ 2023-01-30 16:02 Pedro Tammela
  2023-01-30 16:02 ` [PATCH net-next v4 1/2] " Pedro Tammela
  2023-01-30 16:02 ` [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act Pedro Tammela
  0 siblings, 2 replies; 6+ messages in thread
From: Pedro Tammela @ 2023-01-30 16:02 UTC (permalink / raw)
  To: netdev
  Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni,
	simon.horman, Pedro Tammela

The software pedit action didn't get the same love as some of the
other actions and it's still using spinlocks and shared stats.
Therefore, transition the action to rcu and percpu stats which
improves the action's performance.

We test this change with a very simple packet forwarding setup:

tc filter add dev ens2f0 ingress protocol ip matchall \
   action pedit ex munge eth src set b8:ce:f6:4b:68:35 pipe \
   action pedit ex munge eth dst set ac:1f:6b:e4:ff:93 pipe \
   action mirred egress redirect dev ens2f1
tc filter add dev ens2f1 ingress protocol ip matchall \
   action pedit ex munge eth src set b8:ce:f6:4b:68:34 pipe \
   action pedit ex munge eth dst set ac:1f:6b:e4:ff:92 pipe \
   action mirred egress redirect dev ens2f0

Using TRex with a http-like profile, in our setup with a 25G NIC
and a 26 cores Intel CPU, we observe the following in perf:
   before:
    11.59%  2.30%  [kernel]  [k] tcf_pedit_act
       2.55% tcf_pedit_act
             8.38% _raw_spin_lock
                       6.43% native_queued_spin_lock_slowpath
   after:
    1.46%  1.46%  [kernel]  [k] tcf_pedit_act

v3->v4:
- Address Simon's comments

v2->v3:
- Add missing change in act idr create

v1->v2:
- Fix lock unbalance found by sparse

Pedro Tammela (2):
  net/sched: transition act_pedit to rcu and percpu stats
  net/sched: simplify tcf_pedit_act

 include/net/tc_act/tc_pedit.h |  81 ++++++++--
 net/sched/act_pedit.c         | 273 +++++++++++++++++++---------------
 2 files changed, 217 insertions(+), 137 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next v4 1/2] net/sched: transition act_pedit to rcu and percpu stats
  2023-01-30 16:02 [PATCH net-next v4 0/2] net/sched: transition act_pedit to rcu and percpu stats Pedro Tammela
@ 2023-01-30 16:02 ` Pedro Tammela
  2023-01-31 12:37   ` Simon Horman
  2023-01-30 16:02 ` [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act Pedro Tammela
  1 sibling, 1 reply; 6+ messages in thread
From: Pedro Tammela @ 2023-01-30 16:02 UTC (permalink / raw)
  To: netdev
  Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni,
	simon.horman, Pedro Tammela

The software pedit action didn't get the same love as some of the
other actions and it's still using spinlocks and shared stats in the
datapath.
Transition the action to rcu and percpu stats as this improves the
action's performance on multiple cpu deployments.

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
---
 include/net/tc_act/tc_pedit.h |  81 +++++++++++++++----
 net/sched/act_pedit.c         | 144 +++++++++++++++++++++-------------
 2 files changed, 154 insertions(+), 71 deletions(-)

diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 3e02709a1df6..83fe39931781 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -4,22 +4,29 @@
 
 #include <net/act_api.h>
 #include <linux/tc_act/tc_pedit.h>
+#include <linux/types.h>
 
 struct tcf_pedit_key_ex {
 	enum pedit_header_type htype;
 	enum pedit_cmd cmd;
 };
 
-struct tcf_pedit {
-	struct tc_action	common;
-	unsigned char		tcfp_nkeys;
-	unsigned char		tcfp_flags;
-	u32			tcfp_off_max_hint;
+struct tcf_pedit_parms {
 	struct tc_pedit_key	*tcfp_keys;
 	struct tcf_pedit_key_ex	*tcfp_keys_ex;
+	u32 tcfp_off_max_hint;
+	unsigned char tcfp_nkeys;
+	unsigned char tcfp_flags;
+	struct rcu_head rcu;
+};
+
+struct tcf_pedit {
+	struct tc_action common;
+	struct tcf_pedit_parms __rcu *parms;
 };
 
 #define to_pedit(a) ((struct tcf_pedit *)a)
+#define to_pedit_parms(a) (rcu_dereference(to_pedit(a)->parms))
 
 static inline bool is_tcf_pedit(const struct tc_action *a)
 {
@@ -32,37 +39,81 @@ static inline bool is_tcf_pedit(const struct tc_action *a)
 
 static inline int tcf_pedit_nkeys(const struct tc_action *a)
 {
-	return to_pedit(a)->tcfp_nkeys;
+	struct tcf_pedit_parms *parms;
+	int nkeys;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	nkeys = parms->tcfp_nkeys;
+	rcu_read_unlock();
+
+	return nkeys;
 }
 
 static inline u32 tcf_pedit_htype(const struct tc_action *a, int index)
 {
-	if (to_pedit(a)->tcfp_keys_ex)
-		return to_pedit(a)->tcfp_keys_ex[index].htype;
+	u32 htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+	struct tcf_pedit_parms *parms;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	if (parms->tcfp_keys_ex)
+		htype = parms->tcfp_keys_ex[index].htype;
+	rcu_read_unlock();
 
-	return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+	return htype;
 }
 
 static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index)
 {
-	if (to_pedit(a)->tcfp_keys_ex)
-		return to_pedit(a)->tcfp_keys_ex[index].cmd;
+	struct tcf_pedit_parms *parms;
+	u32 cmd = __PEDIT_CMD_MAX;
 
-	return __PEDIT_CMD_MAX;
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	if (parms->tcfp_keys_ex)
+		cmd = parms->tcfp_keys_ex[index].cmd;
+	rcu_read_unlock();
+
+	return cmd;
 }
 
 static inline u32 tcf_pedit_mask(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].mask;
+	struct tcf_pedit_parms *parms;
+	u32 mask;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	mask = parms->tcfp_keys[index].mask;
+	rcu_read_unlock();
+
+	return mask;
 }
 
 static inline u32 tcf_pedit_val(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].val;
+	struct tcf_pedit_parms *parms;
+	u32 val;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	val = parms->tcfp_keys[index].val;
+	rcu_read_unlock();
+
+	return val;
 }
 
 static inline u32 tcf_pedit_offset(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].off;
+	struct tcf_pedit_parms *parms;
+	u32 off;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	off = parms->tcfp_keys[index].off;
+	rcu_read_unlock();
+
+	return off;
 }
 #endif /* __NET_TC_PED_H */
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0378e9f0121..c8e8748dc258 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -134,6 +134,17 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 	return -EINVAL;
 }
 
+static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
+{
+	struct tcf_pedit_parms *parms =
+		container_of(head, struct tcf_pedit_parms, rcu);
+
+	kfree(parms->tcfp_keys_ex);
+	kfree(parms->tcfp_keys);
+
+	kfree(parms);
+}
+
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
 			  struct tcf_proto *tp, u32 flags,
@@ -143,8 +154,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	bool bind = flags & TCA_ACT_FLAGS_BIND;
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
 	struct tcf_chain *goto_ch = NULL;
-	struct tc_pedit_key *keys = NULL;
-	struct tcf_pedit_key_ex *keys_ex;
+	struct tcf_pedit_parms *oparms, *nparms;
 	struct tc_pedit *parm;
 	struct nlattr *pattr;
 	struct tcf_pedit *p;
@@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	}
 
-	keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
-	if (IS_ERR(keys_ex))
-		return PTR_ERR(keys_ex);
+	nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+	if (!nparms)
+		return -ENOMEM;
+
+	nparms->tcfp_keys_ex =
+		tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+	if (IS_ERR(nparms->tcfp_keys_ex)) {
+		ret = PTR_ERR(nparms->tcfp_keys_ex);
+		goto out_free;
+	}
 
 	index = parm->index;
 	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (!err) {
-		ret = tcf_idr_create(tn, index, est, a,
-				     &act_pedit_ops, bind, false, flags);
+		ret = tcf_idr_create_from_flags(tn, index, est, a,
+						&act_pedit_ops, bind, flags);
 		if (ret) {
 			tcf_idr_cleanup(tn, index);
-			goto out_free;
+			goto out_free_ex;
 		}
 		ret = ACT_P_CREATED;
 	} else if (err > 0) {
@@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		}
 	} else {
 		ret = err;
-		goto out_free;
+		goto out_free_ex;
 	}
 
 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
@@ -212,68 +229,79 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		ret = err;
 		goto out_release;
 	}
+
+	nparms->tcfp_off_max_hint = 0;
+	nparms->tcfp_flags = parm->flags;
+
 	p = to_pedit(*a);
 	spin_lock_bh(&p->tcf_lock);
 
+	oparms = rcu_dereference_protected(p->parms, 1);
+
 	if (ret == ACT_P_CREATED ||
-	    (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
-		keys = kmalloc(ksize, GFP_ATOMIC);
-		if (!keys) {
+	    (oparms->tcfp_nkeys && oparms->tcfp_nkeys != parm->nkeys)) {
+		nparms->tcfp_keys = kmalloc(ksize, GFP_ATOMIC);
+		if (!nparms->tcfp_keys) {
 			spin_unlock_bh(&p->tcf_lock);
 			ret = -ENOMEM;
-			goto put_chain;
+			goto out_release;
 		}
-		kfree(p->tcfp_keys);
-		p->tcfp_keys = keys;
-		p->tcfp_nkeys = parm->nkeys;
+		nparms->tcfp_nkeys = parm->nkeys;
+	} else {
+		nparms->tcfp_keys = oparms->tcfp_keys;
+		nparms->tcfp_nkeys = oparms->tcfp_nkeys;
 	}
-	memcpy(p->tcfp_keys, parm->keys, ksize);
-	p->tcfp_off_max_hint = 0;
-	for (i = 0; i < p->tcfp_nkeys; ++i) {
-		u32 cur = p->tcfp_keys[i].off;
+
+	memcpy(nparms->tcfp_keys, parm->keys, ksize);
+
+	for (i = 0; i < nparms->tcfp_nkeys; ++i) {
+		u32 cur = nparms->tcfp_keys[i].off;
 
 		/* sanitize the shift value for any later use */
-		p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
-					      p->tcfp_keys[i].shift);
+		nparms->tcfp_keys[i].shift = min_t(size_t,
+						   BITS_PER_TYPE(int) - 1,
+						   nparms->tcfp_keys[i].shift);
 
 		/* The AT option can read a single byte, we can bound the actual
 		 * value with uchar max.
 		 */
-		cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+		cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
 
 		/* Each key touches 4 bytes starting from the computed offset */
-		p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+		nparms->tcfp_off_max_hint =
+			max(nparms->tcfp_off_max_hint, cur + 4);
 	}
 
-	p->tcfp_flags = parm->flags;
 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 
-	kfree(p->tcfp_keys_ex);
-	p->tcfp_keys_ex = keys_ex;
+	rcu_assign_pointer(p->parms, nparms);
 
 	spin_unlock_bh(&p->tcf_lock);
+
+	if (oparms)
+		call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
+
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
+
 	return ret;
 
-put_chain:
-	if (goto_ch)
-		tcf_chain_put_by_act(goto_ch);
 out_release:
 	tcf_idr_release(*a, bind);
+out_free_ex:
+	kfree(nparms->tcfp_keys_ex);
 out_free:
-	kfree(keys_ex);
+	kfree(nparms);
 	return ret;
-
 }
 
 static void tcf_pedit_cleanup(struct tc_action *a)
 {
 	struct tcf_pedit *p = to_pedit(a);
-	struct tc_pedit_key *keys = p->tcfp_keys;
+	struct tcf_pedit_parms *parms;
 
-	kfree(keys);
-	kfree(p->tcfp_keys_ex);
+	parms = rcu_dereference_protected(p->parms, 1);
+	call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
 }
 
 static bool offset_valid(struct sk_buff *skb, int offset)
@@ -325,28 +353,30 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
 				    struct tcf_result *res)
 {
 	struct tcf_pedit *p = to_pedit(a);
+	struct tcf_pedit_parms *parms;
 	u32 max_offset;
 	int i;
 
-	spin_lock(&p->tcf_lock);
+	parms = rcu_dereference_bh(p->parms);
 
 	max_offset = (skb_transport_header_was_set(skb) ?
 		      skb_transport_offset(skb) :
 		      skb_network_offset(skb)) +
-		     p->tcfp_off_max_hint;
+		     parms->tcfp_off_max_hint;
 	if (skb_ensure_writable(skb, min(skb->len, max_offset)))
-		goto unlock;
+		goto done;
 
 	tcf_lastuse_update(&p->tcf_tm);
+	tcf_action_update_bstats(&p->common, skb);
 
-	if (p->tcfp_nkeys > 0) {
-		struct tc_pedit_key *tkey = p->tcfp_keys;
-		struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
+	if (parms->tcfp_nkeys > 0) {
+		struct tc_pedit_key *tkey = parms->tcfp_keys;
+		struct tcf_pedit_key_ex *tkey_ex = parms->tcfp_keys_ex;
 		enum pedit_header_type htype =
 			TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
 		enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
 
-		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
+		for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
 			u32 *ptr, hdata;
 			int offset = tkey->off;
 			int hoffset;
@@ -422,11 +452,10 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
 	}
 
 bad:
+	spin_lock(&p->tcf_lock);
 	p->tcf_qstats.overlimits++;
-done:
-	bstats_update(&p->tcf_bstats, skb);
-unlock:
 	spin_unlock(&p->tcf_lock);
+done:
 	return p->tcf_action;
 }
 
@@ -445,30 +474,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_pedit *p = to_pedit(a);
+	struct tcf_pedit_parms *parms;
 	struct tc_pedit *opt;
 	struct tcf_t t;
 	int s;
 
-	s = struct_size(opt, keys, p->tcfp_nkeys);
+	spin_lock_bh(&p->tcf_lock);
+	parms = rcu_dereference_protected(p->parms, 1);
+	s = struct_size(opt, keys, parms->tcfp_nkeys);
 
-	/* netlink spinlocks held above us - must use ATOMIC */
 	opt = kzalloc(s, GFP_ATOMIC);
-	if (unlikely(!opt))
+	if (unlikely(!opt)) {
+		spin_unlock_bh(&p->tcf_lock);
 		return -ENOBUFS;
+	}
 
-	spin_lock_bh(&p->tcf_lock);
-	memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
+	memcpy(opt->keys, parms->tcfp_keys,
+	       flex_array_size(opt, keys, parms->tcfp_nkeys));
 	opt->index = p->tcf_index;
-	opt->nkeys = p->tcfp_nkeys;
-	opt->flags = p->tcfp_flags;
+	opt->nkeys = parms->tcfp_nkeys;
+	opt->flags = parms->tcfp_flags;
 	opt->action = p->tcf_action;
 	opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
 	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
-	if (p->tcfp_keys_ex) {
-		if (tcf_pedit_key_ex_dump(skb,
-					  p->tcfp_keys_ex,
-					  p->tcfp_nkeys))
+	if (parms->tcfp_keys_ex) {
+		if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
+					  parms->tcfp_nkeys))
 			goto nla_put_failure;
 
 		if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act
  2023-01-30 16:02 [PATCH net-next v4 0/2] net/sched: transition act_pedit to rcu and percpu stats Pedro Tammela
  2023-01-30 16:02 ` [PATCH net-next v4 1/2] " Pedro Tammela
@ 2023-01-30 16:02 ` Pedro Tammela
  2023-01-31 12:37   ` Simon Horman
  1 sibling, 1 reply; 6+ messages in thread
From: Pedro Tammela @ 2023-01-30 16:02 UTC (permalink / raw)
  To: netdev
  Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni,
	simon.horman, Pedro Tammela

Remove the check for a negative number of keys as
this cannot ever happen

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
---
 net/sched/act_pedit.c | 137 +++++++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 70 deletions(-)

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c8e8748dc258..d0098b9ebb1b 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -352,8 +352,12 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
 				    const struct tc_action *a,
 				    struct tcf_result *res)
 {
+	enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+	enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
 	struct tcf_pedit *p = to_pedit(a);
+	struct tcf_pedit_key_ex *tkey_ex;
 	struct tcf_pedit_parms *parms;
+	struct tc_pedit_key *tkey;
 	u32 max_offset;
 	int i;
 
@@ -369,88 +373,81 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
 	tcf_lastuse_update(&p->tcf_tm);
 	tcf_action_update_bstats(&p->common, skb);
 
-	if (parms->tcfp_nkeys > 0) {
-		struct tc_pedit_key *tkey = parms->tcfp_keys;
-		struct tcf_pedit_key_ex *tkey_ex = parms->tcfp_keys_ex;
-		enum pedit_header_type htype =
-			TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
-		enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
-
-		for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
-			u32 *ptr, hdata;
-			int offset = tkey->off;
-			int hoffset;
-			u32 val;
-			int rc;
-
-			if (tkey_ex) {
-				htype = tkey_ex->htype;
-				cmd = tkey_ex->cmd;
-
-				tkey_ex++;
-			}
+	tkey = parms->tcfp_keys;
+	tkey_ex = parms->tcfp_keys_ex;
 
-			rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
-			if (rc) {
-				pr_info("tc action pedit bad header type specified (0x%x)\n",
-					htype);
-				goto bad;
-			}
+	for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+		int offset = tkey->off;
+		u32 *ptr, hdata;
+		int hoffset;
+		u32 val;
+		int rc;
 
-			if (tkey->offmask) {
-				u8 *d, _d;
-
-				if (!offset_valid(skb, hoffset + tkey->at)) {
-					pr_info("tc action pedit 'at' offset %d out of bounds\n",
-						hoffset + tkey->at);
-					goto bad;
-				}
-				d = skb_header_pointer(skb, hoffset + tkey->at,
-						       sizeof(_d), &_d);
-				if (!d)
-					goto bad;
-				offset += (*d & tkey->offmask) >> tkey->shift;
-			}
+		if (tkey_ex) {
+			htype = tkey_ex->htype;
+			cmd = tkey_ex->cmd;
 
-			if (offset % 4) {
-				pr_info("tc action pedit offset must be on 32 bit boundaries\n");
-				goto bad;
-			}
+			tkey_ex++;
+		}
 
-			if (!offset_valid(skb, hoffset + offset)) {
-				pr_info("tc action pedit offset %d out of bounds\n",
-					hoffset + offset);
-				goto bad;
-			}
+		rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+		if (rc) {
+			pr_info("tc action pedit bad header type specified (0x%x)\n",
+				htype);
+			goto bad;
+		}
 
-			ptr = skb_header_pointer(skb, hoffset + offset,
-						 sizeof(hdata), &hdata);
-			if (!ptr)
-				goto bad;
-			/* just do it, baby */
-			switch (cmd) {
-			case TCA_PEDIT_KEY_EX_CMD_SET:
-				val = tkey->val;
-				break;
-			case TCA_PEDIT_KEY_EX_CMD_ADD:
-				val = (*ptr + tkey->val) & ~tkey->mask;
-				break;
-			default:
-				pr_info("tc action pedit bad command (%d)\n",
-					cmd);
+		if (tkey->offmask) {
+			u8 *d, _d;
+
+			if (!offset_valid(skb, hoffset + tkey->at)) {
+				pr_info("tc action pedit 'at' offset %d out of bounds\n",
+					hoffset + tkey->at);
 				goto bad;
 			}
+			d = skb_header_pointer(skb, hoffset + tkey->at,
+					       sizeof(_d), &_d);
+			if (!d)
+				goto bad;
+			offset += (*d & tkey->offmask) >> tkey->shift;
+		}
 
-			*ptr = ((*ptr & tkey->mask) ^ val);
-			if (ptr == &hdata)
-				skb_store_bits(skb, hoffset + offset, ptr, 4);
+		if (offset % 4) {
+			pr_info("tc action pedit offset must be on 32 bit boundaries\n");
+			goto bad;
 		}
 
-		goto done;
-	} else {
-		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+		if (!offset_valid(skb, hoffset + offset)) {
+			pr_info("tc action pedit offset %d out of bounds\n",
+				hoffset + offset);
+			goto bad;
+		}
+
+		ptr = skb_header_pointer(skb, hoffset + offset,
+					 sizeof(hdata), &hdata);
+		if (!ptr)
+			goto bad;
+		/* just do it, baby */
+		switch (cmd) {
+		case TCA_PEDIT_KEY_EX_CMD_SET:
+			val = tkey->val;
+			break;
+		case TCA_PEDIT_KEY_EX_CMD_ADD:
+			val = (*ptr + tkey->val) & ~tkey->mask;
+			break;
+		default:
+			pr_info("tc action pedit bad command (%d)\n",
+				cmd);
+			goto bad;
+		}
+
+		*ptr = ((*ptr & tkey->mask) ^ val);
+		if (ptr == &hdata)
+			skb_store_bits(skb, hoffset + offset, ptr, 4);
 	}
 
+	goto done;
+
 bad:
 	spin_lock(&p->tcf_lock);
 	p->tcf_qstats.overlimits++;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4 1/2] net/sched: transition act_pedit to rcu and percpu stats
  2023-01-30 16:02 ` [PATCH net-next v4 1/2] " Pedro Tammela
@ 2023-01-31 12:37   ` Simon Horman
  2023-01-31 14:20     ` Pedro Tammela
  0 siblings, 1 reply; 6+ messages in thread
From: Simon Horman @ 2023-01-31 12:37 UTC (permalink / raw)
  To: Pedro Tammela
  Cc: netdev, jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni

On Mon, Jan 30, 2023 at 01:02:32PM -0300, Pedro Tammela wrote:
> The software pedit action didn't get the same love as some of the
> other actions and it's still using spinlocks and shared stats in the
> datapath.
> Transition the action to rcu and percpu stats as this improves the
> action's performance on multiple cpu deployments.
> 
> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> ---
>  include/net/tc_act/tc_pedit.h |  81 +++++++++++++++----
>  net/sched/act_pedit.c         | 144 +++++++++++++++++++++-------------
>  2 files changed, 154 insertions(+), 71 deletions(-)

Hi Pedro,

thanks for the update.

A few questions from my side below.

> diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
> index a0378e9f0121..c8e8748dc258 100644
> --- a/net/sched/act_pedit.c
> +++ b/net/sched/act_pedit.c
> @@ -134,6 +134,17 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
>  	return -EINVAL;
>  }
>  
> +static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
> +{
> +	struct tcf_pedit_parms *parms =
> +		container_of(head, struct tcf_pedit_parms, rcu);
> +
> +	kfree(parms->tcfp_keys_ex);
> +	kfree(parms->tcfp_keys);
> +
> +	kfree(parms);
> +}
> +
>  static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>  			  struct nlattr *est, struct tc_action **a,
>  			  struct tcf_proto *tp, u32 flags,
> @@ -143,8 +154,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>  	bool bind = flags & TCA_ACT_FLAGS_BIND;
>  	struct nlattr *tb[TCA_PEDIT_MAX + 1];
>  	struct tcf_chain *goto_ch = NULL;
> -	struct tc_pedit_key *keys = NULL;
> -	struct tcf_pedit_key_ex *keys_ex;
> +	struct tcf_pedit_parms *oparms, *nparms;
>  	struct tc_pedit *parm;
>  	struct nlattr *pattr;
>  	struct tcf_pedit *p;
> @@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>  		return -EINVAL;
>  	}
>  
> -	keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
> -	if (IS_ERR(keys_ex))
> -		return PTR_ERR(keys_ex);
> +	nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
> +	if (!nparms)
> +		return -ENOMEM;
> +
> +	nparms->tcfp_keys_ex =
> +		tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
> +	if (IS_ERR(nparms->tcfp_keys_ex)) {
> +		ret = PTR_ERR(nparms->tcfp_keys_ex);
> +		goto out_free;
> +	}
>  
>  	index = parm->index;
>  	err = tcf_idr_check_alloc(tn, &index, a, bind);
>  	if (!err) {
> -		ret = tcf_idr_create(tn, index, est, a,
> -				     &act_pedit_ops, bind, false, flags);
> +		ret = tcf_idr_create_from_flags(tn, index, est, a,
> +						&act_pedit_ops, bind, flags);
>  		if (ret) {
>  			tcf_idr_cleanup(tn, index);
> -			goto out_free;
> +			goto out_free_ex;
>  		}
>  		ret = ACT_P_CREATED;
>  	} else if (err > 0) {
> @@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>  		}
>  	} else {
>  		ret = err;
> -		goto out_free;
> +		goto out_free_ex;
>  	}
>  
>  	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
> @@ -212,68 +229,79 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>  		ret = err;
>  		goto out_release;
>  	}
> +
> +	nparms->tcfp_off_max_hint = 0;
> +	nparms->tcfp_flags = parm->flags;
> +
>  	p = to_pedit(*a);
>  	spin_lock_bh(&p->tcf_lock);
>  
> +	oparms = rcu_dereference_protected(p->parms, 1);
> +
>  	if (ret == ACT_P_CREATED ||
> -	    (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
> -		keys = kmalloc(ksize, GFP_ATOMIC);
> -		if (!keys) {
> +	    (oparms->tcfp_nkeys && oparms->tcfp_nkeys != parm->nkeys)) {
> +		nparms->tcfp_keys = kmalloc(ksize, GFP_ATOMIC);
> +		if (!nparms->tcfp_keys) {
>  			spin_unlock_bh(&p->tcf_lock);
>  			ret = -ENOMEM;
> -			goto put_chain;
> +			goto out_release;

I'm a little unclear on why put_chain is no longer needed.
It seems to me that there can be a reference to goto_ch held here,
as was the case before this patch.

>  		}
> -		kfree(p->tcfp_keys);
> -		p->tcfp_keys = keys;
> -		p->tcfp_nkeys = parm->nkeys;
> +		nparms->tcfp_nkeys = parm->nkeys;
> +	} else {
> +		nparms->tcfp_keys = oparms->tcfp_keys;

I feel that I am missing something obvious:
* Here oparms->tcfp_keys is assigned to nparms->tcfp_keys.
* Later on there is a call to call_rcu(..., tcf_pedit_cleanup_rcu),
  which will free oparms->tcfp_keys some time in the future.
* But the memory bay still be accessed via tcfp_keys.

Is there a life cycle issue here?

> +		nparms->tcfp_nkeys = oparms->tcfp_nkeys;
>  	}
> -	memcpy(p->tcfp_keys, parm->keys, ksize);
> -	p->tcfp_off_max_hint = 0;
> -	for (i = 0; i < p->tcfp_nkeys; ++i) {
> -		u32 cur = p->tcfp_keys[i].off;
> +
> +	memcpy(nparms->tcfp_keys, parm->keys, ksize);
> +
> +	for (i = 0; i < nparms->tcfp_nkeys; ++i) {
> +		u32 cur = nparms->tcfp_keys[i].off;
>  
>  		/* sanitize the shift value for any later use */
> -		p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
> -					      p->tcfp_keys[i].shift);
> +		nparms->tcfp_keys[i].shift = min_t(size_t,
> +						   BITS_PER_TYPE(int) - 1,
> +						   nparms->tcfp_keys[i].shift);
>  
>  		/* The AT option can read a single byte, we can bound the actual
>  		 * value with uchar max.
>  		 */
> -		cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
> +		cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
>  
>  		/* Each key touches 4 bytes starting from the computed offset */
> -		p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
> +		nparms->tcfp_off_max_hint =
> +			max(nparms->tcfp_off_max_hint, cur + 4);
>  	}
>  
> -	p->tcfp_flags = parm->flags;
>  	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
>  
> -	kfree(p->tcfp_keys_ex);
> -	p->tcfp_keys_ex = keys_ex;
> +	rcu_assign_pointer(p->parms, nparms);
>  
>  	spin_unlock_bh(&p->tcf_lock);
> +
> +	if (oparms)
> +		call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);

	Here there is a condition on oparms being non-NULL.
	But further above oparms is dereference unconditionally.
	Is there an inconsistency here?

> +
>  	if (goto_ch)
>  		tcf_chain_put_by_act(goto_ch);
> +
>  	return ret;
>  
> -put_chain:
> -	if (goto_ch)
> -		tcf_chain_put_by_act(goto_ch);
>  out_release:
>  	tcf_idr_release(*a, bind);
> +out_free_ex:
> +	kfree(nparms->tcfp_keys_ex);
>  out_free:
> -	kfree(keys_ex);
> +	kfree(nparms);
>  	return ret;
> -
>  }

...

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act
  2023-01-30 16:02 ` [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act Pedro Tammela
@ 2023-01-31 12:37   ` Simon Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2023-01-31 12:37 UTC (permalink / raw)
  To: Pedro Tammela
  Cc: netdev, jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni

On Mon, Jan 30, 2023 at 01:02:33PM -0300, Pedro Tammela wrote:
> Remove the check for a negative number of keys as
> this cannot ever happen
> 
> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>

Reviewed-by: Simon Horman <simon.horman@corigine.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v4 1/2] net/sched: transition act_pedit to rcu and percpu stats
  2023-01-31 12:37   ` Simon Horman
@ 2023-01-31 14:20     ` Pedro Tammela
  0 siblings, 0 replies; 6+ messages in thread
From: Pedro Tammela @ 2023-01-31 14:20 UTC (permalink / raw)
  To: Simon Horman
  Cc: netdev, jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni

On 31/01/2023 09:37, Simon Horman wrote:
[...]
>>   static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>>   			  struct nlattr *est, struct tc_action **a,
>>   			  struct tcf_proto *tp, u32 flags,
>> @@ -143,8 +154,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>>   	bool bind = flags & TCA_ACT_FLAGS_BIND;
>>   	struct nlattr *tb[TCA_PEDIT_MAX + 1];
>>   	struct tcf_chain *goto_ch = NULL;
>> -	struct tc_pedit_key *keys = NULL;
>> -	struct tcf_pedit_key_ex *keys_ex;
>> +	struct tcf_pedit_parms *oparms, *nparms;
>>   	struct tc_pedit *parm;
>>   	struct nlattr *pattr;
>>   	struct tcf_pedit *p;
>> @@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>>   		return -EINVAL;
>>   	}
>>   
>> -	keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
>> -	if (IS_ERR(keys_ex))
>> -		return PTR_ERR(keys_ex);
>> +	nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
>> +	if (!nparms)
>> +		return -ENOMEM;
>> +
>> +	nparms->tcfp_keys_ex =
>> +		tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
>> +	if (IS_ERR(nparms->tcfp_keys_ex)) {
>> +		ret = PTR_ERR(nparms->tcfp_keys_ex);
>> +		goto out_free;
>> +	}
>>   
>>   	index = parm->index;
>>   	err = tcf_idr_check_alloc(tn, &index, a, bind);
>>   	if (!err) {
>> -		ret = tcf_idr_create(tn, index, est, a,
>> -				     &act_pedit_ops, bind, false, flags);
>> +		ret = tcf_idr_create_from_flags(tn, index, est, a,
>> +						&act_pedit_ops, bind, flags);
>>   		if (ret) {
>>   			tcf_idr_cleanup(tn, index);
>> -			goto out_free;
>> +			goto out_free_ex;
>>   		}
>>   		ret = ACT_P_CREATED;
>>   	} else if (err > 0) {
>> @@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>>   		}
>>   	} else {
>>   		ret = err;
>> -		goto out_free;
>> +		goto out_free_ex;
>>   	}
>>   
>>   	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
>> @@ -212,68 +229,79 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
>>   		ret = err;
>>   		goto out_release;
>>   	}
>> +
>> +	nparms->tcfp_off_max_hint = 0;
>> +	nparms->tcfp_flags = parm->flags;
>> +
>>   	p = to_pedit(*a);
>>   	spin_lock_bh(&p->tcf_lock);
>>   
>> +	oparms = rcu_dereference_protected(p->parms, 1);
>> +
>>   	if (ret == ACT_P_CREATED ||
>> -	    (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
>> -		keys = kmalloc(ksize, GFP_ATOMIC);
>> -		if (!keys) {
>> +	    (oparms->tcfp_nkeys && oparms->tcfp_nkeys != parm->nkeys)) {
>> +		nparms->tcfp_keys = kmalloc(ksize, GFP_ATOMIC);
>> +		if (!nparms->tcfp_keys) {
>>   			spin_unlock_bh(&p->tcf_lock);
>>   			ret = -ENOMEM;
>> -			goto put_chain;
>> +			goto out_release;
> 
> I'm a little unclear on why put_chain is no longer needed.
> It seems to me that there can be a reference to goto_ch held here,
> as was the case before this patch.

Correct, initially I thought it was assigned unconditionally after the 
for loop. I will restore it, thanks!

> 
>>   		}
>> -		kfree(p->tcfp_keys);
>> -		p->tcfp_keys = keys;
>> -		p->tcfp_nkeys = parm->nkeys;
>> +		nparms->tcfp_nkeys = parm->nkeys;
>> +	} else {
>> +		nparms->tcfp_keys = oparms->tcfp_keys;
> 
> I feel that I am missing something obvious:
> * Here oparms->tcfp_keys is assigned to nparms->tcfp_keys.
> * Later on there is a call to call_rcu(..., tcf_pedit_cleanup_rcu),
>    which will free oparms->tcfp_keys some time in the future.
> * But the memory bay still be accessed via tcfp_keys.
> 
> Is there a life cycle issue here?

Correct, this is wrong.
I got the wrong impression we could avoid the memory allocation in the 
update case.


> 
>> +		nparms->tcfp_nkeys = oparms->tcfp_nkeys;
>>   	}
>> -	memcpy(p->tcfp_keys, parm->keys, ksize);
>> -	p->tcfp_off_max_hint = 0;
>> -	for (i = 0; i < p->tcfp_nkeys; ++i) {
>> -		u32 cur = p->tcfp_keys[i].off;
>> +
>> +	memcpy(nparms->tcfp_keys, parm->keys, ksize);
>> +
>> +	for (i = 0; i < nparms->tcfp_nkeys; ++i) {
>> +		u32 cur = nparms->tcfp_keys[i].off;
>>   
>>   		/* sanitize the shift value for any later use */
>> -		p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
>> -					      p->tcfp_keys[i].shift);
>> +		nparms->tcfp_keys[i].shift = min_t(size_t,
>> +						   BITS_PER_TYPE(int) - 1,
>> +						   nparms->tcfp_keys[i].shift);
>>   
>>   		/* The AT option can read a single byte, we can bound the actual
>>   		 * value with uchar max.
>>   		 */
>> -		cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
>> +		cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
>>   
>>   		/* Each key touches 4 bytes starting from the computed offset */
>> -		p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
>> +		nparms->tcfp_off_max_hint =
>> +			max(nparms->tcfp_off_max_hint, cur + 4);
>>   	}
>>   
>> -	p->tcfp_flags = parm->flags;
>>   	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
>>   
>> -	kfree(p->tcfp_keys_ex);
>> -	p->tcfp_keys_ex = keys_ex;
>> +	rcu_assign_pointer(p->parms, nparms);
>>   
>>   	spin_unlock_bh(&p->tcf_lock);
>> +
>> +	if (oparms)
>> +		call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
> 
> 	Here there is a condition on oparms being non-NULL.
> 	But further above oparms is dereference unconditionally.
> 	Is there an inconsistency here?

oparms is NULL when we create the action instance.
I believe this will be way clearer in the next version.

> 
>> +
>>   	if (goto_ch)
>>   		tcf_chain_put_by_act(goto_ch);
>> +
>>   	return ret;
>>   
>> -put_chain:
>> -	if (goto_ch)
>> -		tcf_chain_put_by_act(goto_ch);
>>   out_release:
>>   	tcf_idr_release(*a, bind);
>> +out_free_ex:
>> +	kfree(nparms->tcfp_keys_ex);
>>   out_free:
>> -	kfree(keys_ex);
>> +	kfree(nparms);
>>   	return ret;
>> -
>>   }
> 
> ...


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-01-31 14:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-30 16:02 [PATCH net-next v4 0/2] net/sched: transition act_pedit to rcu and percpu stats Pedro Tammela
2023-01-30 16:02 ` [PATCH net-next v4 1/2] " Pedro Tammela
2023-01-31 12:37   ` Simon Horman
2023-01-31 14:20     ` Pedro Tammela
2023-01-30 16:02 ` [PATCH net-next v4 2/2] net/sched: simplify tcf_pedit_act Pedro Tammela
2023-01-31 12:37   ` Simon Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.