All of lore.kernel.org
 help / color / mirror / Atom feed
From: xiangxia.m.yue@gmail.com
To: netdev@vger.kernel.org
Cc: Tonghao Zhang <xiangxia.m.yue@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Jonathan Lemon <jonathan.lemon@gmail.com>,
	Eric Dumazet <edumazet@google.com>,
	Alexander Lobakin <alobakin@pm.me>,
	Paolo Abeni <pabeni@redhat.com>,
	Talal Ahmad <talalahmad@google.com>,
	Kevin Hao <haokexin@gmail.com>,
	Ilias Apalodimas <ilias.apalodimas@linaro.org>,
	Kees Cook <keescook@chromium.org>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Antoine Tenart <atenart@kernel.org>, Wei Wang <weiwan@google.com>,
	Arnd Bergmann <arnd@arndb.de>
Subject: [net-next v2 2/2] net: sched: support hash/classid selecting tx queue
Date: Wed,  8 Dec 2021 22:34:08 +0800	[thread overview]
Message-ID: <20211208143408.7047-3-xiangxia.m.yue@gmail.com> (raw)
In-Reply-To: <20211208143408.7047-1-xiangxia.m.yue@gmail.com>

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch allows users to select queue_mapping, range
from A to B. And users can use skb-hash or cgroup classid
to select Tx queues. Then the packets can load balance from
A to B queue.

$ tc filter ... action skbedit queue_mapping hash-type normal 0 4

"skbedit queue_mapping QUEUE_MAPPING" [0] is enhanced with two
flags: SKBEDIT_F_QUEUE_MAPPING_HASH, SKBEDIT_F_QUEUE_MAPPING_CLASSID.
The range is an unsigned 16bit value in decimal format.

[0]: https://man7.org/linux/man-pages/man8/tc-skbedit.8.html

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Talal Ahmad <talalahmad@google.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Wei Wang <weiwan@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 include/net/tc_act/tc_skbedit.h        |  1 +
 include/uapi/linux/tc_act/tc_skbedit.h |  6 +++
 net/sched/act_skbedit.c                | 58 ++++++++++++++++++++++++--
 3 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 00bfee70609e..ee96e0fa6566 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -17,6 +17,7 @@ struct tcf_skbedit_params {
 	u32 mark;
 	u32 mask;
 	u16 queue_mapping;
+	u16 mapping_mod;
 	u16 ptype;
 	struct rcu_head rcu;
 };
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 800e93377218..8df288078dde 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -29,6 +29,11 @@
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
 #define SKBEDIT_F_INHERITDSFIELD	0x20
+#define SKBEDIT_F_QUEUE_MAPPING_HASH	0x40
+#define SKBEDIT_F_QUEUE_MAPPING_CLASSID	0x80
+
+#define SKBEDIT_F_QUEUE_MAPPING_HASH_MASK (SKBEDIT_F_QUEUE_MAPPING_HASH | \
+					   SKBEDIT_F_QUEUE_MAPPING_CLASSID)
 
 struct tc_skbedit {
 	tc_gen;
@@ -45,6 +50,7 @@ enum {
 	TCA_SKBEDIT_PTYPE,
 	TCA_SKBEDIT_MASK,
 	TCA_SKBEDIT_FLAGS,
+	TCA_SKBEDIT_QUEUE_MAPPING_MAX,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 498feedad70a..355b43999a4a 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/cls_cgroup.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/ip.h>
@@ -23,6 +24,25 @@
 static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
+static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
+			    struct sk_buff *skb)
+{
+	u16 queue_mapping = params->queue_mapping;
+	u16 mapping_mod = params->mapping_mod;
+	u32 hash;
+
+	if (!(params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK))
+		return netdev_cap_txqueue(skb->dev, queue_mapping);
+
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING_CLASSID)
+		hash = jhash_1word(task_get_classid(skb), 0);
+	else if (params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH)
+		hash = skb_get_hash(skb);
+
+	queue_mapping = queue_mapping + hash % mapping_mod;
+	return netdev_cap_txqueue(skb->dev, queue_mapping);
+}
+
 static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
 			   struct tcf_result *res)
 {
@@ -57,10 +77,9 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
 			break;
 		}
 	}
-	if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
-	    skb->dev->real_num_tx_queues > params->queue_mapping) {
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING) {
 		netdev_xmit_skip_txqueue();
-		skb_set_queue_mapping(skb, params->queue_mapping);
+		skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
 	}
 	if (params->flags & SKBEDIT_F_MARK) {
 		skb->mark &= ~params->mask;
@@ -94,6 +113,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MASK]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_FLAGS]		= { .len = sizeof(u64) },
+	[TCA_SKBEDIT_QUEUE_MAPPING_MAX]	= { .len = sizeof(u16) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -110,6 +130,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
 	u16 *queue_mapping = NULL, *ptype = NULL;
+	u16 mapping_mod = 0;
 	bool exists = false;
 	int ret = 0, err;
 	u32 index;
@@ -157,6 +178,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 		if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
 			flags |= SKBEDIT_F_INHERITDSFIELD;
+		if (*pure_flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK) {
+			u16 *queue_mapping_max;
+
+			if (!tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX])
+				return -EINVAL;
+
+			if (!tb[TCA_SKBEDIT_QUEUE_MAPPING])
+				return -EINVAL;
+
+			queue_mapping_max =
+				nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
+
+			if (*queue_mapping_max < *queue_mapping)
+				return -EINVAL;
+
+			mapping_mod = *queue_mapping_max - *queue_mapping + 1;
+			flags |= *pure_flags &
+				 SKBEDIT_F_QUEUE_MAPPING_HASH_MASK;
+		}
 	}
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
@@ -206,8 +246,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	params_new->flags = flags;
 	if (flags & SKBEDIT_F_PRIORITY)
 		params_new->priority = *priority;
-	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+	if (flags & SKBEDIT_F_QUEUE_MAPPING) {
 		params_new->queue_mapping = *queue_mapping;
+		params_new->mapping_mod = mapping_mod;
+	}
 	if (flags & SKBEDIT_F_MARK)
 		params_new->mark = *mark;
 	if (flags & SKBEDIT_F_PTYPE)
@@ -274,6 +316,14 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 		goto nla_put_failure;
 	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
 		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING_HASH_MASK) {
+		if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
+				params->queue_mapping + params->mapping_mod - 1))
+			goto nla_put_failure;
+
+		pure_flags |= params->flags &
+			      SKBEDIT_F_QUEUE_MAPPING_HASH_MASK;
+	}
 	if (pure_flags != 0 &&
 	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
 		goto nla_put_failure;
-- 
2.27.0


  parent reply	other threads:[~2021-12-08 14:34 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-08 14:34 [net-next v2 0/2] net: sched: allow user to select txqueue xiangxia.m.yue
2021-12-08 14:34 ` [net-next v2 1/2] net: sched: use queue_mapping to pick tx queue xiangxia.m.yue
2021-12-08 14:34 ` xiangxia.m.yue [this message]
2021-12-08 22:21   ` [net-next v2 2/2] net: sched: support hash/classid selecting " kernel test robot
2021-12-08 22:21     ` kernel test robot
2021-12-09  1:51     ` Tonghao Zhang
2021-12-09  1:51       ` Tonghao Zhang
2021-12-08 22:32   ` kernel test robot
2021-12-08 22:32     ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211208143408.7047-3-xiangxia.m.yue@gmail.com \
    --to=xiangxia.m.yue@gmail.com \
    --cc=alobakin@pm.me \
    --cc=arnd@arndb.de \
    --cc=atenart@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=haokexin@gmail.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=jonathan.lemon@gmail.com \
    --cc=keescook@chromium.org \
    --cc=kuba@kernel.org \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=talalahmad@google.com \
    --cc=weiwan@google.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.