All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct
@ 2019-01-29  8:02 Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 2/6 v2] net/sched: cls_flower: add match on ct info Paul Blakey
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

Act ct will send packets to conntrack on a specific zone,
and when commiting a connection, a ct label and ct mark can be set.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/net/tc_act/tc_ct.h        |  37 +++
 include/uapi/linux/tc_act/tc_ct.h |  29 +++
 net/sched/Kconfig                 |  11 +
 net/sched/Makefile                |   1 +
 net/sched/act_ct.c                | 465 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 543 insertions(+)
 create mode 100644 include/net/tc_act/tc_ct.h
 create mode 100644 include/uapi/linux/tc_act/tc_ct.h
 create mode 100644 net/sched/act_ct.c

diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
new file mode 100644
index 0000000..4a16375
--- /dev/null
+++ b/include/net/tc_act/tc_ct.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CT_H
+#define __NET_TC_CT_H
+
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_ct.h>
+
+#define TCA_ACT_CT_LABEL_SIZE 4
+struct tcf_ct {
+	struct tc_action common;
+	struct net *net;
+	struct nf_conn *tmpl;
+	u32 labels[TCA_ACT_CT_LABEL_SIZE];
+	u32 labels_mask[TCA_ACT_CT_LABEL_SIZE];
+	u32 mark;
+	u32 mark_mask;
+	u16 zone;
+	bool commit;
+};
+
+#define to_ct(a) ((struct tcf_ct *)a)
+
+static inline bool is_tcf_ct(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_CT)
+		return true;
+#endif
+	return false;
+}
+
+static inline struct tcf_ct *tcf_ct(const struct tc_action *a)
+{
+	return to_ct(a);
+}
+
+#endif /* __NET_TC_CT_H */
diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
new file mode 100644
index 0000000..6dbd771
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_ct.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CT_H
+#define __UAPI_TC_CT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CT 18
+
+struct tc_ct {
+	tc_gen;
+	__u16 zone;
+	__u32 labels[4];
+	__u32 labels_mask[4];
+	__u32 mark;
+	__u32 mark_mask;
+	bool commit;
+};
+
+enum {
+	TCA_CT_UNSPEC,
+	TCA_CT_PARMS,
+	TCA_CT_TM,
+	TCA_CT_PAD,
+	__TCA_CT_MAX
+};
+#define TCA_CT_MAX (__TCA_CT_MAX - 1)
+
+#endif /* __UAPI_TC_CT_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1b9afde..935a327 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -912,6 +912,17 @@ config NET_ACT_TUNNEL_KEY
 	  To compile this code as a module, choose M here: the
 	  module will be called act_tunnel_key.
 
+config NET_ACT_CT
+        tristate "connection tracking action"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to allow sending the packets to conntrack module
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_ct.
+
 config NET_IFE_SKBMARK
         tristate "Support to encoding decoding skb mark on IFE action"
         depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8a40431..c0a02de 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
 obj-$(CONFIG_NET_IFE_SKBTCINDEX)	+= act_meta_skbtcindex.o
 obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
+obj-$(CONFIG_NET_ACT_CT)	+= act_ct.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
new file mode 100644
index 0000000..61155cc
--- /dev/null
+++ b/net/sched/act_ct.c
@@ -0,0 +1,465 @@
+/*
+ * net/sched/act_ct.c  connection tracking action
+ *
+ * Copyright (c) 2018
+ *
+ * Authors:	Yossi Kuperman <yossiku@mellanox.com>
+ *		Paul Blakey <paulb@mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_ct.h>
+#include <net/tc_act/tc_ct.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+
+#include <net/pkt_cls.h>
+
+static unsigned int ct_net_id;
+static struct tc_action_ops act_ct_ops;
+
+/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
+static bool skb_nfct_cached(struct net *net, struct sk_buff *skb, u16 zone_id)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return false;
+	if (!net_eq(net, read_pnet(&ct->ct_net)))
+		return false;
+	if (nf_ct_zone(ct)->id != zone_id)
+		return false;
+	return true;
+}
+
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+static int tcf_skb_network_trim(struct sk_buff *skb)
+{
+	unsigned int len;
+	int err;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		len = ntohs(ip_hdr(skb)->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		len = sizeof(struct ipv6hdr)
+			+ ntohs(ipv6_hdr(skb)->payload_len);
+		break;
+	default:
+		len = skb->len;
+	}
+
+	err = pskb_trim_rcsum(skb, len);
+
+	return err;
+}
+
+static u_int8_t tcf_skb_family(struct sk_buff *skb)
+{
+	u_int8_t family = PF_UNSPEC;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		family = PF_INET;
+		break;
+	case htons(ETH_P_IPV6):
+		family = PF_INET6;
+		break;
+	default:
+        break;
+	}
+
+	return family;
+}
+
+static bool labels_nonzero(const u32 *labels_mask)
+{
+	return !!memchr_inv(labels_mask, 0, 4);
+}
+
+static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+		      struct tcf_result *res)
+{
+	struct net *net = dev_net(skb->dev);
+	struct tcf_ct *ca = to_ct(a);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *tmpl = NULL;
+	struct nf_hook_state state;
+	struct nf_conn *ct;
+	int nh_ofs, err;
+	u_int8_t family;
+	bool cached;
+
+	/* The conntrack module expects to be working at L3. */
+	nh_ofs = skb_network_offset(skb);
+	skb_pull_rcsum(skb, nh_ofs);
+
+	err = tcf_skb_network_trim(skb);
+	if (err)
+		goto drop;
+
+	family = tcf_skb_family(skb);
+	if (family == PF_UNSPEC)
+		goto drop;
+
+	state.hook = NF_INET_PRE_ROUTING,
+	state.net = net,
+	state.pf = family;
+
+	spin_lock(&ca->tcf_lock);
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+	tmpl = ca->tmpl;
+
+	/* If we are recirculating packets to match on ct fields and
+	 * committing with a separate ct action, then we don't need to
+	 * actually run the packet through conntrack twice unless it's for a
+	 * different zone. */
+	cached = skb_nfct_cached(net, skb, ca->zone);
+	if (!cached) {
+		/* Associate skb with specified zone. */
+		if (tmpl) {
+			if (skb_nfct(skb))
+				nf_conntrack_put(skb_nfct(skb));
+			nf_conntrack_get(&tmpl->ct_general);
+			nf_ct_set(skb, tmpl, IP_CT_NEW);
+		}
+
+		err = nf_conntrack_in(skb, &state);
+		if (err != NF_ACCEPT)
+			goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		goto out;
+	nf_ct_deliver_cached_events(ct);
+
+	if (ca->commit) {
+		u32 *labels = ca->labels;
+		u32 *labels_m = ca->labels_mask;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+		if (ca->mark_mask) {
+			u32 ct_mark = ca->mark;
+			u32 mask = ca->mark_mask;
+			u32 new_mark;
+
+			new_mark = ct_mark | (ct->mark & ~(mask));
+			if (ct->mark != new_mark) {
+				ct->mark = new_mark;
+				if (nf_ct_is_confirmed(ct))
+					nf_conntrack_event_cache(IPCT_MARK, ct);
+			}
+		}
+#endif
+		if (!nf_ct_is_confirmed(ct)) {
+			bool have_mask = labels_nonzero(labels_m);
+			struct nf_conn_labels *cl, *master_cl;
+
+			/* Inherit master's labels to the related connection? */
+			master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL;
+
+			if (!master_cl && !have_mask)
+				goto skip; /* Nothing to do. */
+
+			/* Get labels or add them */
+			cl = nf_ct_labels_find(ct);
+			if (!cl) {
+				nf_ct_labels_ext_add(ct);
+				cl = nf_ct_labels_find(ct);
+			}
+			if (!cl)
+				goto out;
+
+			/* Inherit the master's labels, if any.  Must use memcpy for backport
+			 * as struct assignment only copies the length field in older
+			 * kernels.
+	 		*/
+			if (master_cl)
+				memcpy(cl->bits, master_cl->bits, NF_CT_LABELS_MAX_SIZE);
+
+			if (have_mask) {
+				u32 *dst = (u32 *)cl->bits;
+				int i;
+
+				for (i = 0; i < 4; i++)
+					dst[i] = (dst[i] & ~labels_m[i]) | (labels[i] & labels_m[i]);
+			}
+
+			/* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
+			 * IPCT_LABEL bit is set in the event cache.
+			 */
+			nf_conntrack_event_cache(IPCT_LABEL, ct);
+		} else if (labels_nonzero(labels_m)) {
+			struct nf_conn_labels *cl;
+
+			cl = nf_ct_labels_find(ct);
+			if (!cl) {
+				nf_ct_labels_ext_add(ct);
+				cl = nf_ct_labels_find(ct);
+			}
+
+			if (!cl)
+				goto out;
+
+			nf_connlabels_replace(ct, ca->labels, ca->labels_mask, 4);
+		}
+skip:
+		/* This will take care of sending queued events even if the connection
+		 * is already confirmed. */
+		nf_conntrack_confirm(skb);
+	}
+
+out:
+	skb_push(skb, nh_ofs);
+	skb_postpush_rcsum(skb, skb->data, nh_ofs);
+
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+
+drop:
+	spin_lock(&ca->tcf_lock);
+	ca->tcf_qstats.drops++;
+	spin_unlock(&ca->tcf_lock);
+	return TC_ACT_SHOT;
+}
+
+static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
+	[TCA_CT_PARMS] = { .len = sizeof(struct tc_ct) },
+};
+
+static int tcf_ct_init(struct net *net, struct nlattr *nla,
+		       struct nlattr *est, struct tc_action **a,
+		       int ovr, int bind, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+	struct nlattr *tb[TCA_CT_MAX + 1];
+	struct nf_conntrack_zone zone;
+	struct nf_conn *tmpl = NULL;
+	bool exists = false;
+	struct tc_ct *parm;
+	struct tcf_ct *ci;
+	int ret, err;
+
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed");
+		return -EINVAL;
+	}
+
+	ret = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_CT_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters");
+		return -EINVAL;
+	}
+
+	parm = nla_data(tb[TCA_CT_PARMS]);
+
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
+	if (exists && bind)
+		return 0;
+
+	if (!exists) {
+		ret = tcf_idr_create(tn, parm->index, est, a, &act_ct_ops, bind, false);
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
+			return ret;
+		}
+
+		ci = to_ct(*a);
+		ci->tcf_action = parm->action;
+		ci->net = net;
+		ci->commit = parm->commit;
+		ci->zone = parm->zone;
+#if !IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+		if (parm->mark_mask) {
+			NL_SET_ERR_MSG_MOD(extack, "Mark not supported by kernel config");
+			return -EOPNOTSUPP;
+		}
+#endif
+#if !IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
+		if (labels_nonzero(parm->labels_mask)) {
+			NL_SET_ERR_MSG_MOD(extack, "Labels not supported by kernel config");
+			return -EOPNOTSUPP;
+		}
+#endif
+		if (parm->zone != NF_CT_DEFAULT_ZONE_ID) {
+			nf_ct_zone_init(&zone, parm->zone,
+					NF_CT_DEFAULT_ZONE_DIR, 0);
+
+			tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_ATOMIC);
+			if (!tmpl) {
+				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
+				tcf_idr_cleanup(tn, parm->index);
+				return -ENOMEM;
+			}
+			__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+			nf_conntrack_get(&tmpl->ct_general);
+		}
+
+		ci->tmpl = tmpl;
+		ci->mark = parm->mark;
+		ci->mark_mask = parm->mark_mask;
+		memcpy(ci->labels, parm->labels, sizeof(parm->labels));
+		memcpy(ci->labels_mask, parm->labels_mask, sizeof(parm->labels_mask));
+
+		tcf_idr_insert(tn, *a);
+		ret = ACT_P_CREATED;
+	} else {
+		/* TODO: handle replace */
+		NL_SET_ERR_MSG_MOD(extack, "Ct can only be created");
+		tcf_idr_cleanup(tn, parm->index);
+		return -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static void tcf_ct_release(struct tc_action *a)
+{
+	struct tcf_ct *ci = to_ct(a);
+
+	if (ci->tmpl)
+		nf_conntrack_put(&ci->tmpl->ct_general);
+}
+
+static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ct *ci = to_ct(a);
+
+	struct tc_ct opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+	};
+	struct tcf_t t;
+
+	spin_lock_bh(&ci->tcf_lock);
+	opt.action  = ci->tcf_action,
+	opt.zone   = ci->zone,
+	opt.commit = ci->commit,
+	opt.mark = ci->mark,
+	opt.mark_mask = ci->mark_mask,
+	memcpy(opt.labels, ci->labels, sizeof(opt.labels));
+	memcpy(opt.labels_mask, ci->labels_mask, sizeof(opt.labels_mask));
+
+	if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD))
+		goto nla_put_failure;
+	spin_unlock_bh(&ci->tcf_lock);
+
+	return skb->len;
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_ct_walker(struct net *net, struct sk_buff *skb,
+			 struct netlink_callback *cb, int type,
+			 const struct tc_action_ops *ops,
+			 struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_ct_ops = {
+	.kind		=	"ct",
+	.type		=	TCA_ACT_CT,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ct_act,
+	.dump		=	tcf_ct_dump,
+	.init		=	tcf_ct_init,
+	.cleanup	=	tcf_ct_release,
+	.walk		=	tcf_ct_walker,
+	.lookup		=	tcf_ct_search,
+	.size		=	sizeof(struct tcf_ct),
+};
+
+static __net_init int ct_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, ct_net_id);
+
+	return tc_action_net_init(tn, &act_ct_ops);
+}
+
+static void __net_exit ct_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, ct_net_id);
+}
+
+static struct pernet_operations ct_net_ops = {
+	.init = ct_init_net,
+	.exit_batch = ct_exit_net,
+	.id   = &ct_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init ct_init_module(void)
+{
+	char *mark = IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) ? "on" : "off";
+	char *label = IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) ? "on" : "off";
+
+	pr_info("ct action on, mark: %s, label: %s\n", mark, label);
+	return tcf_register_action(&act_ct_ops, &ct_net_ops);
+}
+
+static void __exit ct_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ct_ops, &ct_net_ops);
+}
+
+module_init(ct_init_module);
+module_exit(ct_cleanup_module);
+MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
+MODULE_DESCRIPTION("Connection tracking action");
+MODULE_LICENSE("GPL");
+
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH net-next 2/6 v2] net/sched: cls_flower: add match on ct info
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
@ 2019-01-29  8:02 ` Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support Paul Blakey
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

New match on ct state, mark, and label from ct_info on the skb.
This can be set via sending the packet to ct via the ct action.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/uapi/linux/pkt_cls.h |  17 ++++++
 net/sched/cls_flower.c       | 126 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 02ac251..121f1ef 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -497,11 +497,28 @@ enum {
 	TCA_FLOWER_KEY_PORT_DST_MIN,	/* be16 */
 	TCA_FLOWER_KEY_PORT_DST_MAX,	/* be16 */
 
+	TCA_FLOWER_KEY_CT_STATE,
+	TCA_FLOWER_KEY_CT_STATE_MASK,
+	TCA_FLOWER_KEY_CT_ZONE,
+	TCA_FLOWER_KEY_CT_ZONE_MASK,
+	TCA_FLOWER_KEY_CT_MARK,
+	TCA_FLOWER_KEY_CT_MARK_MASK,
+	TCA_FLOWER_KEY_CT_LABELS,
+	TCA_FLOWER_KEY_CT_LABELS_MASK,
+
 	__TCA_FLOWER_MAX,
 };
 
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
 
+
+#define TCA_FLOWER_KEY_CT_FLAGS_NEW               0x01 /* Beginning of a new connection. */
+#define TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED       0x02 /* Part of an existing connection. */
+#define TCA_FLOWER_KEY_CT_FLAGS_RELATED           0x04 /* Related to an established connection. */
+#define TCA_FLOWER_KEY_CT_FLAGS_INVALID           0x10 /* Could not track connection. */
+#define TCA_FLOWER_KEY_CT_FLAGS_TRACKED           0x20 /* Conntrack has occurred. */
+
+
 enum {
 	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
 	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f6aa57f..bf74a31 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -29,6 +29,9 @@
 #include <net/dst.h>
 #include <net/dst_metadata.h>
 
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+
 struct fl_flow_key {
 	int	indev_ifindex;
 	struct flow_dissector_key_control control;
@@ -57,6 +60,11 @@ struct fl_flow_key {
 	struct flow_dissector_key_enc_opts enc_opts;
 	struct flow_dissector_key_ports tp_min;
 	struct flow_dissector_key_ports tp_max;
+
+	u8	ct_state;
+	u16	ct_zone;
+	u32	ct_mark;
+	u32	ct_labels[NF_CT_LABELS_MAX_SIZE / sizeof(u32)];
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -265,19 +273,55 @@ static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
 	return __fl_lookup(mask, mkey);
 }
 
+static u8 fl_ct_get_state(enum ip_conntrack_info ctinfo)
+{
+	u8 ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+
+	switch (ctinfo) {
+	case IP_CT_ESTABLISHED:
+	case IP_CT_ESTABLISHED_REPLY:
+		ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+		break;
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		ct_state |= TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+		break;
+	case IP_CT_NEW:
+		ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW;
+		break;
+	default:
+		break;
+	}
+
+	return ct_state;
+}
+
 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
-	struct cls_fl_filter *f;
-	struct fl_flow_mask *mask;
-	struct fl_flow_key skb_key;
+	enum ip_conntrack_info ctinfo;
 	struct fl_flow_key skb_mkey;
+	struct fl_flow_key skb_key;
+	struct fl_flow_mask *mask;
+	struct nf_conn_labels *cl;
+	struct cls_fl_filter *f;
+	struct nf_conn *ct;
 
 	list_for_each_entry_rcu(mask, &head->masks, list) {
 		fl_clear_masked_range(&skb_key, mask);
 
 		skb_key.indev_ifindex = skb->skb_iif;
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct) {
+			skb_key.ct_state = fl_ct_get_state(ctinfo);
+			skb_key.ct_zone = ct->zone.id;
+			skb_key.ct_mark = ct->mark;
+
+			cl = nf_ct_labels_find(ct);
+			if (cl)
+				memcpy(skb_key.ct_labels, cl->bits, sizeof(skb_key.ct_labels));
+		}
 		/* skb_flow_dissect() does not set n_proto in case an unknown
 		 * protocol, so do it rather here.
 		 */
@@ -562,6 +606,14 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
 	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
 	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
+	[TCA_FLOWER_KEY_CT_STATE]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CT_STATE_MASK]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CT_ZONE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_ZONE_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CT_MARK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_CT_MARK_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_CT_LABELS]	= { .type = NLA_UNSPEC, .len = 16 },
+	[TCA_FLOWER_KEY_CT_LABELS_MASK]	= { .type = NLA_UNSPEC, .len = 16 },
 };
 
 static const struct nla_policy
@@ -872,6 +924,36 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 	return 0;
 }
 
+static int fl_set_key_ct(struct nlattr **tb, struct fl_flow_key *key,
+			 struct fl_flow_key *mask,
+			 struct netlink_ext_ack *extack)
+{
+	size_t label_len = 0;
+
+	if (tb[TCA_FLOWER_KEY_CT_STATE]) {
+		key->ct_state = nla_get_u8(tb[TCA_FLOWER_KEY_CT_STATE]);
+		mask->ct_state = nla_get_u8(tb[TCA_FLOWER_KEY_CT_STATE_MASK]);
+	}
+
+	if (tb[TCA_FLOWER_KEY_CT_ZONE_MASK]) {
+		key->ct_zone = nla_get_u16(tb[TCA_FLOWER_KEY_CT_ZONE]);
+		mask->ct_zone = nla_get_u16(tb[TCA_FLOWER_KEY_CT_ZONE_MASK]);
+	}
+
+	if (tb[TCA_FLOWER_KEY_CT_MARK_MASK]) {
+		key->ct_mark = nla_get_u32(tb[TCA_FLOWER_KEY_CT_MARK]);
+		mask->ct_mark = nla_get_u32(tb[TCA_FLOWER_KEY_CT_MARK_MASK]);
+	}
+
+	if (tb[TCA_FLOWER_KEY_CT_LABELS_MASK]) {
+		label_len = nla_len(tb[TCA_FLOWER_KEY_CT_LABELS]);
+		memcpy(key->ct_labels, nla_data(tb[TCA_FLOWER_KEY_CT_LABELS]), label_len);
+		memcpy(mask->ct_labels, nla_data(tb[TCA_FLOWER_KEY_CT_LABELS_MASK]), label_len);
+	}
+
+	return 0;
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask,
 		      struct netlink_ext_ack *extack)
@@ -1082,6 +1164,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 			return ret;
 	}
 
+	ret = fl_set_key_ct(tb, key, mask, extack);
+	if (ret)
+		return ret;
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -1761,6 +1847,37 @@ static int fl_dump_key_geneve_opt(struct sk_buff *skb,
 	return -EMSGSIZE;
 }
 
+static int fl_dump_key_ct(struct sk_buff *skb,
+			  struct fl_flow_key *key,
+			  struct fl_flow_key *mask)
+{
+	if(fl_dump_key_val(skb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
+			   &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
+			   sizeof(key->ct_state)))
+		goto nla_put_failure;
+
+	if (fl_dump_key_val(skb, &key->ct_zone, TCA_FLOWER_KEY_CT_ZONE,
+			    &mask->ct_zone, TCA_FLOWER_KEY_CT_ZONE_MASK,
+			    sizeof(key->ct_zone)))
+		goto nla_put_failure;
+
+	if (fl_dump_key_val(skb, &key->ct_mark, TCA_FLOWER_KEY_CT_MARK,
+			    &mask->ct_mark, TCA_FLOWER_KEY_CT_MARK_MASK,
+			    sizeof(key->ct_mark)))
+		goto nla_put_failure;
+
+	if (fl_dump_key_val(skb, &key->ct_labels, TCA_FLOWER_KEY_CT_LABELS,
+			    &mask->ct_labels, TCA_FLOWER_KEY_CT_LABELS_MASK,
+			    sizeof(key->ct_labels)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+
 static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
 			       struct flow_dissector_key_enc_opts *enc_opts)
 {
@@ -1994,6 +2111,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
 	    fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
 		goto nla_put_failure;
 
+	if (fl_dump_key_ct(skb, key, mask))
+		goto nla_put_failure;
+
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
 		goto nla_put_failure;
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 2/6 v2] net/sched: cls_flower: add match on ct info Paul Blakey
@ 2019-01-29  8:02 ` Paul Blakey
  2019-01-29 18:08   ` Marcelo Leitner
  2019-01-29  8:02 ` [RFC PATCH net-next 4/6 v2] net: Add new tc recirc id skb extension Paul Blakey
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

TODO: handle EEXist.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c       | 22 ++++++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 121f1ef..d848d6d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -506,6 +506,8 @@ enum {
 	TCA_FLOWER_KEY_CT_LABELS,
 	TCA_FLOWER_KEY_CT_LABELS_MASK,
 
+	TCA_FLOWER_EMATCHES,
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index bf74a31..f11fda0 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -104,6 +104,7 @@ struct cls_fl_filter {
 	struct rhash_head ht_node;
 	struct fl_flow_key mkey;
 	struct tcf_exts exts;
+	struct tcf_ematch_tree ematches;
 	struct tcf_result res;
 	struct fl_flow_key key;
 	struct list_head list;
@@ -332,10 +333,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		fl_set_masked_key(&skb_mkey, &skb_key, mask);
 
 		f = fl_lookup(mask, &skb_mkey, &skb_key);
-		if (f && !tc_skip_sw(f->flags)) {
-			*res = f->res;
-			return tcf_exts_exec(skb, &f->exts, res);
-		}
+		if (!f || tc_skip_sw(f->flags))
+			continue;
+
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+
+		*res = f->res;
+		return tcf_exts_exec(skb, &f->exts, res);
 	}
 	return -1;
 }
@@ -388,6 +393,7 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
 static void __fl_destroy_filter(struct cls_fl_filter *f)
 {
 	tcf_exts_destroy(&f->exts);
+	tcf_em_tree_destroy(&f->ematches);
 	tcf_exts_put_net(&f->exts);
 	kfree(f);
 }
@@ -523,6 +529,7 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
 	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
+	[TCA_FLOWER_EMATCHES]		= { .type = NLA_NESTED },
 	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
 					    .len = IFNAMSIZ },
 	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
@@ -1348,6 +1355,10 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 	if (err < 0)
 		return err;
 
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOWER_EMATCHES], &f->ematches);
+	if (err < 0)
+		return err;
+
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
@@ -2143,6 +2154,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
+	if (tcf_em_tree_dump(skb, &f->ematches, TCA_FLOWER_EMATCHES) < 0)
+		goto nla_put_failure;
+
 	key = &f->key;
 	mask = &f->mask->key;
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH net-next 4/6 v2] net: Add new tc recirc id skb extension
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 2/6 v2] net/sched: cls_flower: add match on ct info Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support Paul Blakey
@ 2019-01-29  8:02 ` Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 5/6 v2] net/sched: em_meta: add match on " Paul Blakey
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

This will be used by followup patch to tc act ct to
recirculate the packet after going to the connection tracking module and
share this recirculation from tc to OVS.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/linux/skbuff.h | 1 +
 net/core/skbuff.c      | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 95d25b0..02768c7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3912,6 +3912,7 @@ enum skb_ext_id {
 #ifdef CONFIG_XFRM
 	SKB_EXT_SEC_PATH,
 #endif
+	SKB_EXT_TC_RECIRC_ID,
 	SKB_EXT_NUM, /* must be last */
 };
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 26d8484..57a2655 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3911,6 +3911,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
 #endif
+	[SKB_EXT_TC_RECIRC_ID] = SKB_EXT_CHUNKSIZEOF(uint32_t),
 };
 
 static __always_inline unsigned int skb_ext_total_length(void)
@@ -3922,6 +3923,7 @@ static __always_inline unsigned int skb_ext_total_length(void)
 #ifdef CONFIG_XFRM
 		skb_ext_type_len[SKB_EXT_SEC_PATH] +
 #endif
+		skb_ext_type_len[SKB_EXT_TC_RECIRC_ID] +
 		0;
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH net-next 5/6 v2] net/sched: em_meta: add match on tc recirc id skb extension
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
                   ` (2 preceding siblings ...)
  2019-01-29  8:02 ` [RFC PATCH net-next 4/6 v2] net: Add new tc recirc id skb extension Paul Blakey
@ 2019-01-29  8:02 ` Paul Blakey
  2019-01-29  8:02 ` [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support Paul Blakey
  2019-02-01 13:23 ` [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Marcelo Leitner
  5 siblings, 0 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/uapi/linux/tc_ematch/tc_em_meta.h | 1 +
 net/sched/em_meta.c                       | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/include/uapi/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h
index cf30b5b..25a6fb8 100644
--- a/include/uapi/linux/tc_ematch/tc_em_meta.h
+++ b/include/uapi/linux/tc_ematch/tc_em_meta.h
@@ -81,6 +81,7 @@ enum {
  	TCF_META_ID_SK_WRITE_PENDING,
 	TCF_META_ID_VLAN_TAG,
 	TCF_META_ID_RXHASH,
+	TCF_META_ID_TCRECIRC,
 	__TCF_META_ID_MAX
 };
 #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index d6e9711..041e0b3 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -245,6 +245,13 @@ static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
 	dst->value = skb->tc_index;
 }
 
+META_COLLECTOR(int_tcrecirc)
+{
+	uint32_t *recirc = skb_ext_find(skb, SKB_EXT_TC_RECIRC_ID);
+
+	dst->value = recirc? *recirc : 0;
+}
+
 /**************************************************************************
  * Routing
  **************************************************************************/
@@ -638,6 +645,7 @@ struct meta_ops {
 		[META_ID(MACLEN)]		= META_FUNC(int_maclen),
 		[META_ID(NFMARK)]		= META_FUNC(int_mark),
 		[META_ID(TCINDEX)]		= META_FUNC(int_tcindex),
+		[META_ID(TCRECIRC)]		= META_FUNC(int_tcrecirc),
 		[META_ID(RTCLASSID)]		= META_FUNC(int_rtclassid),
 		[META_ID(RTIIF)]		= META_FUNC(int_rtiif),
 		[META_ID(SK_FAMILY)]		= META_FUNC(int_sk_family),
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
                   ` (3 preceding siblings ...)
  2019-01-29  8:02 ` [RFC PATCH net-next 5/6 v2] net/sched: em_meta: add match on " Paul Blakey
@ 2019-01-29  8:02 ` Paul Blakey
  2019-01-29 18:12   ` Marcelo Leitner
  2019-02-01 13:23 ` [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Marcelo Leitner
  5 siblings, 1 reply; 13+ messages in thread
From: Paul Blakey @ 2019-01-29  8:02 UTC (permalink / raw)
  To: Guy Shattah, Marcelo Leitner, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev
  Cc: Paul Blakey

Set or clears (free) the skb tc recirc id extension.
If used with OVS, OVS can clear this recirc id after it reads it.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
 include/net/tc_act/tc_ct.h        |  2 ++
 include/uapi/linux/tc_act/tc_ct.h |  2 ++
 net/sched/act_ct.c                | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index 4a16375..6ea19d8 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -16,6 +16,8 @@ struct tcf_ct {
 	u32 mark_mask;
 	u16 zone;
 	bool commit;
+	uint32_t set_recirc;
+	bool del_recirc;
 };
 
 #define to_ct(a) ((struct tcf_ct *)a)
diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
index 6dbd771..2279a9b 100644
--- a/include/uapi/linux/tc_act/tc_ct.h
+++ b/include/uapi/linux/tc_act/tc_ct.h
@@ -14,6 +14,8 @@ struct tc_ct {
 	__u32 labels_mask[4];
 	__u32 mark;
 	__u32 mark_mask;
+	uint32_t set_recirc;
+	bool del_recirc;
 	bool commit;
 };
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 61155cc..7822385 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -117,6 +117,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 	u_int8_t family;
 	bool cached;
 
+	if (ca->del_recirc) {
+		skb_ext_del(skb, SKB_EXT_TC_RECIRC_ID);
+		return ca->tcf_action;
+	}
+
 	/* The conntrack module expects to be working at L3. */
 	nh_ofs = skb_network_offset(skb);
 	skb_pull_rcsum(skb, nh_ofs);
@@ -243,6 +248,15 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 	skb_postpush_rcsum(skb, skb->data, nh_ofs);
 
 	spin_unlock(&ca->tcf_lock);
+
+	if (ca->set_recirc) {
+		u32 recirc = ca->set_recirc;
+		uint32_t *recircp = skb_ext_add(skb, SKB_EXT_TC_RECIRC_ID);
+
+		if (recircp)
+			*recircp = recirc;
+	}
+
 	return ca->tcf_action;
 
 drop:
@@ -305,6 +319,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->commit = parm->commit;
 		ci->zone = parm->zone;
+		ci->set_recirc = parm->set_recirc;
+		ci->del_recirc = parm->del_recirc;
 #if !IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
 		if (parm->mark_mask) {
 			NL_SET_ERR_MSG_MOD(extack, "Mark not supported by kernel config");
@@ -378,6 +394,8 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
 	opt.mark_mask = ci->mark_mask,
 	memcpy(opt.labels, ci->labels, sizeof(opt.labels));
 	memcpy(opt.labels_mask, ci->labels_mask, sizeof(opt.labels_mask));
+	opt.set_recirc = ci->set_recirc;
+	opt.del_recirc = ci->del_recirc;
 
 	if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support
  2019-01-29  8:02 ` [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support Paul Blakey
@ 2019-01-29 18:08   ` Marcelo Leitner
  2019-01-30  8:40     ` Paul Blakey
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Leitner @ 2019-01-29 18:08 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Guy Shattah, Aaron Conole, John Hurley, Simon Horman,
	Justin Pettit, Gregory Rose, Eelco Chaudron, Flavio Leitner,
	Florian Westphal, Jiri Pirko, Rashid Khan, Sushil Kulkarni,
	Andy Gospodarek, Roi Dayan, Yossi Kuperman, Or Gerlitz,
	Rony Efraim, davem, netdev

On Tue, Jan 29, 2019 at 10:02:03AM +0200, Paul Blakey wrote:
> TODO: handle EEXist.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> ---
>  include/uapi/linux/pkt_cls.h |  2 ++
>  net/sched/cls_flower.c       | 22 ++++++++++++++++++----
>  2 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
> index 121f1ef..d848d6d 100644
> --- a/include/uapi/linux/pkt_cls.h
> +++ b/include/uapi/linux/pkt_cls.h
> @@ -506,6 +506,8 @@ enum {
>  	TCA_FLOWER_KEY_CT_LABELS,
>  	TCA_FLOWER_KEY_CT_LABELS_MASK,
>  
> +	TCA_FLOWER_EMATCHES,
> +
>  	__TCA_FLOWER_MAX,
>  };
>  
> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
> index bf74a31..f11fda0 100644
> --- a/net/sched/cls_flower.c
> +++ b/net/sched/cls_flower.c
> @@ -104,6 +104,7 @@ struct cls_fl_filter {
>  	struct rhash_head ht_node;
>  	struct fl_flow_key mkey;
>  	struct tcf_exts exts;
> +	struct tcf_ematch_tree ematches;
>  	struct tcf_result res;
>  	struct fl_flow_key key;
>  	struct list_head list;
> @@ -332,10 +333,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>  		fl_set_masked_key(&skb_mkey, &skb_key, mask);
>  
>  		f = fl_lookup(mask, &skb_mkey, &skb_key);
> -		if (f && !tc_skip_sw(f->flags)) {
> -			*res = f->res;
> -			return tcf_exts_exec(skb, &f->exts, res);
> -		}
> +		if (!f || tc_skip_sw(f->flags))
> +			continue;
> +
> +		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
> +			continue;

Considering just the recirc_id (and not the other fields supported by
ematch), have you considered integrating recirc_id match on flow
dissector instead?  It would avoid the matching in 2 steps here and
benefit from the hashing.

> +
> +		*res = f->res;
> +		return tcf_exts_exec(skb, &f->exts, res);
>  	}
>  	return -1;
>  }
> @@ -388,6 +393,7 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
>  static void __fl_destroy_filter(struct cls_fl_filter *f)
>  {
>  	tcf_exts_destroy(&f->exts);
> +	tcf_em_tree_destroy(&f->ematches);
>  	tcf_exts_put_net(&f->exts);
>  	kfree(f);
>  }
> @@ -523,6 +529,7 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
>  static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
>  	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
>  	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
> +	[TCA_FLOWER_EMATCHES]		= { .type = NLA_NESTED },
>  	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
>  					    .len = IFNAMSIZ },
>  	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
> @@ -1348,6 +1355,10 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
>  	if (err < 0)
>  		return err;
>  
> +	err = tcf_em_tree_validate(tp, tb[TCA_FLOWER_EMATCHES], &f->ematches);
> +	if (err < 0)
> +		return err;
> +
>  	if (tb[TCA_FLOWER_CLASSID]) {
>  		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
>  		tcf_bind_filter(tp, &f->res, base);
> @@ -2143,6 +2154,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
>  	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
>  		goto nla_put_failure;
>  
> +	if (tcf_em_tree_dump(skb, &f->ematches, TCA_FLOWER_EMATCHES) < 0)
> +		goto nla_put_failure;
> +
>  	key = &f->key;
>  	mask = &f->mask->key;
>  
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support
  2019-01-29  8:02 ` [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support Paul Blakey
@ 2019-01-29 18:12   ` Marcelo Leitner
  2019-01-30  8:20     ` Paul Blakey
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Leitner @ 2019-01-29 18:12 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Guy Shattah, Aaron Conole, John Hurley, Simon Horman,
	Justin Pettit, Gregory Rose, Eelco Chaudron, Flavio Leitner,
	Florian Westphal, Jiri Pirko, Rashid Khan, Sushil Kulkarni,
	Andy Gospodarek, Roi Dayan, Yossi Kuperman, Or Gerlitz,
	Rony Efraim, davem, netdev

On Tue, Jan 29, 2019 at 10:02:06AM +0200, Paul Blakey wrote:
> Set or clears (free) the skb tc recirc id extension.
> If used with OVS, OVS can clear this recirc id after it reads it.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> ---
>  include/net/tc_act/tc_ct.h        |  2 ++
>  include/uapi/linux/tc_act/tc_ct.h |  2 ++
>  net/sched/act_ct.c                | 18 ++++++++++++++++++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
> index 4a16375..6ea19d8 100644
> --- a/include/net/tc_act/tc_ct.h
> +++ b/include/net/tc_act/tc_ct.h
> @@ -16,6 +16,8 @@ struct tcf_ct {
>  	u32 mark_mask;
>  	u16 zone;
>  	bool commit;
> +	uint32_t set_recirc;
> +	bool del_recirc;
>  };
>  
>  #define to_ct(a) ((struct tcf_ct *)a)
> diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
> index 6dbd771..2279a9b 100644
> --- a/include/uapi/linux/tc_act/tc_ct.h
> +++ b/include/uapi/linux/tc_act/tc_ct.h
> @@ -14,6 +14,8 @@ struct tc_ct {
>  	__u32 labels_mask[4];
>  	__u32 mark;
>  	__u32 mark_mask;
> +	uint32_t set_recirc;
> +	bool del_recirc;
>  	bool commit;
>  };

Have you considered adding a specific action for this? Asking because
setting recirc_id can be useful outside of ct context too: decap, set
recirc_id, reclassify.

>  
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 61155cc..7822385 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -117,6 +117,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	u_int8_t family;
>  	bool cached;
>  
> +	if (ca->del_recirc) {
> +		skb_ext_del(skb, SKB_EXT_TC_RECIRC_ID);
> +		return ca->tcf_action;
> +	}
> +
>  	/* The conntrack module expects to be working at L3. */
>  	nh_ofs = skb_network_offset(skb);
>  	skb_pull_rcsum(skb, nh_ofs);
> @@ -243,6 +248,15 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>  	skb_postpush_rcsum(skb, skb->data, nh_ofs);
>  
>  	spin_unlock(&ca->tcf_lock);
> +
> +	if (ca->set_recirc) {
> +		u32 recirc = ca->set_recirc;
> +		uint32_t *recircp = skb_ext_add(skb, SKB_EXT_TC_RECIRC_ID);
> +
> +		if (recircp)
> +			*recircp = recirc;
> +	}
> +
>  	return ca->tcf_action;
>  
>  drop:
> @@ -305,6 +319,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
>  		ci->net = net;
>  		ci->commit = parm->commit;
>  		ci->zone = parm->zone;
> +		ci->set_recirc = parm->set_recirc;
> +		ci->del_recirc = parm->del_recirc;
>  #if !IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
>  		if (parm->mark_mask) {
>  			NL_SET_ERR_MSG_MOD(extack, "Mark not supported by kernel config");
> @@ -378,6 +394,8 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
>  	opt.mark_mask = ci->mark_mask,
>  	memcpy(opt.labels, ci->labels, sizeof(opt.labels));
>  	memcpy(opt.labels_mask, ci->labels_mask, sizeof(opt.labels_mask));
> +	opt.set_recirc = ci->set_recirc;
> +	opt.del_recirc = ci->del_recirc;
>  
>  	if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
>  		goto nla_put_failure;
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support
  2019-01-29 18:12   ` Marcelo Leitner
@ 2019-01-30  8:20     ` Paul Blakey
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-30  8:20 UTC (permalink / raw)
  To: Marcelo Leitner
  Cc: Paul Blakey, Guy Shattah, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev



On 29/01/2019 20:12, Marcelo Leitner wrote:
> On Tue, Jan 29, 2019 at 10:02:06AM +0200, Paul Blakey wrote:
>> Set or clears (free) the skb tc recirc id extension.
>> If used with OVS, OVS can clear this recirc id after it reads it.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> ---
>>  include/net/tc_act/tc_ct.h        |  2 ++
>>  include/uapi/linux/tc_act/tc_ct.h |  2 ++
>>  net/sched/act_ct.c                | 18 ++++++++++++++++++
>>  3 files changed, 22 insertions(+)
>>
>> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
>> index 4a16375..6ea19d8 100644
>> --- a/include/net/tc_act/tc_ct.h
>> +++ b/include/net/tc_act/tc_ct.h
>> @@ -16,6 +16,8 @@ struct tcf_ct {
>>  	u32 mark_mask;
>>  	u16 zone;
>>  	bool commit;
>> +	uint32_t set_recirc;
>> +	bool del_recirc;
>>  };
>>  
>>  #define to_ct(a) ((struct tcf_ct *)a)
>> diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
>> index 6dbd771..2279a9b 100644
>> --- a/include/uapi/linux/tc_act/tc_ct.h
>> +++ b/include/uapi/linux/tc_act/tc_ct.h
>> @@ -14,6 +14,8 @@ struct tc_ct {
>>  	__u32 labels_mask[4];
>>  	__u32 mark;
>>  	__u32 mark_mask;
>> +	uint32_t set_recirc;
>> +	bool del_recirc;
>>  	bool commit;
>>  };
> 
> Have you considered adding a specific action for this? Asking because
> setting recirc_id can be useful outside of ct context too: decap, set
> recirc_id, reclassify.
> 

Yes I did, I didn't know if it was overkill, till a use case such as you
suggested arises beside CT.

One thing I'm not sure of is, who will delete the recirc id, it seems
it's freed only via skb_ext_put(skb) at skb_release_head_state()
But I might be missing a del from skb_scrub_packet like nf_reset does.

>>  
>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>> index 61155cc..7822385 100644
>> --- a/net/sched/act_ct.c
>> +++ b/net/sched/act_ct.c
>> @@ -117,6 +117,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>>  	u_int8_t family;
>>  	bool cached;
>>  
>> +	if (ca->del_recirc) {
>> +		skb_ext_del(skb, SKB_EXT_TC_RECIRC_ID);
>> +		return ca->tcf_action;
>> +	}
>> +
>>  	/* The conntrack module expects to be working at L3. */
>>  	nh_ofs = skb_network_offset(skb);
>>  	skb_pull_rcsum(skb, nh_ofs);
>> @@ -243,6 +248,15 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
>>  	skb_postpush_rcsum(skb, skb->data, nh_ofs);
>>  
>>  	spin_unlock(&ca->tcf_lock);
>> +
>> +	if (ca->set_recirc) {
>> +		u32 recirc = ca->set_recirc;
>> +		uint32_t *recircp = skb_ext_add(skb, SKB_EXT_TC_RECIRC_ID);
>> +
>> +		if (recircp)
>> +			*recircp = recirc;
>> +	}
>> +
>>  	return ca->tcf_action;
>>  
>>  drop:
>> @@ -305,6 +319,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
>>  		ci->net = net;
>>  		ci->commit = parm->commit;
>>  		ci->zone = parm->zone;
>> +		ci->set_recirc = parm->set_recirc;
>> +		ci->del_recirc = parm->del_recirc;
>>  #if !IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
>>  		if (parm->mark_mask) {
>>  			NL_SET_ERR_MSG_MOD(extack, "Mark not supported by kernel config");
>> @@ -378,6 +394,8 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
>>  	opt.mark_mask = ci->mark_mask,
>>  	memcpy(opt.labels, ci->labels, sizeof(opt.labels));
>>  	memcpy(opt.labels_mask, ci->labels_mask, sizeof(opt.labels_mask));
>> +	opt.set_recirc = ci->set_recirc;
>> +	opt.del_recirc = ci->del_recirc;
>>  
>>  	if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
>>  		goto nla_put_failure;
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support
  2019-01-29 18:08   ` Marcelo Leitner
@ 2019-01-30  8:40     ` Paul Blakey
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Blakey @ 2019-01-30  8:40 UTC (permalink / raw)
  To: Marcelo Leitner
  Cc: Paul Blakey, Guy Shattah, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev



On 29/01/2019 20:08, Marcelo Leitner wrote:
> On Tue, Jan 29, 2019 at 10:02:03AM +0200, Paul Blakey wrote:
>> TODO: handle EEXist.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> ---
>>  include/uapi/linux/pkt_cls.h |  2 ++
>>  net/sched/cls_flower.c       | 22 ++++++++++++++++++----
>>  2 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
>> index 121f1ef..d848d6d 100644
>> --- a/include/uapi/linux/pkt_cls.h
>> +++ b/include/uapi/linux/pkt_cls.h
>> @@ -506,6 +506,8 @@ enum {
>>  	TCA_FLOWER_KEY_CT_LABELS,
>>  	TCA_FLOWER_KEY_CT_LABELS_MASK,
>>  
>> +	TCA_FLOWER_EMATCHES,
>> +
>>  	__TCA_FLOWER_MAX,
>>  };
>>  
>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
>> index bf74a31..f11fda0 100644
>> --- a/net/sched/cls_flower.c
>> +++ b/net/sched/cls_flower.c
>> @@ -104,6 +104,7 @@ struct cls_fl_filter {
>>  	struct rhash_head ht_node;
>>  	struct fl_flow_key mkey;
>>  	struct tcf_exts exts;
>> +	struct tcf_ematch_tree ematches;
>>  	struct tcf_result res;
>>  	struct fl_flow_key key;
>>  	struct list_head list;
>> @@ -332,10 +333,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>>  		fl_set_masked_key(&skb_mkey, &skb_key, mask);
>>  
>>  		f = fl_lookup(mask, &skb_mkey, &skb_key);
>> -		if (f && !tc_skip_sw(f->flags)) {
>> -			*res = f->res;
>> -			return tcf_exts_exec(skb, &f->exts, res);
>> -		}
>> +		if (!f || tc_skip_sw(f->flags))
>> +			continue;
>> +
>> +		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
>> +			continue;
> 
> Considering just the recirc_id (and not the other fields supported by
> ematch), have you considered integrating recirc_id match on flow
> dissector instead?  It would avoid the matching in 2 steps here and
> benefit from the hashing.
> 

yes,
although ematch is no op if not used, I actually have to convert flower
to a rhl hashtable as we can have the flower keys but different ematches
which is a pointer (and why I have the TODO in the commit msg), then all
similar flows , different only by recirc id ematch, could be on the same
list, and this would be slow. I'm not sure how real this example is, but
I agree.

So I'll change it for next patch, unless someone thinks different.

Thanks.

>> +
>> +		*res = f->res;
>> +		return tcf_exts_exec(skb, &f->exts, res);
>>  	}
>>  	return -1;
>>  }
>> @@ -388,6 +393,7 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
>>  static void __fl_destroy_filter(struct cls_fl_filter *f)
>>  {
>>  	tcf_exts_destroy(&f->exts);
>> +	tcf_em_tree_destroy(&f->ematches);
>>  	tcf_exts_put_net(&f->exts);
>>  	kfree(f);
>>  }
>> @@ -523,6 +529,7 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
>>  static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
>>  	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
>>  	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
>> +	[TCA_FLOWER_EMATCHES]		= { .type = NLA_NESTED },
>>  	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
>>  					    .len = IFNAMSIZ },
>>  	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
>> @@ -1348,6 +1355,10 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
>>  	if (err < 0)
>>  		return err;
>>  
>> +	err = tcf_em_tree_validate(tp, tb[TCA_FLOWER_EMATCHES], &f->ematches);
>> +	if (err < 0)
>> +		return err;
>> +
>>  	if (tb[TCA_FLOWER_CLASSID]) {
>>  		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
>>  		tcf_bind_filter(tp, &f->res, base);
>> @@ -2143,6 +2154,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
>>  	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
>>  		goto nla_put_failure;
>>  
>> +	if (tcf_em_tree_dump(skb, &f->ematches, TCA_FLOWER_EMATCHES) < 0)
>> +		goto nla_put_failure;
>> +
>>  	key = &f->key;
>>  	mask = &f->mask->key;
>>  
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct
  2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
                   ` (4 preceding siblings ...)
  2019-01-29  8:02 ` [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support Paul Blakey
@ 2019-02-01 13:23 ` Marcelo Leitner
  2019-02-03  8:26   ` Paul Blakey
  5 siblings, 1 reply; 13+ messages in thread
From: Marcelo Leitner @ 2019-02-01 13:23 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Guy Shattah, Aaron Conole, John Hurley, Simon Horman,
	Justin Pettit, Gregory Rose, Eelco Chaudron, Flavio Leitner,
	Florian Westphal, Jiri Pirko, Rashid Khan, Sushil Kulkarni,
	Andy Gospodarek, Roi Dayan, Yossi Kuperman, Or Gerlitz,
	Rony Efraim, davem, netdev

On Tue, Jan 29, 2019 at 10:02:01AM +0200, Paul Blakey wrote:
...
> diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
> new file mode 100644
> index 0000000..6dbd771
> --- /dev/null
> +++ b/include/uapi/linux/tc_act/tc_ct.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef __UAPI_TC_CT_H
> +#define __UAPI_TC_CT_H
> +
> +#include <linux/types.h>
> +#include <linux/pkt_cls.h>
> +
> +#define TCA_ACT_CT 18
> +
> +struct tc_ct {
> +	tc_gen;
> +	__u16 zone;
> +	__u32 labels[4];
> +	__u32 labels_mask[4];
> +	__u32 mark;
> +	__u32 mark_mask;
> +	bool commit;

This is one of the points that our implementations differs. You used a
struct and wrapped it into TCA_CT_PARMS attribute, while I broke it up
into several attributes.

cls_flower and act_bpf, for example, doesn't use structs, but others
do.

Both have pros and cons and I imagine this topic probably was already
discussed but I'm not aware of a recommendation. Do we have one?

> +};
> +
> +enum {
> +	TCA_CT_UNSPEC,
> +	TCA_CT_PARMS,
> +	TCA_CT_TM,
> +	TCA_CT_PAD,
> +	__TCA_CT_MAX
> +};
> +#define TCA_CT_MAX (__TCA_CT_MAX - 1)
> +
> +#endif /* __UAPI_TC_CT_H */
...

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct
  2019-02-01 13:23 ` [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Marcelo Leitner
@ 2019-02-03  8:26   ` Paul Blakey
  2019-02-04 12:48     ` Simon Horman
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Blakey @ 2019-02-03  8:26 UTC (permalink / raw)
  To: Marcelo Leitner
  Cc: Paul Blakey, Guy Shattah, Aaron Conole, John Hurley,
	Simon Horman, Justin Pettit, Gregory Rose, Eelco Chaudron,
	Flavio Leitner, Florian Westphal, Jiri Pirko, Rashid Khan,
	Sushil Kulkarni, Andy Gospodarek, Roi Dayan, Yossi Kuperman,
	Or Gerlitz, Rony Efraim, davem, netdev



On 01/02/2019 15:23, Marcelo Leitner wrote:
> On Tue, Jan 29, 2019 at 10:02:01AM +0200, Paul Blakey wrote:
> ...
>> diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
>> new file mode 100644
>> index 0000000..6dbd771
>> --- /dev/null
>> +++ b/include/uapi/linux/tc_act/tc_ct.h
>> @@ -0,0 +1,29 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +#ifndef __UAPI_TC_CT_H
>> +#define __UAPI_TC_CT_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/pkt_cls.h>
>> +
>> +#define TCA_ACT_CT 18
>> +
>> +struct tc_ct {
>> +	tc_gen;
>> +	__u16 zone;
>> +	__u32 labels[4];
>> +	__u32 labels_mask[4];
>> +	__u32 mark;
>> +	__u32 mark_mask;
>> +	bool commit;
> 
> This is one of the points that our implementations differs. You used a
> struct and wrapped it into TCA_CT_PARMS attribute, while I broke it up
> into several attributes.
> 
> cls_flower and act_bpf, for example, doesn't use structs, but others
> do.
> 
> Both have pros and cons and I imagine this topic probably was already
> discussed but I'm not aware of a recommendation. Do we have one?

I guess flower uses a netlink attribute per key attribute because
a lot of time, most of them won't be used, and you would send less.
we can have ct, ct + snat, ct + dnat, zone and mark.... a lot of this
won't be used sometimes.

Also you can't add nested attributes to the struct easily.

Also netlink attributes can be tested for existence, while a struct
would need a special non valid value, or another field to specify which
fields are used.

both are hard to test if a requested attribute was ignored, besides
checking the netlink echo or dumping the action back. if for example a
older kernel module and newer userspace uses a attribute above
enum TCA_CT_MAX (struct attributes also don't have max len, in nla_parse).


All in all, I think mostly netlink attributes would be better.

> 
>> +};
>> +
>> +enum {
>> +	TCA_CT_UNSPEC,
>> +	TCA_CT_PARMS,
>> +	TCA_CT_TM,
>> +	TCA_CT_PAD,
>> +	__TCA_CT_MAX
>> +};
>> +#define TCA_CT_MAX (__TCA_CT_MAX - 1)
>> +
>> +#endif /* __UAPI_TC_CT_H */
> ...
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct
  2019-02-03  8:26   ` Paul Blakey
@ 2019-02-04 12:48     ` Simon Horman
  0 siblings, 0 replies; 13+ messages in thread
From: Simon Horman @ 2019-02-04 12:48 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Marcelo Leitner, Guy Shattah, Aaron Conole, John Hurley,
	Justin Pettit, Gregory Rose, Eelco Chaudron, Flavio Leitner,
	Florian Westphal, Jiri Pirko, Rashid Khan, Sushil Kulkarni,
	Andy Gospodarek, Roi Dayan, Yossi Kuperman, Or Gerlitz,
	Rony Efraim, davem, netdev

[Repost without HTML; sorry about that]

On Sun, Feb 03, 2019 at 08:26:23AM +0000, Paul Blakey wrote:
> 
> 
> On 01/02/2019 15:23, Marcelo Leitner wrote:
> > On Tue, Jan 29, 2019 at 10:02:01AM +0200, Paul Blakey wrote:
> > ...
> >> diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h
> >> new file mode 100644
> >> index 0000000..6dbd771
> >> --- /dev/null
> >> +++ b/include/uapi/linux/tc_act/tc_ct.h
> >> @@ -0,0 +1,29 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> >> +#ifndef __UAPI_TC_CT_H
> >> +#define __UAPI_TC_CT_H
> >> +
> >> +#include <linux/types.h>
> >> +#include <linux/pkt_cls.h>
> >> +
> >> +#define TCA_ACT_CT 18
> >> +
> >> +struct tc_ct {
> >> +	tc_gen;
> >> +	__u16 zone;
> >> +	__u32 labels[4];
> >> +	__u32 labels_mask[4];
> >> +	__u32 mark;
> >> +	__u32 mark_mask;
> >> +	bool commit;
> > 
> > This is one of the points that our implementations differs. You used a
> > struct and wrapped it into TCA_CT_PARMS attribute, while I broke it up
> > into several attributes.
> > 
> > cls_flower and act_bpf, for example, doesn't use structs, but others
> > do.
> > 
> > Both have pros and cons and I imagine this topic probably was already
> > discussed but I'm not aware of a recommendation. Do we have one?
> 
> I guess flower uses a netlink attribute per key attribute because
> a lot of time, most of them won't be used, and you would send less.
> we can have ct, ct + snat, ct + dnat, zone and mark.... a lot of this
> won't be used sometimes.
> 
> Also you can't add nested attributes to the struct easily.
> 
> Also netlink attributes can be tested for existence, while a struct
> would need a special non valid value, or another field to specify which
> fields are used.
> 
> both are hard to test if a requested attribute was ignored, besides
> checking the netlink echo or dumping the action back. if for example a
> older kernel module and newer userspace uses a attribute above
> enum TCA_CT_MAX (struct attributes also don't have max len, in nla_parse).
> 
> 
> All in all, I think mostly netlink attributes would be better.

+1

I believe that Flower uses more attributes because its regarded as being
more flexible and that benefit outweighs the extra cost - f.e. the netlink
messages would tend to be a bit larger if a struct was used.

> 
> > 
> >> +};
> >> +
> >> +enum {
> >> +	TCA_CT_UNSPEC,
> >> +	TCA_CT_PARMS,
> >> +	TCA_CT_TM,
> >> +	TCA_CT_PAD,
> >> +	__TCA_CT_MAX
> >> +};
> >> +#define TCA_CT_MAX (__TCA_CT_MAX - 1)
> >> +
> >> +#endif /* __UAPI_TC_CT_H */
> > ...
> > 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-02-04 12:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-29  8:02 [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Paul Blakey
2019-01-29  8:02 ` [RFC PATCH net-next 2/6 v2] net/sched: cls_flower: add match on ct info Paul Blakey
2019-01-29  8:02 ` [RFC PATCH net-next 3/6 v2] net/sched: cls_flower: Add ematch support Paul Blakey
2019-01-29 18:08   ` Marcelo Leitner
2019-01-30  8:40     ` Paul Blakey
2019-01-29  8:02 ` [RFC PATCH net-next 4/6 v2] net: Add new tc recirc id skb extension Paul Blakey
2019-01-29  8:02 ` [RFC PATCH net-next 5/6 v2] net/sched: em_meta: add match on " Paul Blakey
2019-01-29  8:02 ` [RFC PATCH net-next 6/6 v2] net/sched: act_ct: Add tc recirc id set/del support Paul Blakey
2019-01-29 18:12   ` Marcelo Leitner
2019-01-30  8:20     ` Paul Blakey
2019-02-01 13:23 ` [RFC PATCH net-next 1/6 v2] net/sched: Introduce act_ct Marcelo Leitner
2019-02-03  8:26   ` Paul Blakey
2019-02-04 12:48     ` Simon Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.