Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method
@ 2019-06-26 10:32 wenxu
  2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: wenxu @ 2019-06-26 10:32 UTC (permalink / raw)
  To: pablo, fw; +Cc: netfilter-devel, netdev

From: wenxu <wenxu@ucloud.cn>

Add struct flow_offload_dst to support more offload method to replace
dst_cache which only work for route offload.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 include/net/netfilter/nf_flow_table.h | 12 ++++++++++--
 net/netfilter/nf_flow_table_core.c    | 22 +++++++++++-----------
 net/netfilter/nf_flow_table_ip.c      |  4 ++--
 net/netfilter/nft_flow_offload.c      | 10 +++++-----
 4 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d8c1879..968be64 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -33,6 +33,10 @@ enum flow_offload_tuple_dir {
 	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
 };
 
+struct flow_offload_dst {
+	struct dst_entry		*dst_cache;
+};
+
 struct flow_offload_tuple {
 	union {
 		struct in_addr		src_v4;
@@ -55,7 +59,7 @@ struct flow_offload_tuple {
 
 	u16				mtu;
 
-	struct dst_entry		*dst_cache;
+	struct flow_offload_dst		dst;
 };
 
 struct flow_offload_tuple_rhash {
@@ -85,8 +89,12 @@ struct nf_flow_route {
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
 
+struct nf_flow_data {
+	struct nf_flow_route route;
+};
+
 struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
-					struct nf_flow_route *route);
+					struct nf_flow_data *data);
 void flow_offload_free(struct flow_offload *flow);
 
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index e3d7972..125ce1c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -24,13 +24,13 @@ struct flow_offload_entry {
 
 static void
 flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
-		      struct nf_flow_route *route,
+		      struct nf_flow_data *data,
 		      enum flow_offload_tuple_dir dir)
 {
 	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
 	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
-	struct dst_entry *other_dst = route->tuple[!dir].dst;
-	struct dst_entry *dst = route->tuple[dir].dst;
+	struct dst_entry *other_dst = date->route.tuple[!dir].dst;
+	struct dst_entry *dst = data->route.tuple[dir].dst;
 
 	ft->dir = dir;
 
@@ -57,7 +57,7 @@ struct flow_offload_entry {
 }
 
 struct flow_offload *
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_data *data)
 {
 	struct flow_offload_entry *entry;
 	struct flow_offload *flow;
@@ -72,16 +72,16 @@ struct flow_offload *
 
 	flow = &entry->flow;
 
-	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
 		goto err_dst_cache_original;
 
-	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
 		goto err_dst_cache_reply;
 
 	entry->ct = ct;
 
-	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
-	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+	flow_offload_fill_dir(flow, ct, data, FLOW_OFFLOAD_DIR_ORIGINAL);
+	flow_offload_fill_dir(flow, ct, data, FLOW_OFFLOAD_DIR_REPLY);
 
 	if (ct->status & IPS_SRC_NAT)
 		flow->flags |= FLOW_OFFLOAD_SNAT;
@@ -91,7 +91,7 @@ struct flow_offload *
 	return flow;
 
 err_dst_cache_reply:
-	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+	dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
 err_dst_cache_original:
 	kfree(entry);
 err_ct_refcnt:
@@ -139,8 +139,8 @@ void flow_offload_free(struct flow_offload *flow)
 {
 	struct flow_offload_entry *e;
 
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
+	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
 	e = container_of(flow, struct flow_offload_entry, flow);
 	if (flow->flags & FLOW_OFFLOAD_DYING)
 		nf_ct_delete(e->ct, 0, 0);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 2413174..0016bb8 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -241,7 +241,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
 	outdev = rt->dst.dev;
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
@@ -457,7 +457,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst.dst_cache;
 	outdev = rt->dst.dev;
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index aa5f571..cdb7c46 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -73,7 +73,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	struct nft_flow_offload *priv = nft_expr_priv(expr);
 	struct nf_flowtable *flowtable = &priv->flowtable->data;
 	enum ip_conntrack_info ctinfo;
-	struct nf_flow_route route;
+	struct nf_flow_data data;
 	struct flow_offload *flow;
 	enum ip_conntrack_dir dir;
 	bool is_tcp = false;
@@ -108,10 +108,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 		goto out;
 
 	dir = CTINFO2DIR(ctinfo);
-	if (nft_flow_route(pkt, ct, &route, dir) < 0)
+	if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
 		goto err_flow_route;
 
-	flow = flow_offload_alloc(ct, &route);
+	flow = flow_offload_alloc(ct, &data);
 	if (!flow)
 		goto err_flow_alloc;
 
@@ -124,13 +124,13 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	if (ret < 0)
 		goto err_flow_add;
 
-	dst_release(route.tuple[!dir].dst);
+	dst_release(data.route.tuple[!dir].dst);
 	return;
 
 err_flow_add:
 	flow_offload_free(flow);
 err_flow_alloc:
-	dst_release(route.tuple[!dir].dst);
+	dst_release(data.route.tuple[!dir].dst);
 err_flow_route:
 	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
 out:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-26 10:32 [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method wenxu
@ 2019-06-26 10:32 ` wenxu
  2019-06-26 18:38   ` Florian Westphal
  2019-06-26 18:40   ` Pablo Neira Ayuso
  2019-06-26 10:32 ` [PATCH 3/3 nf-next] netfilter: Flow table support for the bridge family wenxu
  2019-06-26 18:29 ` [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method Pablo Neira Ayuso
  2 siblings, 2 replies; 12+ messages in thread
From: wenxu @ 2019-06-26 10:32 UTC (permalink / raw)
  To: pablo, fw; +Cc: netfilter-devel, netdev

From: wenxu <wenxu@ucloud.cn>

With nf_conntrack_bridge function. The bridge family can do
conntrack it self. The flow offload function based on the
conntrack. So the flow in the bridge wih conntrack can be
offloaded.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 include/net/netfilter/nf_flow_table.h | 30 +++++++++++-
 net/netfilter/nf_flow_table_core.c    | 53 ++++++++++++++++-----
 net/netfilter/nf_flow_table_ip.c      | 41 +++++++++++++---
 net/netfilter/nft_flow_offload.c      | 89 ++++++++++++++++++++++++++++++++---
 4 files changed, 185 insertions(+), 28 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 968be64..9a0cf27 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -33,8 +33,22 @@ enum flow_offload_tuple_dir {
 	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
 };
 
+enum flow_offload_tuple_type {
+	FLOW_OFFLOAD_TYPE_INET,
+	FLOW_OFFLOAD_TYPE_BRIDGE,
+};
+
+struct dst_br_port {
+	struct net_device *dev;
+	u16	dst_vlan_tag;
+};
+
 struct flow_offload_dst {
-	struct dst_entry		*dst_cache;
+	enum flow_offload_tuple_type type;
+	union {
+		struct dst_entry		*dst_cache;
+		struct dst_br_port		dst_port;
+	};
 };
 
 struct flow_offload_tuple {
@@ -52,6 +66,7 @@ struct flow_offload_tuple {
 	};
 
 	int				iifidx;
+	u16				vlan_tag;
 
 	u8				l3proto;
 	u8				l4proto;
@@ -89,8 +104,19 @@ struct nf_flow_route {
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
 
+struct nf_flow_forward {
+	struct {
+		struct dst_br_port	dst_port;
+		u16 vlan_tag;
+	} tuple[FLOW_OFFLOAD_DIR_MAX];
+};
+
 struct nf_flow_data {
-	struct nf_flow_route route;
+	enum flow_offload_tuple_type type;
+	union {
+		struct nf_flow_route route;
+		struct nf_flow_forward forward;
+	};
 };
 
 struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 125ce1c..19ee69c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -29,16 +29,38 @@ struct flow_offload_entry {
 {
 	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
 	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
-	struct dst_entry *other_dst = date->route.tuple[!dir].dst;
-	struct dst_entry *dst = data->route.tuple[dir].dst;
 
+	struct dst_entry *other_dst;
+	struct dst_entry *dst;
+	struct dst_br_port other_dst_port;
+	struct dst_br_port dst_port;
+
+	if (data->type == FLOW_OFFLOAD_TYPE_BRIDGE) {
+		other_dst_port = data->forward.tuple[!dir].dst_port;
+		dst_port = data->forward.tuple[dir].dst_port;
+
+		ft->iifidx = other_dst_port.dev->ifindex;
+		ft->dst.dst_port = dst_port;
+		ft->vlan_tag = data->forward.tuple[dir].vlan_tag;
+	} else {
+		other_dst = data->route.tuple[!dir].dst;
+		dst = data->route.tuple[dir].dst;
+
+		ft->iifidx = other_dst->dev->ifindex;
+		ft->dst.dst_cache = dst;
+	}
+
+	ft->dst.type = data->type;
 	ft->dir = dir;
 
 	switch (ctt->src.l3num) {
 	case NFPROTO_IPV4:
 		ft->src_v4 = ctt->src.u3.in;
 		ft->dst_v4 = ctt->dst.u3.in;
-		ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+		if (data->type == FLOW_OFFLOAD_TYPE_BRIDGE)
+			ft->mtu = dst_port.dev->mtu;
+		else
+			ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
 		break;
 	case NFPROTO_IPV6:
 		ft->src_v6 = ctt->src.u3.in6;
@@ -51,9 +73,6 @@ struct flow_offload_entry {
 	ft->l4proto = ctt->dst.protonum;
 	ft->src_port = ctt->src.u.tcp.port;
 	ft->dst_port = ctt->dst.u.tcp.port;
-
-	ft->iifidx = other_dst->dev->ifindex;
-	ft->dst_cache = dst;
 }
 
 struct flow_offload *
@@ -72,11 +91,13 @@ struct flow_offload *
 
 	flow = &entry->flow;
 
-	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
-		goto err_dst_cache_original;
+	if (data->type == FLOW_OFFLOAD_TYPE_INET) {
+		if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+			goto err_dst_cache_original;
 
-	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
-		goto err_dst_cache_reply;
+		if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+			goto err_dst_cache_reply;
+	}
 
 	entry->ct = ct;
 
@@ -91,7 +112,8 @@ struct flow_offload *
 	return flow;
 
 err_dst_cache_reply:
-	dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+	if (data->type == FLOW_OFFLOAD_TYPE_INET)
+		dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
 err_dst_cache_original:
 	kfree(entry);
 err_ct_refcnt:
@@ -139,8 +161,13 @@ void flow_offload_free(struct flow_offload *flow)
 {
 	struct flow_offload_entry *e;
 
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
+	if (flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.type == FLOW_OFFLOAD_TYPE_INET) {
+		dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
+		dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
+	} else {
+		dev_put(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_port.dev);
+		dev_put(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_port.dev);
+	}
 	e = container_of(flow, struct flow_offload_entry, flow);
 	if (flow->flags & FLOW_OFFLOAD_DYING)
 		nf_ct_delete(e->ct, 0, 0);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 0016bb8..9af01ef 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -16,6 +16,8 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
+#include "../bridge/br_private.h"
+
 static int nf_flow_state_check(struct flow_offload *flow, int proto,
 			       struct sk_buff *skb, unsigned int thoff)
 {
@@ -220,6 +222,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 {
 	struct flow_offload_tuple_rhash *tuplehash;
 	struct nf_flowtable *flow_table = priv;
+	int family = flow_table->type->family;
 	struct flow_offload_tuple tuple = {};
 	enum flow_offload_tuple_dir dir;
 	struct flow_offload *flow;
@@ -228,6 +231,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
+	u16 vlan_tag;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return NF_ACCEPT;
@@ -235,14 +239,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
 		return NF_ACCEPT;
 
+	if (family != NFPROTO_BRIDGE && family != NFPROTO_IPV4)
+		return NF_ACCEPT;
+
+	if (family == NFPROTO_BRIDGE && skb_vlan_tag_present(skb))
+		tuple.vlan_tag = skb_vlan_tag_get_id(skb);
+
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
 	if (tuplehash == NULL)
 		return NF_ACCEPT;
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
-	outdev = rt->dst.dev;
+	if (family == NFPROTO_IPV4) {
+		rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
+		outdev = rt->dst.dev;
+	} else {
+		vlan_tag = flow->tuplehash[dir].tuple.dst.dst_port.dst_vlan_tag;
+		outdev = flow->tuplehash[dir].tuple.dst.dst_port.dev;
+	}
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
@@ -258,13 +273,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 		return NF_DROP;
 
 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
-	iph = ip_hdr(skb);
-	ip_decrease_ttl(iph);
 
 	skb->dev = outdev;
-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+	if (family == NFPROTO_IPV4) {
+		iph = ip_hdr(skb);
+		ip_decrease_ttl(iph);
+
+		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+	} else {
+		const struct net_bridge_port *p;
+
+		if (vlan_tag && (p = br_port_get_rtnl_rcu(state->in)))
+			__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan_tag);
+		else
+			__vlan_hwaccel_clear_tag(skb);
+
+		br_dev_queue_push_xmit(state->net, state->sk, skb);
+	}
 
 	return NF_STOLEN;
 }
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index cdb7c46..c88396a 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -14,6 +14,8 @@
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <net/netfilter/nf_flow_table.h>
 
+#include "../bridge/br_private.h"
+
 struct nft_flow_offload {
 	struct nft_flowtable	*flowtable;
 };
@@ -49,6 +51,58 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
 	return 0;
 }
 
+static int nft_flow_forward(const struct nft_pktinfo *pkt,
+			    const struct nf_conn *ct,
+			    struct nf_flow_forward *forward,
+			    enum ip_conntrack_dir dir)
+{
+	struct net_bridge_vlan_group *vg;
+	const struct net_bridge_port *p;
+	u16 vid = 0;
+
+	if (skb_vlan_tag_present(pkt->skb))
+		vid = skb_vlan_tag_get_id(pkt->skb);
+
+	forward->tuple[dir].dst_port.dst_vlan_tag = vid;
+	forward->tuple[!dir].vlan_tag = vid;
+	forward->tuple[dir].dst_port.dev = dev_get_by_index(dev_net(nft_out(pkt)),
+							    nft_out(pkt)->ifindex);
+	forward->tuple[!dir].dst_port.dev = dev_get_by_index(dev_net(nft_in(pkt)),
+							     nft_in(pkt)->ifindex);
+
+	rtnl_lock();
+	p = br_port_get_rtnl_rcu(nft_out(pkt));
+	if (p) {
+		if (!br_opt_get(p->br, BROPT_VLAN_ENABLED))
+			goto out;
+
+		if (!vid) {
+			vg = nbp_vlan_group_rcu(p);
+			vid = br_get_pvid(vg);
+		}
+
+		if (vid) {
+			struct bridge_vlan_info info;
+
+			if (br_vlan_get_info(nft_in(pkt), vid, &info) == 0 &&
+			    info.flags & BRIDGE_VLAN_INFO_UNTAGGED)
+				vid = 0;
+		}
+	} else {
+		rtnl_unlock();
+		dev_put(forward->tuple[dir].dst_port.dev);
+		dev_put(forward->tuple[!dir].dst_port.dev);
+		return -ENOENT;
+	}
+
+out:
+	rtnl_unlock();
+	forward->tuple[!dir].dst_port.dst_vlan_tag = vid;
+	forward->tuple[dir].vlan_tag = vid;
+
+	return 0;
+}
+
 static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
 {
 	if (skb_sec_path(skb))
@@ -61,6 +115,15 @@ static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
 
 		if (unlikely(opt->optlen))
 			return true;
+	} else if (family == NFPROTO_BRIDGE) {
+		const struct iphdr *iph;
+
+		if (skb->protocol != htons(ETH_P_IP))
+			return true;
+
+		iph = ip_hdr(skb);
+		if (iph->ihl > 5)
+			return true;
 	}
 
 	return false;
@@ -76,11 +139,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	struct nf_flow_data data;
 	struct flow_offload *flow;
 	enum ip_conntrack_dir dir;
+	int family = nft_pf(pkt);
 	bool is_tcp = false;
 	struct nf_conn *ct;
 	int ret;
 
-	if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
+	if (nft_flow_offload_skip(pkt->skb, family))
 		goto out;
 
 	ct = nf_ct_get(pkt->skb, &ctinfo);
@@ -108,8 +172,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 		goto out;
 
 	dir = CTINFO2DIR(ctinfo);
-	if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
-		goto err_flow_route;
+	if (family == NFPROTO_BRIDGE) {
+		data.type = FLOW_OFFLOAD_TYPE_BRIDGE;
+		if (nft_flow_forward(pkt, ct, &data.forward, dir) < 0)
+			goto err_flow_data;
+	} else {
+		data.type = FLOW_OFFLOAD_TYPE_INET;
+		if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
+			goto err_flow_data;
+	}
 
 	flow = flow_offload_alloc(ct, &data);
 	if (!flow)
@@ -124,14 +195,20 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	if (ret < 0)
 		goto err_flow_add;
 
-	dst_release(data.route.tuple[!dir].dst);
+	if (family != NFPROTO_BRIDGE)
+		dst_release(data.route.tuple[!dir].dst);
 	return;
 
 err_flow_add:
 	flow_offload_free(flow);
 err_flow_alloc:
-	dst_release(data.route.tuple[!dir].dst);
-err_flow_route:
+	if (family == NFPROTO_BRIDGE) {
+		dev_put(data.forward.tuple[dir].dst_port.dev);
+		dev_put(data.forward.tuple[!dir].dst_port.dev);
+	} else {
+		dst_release(data.route.tuple[!dir].dst);
+	}
+err_flow_data:
 	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
 out:
 	regs->verdict.code = NFT_BREAK;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/3 nf-next] netfilter: Flow table support for the bridge family
  2019-06-26 10:32 [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method wenxu
  2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
@ 2019-06-26 10:32 ` wenxu
  2019-06-26 18:29 ` [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method Pablo Neira Ayuso
  2 siblings, 0 replies; 12+ messages in thread
From: wenxu @ 2019-06-26 10:32 UTC (permalink / raw)
  To: pablo, fw; +Cc: netfilter-devel, netdev

From: wenxu <wenxu@ucloud.cn>

This patch adds the bridge flow table type, that implements the datapath
flow table to forward IPv4 traffic through bridge.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 net/bridge/netfilter/Kconfig                |  8 +++++
 net/bridge/netfilter/Makefile               |  1 +
 net/bridge/netfilter/nf_flow_table_bridge.c | 46 +++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)
 create mode 100644 net/bridge/netfilter/nf_flow_table_bridge.c

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index f4fb0b9..cba5f71 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -33,6 +33,14 @@ config NF_CONNTRACK_BRIDGE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_FLOW_TABLE_BRIDGE
+	tristate "Netfilter flow table bridge module"
+	depends on NF_FLOW_TABLE && NF_CONNTRACK_BRIDGE
+	help
+          This option adds the flow table bridge support.
+
+	  To compile it as a module, choose M here.
+
 endif # NF_TABLES_BRIDGE
 
 menuconfig BRIDGE_NF_EBTABLES
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 9d77673..deb81e6 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
+obj-$(CONFIG_NF_FLOW_TABLE_BRIDGE) += nf_flow_table_bridge.o
 
 # packet logging
 obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
diff --git a/net/bridge/netfilter/nf_flow_table_bridge.c b/net/bridge/netfilter/nf_flow_table_bridge.c
new file mode 100644
index 0000000..ad3220c
--- /dev/null
+++ b/net/bridge/netfilter/nf_flow_table_bridge.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nf_flow_offload_bridge_hook(void *priv, struct sk_buff *skb,
+			    const struct nf_hook_state *state)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return nf_flow_offload_ip_hook(priv, skb, state);
+	}
+
+	return NF_ACCEPT;
+}
+
+static struct nf_flowtable_type flowtable_bridge = {
+	.family		= NFPROTO_BRIDGE,
+	.init		= nf_flow_table_init,
+	.free		= nf_flow_table_free,
+	.hook		= nf_flow_offload_bridge_hook,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nf_flow_bridge_module_init(void)
+{
+	nft_register_flowtable_type(&flowtable_bridge);
+
+	return 0;
+}
+
+static void __exit nf_flow_bridge_module_exit(void)
+{
+	nft_unregister_flowtable_type(&flowtable_bridge);
+}
+
+module_init(nf_flow_bridge_module_init);
+module_exit(nf_flow_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_BRIDGE);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method
  2019-06-26 10:32 [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method wenxu
  2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
  2019-06-26 10:32 ` [PATCH 3/3 nf-next] netfilter: Flow table support for the bridge family wenxu
@ 2019-06-26 18:29 ` Pablo Neira Ayuso
  2 siblings, 0 replies; 12+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-26 18:29 UTC (permalink / raw)
  To: wenxu; +Cc: fw, netfilter-devel, netdev

On Wed, Jun 26, 2019 at 06:32:26PM +0800, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>

Fix email subject, to:

netfilter: nf_flow_table: Refactor flow_offload_tuple to destination

> Add struct flow_offload_dst to support more offload method to replace
> dst_cache which only work for route offload.
> 
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> ---
>  include/net/netfilter/nf_flow_table.h | 12 ++++++++++--
>  net/netfilter/nf_flow_table_core.c    | 22 +++++++++++-----------
>  net/netfilter/nf_flow_table_ip.c      |  4 ++--
>  net/netfilter/nft_flow_offload.c      | 10 +++++-----
>  4 files changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index d8c1879..968be64 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -33,6 +33,10 @@ enum flow_offload_tuple_dir {
>  	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
>  };
>  
> +struct flow_offload_dst {
> +	struct dst_entry		*dst_cache;
> +};
> +
>  struct flow_offload_tuple {
>  	union {
>  		struct in_addr		src_v4;
> @@ -55,7 +59,7 @@ struct flow_offload_tuple {
>  
>  	u16				mtu;
>  
> -	struct dst_entry		*dst_cache;
> +	struct flow_offload_dst		dst;
>  };
>  
>  struct flow_offload_tuple_rhash {
> @@ -85,8 +89,12 @@ struct nf_flow_route {
>  	} tuple[FLOW_OFFLOAD_DIR_MAX];
>  };
>  
> +struct nf_flow_data {

Please, call this:

struct nf_flow_dst

instead.

> +	struct nf_flow_route route;
> +};
> +
>  struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
> -					struct nf_flow_route *route);
> +					struct nf_flow_data *data);
>  void flow_offload_free(struct flow_offload *flow);
>  
>  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index e3d7972..125ce1c 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -24,13 +24,13 @@ struct flow_offload_entry {
>  
>  static void
>  flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
> -		      struct nf_flow_route *route,
> +		      struct nf_flow_data *data,
>  		      enum flow_offload_tuple_dir dir)
>  {
>  	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
>  	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
> -	struct dst_entry *other_dst = route->tuple[!dir].dst;
> -	struct dst_entry *dst = route->tuple[dir].dst;
> +	struct dst_entry *other_dst = date->route.tuple[!dir].dst;
> +	struct dst_entry *dst = data->route.tuple[dir].dst;
>  
>  	ft->dir = dir;
>  
> @@ -57,7 +57,7 @@ struct flow_offload_entry {
>  }
>  
>  struct flow_offload *
> -flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
> +flow_offload_alloc(struct nf_conn *ct, struct nf_flow_data *data)
>  {
>  	struct flow_offload_entry *entry;
>  	struct flow_offload *flow;
> @@ -72,16 +72,16 @@ struct flow_offload *
>  
>  	flow = &entry->flow;
>  
> -	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
> +	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
>  		goto err_dst_cache_original;
>  
> -	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
> +	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
>  		goto err_dst_cache_reply;
>  
>  	entry->ct = ct;
>  
> -	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
> -	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
> +	flow_offload_fill_dir(flow, ct, data, FLOW_OFFLOAD_DIR_ORIGINAL);
> +	flow_offload_fill_dir(flow, ct, data, FLOW_OFFLOAD_DIR_REPLY);
>  
>  	if (ct->status & IPS_SRC_NAT)
>  		flow->flags |= FLOW_OFFLOAD_SNAT;
> @@ -91,7 +91,7 @@ struct flow_offload *
>  	return flow;
>  
>  err_dst_cache_reply:
> -	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
> +	dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
>  err_dst_cache_original:
>  	kfree(entry);
>  err_ct_refcnt:
> @@ -139,8 +139,8 @@ void flow_offload_free(struct flow_offload *flow)
>  {
>  	struct flow_offload_entry *e;
>  
> -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
> -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
> +	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
> +	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
>  	e = container_of(flow, struct flow_offload_entry, flow);
>  	if (flow->flags & FLOW_OFFLOAD_DYING)
>  		nf_ct_delete(e->ct, 0, 0);
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 2413174..0016bb8 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
> @@ -241,7 +241,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>  
>  	dir = tuplehash->tuple.dir;
>  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> -	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
> +	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
>  	outdev = rt->dst.dev;
>  
>  	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
> @@ -457,7 +457,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>  
>  	dir = tuplehash->tuple.dir;
>  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> -	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
> +	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst.dst_cache;
>  	outdev = rt->dst.dev;
>  
>  	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
> index aa5f571..cdb7c46 100644
> --- a/net/netfilter/nft_flow_offload.c
> +++ b/net/netfilter/nft_flow_offload.c
> @@ -73,7 +73,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  	struct nft_flow_offload *priv = nft_expr_priv(expr);
>  	struct nf_flowtable *flowtable = &priv->flowtable->data;
>  	enum ip_conntrack_info ctinfo;
> -	struct nf_flow_route route;
> +	struct nf_flow_data data;

Please, reverse xmas tree for variable definition, from longest line
to shortest one.

>  	struct flow_offload *flow;
>  	enum ip_conntrack_dir dir;
>  	bool is_tcp = false;
> @@ -108,10 +108,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  		goto out;
>  
>  	dir = CTINFO2DIR(ctinfo);
> -	if (nft_flow_route(pkt, ct, &route, dir) < 0)
> +	if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
>  		goto err_flow_route;
>  
> -	flow = flow_offload_alloc(ct, &route);
> +	flow = flow_offload_alloc(ct, &data);
>  	if (!flow)
>  		goto err_flow_alloc;
>  
> @@ -124,13 +124,13 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  	if (ret < 0)
>  		goto err_flow_add;
>  
> -	dst_release(route.tuple[!dir].dst);
> +	dst_release(data.route.tuple[!dir].dst);
>  	return;
>  
>  err_flow_add:
>  	flow_offload_free(flow);
>  err_flow_alloc:
> -	dst_release(route.tuple[!dir].dst);
> +	dst_release(data.route.tuple[!dir].dst);
>  err_flow_route:
>  	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
>  out:
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
@ 2019-06-26 18:38   ` Florian Westphal
  2019-06-26 19:19     ` Florian Westphal
  2019-06-26 18:40   ` Pablo Neira Ayuso
  1 sibling, 1 reply; 12+ messages in thread
From: Florian Westphal @ 2019-06-26 18:38 UTC (permalink / raw)
  To: wenxu; +Cc: pablo, fw, netfilter-devel, netdev

wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 0016bb8..9af01ef 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
> -	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> +	if (family == NFPROTO_IPV4) {
> +		iph = ip_hdr(skb);
> +		ip_decrease_ttl(iph);
> +
> +		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
> +		skb_dst_set_noref(skb, &rt->dst);
> +		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> +	} else {
> +		const struct net_bridge_port *p;
> +
> +		if (vlan_tag && (p = br_port_get_rtnl_rcu(state->in)))
> +			__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan_tag);
> +		else
> +			__vlan_hwaccel_clear_tag(skb);
> +
> +		br_dev_queue_push_xmit(state->net, state->sk, skb);

Won't that result in a module dep on bridge?

Whats the idea with this patch?

Do you see a performance improvement when bypassing bridge layer? If so,
how much?

I just wonder if its really cheaper than not using bridge conntrack in
the first place :-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
  2019-06-26 18:38   ` Florian Westphal
@ 2019-06-26 18:40   ` Pablo Neira Ayuso
  1 sibling, 0 replies; 12+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-26 18:40 UTC (permalink / raw)
  To: wenxu; +Cc: fw, netfilter-devel, netdev

On Wed, Jun 26, 2019 at 06:32:27PM +0800, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>
> 
> With nf_conntrack_bridge function. The bridge family can do
> conntrack it self. The flow offload function based on the
> conntrack. So the flow in the bridge wih conntrack can be
> offloaded.
> 
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> ---
>  include/net/netfilter/nf_flow_table.h | 30 +++++++++++-
>  net/netfilter/nf_flow_table_core.c    | 53 ++++++++++++++++-----
>  net/netfilter/nf_flow_table_ip.c      | 41 +++++++++++++---
>  net/netfilter/nft_flow_offload.c      | 89 ++++++++++++++++++++++++++++++++---
>  4 files changed, 185 insertions(+), 28 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 968be64..9a0cf27 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -33,8 +33,22 @@ enum flow_offload_tuple_dir {
>  	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
>  };
>  
> +enum flow_offload_tuple_type {
> +	FLOW_OFFLOAD_TYPE_INET,
> +	FLOW_OFFLOAD_TYPE_BRIDGE,
> +};
> +
> +struct dst_br_port {
> +	struct net_device *dev;
> +	u16	dst_vlan_tag;
> +};
> +
>  struct flow_offload_dst {
> -	struct dst_entry		*dst_cache;
> +	enum flow_offload_tuple_type type;
> +	union {
> +		struct dst_entry		*dst_cache;
> +		struct dst_br_port		dst_port;
> +	};
>  };
>  
>  struct flow_offload_tuple {
> @@ -52,6 +66,7 @@ struct flow_offload_tuple {
>  	};
>  
>  	int				iifidx;
> +	u16				vlan_tag;
>  
>  	u8				l3proto;
>  	u8				l4proto;
> @@ -89,8 +104,19 @@ struct nf_flow_route {
>  	} tuple[FLOW_OFFLOAD_DIR_MAX];
>  };
>  
> +struct nf_flow_forward {
> +	struct {
> +		struct dst_br_port	dst_port;
> +		u16 vlan_tag;
> +	} tuple[FLOW_OFFLOAD_DIR_MAX];
> +};
> +
>  struct nf_flow_data {
> -	struct nf_flow_route route;
> +	enum flow_offload_tuple_type type;
> +	union {
> +		struct nf_flow_route route;
> +		struct nf_flow_forward forward;
> +	};
>  };
>  
>  struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 125ce1c..19ee69c 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -29,16 +29,38 @@ struct flow_offload_entry {
>  {
>  	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
>  	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
> -	struct dst_entry *other_dst = date->route.tuple[!dir].dst;
> -	struct dst_entry *dst = data->route.tuple[dir].dst;
>  
> +	struct dst_entry *other_dst;
> +	struct dst_entry *dst;
> +	struct dst_br_port other_dst_port;
> +	struct dst_br_port dst_port;

Please, reverse xmas tree for variable definitions.

> +	if (data->type == FLOW_OFFLOAD_TYPE_BRIDGE) {

Could you add functions for these?

        nf_flow_fill_bridge_dst(...)

> +		other_dst_port = data->forward.tuple[!dir].dst_port;
> +		dst_port = data->forward.tuple[dir].dst_port;
> +
> +		ft->iifidx = other_dst_port.dev->ifindex;
> +		ft->dst.dst_port = dst_port;
> +		ft->vlan_tag = data->forward.tuple[dir].vlan_tag;
> +	} else {

You could probably make an initial patch to add this function, so this
patch becomes smaller and easier to review:

                nf_flow_fill_inet_dst(...)

to wrap the code below.

> +		other_dst = data->route.tuple[!dir].dst;
> +		dst = data->route.tuple[dir].dst;
> +
> +		ft->iifidx = other_dst->dev->ifindex;
> +		ft->dst.dst_cache = dst;
> +	}
> +
> +	ft->dst.type = data->type;
>  	ft->dir = dir;
>  
>  	switch (ctt->src.l3num) {
>  	case NFPROTO_IPV4:
>  		ft->src_v4 = ctt->src.u3.in;
>  		ft->dst_v4 = ctt->dst.u3.in;
> -		ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
> +		if (data->type == FLOW_OFFLOAD_TYPE_BRIDGE)
> +			ft->mtu = dst_port.dev->mtu;
> +		else
> +			ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
>  		break;
>  	case NFPROTO_IPV6:
>  		ft->src_v6 = ctt->src.u3.in6;
> @@ -51,9 +73,6 @@ struct flow_offload_entry {
>  	ft->l4proto = ctt->dst.protonum;
>  	ft->src_port = ctt->src.u.tcp.port;
>  	ft->dst_port = ctt->dst.u.tcp.port;
> -
> -	ft->iifidx = other_dst->dev->ifindex;
> -	ft->dst_cache = dst;
>  }
>  
>  struct flow_offload *
> @@ -72,11 +91,13 @@ struct flow_offload *
>  
>  	flow = &entry->flow;
>  
> -	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
> -		goto err_dst_cache_original;
> +	if (data->type == FLOW_OFFLOAD_TYPE_INET) {

Place this code below in a function?

> +		if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
> +			goto err_dst_cache_original;
>  
> -	if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
> -		goto err_dst_cache_reply;
> +		if (!dst_hold_safe(data->route.tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
> +			goto err_dst_cache_reply;
> +	}
>  
>  	entry->ct = ct;
>  
> @@ -91,7 +112,8 @@ struct flow_offload *
>  	return flow;
>  
>  err_dst_cache_reply:
> -	dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
> +	if (data->type == FLOW_OFFLOAD_TYPE_INET)
> +		dst_release(data->route.tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);

Same thing here, place this code in a function.

>  err_dst_cache_original:
>  	kfree(entry);
>  err_ct_refcnt:
> @@ -139,8 +161,13 @@ void flow_offload_free(struct flow_offload *flow)
>  {
>  	struct flow_offload_entry *e;
>  
> -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
> -	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
> +	if (flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.type == FLOW_OFFLOAD_TYPE_INET) {

Place this code in a function.

Better use switch() rather than if().

> +		dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_cache);
> +		dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_cache);
> +	} else {
> +		dev_put(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst.dst_port.dev);
> +		dev_put(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst.dst_port.dev);
> +	}
>  	e = container_of(flow, struct flow_offload_entry, flow);
>  	if (flow->flags & FLOW_OFFLOAD_DYING)
>  		nf_ct_delete(e->ct, 0, 0);
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 0016bb8..9af01ef 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
> @@ -16,6 +16,8 @@
>  #include <linux/tcp.h>
>  #include <linux/udp.h>
>  
> +#include "../bridge/br_private.h"
> +
>  static int nf_flow_state_check(struct flow_offload *flow, int proto,
>  			       struct sk_buff *skb, unsigned int thoff)
>  {
> @@ -220,6 +222,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>  {
>  	struct flow_offload_tuple_rhash *tuplehash;
>  	struct nf_flowtable *flow_table = priv;
> +	int family = flow_table->type->family;
>  	struct flow_offload_tuple tuple = {};
>  	enum flow_offload_tuple_dir dir;
>  	struct flow_offload *flow;
> @@ -228,6 +231,7 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>  	unsigned int thoff;
>  	struct iphdr *iph;
>  	__be32 nexthop;
> +	u16 vlan_tag;
>  
>  	if (skb->protocol != htons(ETH_P_IP))
>  		return NF_ACCEPT;
> @@ -235,14 +239,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>  	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
>  		return NF_ACCEPT;
>  
> +	if (family != NFPROTO_BRIDGE && family != NFPROTO_IPV4)
> +		return NF_ACCEPT;
> +
> +	if (family == NFPROTO_BRIDGE && skb_vlan_tag_present(skb))
> +		tuple.vlan_tag = skb_vlan_tag_get_id(skb);
> +
>  	tuplehash = flow_offload_lookup(flow_table, &tuple);
>  	if (tuplehash == NULL)
>  		return NF_ACCEPT;
>  
>  	dir = tuplehash->tuple.dir;
>  	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> -	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
> -	outdev = rt->dst.dev;
> +	if (family == NFPROTO_IPV4) {
> +		rt = (struct rtable *)flow->tuplehash[dir].tuple.dst.dst_cache;
> +		outdev = rt->dst.dev;
> +	} else {
> +		vlan_tag = flow->tuplehash[dir].tuple.dst.dst_port.dst_vlan_tag;
> +		outdev = flow->tuplehash[dir].tuple.dst.dst_port.dev;
> +	}
>  
>  	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
>  		return NF_ACCEPT;
> @@ -258,13 +273,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
>  		return NF_DROP;
>  
>  	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
> -	iph = ip_hdr(skb);
> -	ip_decrease_ttl(iph);
>  
>  	skb->dev = outdev;
> -	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
> -	skb_dst_set_noref(skb, &rt->dst);
> -	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> +	if (family == NFPROTO_IPV4) {
> +		iph = ip_hdr(skb);
> +		ip_decrease_ttl(iph);
> +
> +		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
> +		skb_dst_set_noref(skb, &rt->dst);
> +		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> +	} else {
> +		const struct net_bridge_port *p;
> +
> +		if (vlan_tag && (p = br_port_get_rtnl_rcu(state->in)))
> +			__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan_tag);
> +		else
> +			__vlan_hwaccel_clear_tag(skb);
> +
> +		br_dev_queue_push_xmit(state->net, state->sk, skb);
> +	}

Probably you can place common code into functions, the make a function
that uses these function to build the bridge and the inet datapath
into independent functions.

Instead of all these if() branches so often to reuse code, which makes
the code hard to follow.

>  	return NF_STOLEN;
>  }
> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
> index cdb7c46..c88396a 100644
> --- a/net/netfilter/nft_flow_offload.c
> +++ b/net/netfilter/nft_flow_offload.c
> @@ -14,6 +14,8 @@
>  #include <linux/netfilter/nf_conntrack_common.h>
>  #include <net/netfilter/nf_flow_table.h>
>  
> +#include "../bridge/br_private.h"
> +
>  struct nft_flow_offload {
>  	struct nft_flowtable	*flowtable;
>  };
> @@ -49,6 +51,58 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
>  	return 0;
>  }
>  
> +static int nft_flow_forward(const struct nft_pktinfo *pkt,
> +			    const struct nf_conn *ct,
> +			    struct nf_flow_forward *forward,
> +			    enum ip_conntrack_dir dir)
> +{
> +	struct net_bridge_vlan_group *vg;
> +	const struct net_bridge_port *p;
> +	u16 vid = 0;
> +
> +	if (skb_vlan_tag_present(pkt->skb))
> +		vid = skb_vlan_tag_get_id(pkt->skb);
> +
> +	forward->tuple[dir].dst_port.dst_vlan_tag = vid;
> +	forward->tuple[!dir].vlan_tag = vid;
> +	forward->tuple[dir].dst_port.dev = dev_get_by_index(dev_net(nft_out(pkt)),
> +							    nft_out(pkt)->ifindex);
> +	forward->tuple[!dir].dst_port.dev = dev_get_by_index(dev_net(nft_in(pkt)),
> +							     nft_in(pkt)->ifindex);
> +
> +	rtnl_lock();

rtnl_lock() from the packet path?

> +	p = br_port_get_rtnl_rcu(nft_out(pkt));
> +	if (p) {
> +		if (!br_opt_get(p->br, BROPT_VLAN_ENABLED))
> +			goto out;
> +
> +		if (!vid) {
> +			vg = nbp_vlan_group_rcu(p);
> +			vid = br_get_pvid(vg);
> +		}
> +
> +		if (vid) {
> +			struct bridge_vlan_info info;
> +
> +			if (br_vlan_get_info(nft_in(pkt), vid, &info) == 0 &&
> +			    info.flags & BRIDGE_VLAN_INFO_UNTAGGED)
> +				vid = 0;
> +		}
> +	} else {
> +		rtnl_unlock();
> +		dev_put(forward->tuple[dir].dst_port.dev);
> +		dev_put(forward->tuple[!dir].dst_port.dev);
> +		return -ENOENT;
> +	}
> +
> +out:
> +	rtnl_unlock();
> +	forward->tuple[!dir].dst_port.dst_vlan_tag = vid;
> +	forward->tuple[dir].vlan_tag = vid;
> +
> +	return 0;
> +}
> +
>  static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
>  {
>  	if (skb_sec_path(skb))
> @@ -61,6 +115,15 @@ static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
>  
>  		if (unlikely(opt->optlen))
>  			return true;
> +	} else if (family == NFPROTO_BRIDGE) {
> +		const struct iphdr *iph;
> +
> +		if (skb->protocol != htons(ETH_P_IP))
> +			return true;
> +
> +		iph = ip_hdr(skb);
> +		if (iph->ihl > 5)
> +			return true;
>  	}
>  
>  	return false;
> @@ -76,11 +139,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  	struct nf_flow_data data;
>  	struct flow_offload *flow;
>  	enum ip_conntrack_dir dir;
> +	int family = nft_pf(pkt);
>  	bool is_tcp = false;
>  	struct nf_conn *ct;
>  	int ret;
>  
> -	if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
> +	if (nft_flow_offload_skip(pkt->skb, family))
>  		goto out;
>  
>  	ct = nf_ct_get(pkt->skb, &ctinfo);
> @@ -108,8 +172,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  		goto out;
>  
>  	dir = CTINFO2DIR(ctinfo);
> -	if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
> -		goto err_flow_route;
> +	if (family == NFPROTO_BRIDGE) {
> +		data.type = FLOW_OFFLOAD_TYPE_BRIDGE;
> +		if (nft_flow_forward(pkt, ct, &data.forward, dir) < 0)
> +			goto err_flow_data;
> +	} else {
> +		data.type = FLOW_OFFLOAD_TYPE_INET;
> +		if (nft_flow_route(pkt, ct, &data.route, dir) < 0)
> +			goto err_flow_data;
> +	}
>  
>  	flow = flow_offload_alloc(ct, &data);
>  	if (!flow)
> @@ -124,14 +195,20 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  	if (ret < 0)
>  		goto err_flow_add;
>  
> -	dst_release(data.route.tuple[!dir].dst);
> +	if (family != NFPROTO_BRIDGE)
> +		dst_release(data.route.tuple[!dir].dst);
>  	return;
>  
>  err_flow_add:
>  	flow_offload_free(flow);
>  err_flow_alloc:
> -	dst_release(data.route.tuple[!dir].dst);
> -err_flow_route:
> +	if (family == NFPROTO_BRIDGE) {
> +		dev_put(data.forward.tuple[dir].dst_port.dev);
> +		dev_put(data.forward.tuple[!dir].dst_port.dev);
> +	} else {
> +		dst_release(data.route.tuple[!dir].dst);
> +	}
> +err_flow_data:
>  	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
>  out:
>  	regs->verdict.code = NFT_BREAK;
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-26 18:38   ` Florian Westphal
@ 2019-06-26 19:19     ` Florian Westphal
  2019-06-27  6:22       ` wenxu
  0 siblings, 1 reply; 12+ messages in thread
From: Florian Westphal @ 2019-06-26 19:19 UTC (permalink / raw)
  To: Florian Westphal; +Cc: wenxu, pablo, netfilter-devel, netdev

Florian Westphal <fw@strlen.de> wrote:
> wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
> > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> > index 0016bb8..9af01ef 100644
> > --- a/net/netfilter/nf_flow_table_ip.c
> > +++ b/net/netfilter/nf_flow_table_ip.c
> > -	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> > +	if (family == NFPROTO_IPV4) {
> > +		iph = ip_hdr(skb);
> > +		ip_decrease_ttl(iph);
> > +
> > +		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
> > +		skb_dst_set_noref(skb, &rt->dst);
> > +		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
> > +	} else {
> > +		const struct net_bridge_port *p;
> > +
> > +		if (vlan_tag && (p = br_port_get_rtnl_rcu(state->in)))
> > +			__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan_tag);
> > +		else
> > +			__vlan_hwaccel_clear_tag(skb);
> > +
> > +		br_dev_queue_push_xmit(state->net, state->sk, skb);
> 
> Won't that result in a module dep on bridge?
> 
> Whats the idea with this patch?
> 
> Do you see a performance improvement when bypassing bridge layer? If so,
> how much?
> 
> I just wonder if its really cheaper than not using bridge conntrack in
> the first place :-)

Addendum: Did you look at the nftables fwd expression?  Maybe you can use
it as a simpler way to speed things up?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-26 19:19     ` Florian Westphal
@ 2019-06-27  6:22       ` wenxu
  2019-06-27 12:58         ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: wenxu @ 2019-06-27  6:22 UTC (permalink / raw)
  To: Florian Westphal; +Cc: pablo, netfilter-devel, netdev


On 6/27/2019 3:19 AM, Florian Westphal wrote:
> Florian Westphal <fw@strlen.de> wrote:
>> wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
>>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>>> index 0016bb8..9af01ef 100644
>>> --- a/net/netfilter/nf_flow_table_ip.c
>>> +++ b/net/netfilter/nf_flow_table_ip.c
>>> -	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
>>> +	if (family == NFPROTO_IPV4) {
>>> +		iph = ip_hdr(skb);
>>> +		ip_decrease_ttl(iph);
>>> +
>>> +		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
>>> +		skb_dst_set_noref(skb, &rt->dst);
>>> +		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
>>> +	} else {
>>> +		const struct net_bridge_port *p;
>>> +
>>> +		if (vlan_tag && (p = br_port_get_rtnl_rcu(state->in)))
>>> +			__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan_tag);
>>> +		else
>>> +			__vlan_hwaccel_clear_tag(skb);
>>> +
>>> +		br_dev_queue_push_xmit(state->net, state->sk, skb);
>> Won't that result in a module dep on bridge?
I  will fix it in version 2
>>
>> Whats the idea with this patch?
>>
>> Do you see a performance improvement when bypassing bridge layer? If so,
>> how much?
>>
>> I just wonder if its really cheaper than not using bridge conntrack in
>> the first place :-)

This patch is based on the conntrack function in bridge.  It will bypass the fdb lookup

and conntrack lookup to get the performance  improvement. The more important things

for hardware offload in the future with nf_tables add hardware offload support

>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-27  6:22       ` wenxu
@ 2019-06-27 12:58         ` Pablo Neira Ayuso
  2019-06-28  3:37           ` wenxu
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-27 12:58 UTC (permalink / raw)
  To: wenxu; +Cc: Florian Westphal, netfilter-devel, netdev

On Thu, Jun 27, 2019 at 02:22:36PM +0800, wenxu wrote:
> On 6/27/2019 3:19 AM, Florian Westphal wrote:
> > Florian Westphal <fw@strlen.de> wrote:
[...]
> >> Whats the idea with this patch?
> >>
> >> Do you see a performance improvement when bypassing bridge layer? If so,
> >> how much?
> >>
> >> I just wonder if its really cheaper than not using bridge conntrack in
> >> the first place :-)
> 
> This patch is based on the conntrack function in bridge.  It will
> bypass the fdb lookup and conntrack lookup to get the performance 
> improvement. The more important things for hardware offload in the
> future with nf_tables add hardware offload support

Florian would like to see numbers / benchmark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-27 12:58         ` Pablo Neira Ayuso
@ 2019-06-28  3:37           ` wenxu
  2019-06-28  6:06             ` Florian Westphal
  0 siblings, 1 reply; 12+ messages in thread
From: wenxu @ 2019-06-28  3:37 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Florian Westphal; +Cc: netfilter-devel, netdev


On 6/27/2019 8:58 PM, Pablo Neira Ayuso wrote:
> On Thu, Jun 27, 2019 at 02:22:36PM +0800, wenxu wrote:
>> On 6/27/2019 3:19 AM, Florian Westphal wrote:
>>> Florian Westphal <fw@strlen.de> wrote:
> [...]
>>>> Whats the idea with this patch?
>>>>
>>>> Do you see a performance improvement when bypassing bridge layer? If so,
>>>> how much?
>>>>
>>>> I just wonder if its really cheaper than not using bridge conntrack in
>>>> the first place :-)
>> This patch is based on the conntrack function in bridge.  It will
>> bypass the fdb lookup and conntrack lookup to get the performance 
>> improvement. The more important things for hardware offload in the
>> future with nf_tables add hardware offload support
> Florian would like to see numbers / benchmark.


I just did a simple performace test with following test.

p netns add ns21
ip netns add ns22
ip l add dev veth21 type veth peer name eth0 netns ns21
ip l add dev veth22 type veth peer name eth0 netns ns22
ifconfig veth21 up
ifconfig veth22 up
ip netns exec ns21 ip a a dev eth0 10.0.0.7/24
ip netns exec ns22 ip a a dev eth0 10.0.0.8/24
ip netns exec ns21 ifconfig eth0 up
ip netns exec ns22 ifconfig eth0 up

ip l add dev br0 type bridge vlan_filtering 1
brctl addif br0 veth21
brctl addif br0 veth22

ifconfig br0 up

bridge vlan add dev veth21 vid 200 pvid untagged
bridge vlan add dev veth22 vid 200 pvid untagged

nft add table bridge firewall
nft add chain bridge firewall zones { type filter hook prerouting priority - 300 \; }
nft add rule bridge firewall zones counter ct zone set iif map { "veth21" : 2, "veth22" : 2 }

nft add chain bridge firewall rule-200-ingress
nft add rule bridge firewall rule-200-ingress ct zone 2 ct state established,related counter accept
nft add rule bridge firewall rule-200-ingress ct zone 2 ct state invalid counter drop
nft add rule bridge firewall rule-200-ingress ct zone 2 tcp dport 23 ct state new counter accept
nft add rule bridge firewall rule-200-ingress counter drop

nft add chain bridge firewall rule-200-egress
nft add rule bridge firewall rule-200-egress ct zone 2 ct state established,related counter accept
nft add rule bridge firewall rule-200-egress ct zone 2 ct state invalid counter drop
nft add rule bridge firewall rule-200-egress ct zone 2 tcp dport 23 ct state new counter drop
nft add rule bridge firewall rule-200-egress counter accept

nft add chain bridge firewall rules-all { type filter hook prerouting priority - 150 \; }
nft add rule bridge firewall rules-all counter meta protocol ip iif vmap { "veth22" : jump rule-200-ingress, "veth21" : jump rule-200-egress }



netns21 communication with ns22


ns21 iperf to 10.0.0.8 with dport 22 in ns22


first time with OFFLOAD enable

nft add flowtable bridge firewall fb2 { hook ingress priority 0 \; devices = { veth21, veth22 } \; }
nft add chain bridge firewall ftb-all {type filter hook forward priority 0 \; policy accept \; }
nft add rule bridge firewall ftb-all counter ct zone 2 ip protocol tcp flow offload @fb2

# iperf -c 10.0.0.8 -p 22 -t 60 -i2
------------------------------------------------------------
Client connecting to 10.0.0.8, TCP port 22
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[  3] local 10.0.0.7 port 60014 connected with 10.0.0.8 port 22
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 2.0 sec  10.8 GBytes  46.5 Gbits/sec
[  3]  2.0- 4.0 sec  10.9 GBytes  46.7 Gbits/sec
[  3]  4.0- 6.0 sec  10.9 GBytes  46.8 Gbits/sec
[  3]  6.0- 8.0 sec  11.0 GBytes  47.2 Gbits/sec
[  3]  8.0-10.0 sec  11.0 GBytes  47.1 Gbits/sec
[  3] 10.0-12.0 sec  11.0 GBytes  47.1 Gbits/sec
[  3] 12.0-14.0 sec  11.7 GBytes  50.4 Gbits/sec
[  3] 14.0-16.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 16.0-18.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 18.0-20.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 20.0-22.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 22.0-24.0 sec  12.0 GBytes  51.4 Gbits/sec
[  3] 24.0-26.0 sec  12.0 GBytes  51.3 Gbits/sec
[  3] 26.0-28.0 sec  12.0 GBytes  51.7 Gbits/sec
[  3] 28.0-30.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 30.0-32.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 32.0-34.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 34.0-36.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 36.0-38.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 38.0-40.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 40.0-42.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 42.0-44.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 44.0-46.0 sec  12.0 GBytes  51.4 Gbits/sec
[  3] 46.0-48.0 sec  12.0 GBytes  51.4 Gbits/sec
[  3] 48.0-50.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 50.0-52.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 52.0-54.0 sec  12.0 GBytes  51.6 Gbits/sec
[  3] 54.0-56.0 sec  12.0 GBytes  51.5 Gbits/sec
[  3] 56.0-58.0 sec  11.9 GBytes  51.2 Gbits/sec
[  3] 58.0-60.0 sec  11.8 GBytes  50.7 Gbits/sec
[  3]  0.0-60.0 sec   353 GBytes  50.5 Gbits/sec


The second time on any offload:
# iperf -c 10.0.0.8 -p 22 -t 60 -i2
------------------------------------------------------------
Client connecting to 10.0.0.8, TCP port 22
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[  3] local 10.0.0.7 port 60536 connected with 10.0.0.8 port 22
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 2.0 sec  8.88 GBytes  38.1 Gbits/sec
[  3]  2.0- 4.0 sec  9.02 GBytes  38.7 Gbits/sec
[  3]  4.0- 6.0 sec  9.02 GBytes  38.8 Gbits/sec
[  3]  6.0- 8.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3]  8.0-10.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 10.0-12.0 sec  9.04 GBytes  38.8 Gbits/sec
[  3] 12.0-14.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 14.0-16.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 16.0-18.0 sec  9.06 GBytes  38.9 Gbits/sec
[  3] 18.0-20.0 sec  9.07 GBytes  39.0 Gbits/sec
[  3] 20.0-22.0 sec  9.07 GBytes  38.9 Gbits/sec
[  3] 22.0-24.0 sec  9.06 GBytes  38.9 Gbits/sec
[  3] 24.0-26.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 26.0-28.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 28.0-30.0 sec  9.06 GBytes  38.9 Gbits/sec
[  3] 30.0-32.0 sec  9.06 GBytes  38.9 Gbits/sec
[  3] 32.0-34.0 sec  9.07 GBytes  38.9 Gbits/sec
[  3] 34.0-36.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 36.0-38.0 sec  9.03 GBytes  38.8 Gbits/sec
[  3] 38.0-40.0 sec  9.03 GBytes  38.8 Gbits/sec
[  3] 40.0-42.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 42.0-44.0 sec  9.03 GBytes  38.8 Gbits/sec
[  3] 44.0-46.0 sec  9.04 GBytes  38.8 Gbits/sec
[  3] 46.0-48.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 48.0-50.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 50.0-52.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 52.0-54.0 sec  9.06 GBytes  38.9 Gbits/sec
[  3] 54.0-56.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 56.0-58.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3] 58.0-60.0 sec  9.05 GBytes  38.9 Gbits/sec
[  3]  0.0-60.0 sec   271 GBytes  38.8 Gbits/sec




>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-28  3:37           ` wenxu
@ 2019-06-28  6:06             ` Florian Westphal
  2019-06-28  9:51               ` wenxu
  0 siblings, 1 reply; 12+ messages in thread
From: Florian Westphal @ 2019-06-28  6:06 UTC (permalink / raw)
  To: wenxu; +Cc: Pablo Neira Ayuso, Florian Westphal, netfilter-devel, netdev

wenxu <wenxu@ucloud.cn> wrote:
> ns21 iperf to 10.0.0.8 with dport 22 in ns22
> first time with OFFLOAD enable
> 
> nft add flowtable bridge firewall fb2 { hook ingress priority 0 \; devices = { veth21, veth22 } \; }
> nft add chain bridge firewall ftb-all {type filter hook forward priority 0 \; policy accept \; }
> nft add rule bridge firewall ftb-all counter ct zone 2 ip protocol tcp flow offload @fb2
> 
> # iperf -c 10.0.0.8 -p 22 -t 60 -i2
[..]
> [  3]  0.0-60.0 sec   353 GBytes  50.5 Gbits/sec
> 
> The second time on any offload:
> # iperf -c 10.0.0.8 -p 22 -t 60 -i2
> [  3]  0.0-60.0 sec   271 GBytes  38.8 Gbits/sec

Wow, this is pretty impressive.  Do you have numbers without
offload and no connection tracking?

Is this with CONFIG_RETPOLINE=y (just curious)?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload
  2019-06-28  6:06             ` Florian Westphal
@ 2019-06-28  9:51               ` wenxu
  0 siblings, 0 replies; 12+ messages in thread
From: wenxu @ 2019-06-28  9:51 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Pablo Neira Ayuso, netfilter-devel, netdev


On 6/28/2019 2:06 PM, Florian Westphal wrote:
> wenxu <wenxu@ucloud.cn> wrote:
>> ns21 iperf to 10.0.0.8 with dport 22 in ns22
>> first time with OFFLOAD enable
>>
>> nft add flowtable bridge firewall fb2 { hook ingress priority 0 \; devices = { veth21, veth22 } \; }
>> nft add chain bridge firewall ftb-all {type filter hook forward priority 0 \; policy accept \; }
>> nft add rule bridge firewall ftb-all counter ct zone 2 ip protocol tcp flow offload @fb2
>>
>> # iperf -c 10.0.0.8 -p 22 -t 60 -i2
> [..]
>> [  3]  0.0-60.0 sec   353 GBytes  50.5 Gbits/sec
>>
>> The second time on any offload:
>> # iperf -c 10.0.0.8 -p 22 -t 60 -i2
>> [  3]  0.0-60.0 sec   271 GBytes  38.8 Gbits/sec
> Wow, this is pretty impressive.  Do you have numbers without
> offload and no connection tracking?

There is no other connection  on the bridge in zone 2

>
> Is this with CONFIG_RETPOLINE=y (just curious)?
Yes, it is enable.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, back to index

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-26 10:32 [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method wenxu
2019-06-26 10:32 ` [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload wenxu
2019-06-26 18:38   ` Florian Westphal
2019-06-26 19:19     ` Florian Westphal
2019-06-27  6:22       ` wenxu
2019-06-27 12:58         ` Pablo Neira Ayuso
2019-06-28  3:37           ` wenxu
2019-06-28  6:06             ` Florian Westphal
2019-06-28  9:51               ` wenxu
2019-06-26 18:40   ` Pablo Neira Ayuso
2019-06-26 10:32 ` [PATCH 3/3 nf-next] netfilter: Flow table support for the bridge family wenxu
2019-06-26 18:29 ` [PATCH 1/3 nf-next] netfilter:nf_flow_table: Refactor flow_offload_tuple to support more offload method Pablo Neira Ayuso

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org netdev@archiver.kernel.org
	public-inbox-index netdev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox