All of lore.kernel.org
 help / color / mirror / Atom feed
From: Felix Fietkau <nbd@nbd.name>
To: netdev@vger.kernel.org
Subject: [RFC 2/2] net: bridge: add a software fast-path implementation
Date: Thu, 10 Feb 2022 15:24:01 +0100	[thread overview]
Message-ID: <20220210142401.4912-2-nbd@nbd.name> (raw)
In-Reply-To: <20220210142401.4912-1-nbd@nbd.name>

This opt-in feature creates a per-port cache of dest_mac+src_mac+vlan tuples
with enough information to quickly push frames to the correct destination port.
It can be enabled per-port

Cache entries are automatically created when a skb is forwarded from one port
to another, and only if there is room and both ports have the offload flag set.

Whenever a fdb entry changes, all corresponding cache entries associated with
it are automatically flushed.

In my test on MT7622 when bridging 1.85 Gbit/s from Ethernet to WLAN, this
significantly improves bridging performance, especially with VLAN filtering
enabled:

CPU usage:
- no offload, no VLAN: 79%
- no offload, VLAN: 84%
- offload, no VLAN: 73-74%
- offload, VLAN: 74%

MT7622 has support for hardware offloading of packets from LAN to WLAN, both
routed and bridged. For bridging it needs source/destination MAC address entries
like the ones stored in this offload cache. This code will be extended later
in order to create appropriate flow_offload rules to handle this

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/linux/if_bridge.h       |   1 +
 include/uapi/linux/if_link.h    |   3 +
 net/bridge/Kconfig              |  10 +
 net/bridge/Makefile             |   1 +
 net/bridge/br.c                 |   8 +
 net/bridge/br_device.c          |   4 +
 net/bridge/br_fdb.c             |  20 +-
 net/bridge/br_forward.c         |   3 +
 net/bridge/br_if.c              |   4 +
 net/bridge/br_input.c           |   5 +
 net/bridge/br_netlink.c         |  31 ++-
 net/bridge/br_offload.c         | 466 ++++++++++++++++++++++++++++++++
 net/bridge/br_private.h         |  30 +-
 net/bridge/br_private_offload.h |  53 ++++
 net/bridge/br_stp.c             |   3 +
 net/bridge/br_vlan_tunnel.c     |   3 +
 net/core/rtnetlink.c            |   2 +-
 17 files changed, 641 insertions(+), 6 deletions(-)
 create mode 100644 net/bridge/br_offload.c
 create mode 100644 net/bridge/br_private_offload.h

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 18d3b264b754..944630df0ec3 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -59,6 +59,7 @@ struct br_ip_list {
 #define BR_MRP_LOST_IN_CONT	BIT(19)
 #define BR_TX_FWD_OFFLOAD	BIT(20)
 #define BR_BPDU_FILTER		BIT(21)
+#define BR_OFFLOAD		BIT(22)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4c847c2d6afa..a7349414a27f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -482,6 +482,8 @@ enum {
 	IFLA_BR_VLAN_STATS_PER_PORT,
 	IFLA_BR_MULTI_BOOLOPT,
 	IFLA_BR_MCAST_QUERIER_STATE,
+	IFLA_BR_OFFLOAD_CACHE_SIZE,
+	IFLA_BR_OFFLOAD_CACHE_RESERVED,
 	__IFLA_BR_MAX,
 };
 
@@ -538,6 +540,7 @@ enum {
 	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
 	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
 	IFLA_BRPORT_BPDU_FILTER,
+	IFLA_BRPORT_OFFLOAD,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 3c8ded7d3e84..3f93da1f66da 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -34,6 +34,16 @@ config BRIDGE
 
 	  If unsure, say N.
 
+config BRIDGE_OFFLOAD
+	bool "Offloading support"
+	depends on BRIDGE
+	help
+	  If you say Y here, you can turn on a per-port offload flag, which
+	  will cache src/destination mac address flows between ports and handle
+	  them faster.
+
+	  If unsure, say N.
+
 config BRIDGE_IGMP_SNOOPING
 	bool "IGMP/MLD snooping"
 	depends on BRIDGE
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 7fb9a021873b..166f76b5f258 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -11,6 +11,7 @@ bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
 			br_netlink_tunnel.o br_arp_nd_proxy.o
 
 bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
+bridge-$(CONFIG_BRIDGE_OFFLOAD) += br_offload.o
 
 bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1fac72cc617f..bd46e5e20b30 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -18,6 +18,7 @@
 #include <net/switchdev.h>
 
 #include "br_private.h"
+#include "br_private_offload.h"
 
 /*
  * Handle changes in state of network devices enslaved to a bridge.
@@ -381,6 +382,10 @@ static int __init br_init(void)
 	if (err)
 		goto err_out;
 
+	err = br_offload_init();
+	if (err)
+		goto err_out0;
+
 	err = register_pernet_subsys(&br_net_ops);
 	if (err)
 		goto err_out1;
@@ -430,6 +435,8 @@ static int __init br_init(void)
 err_out2:
 	unregister_pernet_subsys(&br_net_ops);
 err_out1:
+	br_offload_fini();
+err_out0:
 	br_fdb_fini();
 err_out:
 	stp_proto_unregister(&br_stp_proto);
@@ -452,6 +459,7 @@ static void __exit br_deinit(void)
 #if IS_ENABLED(CONFIG_ATM_LANE)
 	br_fdb_test_addr_hook = NULL;
 #endif
+	br_offload_fini();
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8d6bab244c4a..10c4e4039c7b 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -524,6 +524,10 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_hello_time = br->hello_time = 2 * HZ;
 	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
 	br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	br->offload_cache_size = 128;
+	br->offload_cache_reserved = 8;
+#endif
 	dev->max_mtu = ETH_MAX_MTU;
 
 	br_netfilter_rtable_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..49abfc13a323 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,6 +23,7 @@
 #include <net/switchdev.h>
 #include <trace/events/bridge.h>
 #include "br_private.h"
+#include "br_private_offload.h"
 
 static const struct rhashtable_params br_fdb_rht_params = {
 	.head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
@@ -185,6 +186,8 @@ static void fdb_notify(struct net_bridge *br,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	br_offload_fdb_update(fdb);
+
 	if (swdev_notify)
 		br_switchdev_fdb_notify(br, fdb, type);
 
@@ -393,6 +396,10 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
 	fdb->key.vlan_id = vid;
 	fdb->flags = flags;
 	fdb->updated = fdb->used = jiffies;
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	INIT_HLIST_HEAD(&fdb->offload_in);
+	INIT_HLIST_HEAD(&fdb->offload_out);
+#endif
 	err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode,
 					    br_fdb_rht_params);
 	if (err) {
@@ -527,8 +534,10 @@ void br_fdb_cleanup(struct work_struct *work)
 	 */
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
-		unsigned long this_timer = f->updated + delay;
+		unsigned long this_timer;
 
+		br_offload_fdb_refresh_time(br, f);
+		this_timer = f->updated + delay;
 		if (test_bit(BR_FDB_STATIC, &f->flags) ||
 		    test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) {
 			if (test_bit(BR_FDB_NOTIFY, &f->flags)) {
@@ -651,8 +660,11 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 		if (num >= maxnum)
 			break;
 
-		if (has_expired(br, f))
-			continue;
+		if (has_expired(br, f)) {
+			if (!br_offload_fdb_refresh_time(br, f) ||
+			    has_expired(br, f))
+				continue;
+		}
 
 		/* ignore pseudo entry for local MAC address */
 		if (!f->dst)
@@ -797,6 +809,7 @@ int br_fdb_dump(struct sk_buff *skb,
 		if (!filter_dev && f->dst)
 			goto skip;
 
+		br_offload_fdb_refresh_time(br, f);
 		err = fdb_fill_info(skb, br, f,
 				    NETLINK_CB(cb->skb).portid,
 				    cb->nlh->nlmsg_seq,
@@ -831,6 +844,7 @@ int br_fdb_get(struct sk_buff *skb,
 		goto errout;
 	}
 
+	br_offload_fdb_refresh_time(br, f);
 	err = fdb_fill_info(skb, br, f, portid, seq,
 			    RTM_NEWNEIGH, 0);
 errout:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 9fe5c888f27d..6d9025106d9d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -16,6 +16,7 @@
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
 #include "br_private.h"
+#include "br_private_offload.h"
 
 /* Don't forward packets to originating port or forwarding disabled */
 static inline int should_deliver(const struct net_bridge_port *p,
@@ -32,6 +33,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	br_offload_output(skb);
+
 	skb_push(skb, ETH_HLEN);
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 55f47cadb114..c68c7f6cc429 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -25,6 +25,7 @@
 #include <net/net_namespace.h>
 
 #include "br_private.h"
+#include "br_private_offload.h"
 
 /*
  * Determine initial path cost based on speed.
@@ -772,6 +773,9 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
 
 	if (mask & BR_NEIGH_SUPPRESS)
 		br_recalculate_neigh_suppress_enabled(br);
+
+	if (mask & BR_OFFLOAD)
+		br_offload_port_state(p);
 }
 
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d8263c4849c1..b606ca06ff2d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -22,6 +22,7 @@
 #include <linux/rculist.h>
 #include "br_private.h"
 #include "br_private_tunnel.h"
+#include "br_private_offload.h"
 
 static int
 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -164,6 +165,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 			dst->used = now;
 		br_forward(dst->dst, skb, local_rcv, false);
 	} else {
+		br_offload_skb_disable(skb);
 		if (!mcast_hit)
 			br_flood(br, skb, pkt_type, local_rcv, false);
 		else
@@ -293,6 +295,9 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 
 	memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
 
+	if (br_offload_input(p, skb))
+		return RX_HANDLER_CONSUMED;
+
 	p = br_port_get_rcu(skb->dev);
 	if (p->flags & BR_VLAN_TUNNEL)
 		br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p));
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 11215c55adc2..994aca4b633a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -19,6 +19,7 @@
 #include "br_private_cfm.h"
 #include "br_private_tunnel.h"
 #include "br_private_mcast_eht.h"
+#include "br_private_offload.h"
 
 static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
 				u32 filter_mask)
@@ -185,6 +186,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_NEIGH_SUPPRESS */
 		+ nla_total_size(1)	/* IFLA_BRPORT_ISOLATED */
 		+ nla_total_size(1)	/* IFLA_BRPORT_BPDU_FILTER */
+		+ nla_total_size(1)	/* IFLA_BRPORT_OFFLOAD */
 		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_ROOT_ID */
 		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_BRIDGE_ID */
 		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_DESIGNATED_PORT */
@@ -271,7 +273,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
 		       !!(p->flags & BR_MRP_LOST_IN_CONT)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_BPDU_FILTER, !!(p->flags & BR_BPDU_FILTER)))
+	    nla_put_u8(skb, IFLA_BRPORT_BPDU_FILTER, !!(p->flags & BR_BPDU_FILTER)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_OFFLOAD, !!(p->flags & BR_OFFLOAD)))
 		return -EMSGSIZE;
 
 	timerval = br_timer_value(&p->message_age_timer);
@@ -832,6 +835,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
 	[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
 	[IFLA_BRPORT_BPDU_FILTER] = { .type = NLA_U8 },
+	[IFLA_BRPORT_OFFLOAD] = { .type = NLA_U8 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -897,6 +901,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
 	br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS);
 	br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
 	br_set_port_flag(p, tb, IFLA_BRPORT_BPDU_FILTER, BR_BPDU_FILTER);
+	br_set_port_flag(p, tb, IFLA_BRPORT_OFFLOAD, BR_OFFLOAD);
 
 	changed_mask = old_flags ^ p->flags;
 
@@ -1165,6 +1170,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
 	[IFLA_BR_MULTI_BOOLOPT] =
 		NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)),
+	[IFLA_BR_OFFLOAD_CACHE_SIZE] = { .type = NLA_U32 },
+	[IFLA_BR_OFFLOAD_CACHE_RESERVED] = { .type = NLA_U32 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1424,6 +1431,19 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 		br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
 	}
 #endif
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	if (data[IFLA_BR_OFFLOAD_CACHE_SIZE]) {
+		u32 val = nla_get_u32(data[IFLA_BR_OFFLOAD_CACHE_SIZE]);
+
+		br_offload_set_cache_size(br, val);
+	}
+
+	if (data[IFLA_BR_OFFLOAD_CACHE_RESERVED]) {
+		u32 val = nla_get_u32(data[IFLA_BR_OFFLOAD_CACHE_RESERVED]);
+
+		br_offload_set_cache_reserved(br, val);
+	}
+#endif
 
 	if (data[IFLA_BR_MULTI_BOOLOPT]) {
 		struct br_boolopt_multi *bm;
@@ -1512,6 +1532,10 @@ static size_t br_get_size(const struct net_device *brdev)
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IPTABLES */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IP6TABLES */
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_ARPTABLES */
+#endif
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_OFFLOAD_CACHE_SIZE */
+	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_OFFLOAD_CACHE_RESERVED */
 #endif
 	       nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */
 	       0;
@@ -1636,6 +1660,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 		       br_opt_get(br, BROPT_NF_CALL_ARPTABLES) ? 1 : 0))
 		return -EMSGSIZE;
 #endif
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	if (nla_put_u32(skb, IFLA_BR_OFFLOAD_CACHE_SIZE, br->offload_cache_size) ||
+	    nla_put_u32(skb, IFLA_BR_OFFLOAD_CACHE_RESERVED, br->offload_cache_reserved))
+		return -EMSGSIZE;
+#endif
 
 	return 0;
 }
diff --git a/net/bridge/br_offload.c b/net/bridge/br_offload.c
new file mode 100644
index 000000000000..8cb9266e6cf9
--- /dev/null
+++ b/net/bridge/br_offload.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include "br_private.h"
+#include "br_private_offload.h"
+
+static DEFINE_SPINLOCK(offload_lock);
+
+struct bridge_flow_key {
+	u8 dest[ETH_ALEN];
+	u8 src[ETH_ALEN];
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	u16 vlan_tag;
+	bool vlan_present;
+#endif
+};
+
+struct bridge_flow {
+	struct net_bridge_port *port;
+	struct rhash_head node;
+	struct bridge_flow_key key;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	bool vlan_out_present;
+	u16 vlan_out;
+#endif
+
+	unsigned long used;
+	struct net_bridge_fdb_entry *fdb_in, *fdb_out;
+	struct hlist_node fdb_list_in, fdb_list_out;
+
+	struct rcu_head rcu;
+};
+
+static const struct rhashtable_params flow_params = {
+	.automatic_shrinking = true,
+	.head_offset = offsetof(struct bridge_flow, node),
+	.key_len = sizeof(struct bridge_flow_key),
+	.key_offset = offsetof(struct bridge_flow, key),
+};
+
+static struct kmem_cache *offload_cache __read_mostly;
+
+static void
+flow_rcu_free(struct rcu_head *head)
+{
+	struct bridge_flow *flow;
+
+	flow = container_of(head, struct bridge_flow, rcu);
+	kmem_cache_free(offload_cache, flow);
+}
+
+static void
+__br_offload_flow_free(struct bridge_flow *flow)
+{
+	flow->used = 0;
+	hlist_del(&flow->fdb_list_in);
+	hlist_del(&flow->fdb_list_out);
+
+	call_rcu(&flow->rcu, flow_rcu_free);
+}
+
+static void
+br_offload_flow_free(struct bridge_flow *flow)
+{
+	if (rhashtable_remove_fast(&flow->port->offload.rht, &flow->node,
+				   flow_params) != 0)
+		return;
+
+	__br_offload_flow_free(flow);
+}
+
+static bool
+br_offload_flow_fdb_refresh_time(struct bridge_flow *flow,
+				 struct net_bridge_fdb_entry *fdb)
+{
+	if (!time_after(flow->used, fdb->updated))
+		return false;
+
+	fdb->updated = flow->used;
+
+	return true;
+}
+
+
+static void
+br_offload_flow_refresh_time(struct bridge_flow *flow)
+{
+	br_offload_flow_fdb_refresh_time(flow, flow->fdb_in);
+	br_offload_flow_fdb_refresh_time(flow, flow->fdb_out);
+}
+
+static void
+br_offload_destroy_cb(void *ptr, void *arg)
+{
+	struct bridge_flow *flow = ptr;
+
+	br_offload_flow_refresh_time(flow);
+	__br_offload_flow_free(flow);
+}
+
+static bool
+br_offload_need_gc(struct net_bridge_port *p)
+{
+	return (atomic_read(&p->offload.rht.nelems) +
+	        p->br->offload_cache_reserved) >= p->br->offload_cache_size;
+}
+
+static void
+br_offload_gc_work(struct work_struct *work)
+{
+	struct rhashtable_iter hti;
+	struct net_bridge_port *p;
+	struct bridge_flow *gc_flow = NULL;
+	struct bridge_flow *flow;
+	unsigned long gc_used;
+
+	p = container_of(work, struct net_bridge_port, offload.gc_work);
+
+	if (!br_offload_need_gc(p))
+		return;
+
+	rhashtable_walk_enter(&p->offload.rht, &hti);
+	rhashtable_walk_start(&hti);
+	while ((flow = rhashtable_walk_next(&hti)) != NULL) {
+		unsigned long used;
+
+		if (IS_ERR(flow))
+			continue;
+
+		used = READ_ONCE(flow->used);
+		if (!used)
+			continue;
+
+		if (gc_flow && !time_before(used, gc_used))
+			continue;
+
+		gc_flow = flow;
+		gc_used = used;
+	}
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+
+	if (!gc_flow)
+		return;
+
+	spin_lock_bh(&offload_lock);
+	if (br_offload_need_gc(p) && gc_flow &&
+	    gc_flow->used == gc_used)
+		br_offload_flow_free(gc_flow);
+	if (p->offload.enabled && br_offload_need_gc(p))
+		queue_work(system_long_wq, work);
+	spin_unlock_bh(&offload_lock);
+
+}
+
+void br_offload_port_state(struct net_bridge_port *p)
+{
+	struct net_bridge_port_offload *o = &p->offload;
+	bool enabled = true;
+	bool flush = false;
+
+	if (p->state != BR_STATE_FORWARDING ||
+	    !(p->flags & BR_OFFLOAD))
+		enabled = false;
+
+	spin_lock_bh(&offload_lock);
+	if (o->enabled == enabled)
+		goto out;
+
+	if (enabled) {
+		if (!o->gc_work.func)
+			INIT_WORK(&o->gc_work, br_offload_gc_work);
+		rhashtable_init(&o->rht, &flow_params);
+	} else {
+		flush = true;
+		rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o);
+	}
+
+	o->enabled = enabled;
+
+out:
+	spin_unlock_bh(&offload_lock);
+
+	if (flush)
+		flush_work(&o->gc_work);
+}
+
+void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
+{
+	struct bridge_flow *f;
+	struct hlist_node *tmp;
+
+	spin_lock_bh(&offload_lock);
+
+	hlist_for_each_entry_safe(f, tmp, &fdb->offload_in, fdb_list_in) {
+		br_offload_flow_refresh_time(f);
+		br_offload_flow_free(f);
+	}
+	hlist_for_each_entry_safe(f, tmp, &fdb->offload_out, fdb_list_out) {
+		br_offload_flow_refresh_time(f);
+		br_offload_flow_free(f);
+	}
+
+	spin_unlock_bh(&offload_lock);
+}
+
+bool br_offload_fdb_refresh_time(struct net_bridge *br,
+				 struct net_bridge_fdb_entry *fdb)
+{
+	unsigned long timeout = jiffies - br->ageing_time;
+	struct bridge_flow *f;
+	struct hlist_node *tmp;
+	bool ret = false;
+
+	spin_lock_bh(&offload_lock);
+
+	hlist_for_each_entry_safe(f, tmp, &fdb->offload_in, fdb_list_in) {
+		if (br_offload_flow_fdb_refresh_time(f, fdb))
+			ret = true;
+		if (time_before(f->used, timeout))
+			br_offload_flow_free(f);
+	}
+
+	hlist_for_each_entry_safe(f, tmp, &fdb->offload_out, fdb_list_out) {
+		if (br_offload_flow_fdb_refresh_time(f, fdb))
+			ret = true;
+		if (time_before(f->used, timeout))
+			br_offload_flow_free(f);
+	}
+
+	spin_unlock_bh(&offload_lock);
+
+	return ret;
+}
+
+static void
+br_offload_prepare_key(struct net_bridge_port *p, struct bridge_flow_key *key,
+		       struct sk_buff *skb)
+{
+	memset(key, 0, sizeof(*key));
+	memcpy(key, eth_hdr(skb), 2 * ETH_ALEN);
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	if (!br_opt_get(p->br, BROPT_VLAN_ENABLED))
+		return;
+
+	if (!skb_vlan_tag_present(skb) || skb->vlan_proto != p->br->vlan_proto)
+		return;
+
+	key->vlan_present = true;
+	key->vlan_tag = skb_vlan_tag_get_id(skb);
+#endif
+}
+
+void br_offload_output(struct sk_buff *skb)
+{
+	struct net_bridge_port_offload *o;
+	struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
+	struct net_bridge_port *p, *inp;
+	struct net_device *dev;
+	struct net_bridge_fdb_entry *fdb_in, *fdb_out;
+	struct net_bridge_vlan_group *vg;
+	struct bridge_flow_key key;
+	struct bridge_flow *flow;
+	u16 vlan;
+
+	if (!cb->offload)
+		return;
+
+	rcu_read_lock();
+
+	p = br_port_get_rcu(skb->dev);
+	if (!p)
+		goto out;
+
+	o = &p->offload;
+	if (!o->enabled)
+		goto out;
+
+	if (atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size)
+		goto out;
+
+	dev = dev_get_by_index_rcu(dev_net(p->br->dev), cb->input_ifindex);
+	if (!dev)
+		goto out;
+
+	inp = br_port_get_rcu(dev);
+	if (!p)
+		goto out;
+
+	vg = nbp_vlan_group_rcu(inp);
+	vlan = cb->input_vlan_present ? cb->input_vlan_tag : br_get_pvid(vg);
+	fdb_in = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_source, vlan);
+	if (!fdb_in)
+		goto out;
+
+	vg = nbp_vlan_group_rcu(p);
+	vlan = skb_vlan_tag_present(skb) ? skb_vlan_tag_get_id(skb) : br_get_pvid(vg);
+	fdb_out = br_fdb_find_rcu(p->br, eth_hdr(skb)->h_dest, vlan);
+	if (!fdb_out)
+		goto out;
+
+	br_offload_prepare_key(p, &key, skb);
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	key.vlan_present = cb->input_vlan_present;
+	key.vlan_tag = cb->input_vlan_tag;
+#endif
+
+	flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC);
+	flow->port = fdb_in->dst;
+	memcpy(&flow->key, &key, sizeof(key));
+
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	flow->vlan_out_present = skb_vlan_tag_present(skb);
+	flow->vlan_out = skb_vlan_tag_get(skb);
+#endif
+
+	flow->fdb_in = fdb_in;
+	flow->fdb_out = fdb_out;
+	flow->used = jiffies;
+
+	spin_lock_bh(&offload_lock);
+	if (!o->enabled ||
+	    atomic_read(&p->offload.rht.nelems) >= p->br->offload_cache_size ||
+	    rhashtable_insert_fast(&flow->port->offload.rht, &flow->node, flow_params)) {
+		kmem_cache_free(offload_cache, flow);
+		goto out_unlock;
+	}
+
+	hlist_add_head(&flow->fdb_list_in, &fdb_in->offload_in);
+	hlist_add_head(&flow->fdb_list_out, &fdb_out->offload_out);
+
+	if (br_offload_need_gc(p))
+		queue_work(system_long_wq, &p->offload.gc_work);
+
+out_unlock:
+	spin_unlock_bh(&offload_lock);
+
+out:
+	rcu_read_unlock();
+}
+
+bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb)
+{
+	struct net_bridge_port_offload *o = &p->offload;
+	struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
+	struct bridge_flow_key key;
+	struct net_bridge_port *dst;
+	struct bridge_flow *flow;
+	unsigned long now = jiffies;
+	bool ret = false;
+
+	if (skb->len < sizeof(key))
+		return false;
+
+	if (!o->enabled)
+		return false;
+
+	if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
+		return false;
+
+	br_offload_prepare_key(p, &key, skb);
+
+	rcu_read_lock();
+	flow = rhashtable_lookup(&o->rht, &key, flow_params);
+	if (!flow) {
+		cb->offload = 1;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+		cb->input_vlan_present = key.vlan_present != 0;
+		cb->input_vlan_tag = key.vlan_tag;
+		cb->input_ifindex = p->dev->ifindex;
+#endif
+		goto out;
+	}
+
+	if (flow->fdb_in->dst != p)
+		goto out;
+
+	dst = flow->fdb_out->dst;
+	if (!dst)
+		goto out;
+
+	ret = true;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	if (!flow->vlan_out_present && key.vlan_present) {
+		__vlan_hwaccel_clear_tag(skb);
+	} else if (flow->vlan_out_present) {
+		if (skb_vlan_tag_present(skb) &&
+		    skb->vlan_proto != p->br->vlan_proto) {
+			/* Protocol-mismatch, empty out vlan_tci for new tag */
+			skb_push(skb, ETH_HLEN);
+			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+							skb_vlan_tag_get(skb));
+			if (unlikely(!skb))
+				goto out;
+
+			skb_pull(skb, ETH_HLEN);
+			skb_reset_mac_len(skb);
+		}
+
+		__vlan_hwaccel_put_tag(skb, p->br->vlan_proto,
+				       flow->vlan_out);
+	}
+#endif
+
+	skb->dev = dst->dev;
+	skb_push(skb, ETH_HLEN);
+
+	if (skb_warn_if_lro(skb) || !is_skb_forwardable(skb->dev, skb)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	if (flow->used != now)
+		flow->used = now;
+	skb_forward_csum(skb);
+	dev_queue_xmit(skb);
+
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static void
+br_offload_check_gc(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	spin_lock_bh(&br->lock);
+	list_for_each_entry(p, &br->port_list, list)
+		if (br_offload_need_gc(p))
+			queue_work(system_long_wq, &p->offload.gc_work);
+	spin_unlock_bh(&br->lock);
+}
+
+
+int br_offload_set_cache_size(struct net_bridge *br, unsigned long val)
+{
+	br->offload_cache_size = val;
+	br_offload_check_gc(br);
+
+	return 0;
+}
+
+int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val)
+{
+	br->offload_cache_reserved = val;
+	br_offload_check_gc(br);
+
+	return 0;
+}
+
+int __init br_offload_init(void)
+{
+	offload_cache = kmem_cache_create("bridge_offload_cache",
+					  sizeof(struct bridge_flow),
+					  0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!offload_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void br_offload_fini(void)
+{
+	kmem_cache_destroy(offload_cache);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2661dda1a92b..40021fe4b8c8 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -268,7 +268,15 @@ struct net_bridge_fdb_entry {
 	unsigned long			updated ____cacheline_aligned_in_smp;
 	unsigned long			used;
 
-	struct rcu_head			rcu;
+	union {
+#ifdef CONFIG_BRIDGE_OFFLOAD
+		struct {
+			struct hlist_head		offload_in;
+			struct hlist_head		offload_out;
+		};
+#endif
+		struct rcu_head			rcu;
+	};
 };
 
 #define MDB_PG_FLAGS_PERMANENT	BIT(0)
@@ -343,6 +351,12 @@ struct net_bridge_mdb_entry {
 	struct rcu_head			rcu;
 };
 
+struct net_bridge_port_offload {
+	struct rhashtable		rht;
+	struct work_struct		gc_work;
+	bool				enabled;
+};
+
 struct net_bridge_port {
 	struct net_bridge		*br;
 	struct net_device		*dev;
@@ -404,6 +418,9 @@ struct net_bridge_port {
 	u16				backup_redirected_cnt;
 
 	struct bridge_stp_xstats	stp_xstats;
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	struct net_bridge_port_offload	offload;
+#endif
 };
 
 #define kobj_to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)
@@ -555,6 +572,11 @@ struct br_input_skb_cb {
 	u8 br_netfilter_broute:1;
 #endif
 
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	u32				offload_cache_size;
+	u32				offload_cache_reserved;
+#endif
+
 #ifdef CONFIG_NET_SWITCHDEV
 	/* Set if TX data plane offloading is used towards at least one
 	 * hardware domain.
@@ -580,6 +602,12 @@ struct br_input_skb_cb {
 #else
 # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb)	(0)
 #endif
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	u8 offload:1;
+	u8 input_vlan_present:1;
+	u16 input_vlan_tag;
+	int input_ifindex;
+#endif
 
 #define br_printk(level, br, format, args...)	\
 	printk(level "%s: " format, (br)->dev->name, ##args)
diff --git a/net/bridge/br_private_offload.h b/net/bridge/br_private_offload.h
new file mode 100644
index 000000000000..f66edd0539ab
--- /dev/null
+++ b/net/bridge/br_private_offload.h
@@ -0,0 +1,53 @@
+#ifndef __BR_OFFLOAD_H
+#define __BR_OFFLOAD_H
+
+#ifdef CONFIG_BRIDGE_OFFLOAD
+bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb);
+void br_offload_output(struct sk_buff *skb);
+void br_offload_port_state(struct net_bridge_port *p);
+void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb);
+bool br_offload_fdb_refresh_time(struct net_bridge *br,
+				 struct net_bridge_fdb_entry *fdb);
+int br_offload_init(void);
+void br_offload_fini(void);
+int br_offload_set_cache_size(struct net_bridge *br, unsigned long val);
+int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val);
+#else
+static inline bool br_offload_input(struct net_bridge_port *p, struct sk_buff *skb)
+{
+	return false;
+}
+static inline void br_offload_output(struct sk_buff *skb)
+{
+}
+static inline void br_offload_port_state(struct net_bridge_port *p)
+{
+}
+static inline void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
+{
+}
+static inline bool br_offload_fdb_refresh_time(struct net_bridge *br,
+					       struct net_bridge_fdb_entry *fdb)
+{
+	return false;
+}
+static inline int br_offload_init(void)
+{
+	return 0;
+}
+static inline void br_offload_fini(void)
+{
+}
+#endif
+
+static inline void br_offload_skb_disable(struct sk_buff *skb)
+{
+#ifdef CONFIG_BRIDGE_OFFLOAD
+	struct br_input_skb_cb *cb = (struct br_input_skb_cb *)skb->cb;
+
+	if (cb->offload)
+		cb->offload = 0;
+#endif
+}
+
+#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1d80f34a139c..b57788b53d24 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -12,6 +12,7 @@
 
 #include "br_private.h"
 #include "br_private_stp.h"
+#include "br_private_offload.h"
 
 /* since time values in bpdu are in jiffies and then scaled (1/256)
  * before sending, make sure that is at least one STP tick.
@@ -52,6 +53,8 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
 				(unsigned int) p->port_no, p->dev->name,
 				br_port_state_names[p->state]);
 
+	br_offload_port_state(p);
+
 	if (p->br->stp_enabled == BR_KERNEL_STP) {
 		switch (p->state) {
 		case BR_STATE_BLOCKING:
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6399a8a69d07..ffc65dc4eea8 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -15,6 +15,7 @@
 
 #include "br_private.h"
 #include "br_private_tunnel.h"
+#include "br_private_offload.h"
 
 static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg,
 				    const void *ptr)
@@ -180,6 +181,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
 	skb_dst_drop(skb);
 
 	__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
+	br_offload_skb_disable(skb);
 }
 
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
@@ -201,6 +203,7 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 	if (err)
 		return err;
 
+	br_offload_skb_disable(skb);
 	tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
 	if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
 		skb_dst_set(skb, &tunnel_dst->dst);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00328f0dd22b..da8d3b72a77e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -55,7 +55,7 @@
 #include <net/net_namespace.h>
 
 #define RTNL_MAX_TYPE		50
-#define RTNL_SLAVE_MAX_TYPE	40
+#define RTNL_SLAVE_MAX_TYPE	41
 
 struct rtnl_link {
 	rtnl_doit_func		doit;
-- 
2.32.0 (Apple Git-132)


  reply	other threads:[~2022-02-10 14:24 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-10 14:24 [RFC 1/2] net: bridge: add knob for filtering rx/tx BPDU packets on a port Felix Fietkau
2022-02-10 14:24 ` Felix Fietkau [this message]
2022-02-10 15:02   ` [RFC 2/2] net: bridge: add a software fast-path implementation Nikolay Aleksandrov
2022-02-10 16:53     ` Felix Fietkau
2022-02-11  8:50       ` Nikolay Aleksandrov
2022-03-28 15:15         ` Felix Fietkau
2022-03-28 18:20           ` Nikolay Aleksandrov
2022-03-29 11:07             ` Felix Fietkau
2022-02-10 14:55 ` [RFC 1/2] net: bridge: add knob for filtering rx/tx BPDU packets on a port Nikolay Aleksandrov
2022-02-10 16:06   ` Felix Fietkau
2022-02-11  8:16     ` Nikolay Aleksandrov
2022-02-11 17:01   ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220210142401.4912-2-nbd@nbd.name \
    --to=nbd@nbd.name \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.