[RFC PATCH bpf-next 07/14] xdp_flow: Add flow handling and basic actions in bpf prog

From: Toshiaki Makita <toshiaki.makita1@gmail.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Jesper Dangaard Brouer <hawk@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>
Cc: Toshiaki Makita <toshiaki.makita1@gmail.com>,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	William Tu <u9012063@gmail.com>
Subject: [RFC PATCH bpf-next 07/14] xdp_flow: Add flow handling and basic actions in bpf prog
Date: Tue, 13 Aug 2019 21:05:51 +0900	[thread overview]
Message-ID: <20190813120558.6151-8-toshiaki.makita1@gmail.com> (raw)
In-Reply-To: <20190813120558.6151-1-toshiaki.makita1@gmail.com>

BPF prog for XDP parses the packet and extracts the flow key. Then find
an entry from flow tables.
Only "accept" and "drop" actions are implemented at this point.

Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
---
 net/xdp_flow/xdp_flow_kern_bpf.c | 297 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 296 insertions(+), 1 deletion(-)

diff --git a/net/xdp_flow/xdp_flow_kern_bpf.c b/net/xdp_flow/xdp_flow_kern_bpf.c
index c101156..ceb8a92 100644
--- a/net/xdp_flow/xdp_flow_kern_bpf.c
+++ b/net/xdp_flow/xdp_flow_kern_bpf.c
@@ -1,9 +1,27 @@
 // SPDX-License-Identifier: GPL-2.0
 #define KBUILD_MODNAME "foo"
 #include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
 #include <bpf_helpers.h>
 #include "umh_bpf.h"
 
+/* Used when the action only modifies the packet */
+#define _XDP_CONTINUE -1
+
+struct bpf_map_def SEC("maps") debug_stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
 struct bpf_map_def SEC("maps") flow_masks_head = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(u32),
@@ -25,10 +43,287 @@ struct bpf_map_def SEC("maps") flow_tables = {
 	.max_entries = MAX_FLOW_MASKS,
 };
 
+static inline void account_debug(int idx)
+{
+	long *cnt;
+
+	cnt = bpf_map_lookup_elem(&debug_stats, &idx);
+	if (cnt)
+		*cnt += 1;
+}
+
+static inline void account_action(int act)
+{
+	account_debug(act + 1);
+}
+
+static inline int action_accept(void)
+{
+	account_action(XDP_FLOW_ACTION_ACCEPT);
+	return XDP_PASS;
+}
+
+static inline int action_drop(void)
+{
+	account_action(XDP_FLOW_ACTION_DROP);
+	return XDP_DROP;
+}
+
+static inline int action_redirect(struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_REDIRECT);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline int action_vlan_push(struct xdp_md *ctx,
+				   struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_VLAN_PUSH);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline int action_vlan_pop(struct xdp_md *ctx,
+				  struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_VLAN_POP);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline int action_vlan_mangle(struct xdp_md *ctx,
+				     struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_VLAN_MANGLE);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline int action_mangle(struct xdp_md *ctx,
+				struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_MANGLE);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline int action_csum(struct xdp_md *ctx,
+			      struct xdp_flow_action *action)
+{
+	account_action(XDP_FLOW_ACTION_CSUM);
+
+	// TODO: implement this
+	return XDP_ABORTED;
+}
+
+static inline void __ether_addr_copy(u8 *dst, const u8 *src)
+{
+	u16 *a = (u16 *)dst;
+	const u16 *b = (const u16 *)src;
+
+	a[0] = b[0];
+	a[1] = b[1];
+	a[2] = b[2];
+}
+
+static inline int parse_ipv4(void *data, u64 *nh_off, void *data_end,
+			     struct xdp_flow_key *key)
+{
+	struct iphdr *iph = data + *nh_off;
+
+	if (iph + 1 > data_end)
+		return -1;
+
+	key->ipv4.src = iph->saddr;
+	key->ipv4.dst = iph->daddr;
+	key->ip.ttl = iph->ttl;
+	key->ip.tos = iph->tos;
+	*nh_off += iph->ihl * 4;
+
+	return iph->protocol;
+}
+
+static inline int parse_ipv6(void *data, u64 *nh_off, void *data_end,
+			     struct xdp_flow_key *key)
+{
+	struct ipv6hdr *ip6h = data + *nh_off;
+
+	if (ip6h + 1 > data_end)
+		return -1;
+
+	key->ipv6.src = ip6h->saddr;
+	key->ipv6.dst = ip6h->daddr;
+	key->ip.ttl = ip6h->hop_limit;
+	key->ip.tos = ipv6_get_dsfield(ip6h);
+	*nh_off += sizeof(*ip6h);
+
+	if (ip6h->nexthdr == NEXTHDR_HOP ||
+	    ip6h->nexthdr == NEXTHDR_ROUTING ||
+	    ip6h->nexthdr == NEXTHDR_FRAGMENT ||
+	    ip6h->nexthdr == NEXTHDR_AUTH ||
+	    ip6h->nexthdr == NEXTHDR_NONE ||
+	    ip6h->nexthdr == NEXTHDR_DEST)
+		return 0;
+
+	return ip6h->nexthdr;
+}
+
+#define for_each_flow_mask(entry, head, idx, cnt) \
+	for (entry = bpf_map_lookup_elem(&flow_masks, (head)), \
+	     idx = *(head), cnt = 0; \
+	     entry != NULL && cnt < MAX_FLOW_MASKS; \
+	     idx = entry->next, \
+	     entry = bpf_map_lookup_elem(&flow_masks, &idx), cnt++)
+
+static inline void flow_mask(struct xdp_flow_key *mkey,
+			     const struct xdp_flow_key *key,
+			     const struct xdp_flow_key *mask)
+{
+	long *lmkey = (long *)mkey;
+	long *lmask = (long *)mask;
+	long *lkey = (long *)key;
+	int i;
+
+	for (i = 0; i < sizeof(*mkey); i += sizeof(long))
+		*lmkey++ = *lkey++ & *lmask++;
+}
+
 SEC("xdp_flow")
 int xdp_flow_prog(struct xdp_md *ctx)
 {
-	return XDP_PASS;
+	void *data_end = (void *)(long)ctx->data_end;
+	struct xdp_flow_actions *actions = NULL;
+	void *data = (void *)(long)ctx->data;
+	int cnt, idx, action_idx, zero = 0;
+	struct xdp_flow_mask_entry *entry;
+	struct ethhdr *eth = data;
+	struct xdp_flow_key key;
+	int rc = XDP_DROP;
+	long *value;
+	u16 h_proto;
+	u32 ipproto;
+	u64 nh_off;
+	int *head;
+
+	account_debug(0);
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	__builtin_memset(&key, 0, sizeof(key));
+	h_proto = eth->h_proto;
+	__ether_addr_copy(key.eth.dst, eth->h_dest);
+	__ether_addr_copy(key.eth.src, eth->h_source);
+
+	if (eth_type_vlan(h_proto)) {
+		struct vlan_hdr *vhdr;
+
+		vhdr = data + nh_off;
+		nh_off += sizeof(*vhdr);
+		if (data + nh_off > data_end)
+			return XDP_DROP;
+		key.vlan.tpid = h_proto;
+		key.vlan.tci = vhdr->h_vlan_TCI;
+		h_proto = vhdr->h_vlan_encapsulated_proto;
+	}
+	key.eth.type = h_proto;
+
+	if (h_proto == htons(ETH_P_IP))
+		ipproto = parse_ipv4(data, &nh_off, data_end, &key);
+	else if (h_proto == htons(ETH_P_IPV6))
+		ipproto = parse_ipv6(data, &nh_off, data_end, &key);
+	else
+		ipproto = 0;
+	if (ipproto < 0)
+		return XDP_DROP;
+	key.ip.proto = ipproto;
+
+	if (ipproto == IPPROTO_TCP) {
+		struct tcphdr *th = data + nh_off;
+
+		if (th + 1 > data_end)
+			return XDP_DROP;
+
+		key.l4port.src = th->source;
+		key.l4port.dst = th->dest;
+		key.tcp.flags = (*(__be16 *)&tcp_flag_word(th) & htons(0x0FFF));
+	} else if (ipproto == IPPROTO_UDP) {
+		struct udphdr *uh = data + nh_off;
+
+		if (uh + 1 > data_end)
+			return XDP_DROP;
+
+		key.l4port.src = uh->source;
+		key.l4port.dst = uh->dest;
+	}
+
+	head = bpf_map_lookup_elem(&flow_masks_head, &zero);
+	if (!head)
+		return XDP_PASS;
+
+	for_each_flow_mask(entry, head, idx, cnt) {
+		struct xdp_flow_key mkey;
+		void *flow_table;
+
+		flow_table = bpf_map_lookup_elem(&flow_tables, &idx);
+		if (!flow_table)
+			return XDP_ABORTED;
+
+		flow_mask(&mkey, &key, &entry->mask);
+		actions = bpf_map_lookup_elem(flow_table, &mkey);
+		if (actions)
+			break;
+	}
+
+	if (!actions)
+		return XDP_PASS;
+
+	for (action_idx = 0;
+	     action_idx < actions->num_actions &&
+	     action_idx < MAX_XDP_FLOW_ACTIONS;
+	     action_idx++) {
+		struct xdp_flow_action *action;
+		int act;
+
+		action = &actions->actions[action_idx];
+
+		switch (action->id) {
+		case XDP_FLOW_ACTION_ACCEPT:
+			return action_accept();
+		case XDP_FLOW_ACTION_DROP:
+			return action_drop();
+		case XDP_FLOW_ACTION_REDIRECT:
+			return action_redirect(action);
+		case XDP_FLOW_ACTION_VLAN_PUSH:
+			act = action_vlan_push(ctx, action);
+			break;
+		case XDP_FLOW_ACTION_VLAN_POP:
+			act = action_vlan_pop(ctx, action);
+			break;
+		case XDP_FLOW_ACTION_VLAN_MANGLE:
+			act = action_vlan_mangle(ctx, action);
+			break;
+		case XDP_FLOW_ACTION_MANGLE:
+			act = action_mangle(ctx, action);
+			break;
+		case XDP_FLOW_ACTION_CSUM:
+			act = action_csum(ctx, action);
+			break;
+		default:
+			return XDP_ABORTED;
+		}
+		if (act != _XDP_CONTINUE)
+			return act;
+	}
+
+	return XDP_ABORTED;
 }
 
 char _license[] SEC("license") = "GPL";
-- 
1.8.3.1