From mboxrd@z Thu Jan 1 00:00:00 1970 From: Petar Penkov Subject: [bpf-next, v2 2/3] flow_dissector: implements eBPF parser Date: Fri, 7 Sep 2018 17:11:09 -0700 Message-ID: <20180908001110.200828-3-peterpenkov96@gmail.com> References: <20180908001110.200828-1-peterpenkov96@gmail.com> Mime-Version: 1.0 Content-Transfer-Encoding: 8bit Cc: davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net, simon.horman@netronome.com, ecree@solarflare.com, songliubraving@fb.com, tom@herbertland.com, Petar Penkov , Willem de Bruijn To: netdev@vger.kernel.org Return-path: Received: from mail-pf1-f196.google.com ([209.85.210.196]:42904 "EHLO mail-pf1-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726346AbeIHEyx (ORCPT ); Sat, 8 Sep 2018 00:54:53 -0400 Received: by mail-pf1-f196.google.com with SMTP id l9-v6so7729804pff.9 for ; Fri, 07 Sep 2018 17:11:27 -0700 (PDT) In-Reply-To: <20180908001110.200828-1-peterpenkov96@gmail.com> Sender: netdev-owner@vger.kernel.org List-ID: From: Petar Penkov This eBPF program extracts basic/control/ip address/ports keys from incoming packets. It supports recursive parsing for IP encapsulation, and VLAN, along with IPv4/IPv6 and extension headers. This program is meant to show how flow dissection and key extraction can be done in eBPF. Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_flow.c | 386 +++++++++++++++++++++++++ 2 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/bpf_flow.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fff7fb1285fc..e65f50f9185e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o + test_skb_cgroup_id_kern.o bpf_flow.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c new file mode 100644 index 000000000000..f1e33a574841 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +struct bpf_dissect_cb { + __u16 nhoff; +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + void *data_end = (__u8 *)(long)skb->data_end; + void *data = (__u8 *)(long)skb->data; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (cb->nhoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + cb->nhoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, cb->nhoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("dissect") +int dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + cb->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + cb->nhoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + cb->nhoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + cb->nhoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + cb->nhoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->thoff = cb->nhoff; + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->thoff = cb->nhoff; + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + void *data_end = (__u8 *)(long)skb->data_end; + void *data = (__u8 *)(long)skb->data; + bool done = false; + struct iphdr *iph, _iph; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + cb->nhoff += iph->ihl << 2; + if (data + cb->nhoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + cb->nhoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octects and does not include the first 8 bytes + * of the header + */ + cb->nhoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = (struct bpf_flow_keys *)(long)(skb->flow_keys); + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + cb->nhoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + void *data = (void *)(long)skb->data; + struct mpls_label *mpls, _mpls; + __u8 *version; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, cb->nhoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + cb->nhoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + cb->nhoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; -- 2.19.0.rc2.392.g5ba43deb5a-goog