All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jussi Maki <joamaki@gmail.com>
To: bpf@vger.kernel.org
Cc: daniel@iogearbox.net, andrii.nakryiko@gmail.com,
	Jussi Maki <joamaki@gmail.com>
Subject: [PATCH bpf v2 1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer
Date: Mon, 17 May 2021 10:11:27 +0000	[thread overview]
Message-ID: <20210517101128.641827-2-joamaki@gmail.com> (raw)
In-Reply-To: <20210517101128.641827-1-joamaki@gmail.com>

Add a failing test to try and use bpf_skb_change_head in combination
with bpf_redirect_peer to redirect a packet from a L3 device to veth.

The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 .../selftests/bpf/prog_tests/tc_redirect.c    | 238 +++++++++++++++++-
 .../selftests/bpf/progs/test_tc_peer.c        |  26 ++
 2 files changed, 251 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..aa844f282e8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,16 +11,18 @@
  */
 
 #define _GNU_SOURCE
-#include <fcntl.h>
+
+#include "test_progs.h"
+
 #include <linux/limits.h>
 #include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/stat.h>
-#include <sys/types.h>
 
-#include "test_progs.h"
 #include "network_helpers.h"
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
@@ -32,16 +34,23 @@
 
 #define IP4_SRC "172.16.1.100"
 #define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
 #define IP4_PORT 9004
 
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
 #define IP6_PORT 9006
 
 #define IP4_SLL "169.254.0.1"
 #define IP4_DLL "169.254.0.2"
 #define IP4_NET "169.254.0.0"
 
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
 #define IFADDR_STR_LEN 18
 #define PING_ARGS "-c 3 -w 10 -q"
 
@@ -92,7 +101,8 @@ static int modify_proc(const char *path, const char *newval)
 
 	strncpy(mod->path, path, PATH_MAX);
 
-	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
+	mod->oldlen = fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f);
+	if (mod->oldlen < 0) {
 		log_err("reading from %s failed", path);
 		goto fail;
 	}
@@ -238,14 +248,15 @@ static int get_ifindex(const char *name)
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
 
 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+	SYS("ip link set veth_dst address " MAC_DST);
+
 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
 		goto fail;
-	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
-		goto fail;
 
 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
 	if (result->ifindex_veth_src_fwd < 0)
@@ -306,10 +317,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
 
-	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
-	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
 
 	setns_root();
 	return 0;
@@ -560,6 +569,206 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	setns_root();
 }
 
+
+static int tun_open(char *name)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+	if (*name)
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+		goto fail;
+
+	SYS("ip link set dev %s up", name);
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+	SRC_TO_TARGET = 0,
+	TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+	fd_set rfds, wfds;
+
+	FD_ZERO(&rfds);
+	FD_ZERO(&wfds);
+
+	for (;;) {
+		char buf[1500];
+		int direction, nread, nwrite;
+
+		FD_SET(src_fd, &rfds);
+		FD_SET(target_fd, &rfds);
+
+		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+			fprintf(stderr, "select failed: %s\n", strerror(errno));
+			return 1;
+		}
+
+		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+		if (nread < 0) {
+			fprintf(stderr, "read failed: %s\n", strerror(errno));
+			return 1;
+		}
+
+		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+		if (nwrite != nread) {
+			fprintf(stderr, "write failed: %s\n", strerror(errno));
+			return 1;
+		}
+	}
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel;
+	int err, tunnel_pid = -1;
+	int src_fd, target_fd;
+
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load")) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+	 * expose the L2 headers encapsulating the IP packet to BPF and hence
+	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
+	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+	 * but that requires much more complicated setup.
+	 */
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns " NS_SRC))
+		goto fail;
+
+	src_fd = tun_open("tun_src");
+	if (!ASSERT_GE(src_fd, 0, "alloc tun_src"))
+		goto fail;
+
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+		goto fail;
+
+	target_fd = tun_open("tun_fwd");
+	if (!ASSERT_GE(target_fd, 0, "alloc tun_fwd"))
+		goto fail;
+
+	tunnel_pid = fork();
+	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+		goto fail;
+
+	if (tunnel_pid == 0)
+		exit(tun_relay_loop(src_fd, target_fd));
+
+	setns_root();
+
+	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+		goto fail;
+
+	SYS("tc qdisc add dev tun_fwd clsact");
+	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+
+	/* Setup route and neigh tables */
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_FWD "/128 dev veth_dst scope global");
+
+	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_FWD " dev veth_dst lladdr " MAC_DST_FWD);
+
+	/* Enable forwarding back to wards src, but not the other way in order to require the
+	 * BPF redirection.
+	 */
+	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv4/conf/veth_src_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv4/conf/tun_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/veth_src_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/tun_fwd/forwarding", "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto fail;
+
+	setns_root();
+
+	test_connectivity();
+
+fail:
+	setns_by_name(NS_FWD);
+	restore_proc();
+	setns_root();
+	if (tunnel_pid > 0) {
+		kill(tunnel_pid, SIGTERM);
+		waitpid(tunnel_pid, NULL, 0);
+	}
+	if (src_fd >= 0)
+		close(src_fd);
+	if (target_fd >= 0)
+		close(target_fd);
+	bpf_program__unpin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	test_tc_peer__destroy(skel);
+}
+
 void test_tc_redirect(void)
 {
 	struct netns_setup_result setup_result;
@@ -577,6 +786,9 @@ void test_tc_redirect(void)
 	if (test__start_subtest("tc_redirect_peer"))
 		test_tc_redirect_peer(&setup_result);
 
+	if (test__start_subtest("tc_redirect_peer_l3"))
+		test_tc_redirect_peer_l3(&setup_result);
+
 	if (test__start_subtest("tc_redirect_neigh"))
 		test_tc_redirect_neigh(&setup_result);
 
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..aea7bec5a1ab 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,18 @@
 #include <linux/bpf.h>
 #include <linux/stddef.h>
 #include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 static volatile const __u32 IFINDEX_SRC;
 static volatile const __u32 IFINDEX_DST;
 
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
 {
@@ -29,4 +35,24 @@ int tc_src(struct __sk_buff *skb)
 	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+	__u16 proto = skb->protocol;
+
+	if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
 char __license[] SEC("license") = "GPL";
-- 
2.30.2


  reply	other threads:[~2021-05-17 10:11 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
2021-04-27 21:41   ` Andrii Nakryiko
2021-04-28 10:39     ` Jussi Maki
2021-04-28 10:49       ` Daniel Borkmann
2021-04-28 13:39         ` Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-17 10:11   ` Jussi Maki [this message]
2021-05-17 10:11   ` [PATCH bpf v2 2/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-18 14:23   ` [PATCH bpf v3 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23   ` [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-19 15:33     ` Daniel Borkmann
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-19 15:47   ` [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-20 22:07     ` Daniel Borkmann
2021-05-25 10:22       ` [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-19 15:47   ` [PATCH bpf v4 2/2] " Jussi Maki
2021-05-25 10:29 ` [PATCH bpf v5] " Jussi Maki
2021-05-25 16:03   ` Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210517101128.641827-2-joamaki@gmail.com \
    --to=joamaki@gmail.com \
    --cc=andrii.nakryiko@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.