All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brenden Blanco <bblanco@plumgrid.com>
To: davem@davemloft.net
Cc: Brenden Blanco <bblanco@plumgrid.com>,
	netdev@vger.kernel.org, tom@herbertland.com,
	alexei.starovoitov@gmail.com, gerlitz@mellanox.com,
	daniel@iogearbox.net, john.fastabend@gmail.com,
	brouer@redhat.com
Subject: [RFC PATCH 5/5] Add sample for adding simple drop program to link
Date: Fri,  1 Apr 2016 18:21:58 -0700	[thread overview]
Message-ID: <1459560118-5582-6-git-send-email-bblanco@plumgrid.com> (raw)
In-Reply-To: <1459560118-5582-1-git-send-email-bblanco@plumgrid.com>

Add a sample program that only drops packets at the
BPF_PROG_TYPE_PHYS_DEV hook of a link. With the drop-only program,
observed single core rate is ~14.6Mpps.

Other tests were run, for instance without the dropcnt increment or
without reading from the packet header, the packet rate was mostly
unchanged.

$ perf record -a samples/bpf/netdrvx1 $(</sys/class/net/eth0/ifindex)
proto 17:   14597724 drops/s

./pktgen_sample03_burst_single_flow.sh -i $DEV -d $IP -m $MAC -t 4
Running... ctrl^C to stop
Device: eth4@0
Result: OK: 6486875(c6485849+d1026) usec, 23689465 (60byte,0frags)
  3651906pps 1752Mb/sec (1752914880bps) errors: 0
Device: eth4@1
Result: OK: 6486874(c6485656+d1217) usec, 23689489 (60byte,0frags)
  3651911pps 1752Mb/sec (1752917280bps) errors: 0
Device: eth4@2
Result: OK: 6486851(c6485730+d1120) usec, 23687853 (60byte,0frags)
  3651672pps 1752Mb/sec (1752802560bps) errors: 0
Device: eth4@3
Result: OK: 6486879(c6485807+d1071) usec, 23688954 (60byte,0frags)
  3651825pps 1752Mb/sec (1752876000bps) errors: 0

perf report --no-children:
  18.36%  ksoftirqd/1    [mlx4_en]         [k] mlx4_en_process_rx_cq
  15.98%  swapper        [kernel.vmlinux]  [k] poll_idle
  12.71%  ksoftirqd/1    [mlx4_en]         [k] mlx4_en_alloc_frags
   6.87%  ksoftirqd/1    [mlx4_en]         [k] mlx4_en_free_frag
   4.20%  ksoftirqd/1    [kernel.vmlinux]  [k] get_page_from_freelist
   4.09%  swapper        [mlx4_en]         [k] mlx4_en_process_rx_cq
   3.32%  ksoftirqd/1    [kernel.vmlinux]  [k] sk_load_byte_positive_offset
   2.39%  ksoftirqd/1    [mdio]            [k] 0x00000000000074cd
   2.23%  swapper        [mlx4_en]         [k] mlx4_en_alloc_frags
   2.20%  ksoftirqd/1    [kernel.vmlinux]  [k] free_pages_prepare
   2.08%  ksoftirqd/1    [mlx4_en]         [k] mlx4_call_bpf
   1.57%  ksoftirqd/1    [kernel.vmlinux]  [k] percpu_array_map_lookup_elem
   1.35%  ksoftirqd/1    [mdio]            [k] 0x00000000000074fa
   1.09%  ksoftirqd/1    [kernel.vmlinux]  [k] free_one_page
   1.02%  ksoftirqd/1    [kernel.vmlinux]  [k] bpf_map_lookup_elem
   0.90%  ksoftirqd/1    [kernel.vmlinux]  [k] __alloc_pages_nodemask
   0.88%  swapper        [kernel.vmlinux]  [k] intel_idle
   0.82%  ksoftirqd/1    [mdio]            [k] 0x00000000000074be
   0.80%  swapper        [mlx4_en]         [k] mlx4_en_free_frag

machine specs:
 receiver - Intel E5-1630 v3 @ 3.70GHz
 sender - Intel E5645 @ 2.40GHz
 Mellanox ConnectX-3 @40G

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
---
 samples/bpf/Makefile        |   4 ++
 samples/bpf/bpf_load.c      |   8 +++
 samples/bpf/netdrvx1_kern.c |  26 ++++++++
 samples/bpf/netdrvx1_user.c | 155 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 193 insertions(+)
 create mode 100644 samples/bpf/netdrvx1_kern.c
 create mode 100644 samples/bpf/netdrvx1_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 502c9fc..ad36bb8 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -19,6 +19,7 @@ hostprogs-y += lathist
 hostprogs-y += offwaketime
 hostprogs-y += spintest
 hostprogs-y += map_perf_test
+hostprogs-y += netdrvx1
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -38,6 +39,7 @@ lathist-objs := bpf_load.o libbpf.o lathist_user.o
 offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
 spintest-objs := bpf_load.o libbpf.o spintest_user.o
 map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
+netdrvx1-objs := bpf_load.o libbpf.o netdrvx1_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -56,6 +58,7 @@ always += lathist_kern.o
 always += offwaketime_kern.o
 always += spintest_kern.o
 always += map_perf_test_kern.o
+always += netdrvx1_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -75,6 +78,7 @@ HOSTLOADLIBES_lathist += -lelf
 HOSTLOADLIBES_offwaketime += -lelf
 HOSTLOADLIBES_spintest += -lelf
 HOSTLOADLIBES_map_perf_test += -lelf -lrt
+HOSTLOADLIBES_netdrvx1 += -lelf
 
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 58f86bd..9308fbc 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -49,6 +49,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_socket = strncmp(event, "socket", 6) == 0;
 	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
 	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+	bool is_phys_dev = strncmp(event, "phys_dev", 8) == 0;
 	enum bpf_prog_type prog_type;
 	char buf[256];
 	int fd, efd, err, id;
@@ -63,6 +64,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 	} else if (is_kprobe || is_kretprobe) {
 		prog_type = BPF_PROG_TYPE_KPROBE;
+	} else if (is_phys_dev) {
+		prog_type = BPF_PROG_TYPE_PHYS_DEV;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -76,6 +79,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
 	prog_fd[prog_cnt++] = fd;
 
+	if (is_phys_dev)
+		return 0;
+
 	if (is_socket) {
 		event += 6;
 		if (*event != '/')
@@ -304,6 +310,7 @@ int load_bpf_file(char *path)
 
 			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
 			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
+			    memcmp(shname_prog, "phys_dev", 8) == 0 ||
 			    memcmp(shname_prog, "socket", 6) == 0)
 				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
@@ -320,6 +327,7 @@ int load_bpf_file(char *path)
 
 		if (memcmp(shname, "kprobe/", 7) == 0 ||
 		    memcmp(shname, "kretprobe/", 10) == 0 ||
+		    memcmp(shname, "phys_dev", 8) == 0 ||
 		    memcmp(shname, "socket", 6) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
diff --git a/samples/bpf/netdrvx1_kern.c b/samples/bpf/netdrvx1_kern.c
new file mode 100644
index 0000000..9837d8a
--- /dev/null
+++ b/samples/bpf/netdrvx1_kern.c
@@ -0,0 +1,26 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") dropcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 256,
+};
+
+SEC("phys_dev1")
+int bpf_prog1(struct xdp_metadata *ctx)
+{
+	int index = load_byte(ctx, ETH_HLEN + offsetof(struct iphdr, protocol));
+	long *value;
+
+	value = bpf_map_lookup_elem(&dropcnt, &index);
+	if (value)
+		*value += 1;
+
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/netdrvx1_user.c b/samples/bpf/netdrvx1_user.c
new file mode 100644
index 0000000..9e6ec9a
--- /dev/null
+++ b/samples/bpf/netdrvx1_user.c
@@ -0,0 +1,155 @@
+#include <linux/bpf.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+
+static int set_link_bpf_fd(int ifindex, int fd)
+{
+	struct sockaddr_nl sa;
+	int sock, seq = 0, len, ret = -1;
+	char buf[4096];
+	struct rtattr *rta;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+	rta = (struct rtattr *)(((char *) &req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	rta->rta_type = 42/*IFLA_BPF_FD*/;
+	rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
+	req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len)
+		+ RTA_LENGTH(sizeof(fd));
+	memcpy(RTA_DATA(rta), &fd, sizeof(fd));
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int secs)
+{
+	unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	__u64 values[nr_cpus];
+	__u32 key;
+	int i;
+
+	sleep(secs);
+
+	for (key = 0; key < 256; key++) {
+		__u64 sum = 0;
+
+		assert(bpf_lookup_elem(map_fd[0], &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += values[i];
+		if (sum)
+			printf("proto %u: %10llu drops/s\n", key, sum/secs);
+	}
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+	int ifindex;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (ac != 2) {
+		printf("usage: %s IFINDEX\n", argv[0]);
+		return 1;
+	}
+
+	ifindex = strtoul(argv[1], NULL, 0);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	if (set_link_bpf_fd(ifindex, prog_fd[0]) < 0) {
+		printf("link set bpf fd failed\n");
+		return 1;
+	}
+
+	poll_stats(5);
+
+	set_link_bpf_fd(ifindex, -1);
+
+	return 0;
+}
-- 
2.8.0

  parent reply	other threads:[~2016-04-02  1:22 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-02  1:21 [RFC PATCH 0/5] Add driver bpf hook for early packet drop Brenden Blanco
2016-04-02  1:21 ` [RFC PATCH 1/5] bpf: add PHYS_DEV prog type for early driver filter Brenden Blanco
2016-04-02 16:39   ` Tom Herbert
2016-04-03  7:02     ` Brenden Blanco
2016-04-04 22:07       ` Thomas Graf
2016-04-05  8:19         ` Jesper Dangaard Brouer
2016-04-04  8:49   ` Daniel Borkmann
2016-04-04 13:07     ` Jesper Dangaard Brouer
2016-04-04 13:36       ` Daniel Borkmann
2016-04-04 14:09         ` Tom Herbert
2016-04-04 15:12           ` Jesper Dangaard Brouer
2016-04-04 15:29             ` Brenden Blanco
2016-04-04 16:07               ` John Fastabend
2016-04-04 16:17                 ` Brenden Blanco
2016-04-04 20:00                   ` Alexei Starovoitov
2016-04-04 22:04                     ` Thomas Graf
2016-04-05  2:25                       ` Alexei Starovoitov
2016-04-05  8:11                         ` Jesper Dangaard Brouer
2016-04-05  9:29                     ` Jesper Dangaard Brouer
2016-04-05 22:06                       ` Alexei Starovoitov
2016-04-04 14:33       ` Eric Dumazet
2016-04-04 15:18         ` Edward Cree
2016-04-02  1:21 ` [RFC PATCH 2/5] net: add ndo to set bpf prog in adapter rx Brenden Blanco
2016-04-02  1:21 ` [RFC PATCH 3/5] rtnl: add option for setting link bpf prog Brenden Blanco
2016-04-02  1:21 ` [RFC PATCH 4/5] mlx4: add support for fast rx drop bpf program Brenden Blanco
2016-04-02  2:08   ` Eric Dumazet
2016-04-02  2:47     ` Alexei Starovoitov
2016-04-04 14:57       ` Jesper Dangaard Brouer
2016-04-04 15:22         ` Eric Dumazet
2016-04-04 18:50           ` Alexei Starovoitov
2016-04-05 14:15             ` Or Gerlitz
2016-04-06  4:05               ` Brenden Blanco
2016-04-03  6:15     ` Brenden Blanco
2016-04-05  2:20       ` Brenden Blanco
2016-04-05  2:44         ` Eric Dumazet
2016-04-05 18:59         ` Eran Ben Elisha
2016-04-02  8:23   ` Jesper Dangaard Brouer
2016-04-03  6:11     ` Brenden Blanco
2016-04-04 18:27       ` Alexei Starovoitov
2016-04-05  6:04         ` Jesper Dangaard Brouer
2016-04-02 18:40   ` Johannes Berg
2016-04-03  6:38     ` Brenden Blanco
2016-04-04  7:35       ` Johannes Berg
2016-04-04  9:57         ` Daniel Borkmann
2016-04-04 18:46           ` Alexei Starovoitov
2016-04-04 21:01             ` Daniel Borkmann
2016-04-05  1:17               ` Alexei Starovoitov
2016-04-04  8:33   ` Jesper Dangaard Brouer
2016-04-04  9:22   ` Daniel Borkmann
2016-04-02  1:21 ` Brenden Blanco [this message]
2016-04-06 19:48   ` [RFC PATCH 5/5] Add sample for adding simple drop program to link Jesper Dangaard Brouer
2016-04-06 20:01     ` Jesper Dangaard Brouer
2016-04-06 23:11       ` Alexei Starovoitov
2016-04-06 20:03     ` Daniel Borkmann
2016-04-02 16:47 ` [RFC PATCH 0/5] Add driver bpf hook for early packet drop Tom Herbert
2016-04-03  5:41   ` Brenden Blanco
2016-04-04  7:48     ` Jesper Dangaard Brouer
2016-04-04 18:10       ` Alexei Starovoitov
2016-04-02 18:41 ` Johannes Berg
2016-04-02 22:57   ` Tom Herbert
2016-04-03  2:28     ` Lorenzo Colitti
2016-04-04  7:37       ` Johannes Berg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1459560118-5582-6-git-send-email-bblanco@plumgrid.com \
    --to=bblanco@plumgrid.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=gerlitz@mellanox.com \
    --cc=john.fastabend@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=tom@herbertland.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.