All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] samples/bpf: Add xdp_sample_pkts example
@ 2018-05-30 16:45 Toke Høiland-Jørgensen
  2018-05-31  5:03 ` Song Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-05-30 16:45 UTC (permalink / raw)
  To: netdev; +Cc: Toke Høiland-Jørgensen

This adds an example program showing how to sample packets from XDP using
the perf event buffer. The example userspace program just prints the
ethernet header for every packet sampled.

Most of the userspace code is borrowed from other examples, most notably
trace_output.

Note that the example only works when everything runs on CPU0; so
suitable smp_affinity needs to be set on the device. Some drivers seem
to reset smp_affinity when loading an XDP program, so it may be
necessary to change it after starting the example userspace program.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 samples/bpf/Makefile               |   4 +
 samples/bpf/xdp_sample_pkts_kern.c |  48 ++++++++++++
 samples/bpf/xdp_sample_pkts_user.c | 147 +++++++++++++++++++++++++++++++++++++
 3 files changed, 199 insertions(+)
 create mode 100644 samples/bpf/xdp_sample_pkts_kern.c
 create mode 100644 samples/bpf/xdp_sample_pkts_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 1303af1..6f0c6d2 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
+hostprogs-y += xdp_sample_pkts
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := bpf_load.o xdp_sample_pkts_user.o $(TRACE_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
+always += xdp_sample_pkts_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
 
 HOST_LOADLIBES		+= $(LIBBPF) -lelf
 HOSTLOADLIBES_tracex4		+= -lrt
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
new file mode 100644
index 0000000..c58183a
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -0,0 +1,48 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define SAMPLE_SIZE 64ul
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = 2,
+};
+
+SEC("xdp_sample")
+int xdp_sample_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+
+        /* Metadata will be in the perf event before the packet data. */
+	struct S {
+		u16 cookie;
+		u16 pkt_len;
+	} __attribute__((packed)) metadata;
+
+	if (data + SAMPLE_SIZE < data_end) {
+		/* The XDP perf_event_output handler will use the upper 32 bits
+		 * of the flags argument as a number of bytes to include of the
+		 * packet payload in the event data. If the size is too big, the
+		 * call to bpf_perf_event_output will fail and return -EFAULT.
+		 *
+		 * See bpf_xdp_event_output in net/core/filter.c.
+		 */
+		u64 flags = SAMPLE_SIZE << 32;
+
+		metadata.cookie = 0xdead;
+		metadata.pkt_len = (u16)(data_end - data);
+
+		bpf_perf_event_output(ctx, &my_map, flags,
+				      &metadata, sizeof(metadata));
+	}
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
new file mode 100644
index 0000000..f996917
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -0,0 +1,147 @@
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <net/if.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include <libbpf.h>
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+static int pmu_fd, if_idx = 0;
+static char *if_name;
+
+static int do_attach(int idx, int fd, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, fd, 0);
+	if (err < 0)
+		printf("ERROR: failed to attach program to %s\n", name);
+
+	return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, -1, 0);
+	if (err < 0)
+		printf("ERROR: failed to detach program from %s\n", name);
+
+	return err;
+}
+
+#define SAMPLE_SIZE 64
+
+static int print_bpf_output(void *data, int size)
+{
+	struct {
+		__u16 cookie;
+		__u16 pkt_len;
+		__u8  pkt_data[SAMPLE_SIZE];
+	} __attribute__((packed)) *e = data;
+	int i;
+
+	if (e->cookie != 0xdead) {
+		printf("BUG cookie %x sized %d\n",
+		       e->cookie, size);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+
+	printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
+	for (i = 0; i < 14 && i < e->pkt_len; i++)
+		printf("%02x ", e->pkt_data[i]);
+	printf("\n");
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_bpf_perf_event(void)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+	};
+	int key = 0;
+
+	pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+
+	assert(pmu_fd >= 0);
+	assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
+	ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+static void sig_handler(int signo)
+{
+	do_detach(if_idx, if_name);
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int ret, err;
+
+	if (argc < 2) {
+		printf("Usage: %s <ifname>\n", argv[0]);
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if_idx = if_nametoindex(argv[1]);
+	if (!if_idx)
+		if_idx = strtoul(argv[1], NULL, 0);
+
+	if (!if_idx) {
+		fprintf(stderr, "Invalid ifname\n");
+		return 1;
+	}
+	if_name = argv[1];
+	err = do_attach(if_idx, prog_fd[0], argv[1]);
+	if (err)
+		return err;
+
+	if (signal(SIGINT, sig_handler) ||
+	    signal(SIGHUP, sig_handler) ||
+	    signal(SIGTERM, sig_handler)) {
+		perror("signal");
+		return 1;
+	}
+
+	test_bpf_perf_event();
+
+	if (perf_event_mmap(pmu_fd) < 0)
+		return 1;
+
+	ret = perf_event_poller(pmu_fd, print_bpf_output);
+	kill(0, SIGINT);
+	return ret;
+}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-05-30 16:45 [PATCH] samples/bpf: Add xdp_sample_pkts example Toke Høiland-Jørgensen
@ 2018-05-31  5:03 ` Song Liu
  2018-05-31  9:44   ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 7+ messages in thread
From: Song Liu @ 2018-05-31  5:03 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Networking

On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
> This adds an example program showing how to sample packets from XDP using
> the perf event buffer. The example userspace program just prints the
> ethernet header for every packet sampled.
>
> Most of the userspace code is borrowed from other examples, most notably
> trace_output.
>
> Note that the example only works when everything runs on CPU0; so
> suitable smp_affinity needs to be set on the device. Some drivers seem
> to reset smp_affinity when loading an XDP program, so it may be
> necessary to change it after starting the example userspace program.

Why does this only works when everything runs on CPU0? Is this something
we can improve?

Thanks,
Song

>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> ---
>  samples/bpf/Makefile               |   4 +
>  samples/bpf/xdp_sample_pkts_kern.c |  48 ++++++++++++
>  samples/bpf/xdp_sample_pkts_user.c | 147 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 199 insertions(+)
>  create mode 100644 samples/bpf/xdp_sample_pkts_kern.c
>  create mode 100644 samples/bpf/xdp_sample_pkts_user.c
>
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 1303af1..6f0c6d2 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
>  hostprogs-y += xdpsock
>  hostprogs-y += xdp_fwd
>  hostprogs-y += task_fd_query
> +hostprogs-y += xdp_sample_pkts
>
>  # Libbpf dependencies
>  LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
> @@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o
>  xdpsock-objs := bpf_load.o xdpsock_user.o
>  xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
>  task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
> +xdp_sample_pkts-objs := bpf_load.o xdp_sample_pkts_user.o $(TRACE_HELPERS)
>
>  # Tell kbuild to always build the programs
>  always := $(hostprogs-y)
> @@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
>  always += xdpsock_kern.o
>  always += xdp_fwd_kern.o
>  always += task_fd_query_kern.o
> +always += xdp_sample_pkts_kern.o
>
>  HOSTCFLAGS += -I$(objtree)/usr/include
>  HOSTCFLAGS += -I$(srctree)/tools/lib/
> @@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
>  HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
>  HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
>  HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
> +HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
>
>  HOST_LOADLIBES         += $(LIBBPF) -lelf
>  HOSTLOADLIBES_tracex4          += -lrt
> diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
> new file mode 100644
> index 0000000..c58183a
> --- /dev/null
> +++ b/samples/bpf/xdp_sample_pkts_kern.c
> @@ -0,0 +1,48 @@
> +#include <linux/ptrace.h>
> +#include <linux/version.h>
> +#include <uapi/linux/bpf.h>
> +#include "bpf_helpers.h"
> +
> +#define SAMPLE_SIZE 64ul
> +
> +struct bpf_map_def SEC("maps") my_map = {
> +       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
> +       .key_size = sizeof(int),
> +       .value_size = sizeof(u32),
> +       .max_entries = 2,
> +};
> +
> +SEC("xdp_sample")
> +int xdp_sample_prog(struct xdp_md *ctx)
> +{
> +       void *data_end = (void *)(long)ctx->data_end;
> +       void *data = (void *)(long)ctx->data;
> +
> +        /* Metadata will be in the perf event before the packet data. */
> +       struct S {
> +               u16 cookie;
> +               u16 pkt_len;
> +       } __attribute__((packed)) metadata;
> +
> +       if (data + SAMPLE_SIZE < data_end) {
> +               /* The XDP perf_event_output handler will use the upper 32 bits
> +                * of the flags argument as a number of bytes to include of the
> +                * packet payload in the event data. If the size is too big, the
> +                * call to bpf_perf_event_output will fail and return -EFAULT.
> +                *
> +                * See bpf_xdp_event_output in net/core/filter.c.
> +                */
> +               u64 flags = SAMPLE_SIZE << 32;
> +
> +               metadata.cookie = 0xdead;
> +               metadata.pkt_len = (u16)(data_end - data);
> +
> +               bpf_perf_event_output(ctx, &my_map, flags,
> +                                     &metadata, sizeof(metadata));
> +       }
> +
> +       return XDP_PASS;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> +u32 _version SEC("version") = LINUX_VERSION_CODE;
> diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
> new file mode 100644
> index 0000000..f996917
> --- /dev/null
> +++ b/samples/bpf/xdp_sample_pkts_user.c
> @@ -0,0 +1,147 @@
> +/* This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <linux/perf_event.h>
> +#include <linux/bpf.h>
> +#include <net/if.h>
> +#include <errno.h>
> +#include <assert.h>
> +#include <sys/syscall.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <time.h>
> +#include <signal.h>
> +#include <libbpf.h>
> +#include "bpf_load.h"
> +#include "bpf_util.h"
> +#include <bpf/bpf.h>
> +
> +#include "perf-sys.h"
> +#include "trace_helpers.h"
> +
> +static int pmu_fd, if_idx = 0;
> +static char *if_name;
> +
> +static int do_attach(int idx, int fd, const char *name)
> +{
> +       int err;
> +
> +       err = bpf_set_link_xdp_fd(idx, fd, 0);
> +       if (err < 0)
> +               printf("ERROR: failed to attach program to %s\n", name);
> +
> +       return err;
> +}
> +
> +static int do_detach(int idx, const char *name)
> +{
> +       int err;
> +
> +       err = bpf_set_link_xdp_fd(idx, -1, 0);
> +       if (err < 0)
> +               printf("ERROR: failed to detach program from %s\n", name);
> +
> +       return err;
> +}
> +
> +#define SAMPLE_SIZE 64
> +
> +static int print_bpf_output(void *data, int size)
> +{
> +       struct {
> +               __u16 cookie;
> +               __u16 pkt_len;
> +               __u8  pkt_data[SAMPLE_SIZE];
> +       } __attribute__((packed)) *e = data;
> +       int i;
> +
> +       if (e->cookie != 0xdead) {
> +               printf("BUG cookie %x sized %d\n",
> +                      e->cookie, size);
> +               return LIBBPF_PERF_EVENT_ERROR;
> +       }
> +
> +       printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
> +       for (i = 0; i < 14 && i < e->pkt_len; i++)
> +               printf("%02x ", e->pkt_data[i]);
> +       printf("\n");
> +
> +       return LIBBPF_PERF_EVENT_CONT;
> +}
> +
> +static void test_bpf_perf_event(void)
> +{
> +       struct perf_event_attr attr = {
> +               .sample_type = PERF_SAMPLE_RAW,
> +               .type = PERF_TYPE_SOFTWARE,
> +               .config = PERF_COUNT_SW_BPF_OUTPUT,
> +       };
> +       int key = 0;
> +
> +       pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
> +
> +       assert(pmu_fd >= 0);
> +       assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
> +       ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
> +}
> +
> +static void sig_handler(int signo)
> +{
> +       do_detach(if_idx, if_name);
> +       exit(0);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +       char filename[256];
> +       int ret, err;
> +
> +       if (argc < 2) {
> +               printf("Usage: %s <ifname>\n", argv[0]);
> +               return 1;
> +       }
> +
> +       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
> +
> +       if (load_bpf_file(filename)) {
> +               printf("%s", bpf_log_buf);
> +               return 1;
> +       }
> +
> +       if_idx = if_nametoindex(argv[1]);
> +       if (!if_idx)
> +               if_idx = strtoul(argv[1], NULL, 0);
> +
> +       if (!if_idx) {
> +               fprintf(stderr, "Invalid ifname\n");
> +               return 1;
> +       }
> +       if_name = argv[1];
> +       err = do_attach(if_idx, prog_fd[0], argv[1]);
> +       if (err)
> +               return err;
> +
> +       if (signal(SIGINT, sig_handler) ||
> +           signal(SIGHUP, sig_handler) ||
> +           signal(SIGTERM, sig_handler)) {
> +               perror("signal");
> +               return 1;
> +       }
> +
> +       test_bpf_perf_event();
> +
> +       if (perf_event_mmap(pmu_fd) < 0)
> +               return 1;
> +
> +       ret = perf_event_poller(pmu_fd, print_bpf_output);
> +       kill(0, SIGINT);
> +       return ret;
> +}
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-05-31  5:03 ` Song Liu
@ 2018-05-31  9:44   ` Toke Høiland-Jørgensen
  2018-06-02  4:22     ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-05-31  9:44 UTC (permalink / raw)
  To: Song Liu; +Cc: Networking

Song Liu <liu.song.a23@gmail.com> writes:

> On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>> This adds an example program showing how to sample packets from XDP using
>> the perf event buffer. The example userspace program just prints the
>> ethernet header for every packet sampled.
>>
>> Most of the userspace code is borrowed from other examples, most notably
>> trace_output.
>>
>> Note that the example only works when everything runs on CPU0; so
>> suitable smp_affinity needs to be set on the device. Some drivers seem
>> to reset smp_affinity when loading an XDP program, so it may be
>> necessary to change it after starting the example userspace program.
>
> Why does this only works when everything runs on CPU0? Is this
> something we can improve?

Yeah, good question. Basically, the call from XDP to
bpf_perf_event_output() will fail with -EOPNOTSUPP. I tracked this down
to this if statement in __bpf_perf_event_output() in bpf_trace.c:

>	if (unlikely(event->oncpu != cpu))
>		return -EOPNOTSUPP;

I *think* that the way to fix this is for the userspace program to open
a perf file descriptor for each CPU in the system and poll all of them,
in which case the XDP program can pass the BPF_F_CURRENT_CPU flag to
access the right one. I would love for someone more knowledgeable about
perf internals to confirm this, though.

And, well, the polling function in trace_helpers.c doesn't support
currently this, and I didn't have the time to fix that while writing
this example :)

-Toke

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-05-31  9:44   ` Toke Høiland-Jørgensen
@ 2018-06-02  4:22     ` Daniel Borkmann
  2018-06-04 12:31       ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2018-06-02  4:22 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, Song Liu; +Cc: Networking

On 05/31/2018 11:44 AM, Toke Høiland-Jørgensen wrote:
> Song Liu <liu.song.a23@gmail.com> writes:
> 
>> On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>> This adds an example program showing how to sample packets from XDP using
>>> the perf event buffer. The example userspace program just prints the
>>> ethernet header for every packet sampled.
>>>
>>> Most of the userspace code is borrowed from other examples, most notably
>>> trace_output.
>>>
>>> Note that the example only works when everything runs on CPU0; so
>>> suitable smp_affinity needs to be set on the device. Some drivers seem
>>> to reset smp_affinity when loading an XDP program, so it may be
>>> necessary to change it after starting the example userspace program.
>>
>> Why does this only works when everything runs on CPU0? Is this
>> something we can improve?
> 
> Yeah, good question. Basically, the call from XDP to
> bpf_perf_event_output() will fail with -EOPNOTSUPP. I tracked this down
> to this if statement in __bpf_perf_event_output() in bpf_trace.c:
> 
>> 	if (unlikely(event->oncpu != cpu))
>> 		return -EOPNOTSUPP;
> 
> I *think* that the way to fix this is for the userspace program to open
> a perf file descriptor for each CPU in the system and poll all of them,
> in which case the XDP program can pass the BPF_F_CURRENT_CPU flag to
> access the right one.
That is correct, you need one perf fd per cpu, and map them accordingly
into the map slots when you use BPF_F_CURRENT_CPU.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-06-02  4:22     ` Daniel Borkmann
@ 2018-06-04 12:31       ` Daniel Borkmann
  2018-06-04 13:02         ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2018-06-04 12:31 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, Song Liu; +Cc: Networking

On 06/02/2018 06:22 AM, Daniel Borkmann wrote:
> On 05/31/2018 11:44 AM, Toke Høiland-Jørgensen wrote:
>> Song Liu <liu.song.a23@gmail.com> writes:
>>
>>> On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>>> This adds an example program showing how to sample packets from XDP using
>>>> the perf event buffer. The example userspace program just prints the
>>>> ethernet header for every packet sampled.
>>>>
>>>> Most of the userspace code is borrowed from other examples, most notably
>>>> trace_output.
>>>>
>>>> Note that the example only works when everything runs on CPU0; so
>>>> suitable smp_affinity needs to be set on the device. Some drivers seem
>>>> to reset smp_affinity when loading an XDP program, so it may be
>>>> necessary to change it after starting the example userspace program.
>>>
>>> Why does this only works when everything runs on CPU0? Is this
>>> something we can improve?
>>
>> Yeah, good question. Basically, the call from XDP to
>> bpf_perf_event_output() will fail with -EOPNOTSUPP. I tracked this down
>> to this if statement in __bpf_perf_event_output() in bpf_trace.c:
>>
>>> 	if (unlikely(event->oncpu != cpu))
>>> 		return -EOPNOTSUPP;
>>
>> I *think* that the way to fix this is for the userspace program to open
>> a perf file descriptor for each CPU in the system and poll all of them,
>> in which case the XDP program can pass the BPF_F_CURRENT_CPU flag to
>> access the right one.
> That is correct, you need one perf fd per cpu, and map them accordingly
> into the map slots when you use BPF_F_CURRENT_CPU.

Given this is a sample that users are likely to copy from, I think it would
be great if you could fix this up so you can just pass in BPF_F_CURRENT_CPU
eventually. Thanks for working on this, Toke!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-06-04 12:31       ` Daniel Borkmann
@ 2018-06-04 13:02         ` Toke Høiland-Jørgensen
  2018-06-04 13:12           ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-06-04 13:02 UTC (permalink / raw)
  To: Daniel Borkmann, Song Liu; +Cc: Networking

Daniel Borkmann <daniel@iogearbox.net> writes:

> On 06/02/2018 06:22 AM, Daniel Borkmann wrote:
>> On 05/31/2018 11:44 AM, Toke Høiland-Jørgensen wrote:
>>> Song Liu <liu.song.a23@gmail.com> writes:
>>>
>>>> On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>>>> This adds an example program showing how to sample packets from XDP using
>>>>> the perf event buffer. The example userspace program just prints the
>>>>> ethernet header for every packet sampled.
>>>>>
>>>>> Most of the userspace code is borrowed from other examples, most notably
>>>>> trace_output.
>>>>>
>>>>> Note that the example only works when everything runs on CPU0; so
>>>>> suitable smp_affinity needs to be set on the device. Some drivers seem
>>>>> to reset smp_affinity when loading an XDP program, so it may be
>>>>> necessary to change it after starting the example userspace program.
>>>>
>>>> Why does this only works when everything runs on CPU0? Is this
>>>> something we can improve?
>>>
>>> Yeah, good question. Basically, the call from XDP to
>>> bpf_perf_event_output() will fail with -EOPNOTSUPP. I tracked this down
>>> to this if statement in __bpf_perf_event_output() in bpf_trace.c:
>>>
>>>> 	if (unlikely(event->oncpu != cpu))
>>>> 		return -EOPNOTSUPP;
>>>
>>> I *think* that the way to fix this is for the userspace program to open
>>> a perf file descriptor for each CPU in the system and poll all of them,
>>> in which case the XDP program can pass the BPF_F_CURRENT_CPU flag to
>>> access the right one.
>> That is correct, you need one perf fd per cpu, and map them accordingly
>> into the map slots when you use BPF_F_CURRENT_CPU.
>
> Given this is a sample that users are likely to copy from, I think it would
> be great if you could fix this up so you can just pass in BPF_F_CURRENT_CPU
> eventually. Thanks for working on this, Toke!

You're welcome! And yup, I was planning to. I'll need to add a new
function to the trace helpers that can poll more than one fd; just
haven't gotten around to it yet. :)

-Toke

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] samples/bpf: Add xdp_sample_pkts example
  2018-06-04 13:02         ` Toke Høiland-Jørgensen
@ 2018-06-04 13:12           ` Daniel Borkmann
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Borkmann @ 2018-06-04 13:12 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, Song Liu; +Cc: Networking

On 06/04/2018 03:02 PM, Toke Høiland-Jørgensen wrote:
> Daniel Borkmann <daniel@iogearbox.net> writes:
>> On 06/02/2018 06:22 AM, Daniel Borkmann wrote:
>>> On 05/31/2018 11:44 AM, Toke Høiland-Jørgensen wrote:
>>>> Song Liu <liu.song.a23@gmail.com> writes:
>>>>> On Wed, May 30, 2018 at 9:45 AM, Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>>>>>> This adds an example program showing how to sample packets from XDP using
>>>>>> the perf event buffer. The example userspace program just prints the
>>>>>> ethernet header for every packet sampled.
>>>>>>
>>>>>> Most of the userspace code is borrowed from other examples, most notably
>>>>>> trace_output.
>>>>>>
>>>>>> Note that the example only works when everything runs on CPU0; so
>>>>>> suitable smp_affinity needs to be set on the device. Some drivers seem
>>>>>> to reset smp_affinity when loading an XDP program, so it may be
>>>>>> necessary to change it after starting the example userspace program.
>>>>>
>>>>> Why does this only works when everything runs on CPU0? Is this
>>>>> something we can improve?
>>>>
>>>> Yeah, good question. Basically, the call from XDP to
>>>> bpf_perf_event_output() will fail with -EOPNOTSUPP. I tracked this down
>>>> to this if statement in __bpf_perf_event_output() in bpf_trace.c:
>>>>
>>>>> 	if (unlikely(event->oncpu != cpu))
>>>>> 		return -EOPNOTSUPP;
>>>>
>>>> I *think* that the way to fix this is for the userspace program to open
>>>> a perf file descriptor for each CPU in the system and poll all of them,
>>>> in which case the XDP program can pass the BPF_F_CURRENT_CPU flag to
>>>> access the right one.
>>> That is correct, you need one perf fd per cpu, and map them accordingly
>>> into the map slots when you use BPF_F_CURRENT_CPU.
>>
>> Given this is a sample that users are likely to copy from, I think it would
>> be great if you could fix this up so you can just pass in BPF_F_CURRENT_CPU
>> eventually. Thanks for working on this, Toke!
> 
> You're welcome! And yup, I was planning to. I'll need to add a new
> function to the trace helpers that can poll more than one fd; just
> haven't gotten around to it yet. :)

Ok, great, looking forward!

Cheers,
Daniel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-06-04 13:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-30 16:45 [PATCH] samples/bpf: Add xdp_sample_pkts example Toke Høiland-Jørgensen
2018-05-31  5:03 ` Song Liu
2018-05-31  9:44   ` Toke Høiland-Jørgensen
2018-06-02  4:22     ` Daniel Borkmann
2018-06-04 12:31       ` Daniel Borkmann
2018-06-04 13:02         ` Toke Høiland-Jørgensen
2018-06-04 13:12           ` Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.