* [PATCH bpf-next] bpf: add bpf_xdp_output() helper
@ 2020-03-06 8:59 Eelco Chaudron
2020-03-07 2:57 ` John Fastabend
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Eelco Chaudron @ 2020-03-06 8:59 UTC (permalink / raw)
To: bpf; +Cc: davem, netdev, ast, daniel, kafai, songliubraving, yhs, andriin, toke
Introduce new helper that reuses existing xdp perf_event output
implementation, but can be called from raw_tracepoint programs
that receive 'struct xdp_buff *' as a tracepoint argument.
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
include/uapi/linux/bpf.h | 27 ++++++++++
kernel/bpf/verifier.c | 4 +-
kernel/trace/bpf_trace.c | 3 +
net/core/filter.c | 16 ++++++
tools/include/uapi/linux/bpf.h | 27 ++++++++++
.../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 53 ++++++++++++++++++++
.../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 24 +++++++++
7 files changed, 150 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 40b2d9476268..41a90e2d5821 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2914,6 +2914,30 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ * This helper is similar to **bpf_perf_eventoutput**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3035,7 +3059,8 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
- FN(read_branch_records),
+ FN(read_branch_records), \
+ FN(xdp_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ae32517d4ccd..66eb4b836000 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
func_id != BPF_FUNC_skb_output &&
- func_id != BPF_FUNC_perf_event_read_value)
+ func_id != BPF_FUNC_perf_event_read_value &&
+ func_id != BPF_FUNC_xdp_output)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
case BPF_FUNC_skb_output:
+ case BPF_FUNC_xdp_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 363e0a2c75cf..87c024ccdd1d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1143,6 +1143,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
};
extern const struct bpf_func_proto bpf_skb_output_proto;
+extern const struct bpf_func_proto bpf_xdp_output_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
@@ -1218,6 +1219,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_NET
case BPF_FUNC_skb_output:
return &bpf_skb_output_proto;
+ case BPF_FUNC_xdp_output:
+ return &bpf_xdp_output_proto;
#endif
default:
return raw_tp_prog_func_proto(func_id, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index cd0a532db4e7..22219544410f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ if (unlikely(!xdp ||
+ xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, xdp->data,
@@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static int bpf_xdp_output_btf_ids[5];
+const struct bpf_func_proto bpf_xdp_output_proto = {
+ .func = bpf_xdp_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_xdp_output_btf_ids,
+};
+
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
{
return skb->sk ? sock_gen_cookie(skb->sk) : 0;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 40b2d9476268..41a90e2d5821 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2914,6 +2914,30 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ * This helper is similar to **bpf_perf_eventoutput**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3035,7 +3059,8 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
- FN(read_branch_records),
+ FN(read_branch_records), \
+ FN(xdp_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 4ba011031d4c..a0f688c37023 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -4,17 +4,51 @@
#include "test_xdp.skel.h"
#include "test_xdp_bpf2bpf.skel.h"
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+ int duration = 0;
+ struct meta *meta = (struct meta *)data;
+ struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+ if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ "check_size", "size %u < %zu\n",
+ size, sizeof(pkt_v4) + sizeof(*meta)))
+ return;
+
+ if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+ "meta->ifindex = %d\n", meta->ifindex))
+ return;
+
+ if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+ "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ return;
+
+ if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ "check_packet_content", "content not the same\n"))
+ return;
+
+ *(bool *)ctx = true;
+}
+
void test_xdp_bpf2bpf(void)
{
__u32 duration = 0, retval, size;
char buf[128];
int err, pkt_fd, map_fd;
+ bool passed = false;
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
struct iptnl_info value4 = {.family = AF_INET};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct bpf_program *prog;
+ struct perf_buffer *pb = NULL;
+ struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void)
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
goto out;
+ /* Set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+ pb_opts.ctx = &passed;
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+ 1, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ goto out;
+
/* Run test program */
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
@@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void)
err, errno, retval, size))
goto out;
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+ goto out;
+
+ CHECK_FAIL(!passed);
+
/* Verify test results */
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
"result", "fentry failed err %llu\n",
@@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void)
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
out:
+ if (pb)
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index 42dd2fedd588..a038e827f850 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -3,6 +3,8 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+char _license[] SEC("license") = "GPL";
+
struct net_device {
/* Structure does not need to contain all entries,
* as "preserve_access_index" will use BTF to fix this...
@@ -27,10 +29,32 @@ struct xdp_buff {
struct xdp_rxq_info *rxq;
} __attribute__((preserve_access_index));
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
__u64 test_result_fentry = 0;
SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
+ struct meta meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ meta.ifindex = xdp->rxq->dev->ifindex;
+ meta.pkt_len = data_end - data;
+ bpf_xdp_output(xdp, &perf_buf_map,
+ ((__u64) meta.pkt_len << 32) |
+ BPF_F_CURRENT_CPU,
+ &meta, sizeof(meta));
+
test_result_fentry = xdp->rxq->dev->ifindex;
return 0;
}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* RE: [PATCH bpf-next] bpf: add bpf_xdp_output() helper
2020-03-06 8:59 [PATCH bpf-next] bpf: add bpf_xdp_output() helper Eelco Chaudron
@ 2020-03-07 2:57 ` John Fastabend
2020-03-09 10:06 ` Toke Høiland-Jørgensen
2020-03-13 1:06 ` Alexei Starovoitov
2 siblings, 0 replies; 4+ messages in thread
From: John Fastabend @ 2020-03-07 2:57 UTC (permalink / raw)
To: Eelco Chaudron, bpf
Cc: davem, netdev, ast, daniel, kafai, songliubraving, yhs, andriin, toke
Eelco Chaudron wrote:
> Introduce new helper that reuses existing xdp perf_event output
> implementation, but can be called from raw_tracepoint programs
> that receive 'struct xdp_buff *' as a tracepoint argument.
>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> include/uapi/linux/bpf.h | 27 ++++++++++
> kernel/bpf/verifier.c | 4 +-
> kernel/trace/bpf_trace.c | 3 +
> net/core/filter.c | 16 ++++++
> tools/include/uapi/linux/bpf.h | 27 ++++++++++
> .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 53 ++++++++++++++++++++
> .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 24 +++++++++
> 7 files changed, 150 insertions(+), 4 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 40b2d9476268..41a90e2d5821 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2914,6 +2914,30 @@ union bpf_attr {
> * of sizeof(struct perf_branch_entry).
> *
> * **-ENOENT** if architecture does not support branch records.
> + *
> + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
> + * Description
feels a bit odd to have flags in the middle of a signature but it follows
bpf_perf_event_output() so I guess its better to have the two use the
same signature vs break it here.
> + * Write raw *data* blob into a special BPF perf event held by
> + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
> + * event must have the following attributes: **PERF_SAMPLE_RAW**
> + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
> + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
> + *
> + * The *flags* are used to indicate the index in *map* for which
> + * the value must be put, masked with **BPF_F_INDEX_MASK**.
> + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
> + * to indicate that the index of the current CPU core should be
> + * used.
> + *
> + * The value to write, of *size*, is passed through eBPF stack and
> + * pointed by *data*.
> + *
> + * *ctx* is a pointer to in-kernel struct xdp_buff.
> + *
> + * This helper is similar to **bpf_perf_eventoutput**\ () but
> + * restricted to raw_tracepoint bpf programs.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> */
Otherwise,
Acked-by: John Fastabend <john.fastabend@gmail.com>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next] bpf: add bpf_xdp_output() helper
2020-03-06 8:59 [PATCH bpf-next] bpf: add bpf_xdp_output() helper Eelco Chaudron
2020-03-07 2:57 ` John Fastabend
@ 2020-03-09 10:06 ` Toke Høiland-Jørgensen
2020-03-13 1:06 ` Alexei Starovoitov
2 siblings, 0 replies; 4+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-03-09 10:06 UTC (permalink / raw)
To: Eelco Chaudron, bpf
Cc: davem, netdev, ast, daniel, kafai, songliubraving, yhs, andriin
Eelco Chaudron <echaudro@redhat.com> writes:
> Introduce new helper that reuses existing xdp perf_event output
> implementation, but can be called from raw_tracepoint programs
> that receive 'struct xdp_buff *' as a tracepoint argument.
>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next] bpf: add bpf_xdp_output() helper
2020-03-06 8:59 [PATCH bpf-next] bpf: add bpf_xdp_output() helper Eelco Chaudron
2020-03-07 2:57 ` John Fastabend
2020-03-09 10:06 ` Toke Høiland-Jørgensen
@ 2020-03-13 1:06 ` Alexei Starovoitov
2 siblings, 0 replies; 4+ messages in thread
From: Alexei Starovoitov @ 2020-03-13 1:06 UTC (permalink / raw)
To: Eelco Chaudron
Cc: bpf, David S. Miller, Network Development, Alexei Starovoitov,
Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
Andrii Nakryiko, Toke Høiland-Jørgensen
On Fri, Mar 6, 2020 at 12:59 AM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> Introduce new helper that reuses existing xdp perf_event output
> implementation, but can be called from raw_tracepoint programs
> that receive 'struct xdp_buff *' as a tracepoint argument.
>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Applied. Thanks
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-03-13 1:07 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-06 8:59 [PATCH bpf-next] bpf: add bpf_xdp_output() helper Eelco Chaudron
2020-03-07 2:57 ` John Fastabend
2020-03-09 10:06 ` Toke Høiland-Jørgensen
2020-03-13 1:06 ` Alexei Starovoitov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).