All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: davem@davemloft.net
Cc: alexei.starovoitov@gmail.com, tgraf@suug.ch,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Daniel Borkmann <daniel@iogearbox.net>,
	Alexei Starovoitov <ast@kernel.org>
Subject: [PATCH net-next v2 2/2] bpf: add event output helper for notifications/sampling/logging
Date: Mon, 18 Apr 2016 21:01:24 +0200	[thread overview]
Message-ID: <0a779688ae07a9a7dec50a4abc2b0dfa6246263e.1461005139.git.daniel@iogearbox.net> (raw)
In-Reply-To: <cover.1461005139.git.daniel@iogearbox.net>
In-Reply-To: <cover.1461005139.git.daniel@iogearbox.net>

This patch adds a new helper for cls/act programs that can push events
to user space applications. For networking, this can be f.e. for sampling,
debugging, logging purposes or pushing of arbitrary wake-up events. The
idea is similar to a43eec304259 ("bpf: introduce bpf_perf_event_output()
helper") and 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example").

The eBPF program utilizes a perf event array map that user space populates
with fds from perf_event_open(), the eBPF program calls into the helper
f.e. as skb_event_output(skb, &my_map, BPF_F_CURRENT_CPU, raw, sizeof(raw))
so that the raw data is pushed into the fd f.e. at the map index of the
current CPU.

User space can poll/mmap/etc on this and has a data channel for receiving
events that can be post-processed. The nice thing is that since the eBPF
program and user space application making use of it are tightly coupled,
they can define their own arbitrary raw data format and what/when they
want to push.

While f.e. packet headers could be one part of the meta data that is being
pushed, this is not a substitute for things like packet sockets as whole
packet is not being pushed and push is only done in a single direction.
Intention is more of a generically usable, efficient event pipe to applications.
Workflow is that tc can pin the map and applications can attach themselves
e.g. after cls/act setup to one or multiple map slots, demuxing is done by
the eBPF program.

Adding this facility is with minimal effort, it reuses the helper
introduced in a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
and we get its functionality for free by overloading its BPF_FUNC_ identifier
for cls/act programs, ctx is currently unused, but will be made use of in
future. Example will be added to iproute2's BPF example files.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h      |  2 ++
 kernel/bpf/core.c        |  7 +++++++
 kernel/trace/bpf_trace.c | 27 +++++++++++++++++++++++++++
 net/core/filter.c        |  2 ++
 4 files changed, 38 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5fb3c61..f63afdc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -169,7 +169,9 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_fd_array_map_clear(struct bpf_map *map);
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
+const struct bpf_func_proto *bpf_get_event_output_proto(void);
 
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index be0abf6..e4248fe 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -764,14 +764,21 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
 	return NULL;
 }
 
+const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
+{
+	return NULL;
+}
+
 /* Always built-in helper functions. */
 const struct bpf_func_proto bpf_tail_call_proto = {
 	.func		= NULL,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6bfe55c..75cb154 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -277,6 +277,33 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
 	.arg5_type	= ARG_CONST_STACK_SIZE,
 };
 
+static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
+
+static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+
+	perf_fetch_caller_regs(regs);
+
+	return bpf_perf_event_output((long)regs, r2, flags, r4, size);
+}
+
+static const struct bpf_func_proto bpf_event_output_proto = {
+	.func		= bpf_event_output,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_STACK,
+	.arg5_type	= ARG_CONST_STACK_SIZE,
+};
+
+const struct bpf_func_proto *bpf_get_event_output_proto(void)
+{
+	return &bpf_event_output_proto;
+}
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 5d2ac2b..218e5de 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2039,6 +2039,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_redirect_proto;
 	case BPF_FUNC_get_route_realm:
 		return &bpf_get_route_realm_proto;
+	case BPF_FUNC_perf_event_output:
+		return bpf_get_event_output_proto();
 	default:
 		return sk_filter_func_proto(func_id);
 	}
-- 
1.9.3

  parent reply	other threads:[~2016-04-18 19:01 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-18 19:01 [PATCH net-next v2 0/2] BPF updates Daniel Borkmann
2016-04-18 19:01 ` [PATCH net-next v2 1/2] bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output Daniel Borkmann
2016-04-18 19:01 ` Daniel Borkmann [this message]
2016-04-20  0:26 ` [PATCH net-next v2 0/2] BPF updates David Miller
  -- strict thread matches above, loose matches on Subject: below --
2016-04-26 16:53 [PATCH v2 0/5] mmc: tmio: make CTL_STATUS handling consistent Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 1/5] mmc: tmio: give read32/write32 functions more descriptive names Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 2/5] mmc: tmio: use BIT() within defines Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 3/5] mmc: tmio: use CTL_STATUS consistently Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 4/5] mmc: tmio/sdhi: distinguish between SCLKDIVEN and ILL_FUNC Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 5/5] mmc: tmio: document CTL_STATUS handling Wolfram Sang
2016-04-27  8:31 ` [PATCH v2 0/5] mmc: tmio: make CTL_STATUS handling consistent Ulf Hansson
2016-04-08 15:45 [patch net-next 0/5] mlxsw: small driver update Jiri Pirko
2016-04-08 15:45 ` [patch net-next 1/5] mlxsw: Move devlink port registration into common core code Jiri Pirko
2016-04-08 15:45 ` [patch net-next 2/5] mlxsw: Pass mlxsw_core as a param of mlxsw_core_skb_transmit* Jiri Pirko
2016-04-08 15:45 ` [patch net-next 3/5] mlxsw: Do not pass around driver_priv directly Jiri Pirko
2016-04-08 15:45 ` [patch net-next 4/5] mlxsw: reg: Share direction enum between SBPR, SBCM, SBPM Jiri Pirko
2016-04-08 15:45 ` [patch net-next 5/5] mlxsw: reg: Fix SBPM register name Jiri Pirko
2016-04-08 15:51 ` [patch net-next 0/5] mlxsw: small driver update Jiri Pirko
2016-04-08 17:07   ` David Miller
2016-04-08 17:11     ` Jiri Pirko
2016-04-07 22:31 [RFC PATCH 00/11] GSO partial and TSO FIXEDID support Alexander Duyck
2016-04-07 22:31 ` [RFC PATCH 01/11] GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 02/11] ethtool: Add support for toggling any of the GSO offloads Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 03/11] GSO: Add GSO type for fixed IPv4 ID Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 04/11] GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 05/11] GSO: Support partial segmentation offload Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 06/11] VXLAN: Add option to mangle IP IDs on inner headers when using TSO Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 07/11] GENEVE: " Alexander Duyck
2016-04-07 23:22   ` Jesse Gross
2016-04-07 23:52     ` Alexander Duyck
2016-04-08 21:40       ` Jesse Gross
2016-04-08 22:04         ` Alexander Duyck
2016-04-09 15:52           ` Jesse Gross
2016-04-09 17:36             ` Alexander Duyck
2016-04-09 18:02               ` Eric Dumazet
2016-04-09 18:32                 ` Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 08/11] Documentation: Add documentation for TSO and GSO features Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 09/11] i40e/i40evf: Add support for GSO partial with UDP_TUNNEL_CSUM and GRE_CSUM Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 10/11] ixgbe/ixgbevf: Add support for GSO partial Alexander Duyck
2016-04-07 22:33 ` [RFC PATCH 11/11] igb/igbvf: " Alexander Duyck
2016-04-07 18:39 [PATCH v5 net-next 00/15] MTU/buffer reconfig changes Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 01/15] nfp: correct RX buffer length calculation Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 02/15] nfp: move link state interrupt request/free calls Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 03/15] nfp: break up nfp_net_{alloc|free}_rings Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 04/15] nfp: make *x_ring_init do all the init Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 05/15] nfp: allocate ring SW structs dynamically Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 06/15] nfp: cleanup tx ring flush and rename to reset Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 07/15] nfp: reorganize initial filling of RX rings Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 08/15] nfp: preallocate RX buffers early in .ndo_open Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 09/15] nfp: move filling ring information to FW config Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 10/15] nfp: slice .ndo_open() and .ndo_stop() up Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 11/15] nfp: sync ring state during FW reconfiguration Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 12/15] nfp: propagate list buffer size in struct rx_ring Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 13/15] nfp: convert .ndo_change_mtu() to prepare/commit paradigm Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 14/15] nfp: pass ring count as function parameter Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 15/15] nfp: allow ring size reconfiguration at runtime Jakub Kicinski
2016-04-08 19:34 ` [PATCH v5 net-next 00/15] MTU/buffer reconfig changes David Miller
2016-04-08 19:34   ` [PATCH v5 net-next 00/15] MTU/buffer reconfig changes, [PATCH net-next v2 1/2] bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output, [PATCH v2 1/5] mmc: tmio: give read32/write32 functions more descriptive names, Re: [patch net-next 0/5] mlxsw: small driver update, Re: [RFC PATCH 07/11] GENEVE: Add option to mangle IP IDs on inner headers when using TSO, Re: [PATCH/RFC v2] gpio: rcar: Add Runtime PM handling for interrupts, [v3,4/6] arm64: dts: salvator-x: enable PWM David Miller, Daniel Borkmann, Wolfram Sang, David Miller, Jesse Gross, Laurent Pinchart, Ulrich Hecht
2016-03-31 11:39 [v3,6/6] clk: shmobile: r8a7795: add PWM clock Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 6/6] " Ulrich Hecht
2016-03-31 11:39 [v3,2/6] arm64: defconfig : add PWM driver support Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 2/6] " Ulrich Hecht
2016-03-31 11:39 [PATCH v3 0/6] R8A7795/Salvator-X PWM support Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 1/6] pwm: rcar: Use ARCH_RENESAS Ulrich Hecht
2016-04-06  1:18   ` [v3,1/6] " Simon Horman
2016-07-11  9:44   ` [PATCH v3 1/6] " Thierry Reding
2016-03-31 11:39 ` [PATCH v3 3/6] arm64: dts: r8a7795: Add PWM device nodes Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 4/6] arm64: dts: salvator-x: enable PWM Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 5/6] pwm: add R-Car H3 device tree bindings Ulrich Hecht
2016-04-06  1:23   ` [v3,5/6] " Simon Horman
2016-07-11  9:45   ` [PATCH v3 5/6] " Thierry Reding
2016-02-18 16:06 [PATCH/RFC v2] gpio: rcar: Add Runtime PM handling for interrupts Geert Uytterhoeven
2016-02-19  9:18 ` Linus Walleij
2016-02-19 11:59   ` Marc Zyngier
2016-04-11 16:26     ` Laurent Pinchart
2016-04-11 16:55       ` Marc Zyngier
2016-04-11 17:18         ` Geert Uytterhoeven
2016-04-12  8:06           ` Linus Walleij
2016-02-25  9:07 ` Linus Walleij
2016-02-25  9:37   ` Geert Uytterhoeven
2016-02-25 14:19     ` Linus Walleij
2016-02-25 14:26       ` Geert Uytterhoeven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0a779688ae07a9a7dec50a4abc2b0dfa6246263e.1461005139.git.daniel@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ast@kernel.org \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.