All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 11/11] dtrace: make use of writable buffers in BPF
@ 2019-05-20 23:55 Kris Van Hees
  0 siblings, 0 replies; 2+ messages in thread
From: Kris Van Hees @ 2019-05-20 23:55 UTC (permalink / raw)
  To: netdev, bpf, dtrace-devel, linux-kernel
  Cc: rostedt, mhiramat, acme, ast, daniel

This commit modifies the tiny proof-of-concept DTrace utility to use
the writable-buffer support in BPF along with the new helpers for
buffer reservation and commit.  The dtrace_finalize_context() helper
is updated and is now marked with ctx_update because it sets the
buffer pointer to NULL (and size 0).

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Nick Alcock <nick.alcock@oracle.com>
---
 include/uapi/linux/dtrace.h |   4 +
 kernel/trace/dtrace/bpf.c   | 150 ++++++++++++++++++++++++++++++++++++
 tools/dtrace/dt_buffer.c    |  54 +++++--------
 tools/dtrace/probe1_bpf.c   |  47 ++++++-----
 4 files changed, 198 insertions(+), 57 deletions(-)

diff --git a/include/uapi/linux/dtrace.h b/include/uapi/linux/dtrace.h
index bbe2562c11f2..3fcc075a429f 100644
--- a/include/uapi/linux/dtrace.h
+++ b/include/uapi/linux/dtrace.h
@@ -33,6 +33,10 @@ struct dtrace_bpf_context {
 	u32 gid;	/* from_kgid(&init_user_ns, current_real_cred()->gid */
 	u32 euid;	/* from_kuid(&init_user_ns, current_real_cred()->euid */
 	u32 egid;	/* from_kgid(&init_user_ns, current_real_cred()->egid */
+
+	/* General output buffer */
+	__bpf_md_ptr(u8 *, buf);
+	__bpf_md_ptr(u8 *, buf_end);
 };
 
 /*
diff --git a/kernel/trace/dtrace/bpf.c b/kernel/trace/dtrace/bpf.c
index 95f4103d749e..93bd2f0319cc 100644
--- a/kernel/trace/dtrace/bpf.c
+++ b/kernel/trace/dtrace/bpf.c
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/perf_event.h>
 
 /*
  * Actual kernel definition of the DTrace BPF context.
@@ -16,6 +17,9 @@ struct dtrace_bpf_ctx {
 	u32				ecb_id;
 	u32				probe_id;
 	struct task_struct		*task;
+	struct perf_output_handle	handle;
+	u64				buf_len;
+	u8				*buf;
 };
 
 /*
@@ -55,6 +59,8 @@ BPF_CALL_2(dtrace_finalize_context, struct dtrace_bpf_ctx *, ctx,
 
 	ctx->ecb_id = ecb->id;
 	ctx->probe_id = ecb->probe_id;
+	ctx->buf_len = 0;
+	ctx->buf = NULL;
 
 	return 0;
 }
@@ -62,17 +68,119 @@ BPF_CALL_2(dtrace_finalize_context, struct dtrace_bpf_ctx *, ctx,
 static const struct bpf_func_proto dtrace_finalize_context_proto = {
 	.func           = dtrace_finalize_context,
 	.gpl_only       = false,
+	.ctx_update	= true,
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
 	.arg2_type      = ARG_CONST_MAP_PTR,		/* map */
 };
 
+BPF_CALL_4(dtrace_buffer_reserve, struct dtrace_bpf_ctx *, ctx,
+				  int, id, struct bpf_map *, map, int, size)
+{
+	struct bpf_array	*arr = container_of(map, struct bpf_array, map);
+	int			cpu = smp_processor_id();
+	struct bpf_event_entry	*ee;
+	struct perf_event	*ev;
+	int			err;
+
+	/*
+	 * Make sure the writable-buffer id is valid.  We use the default which
+	 * is the offset of the start-of-buffer pointer in the public context.
+	 */
+	if (id != offsetof(struct dtrace_bpf_context, buf))
+		return -EINVAL;
+
+	/*
+	 * Verify whether we have an uncommitted reserve.  If so, we deny this
+	 * request.
+	 */
+	if (ctx->handle.rb)
+		return -EBUSY;
+
+	/*
+	 * Perform sanity checks.
+	 */
+	if (cpu >= arr->map.max_entries)
+		return -E2BIG;
+	ee = READ_ONCE(arr->ptrs[cpu]);
+	if (!ee)
+		return -ENOENT;
+	ev = ee->event;
+	if (unlikely(ev->attr.type != PERF_TYPE_SOFTWARE ||
+		     ev->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
+		return -EINVAL;
+	if (unlikely(ev->oncpu != cpu))
+		return -EOPNOTSUPP;
+
+	size = round_up(size, sizeof(u64));
+
+	err = perf_output_begin_forward_in_page(&ctx->handle, ev, size);
+	if (err < 0)
+		return err;
+
+	ctx->buf_len = size;
+	ctx->buf = ctx->handle.addr;
+
+	return 0;
+}
+
+static const struct bpf_func_proto dtrace_buffer_reserve_proto = {
+	.func           = dtrace_buffer_reserve,
+	.gpl_only       = false,
+	.ctx_update	= true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
+	.arg2_type      = ARG_ANYTHING,			/* id */
+	.arg3_type      = ARG_CONST_MAP_PTR,		/* map */
+	.arg4_type      = ARG_ANYTHING,			/* size */
+};
+
+BPF_CALL_3(dtrace_buffer_commit, struct dtrace_bpf_ctx *, ctx,
+				 int, id, struct bpf_map *, map)
+{
+	/*
+	 * Make sure the writable-buffer id is valid.  We use the default which
+	 * is the offset of the start-of-buffer pointer in the public context.
+	 */
+	if (id != offsetof(struct dtrace_bpf_context, buf))
+		return -EINVAL;
+
+	/*
+	 * Verify that we have an uncommitted reserve.  If not, there is really
+	 * nothing to be done here.
+	 */
+	if (!ctx->handle.rb)
+		return 0;
+
+	perf_output_end(&ctx->handle);
+
+	ctx->handle.rb = NULL;
+	ctx->buf_len = 0;
+	ctx->buf = NULL;
+
+	return 0;
+}
+
+static const struct bpf_func_proto dtrace_buffer_commit_proto = {
+	.func           = dtrace_buffer_commit,
+	.gpl_only       = false,
+	.ctx_update	= true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
+	.arg2_type      = ARG_ANYTHING,			/* id */
+	.arg3_type      = ARG_CONST_MAP_PTR,		/* map */
+};
+
 static const struct bpf_func_proto *
 dtrace_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_finalize_context:
 		return &dtrace_finalize_context_proto;
+	case BPF_FUNC_buffer_reserve:
+		return &dtrace_buffer_reserve_proto;
+	case BPF_FUNC_buffer_commit:
+		return &dtrace_buffer_commit_proto;
 	case BPF_FUNC_perf_event_output:
 		return bpf_get_perf_event_output_proto();
 	case BPF_FUNC_trace_printk:
@@ -131,6 +239,22 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
 		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u32)))
 			return true;
 		break;
+	case bpf_ctx_range(struct dtrace_bpf_context, buf):
+		info->reg_type = PTR_TO_BUFFER;
+		info->buf_id = offsetof(struct dtrace_bpf_context, buf);
+
+		bpf_ctx_record_field_size(info, sizeof(u64));
+		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64)))
+			return true;
+		break;
+	case bpf_ctx_range(struct dtrace_bpf_context, buf_end):
+		info->reg_type = PTR_TO_BUFFER_END;
+		info->buf_id = offsetof(struct dtrace_bpf_context, buf);
+
+		bpf_ctx_record_field_size(info, sizeof(u64));
+		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64)))
+			return true;
+		break;
 	default:
 		if (size == sizeof(unsigned long))
 			return true;
@@ -152,6 +276,10 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
  *	si->dst_reg = ((type *)si->src_reg)->member
  *	target_size = sizeof(((type *)si->src_reg)->member)
  *
+ *  BPF_LDX_CTX_FIELD_DST(type, member, dst, si, target_size)
+ *	dst = ((type *)si->src_reg)->member
+ *	target_size = sizeof(((type *)si->src_reg)->member)
+ *
  *  BPF_LDX_LNK_FIELD(type, member, si, target_size)
  *	si->dst_reg = ((type *)si->dst_reg)->member
  *	target_size = sizeof(((type *)si->dst_reg)->member)
@@ -172,6 +300,13 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
 			*(target_size) = FIELD_SIZEOF(type, member); \
 			offsetof(type, member); \
 		    }))
+#define BPF_LDX_CTX_FIELD_DST(type, member, dst, si, target_size) \
+	BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+		    (dst), (si)->src_reg, \
+		    ({ \
+			*(target_size) = FIELD_SIZEOF(type, member); \
+			offsetof(type, member); \
+		    }))
 #define BPF_LDX_LNK_FIELD(type, member, si, target_size) \
 	BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
 		    (si)->dst_reg, (si)->dst_reg, \
@@ -261,6 +396,18 @@ static u32 dtrace_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
 		*insn++ = BPF_LDX_LNK_FIELD(struct cred, egid, si, target_size);
 		break;
+	case offsetof(struct dtrace_bpf_context, buf):
+		*insn++ = BPF_LDX_CTX_FIELD(struct dtrace_bpf_ctx, buf, si,
+					    target_size);
+		break;
+	case offsetof(struct dtrace_bpf_context, buf_end):
+		/* buf_end = ctx->buf + ctx->buf_len */
+		*insn++ = BPF_LDX_CTX_FIELD(struct dtrace_bpf_ctx, buf, si,
+					    target_size);
+		*insn++ = BPF_LDX_CTX_FIELD_DST(struct dtrace_bpf_ctx, buf_len,
+						BPF_REG_AX, si, target_size);
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+		break;
 	default:
 		*insn++ = BPF_LDX_CTX_PTR(struct dtrace_bpf_ctx, regs, si);
 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
@@ -308,6 +455,9 @@ static void *dtrace_convert_ctx(enum bpf_prog_type stype, void *ctx)
 		gctx = this_cpu_ptr(&dtrace_ctx);
 		gctx->regs = (struct pt_regs *)ctx;
 		gctx->task = current;
+		gctx->handle.rb = NULL;
+		gctx->buf_len = 0;
+		gctx->buf = NULL;
 
 		return gctx;
 	}
diff --git a/tools/dtrace/dt_buffer.c b/tools/dtrace/dt_buffer.c
index 65c107ca8ac4..28fac9036d69 100644
--- a/tools/dtrace/dt_buffer.c
+++ b/tools/dtrace/dt_buffer.c
@@ -282,33 +282,27 @@ static void write_rb_tail(volatile struct perf_event_mmap_page *rb_page,
  */
 static int output_event(u64 *buf)
 {
-	u8				*data = (u8 *)buf;
-	struct perf_event_header	*hdr;
-	u32				size;
-	u64				probe_id, task;
-	u32				pid, ppid, cpu, euid, egid, tag;
+	u8	*data = (u8 *)buf;
+	u32	probe_id;
+	u32	flags;
+	u64	task;
+	u32	pid, ppid, cpu, euid, egid, tag;
 
-	hdr = (struct perf_event_header *)data;
-	data += sizeof(struct perf_event_header);
+	probe_id = *(u32 *)&(data[0]);
 
-	if (hdr->type != PERF_RECORD_SAMPLE)
-		return 1;
+	if (probe_id == PERF_RECORD_LOST) {
+		u16	size;
+		u64	lost;
 
-	size = *(u32 *)data;
-	data += sizeof(u32);
+		size = *(u16 *)&(data[6]);
+		lost = *(u16 *)&(data[16]);
 
-	/*
-	 * The sample should only take up 48 bytes, but as a result of how the
-	 * BPF program stores the data (filling in a struct that resides on the
-	 * stack, and sending that off using bpf_perf_event_output()), there is
-	 * some internal padding
-	 */
-	if (size != 52) {
-		printf("Sample size is wrong (%d vs expected %d)\n", size, 52);
-		goto out;
+		printf("[%ld probes dropped]\n", lost);
+
+		return size;
 	}
 
-	probe_id = *(u64 *)&(data[0]);
+	flags = *(u32 *)&(data[4]);
 	pid = *(u32 *)&(data[8]);
 	ppid = *(u32 *)&(data[12]);
 	cpu = *(u32 *)&(data[16]);
@@ -318,19 +312,14 @@ static int output_event(u64 *buf)
 	tag = *(u32 *)&(data[40]);
 
 	if (probe_id != 123)
-		printf("Corrupted data (probe_id = %ld)\n", probe_id);
+		printf("Corrupted data (probe_id = %d)\n", probe_id);
 	if (tag != 0xdace)
 		printf("Corrupted data (tag = %x)\n", tag);
 
-	printf("CPU-%d: EPID %ld PID %d PPID %d EUID %d EGID %d TASK %08lx\n",
-	       cpu, probe_id, pid, ppid, euid, egid, task);
+	printf("CPU-%d: [%d/%d] PID %d PPID %d EUID %d EGID %d TASK %08lx\n",
+	       cpu, probe_id, flags, pid, ppid, euid, egid, task);
 
-out:
-	/*
-	 * We processed the perf_event_header, the size, and ;size; bytes of
-	 * probe data.
-	 */
-	return sizeof(struct perf_event_header) + sizeof(u32) + size;
+	return 48;
 }
 
 /*
@@ -351,10 +340,9 @@ static void process_data(struct dtrace_buffer *buf)
 
 		/*
 		 * Ensure that the buffer contains enough data for at least one
-		 * sample (header + sample size + sample data).
+		 * sample.
 		 */
-		if (head - tail < sizeof(struct perf_event_header) +
-				  sizeof(u32) + 48)
+		if (head - tail < 48)
 			break;
 
 		if (*ptr)
diff --git a/tools/dtrace/probe1_bpf.c b/tools/dtrace/probe1_bpf.c
index 5b34edb61412..a3196261e66e 100644
--- a/tools/dtrace/probe1_bpf.c
+++ b/tools/dtrace/probe1_bpf.c
@@ -37,25 +37,16 @@ struct bpf_map_def SEC("maps") buffer_map = {
 	.max_entries = 2,
 };
 
-struct sample {
-	u64 probe_id;
-	u32 pid;
-	u32 ppid;
-	u32 cpu;
-	u32 euid;
-	u32 egid;
-	u64 task;
-	u32 tag;
-};
-
 #define DPROG(F)	SEC("dtrace/"__stringify(F)) int bpf_func_##F
+#define BUF_ID		offsetof(struct dtrace_bpf_context, buf)
 
 /* we jump here when syscall number == __NR_write */
 DPROG(__NR_write)(struct dtrace_bpf_context *ctx)
 {
 	int			cpu = bpf_get_smp_processor_id();
 	struct dtrace_ecb	*ecb;
-	struct sample		smpl;
+	u8			*buf, *buf_end;
+	int			err;
 
 	bpf_finalize_context(ctx, &probemap);
 
@@ -63,17 +54,25 @@ DPROG(__NR_write)(struct dtrace_bpf_context *ctx)
 	if (!ecb)
 		return 0;
 
-	memset(&smpl, 0, sizeof(smpl));
-	smpl.probe_id = ecb->probe_id;
-	smpl.pid = ctx->pid;
-	smpl.ppid = ctx->ppid;
-	smpl.cpu = ctx->cpu;
-	smpl.euid = ctx->euid;
-	smpl.egid = ctx->egid;
-	smpl.task = ctx->task;
-	smpl.tag = 0xdace;
-
-	bpf_perf_event_output(ctx, &buffer_map, cpu, &smpl, sizeof(smpl));
+	err = bpf_buffer_reserve(ctx, BUF_ID, &buffer_map, 48);
+	if (err < 0)
+		return -1;
+	buf = ctx->buf;
+	buf_end = ctx->buf_end;
+	if (buf + 48 > buf_end)
+		return -1;
+
+	*(u32 *)(&buf[0]) = ecb->probe_id;
+	*(u32 *)(&buf[4]) = 0;
+	*(u32 *)(&buf[8]) = ctx->pid;
+	*(u32 *)(&buf[12]) = ctx->ppid;
+	*(u32 *)(&buf[16]) = ctx->cpu;
+	*(u32 *)(&buf[20]) = ctx->euid;
+	*(u32 *)(&buf[24]) = ctx->egid;
+	*(u64 *)(&buf[32]) = ctx->task;
+	*(u32 *)(&buf[40]) = 0xdace;
+
+	bpf_buffer_commit(ctx, BUF_ID, &buffer_map);
 
 	return 0;
 }
@@ -84,7 +83,7 @@ int bpf_prog1(struct pt_regs *ctx)
 	struct dtrace_ecb	ecb;
 	int			cpu = bpf_get_smp_processor_id();
 
-	ecb.id = 1;
+	ecb.id = 3;
 	ecb.probe_id = 123;
 
 	bpf_map_update_elem(&probemap, &cpu, &ecb, BPF_ANY);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [RFC PATCH 11/11] dtrace: make use of writable buffers in BPF
  2019-05-20 23:47 [RFC PATCH 00/11] bpf, trace, dtrace: DTrace BPF program type implementation and sample use Kris Van Hees
@ 2019-05-21 20:40 ` Kris Van Hees
  0 siblings, 0 replies; 2+ messages in thread
From: Kris Van Hees @ 2019-05-21 20:40 UTC (permalink / raw)
  To: netdev, bpf, dtrace-devel, linux-kernel
  Cc: rostedt, mhiramat, acme, ast, daniel

This commit modifies the tiny proof-of-concept DTrace utility to use
the writable-buffer support in BPF along with the new helpers for
buffer reservation and commit.  The dtrace_finalize_context() helper
is updated and is now marked with ctx_update because it sets the
buffer pointer to NULL (and size 0).

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Nick Alcock <nick.alcock@oracle.com>
---
 include/uapi/linux/dtrace.h |   4 +
 kernel/trace/dtrace/bpf.c   | 150 ++++++++++++++++++++++++++++++++++++
 tools/dtrace/dt_buffer.c    |  54 +++++--------
 tools/dtrace/probe1_bpf.c   |  47 ++++++-----
 4 files changed, 198 insertions(+), 57 deletions(-)

diff --git a/include/uapi/linux/dtrace.h b/include/uapi/linux/dtrace.h
index bbe2562c11f2..3fcc075a429f 100644
--- a/include/uapi/linux/dtrace.h
+++ b/include/uapi/linux/dtrace.h
@@ -33,6 +33,10 @@ struct dtrace_bpf_context {
 	u32 gid;	/* from_kgid(&init_user_ns, current_real_cred()->gid */
 	u32 euid;	/* from_kuid(&init_user_ns, current_real_cred()->euid */
 	u32 egid;	/* from_kgid(&init_user_ns, current_real_cred()->egid */
+
+	/* General output buffer */
+	__bpf_md_ptr(u8 *, buf);
+	__bpf_md_ptr(u8 *, buf_end);
 };
 
 /*
diff --git a/kernel/trace/dtrace/bpf.c b/kernel/trace/dtrace/bpf.c
index 95f4103d749e..93bd2f0319cc 100644
--- a/kernel/trace/dtrace/bpf.c
+++ b/kernel/trace/dtrace/bpf.c
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/perf_event.h>
 
 /*
  * Actual kernel definition of the DTrace BPF context.
@@ -16,6 +17,9 @@ struct dtrace_bpf_ctx {
 	u32				ecb_id;
 	u32				probe_id;
 	struct task_struct		*task;
+	struct perf_output_handle	handle;
+	u64				buf_len;
+	u8				*buf;
 };
 
 /*
@@ -55,6 +59,8 @@ BPF_CALL_2(dtrace_finalize_context, struct dtrace_bpf_ctx *, ctx,
 
 	ctx->ecb_id = ecb->id;
 	ctx->probe_id = ecb->probe_id;
+	ctx->buf_len = 0;
+	ctx->buf = NULL;
 
 	return 0;
 }
@@ -62,17 +68,119 @@ BPF_CALL_2(dtrace_finalize_context, struct dtrace_bpf_ctx *, ctx,
 static const struct bpf_func_proto dtrace_finalize_context_proto = {
 	.func           = dtrace_finalize_context,
 	.gpl_only       = false,
+	.ctx_update	= true,
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
 	.arg2_type      = ARG_CONST_MAP_PTR,		/* map */
 };
 
+BPF_CALL_4(dtrace_buffer_reserve, struct dtrace_bpf_ctx *, ctx,
+				  int, id, struct bpf_map *, map, int, size)
+{
+	struct bpf_array	*arr = container_of(map, struct bpf_array, map);
+	int			cpu = smp_processor_id();
+	struct bpf_event_entry	*ee;
+	struct perf_event	*ev;
+	int			err;
+
+	/*
+	 * Make sure the writable-buffer id is valid.  We use the default which
+	 * is the offset of the start-of-buffer pointer in the public context.
+	 */
+	if (id != offsetof(struct dtrace_bpf_context, buf))
+		return -EINVAL;
+
+	/*
+	 * Verify whether we have an uncommitted reserve.  If so, we deny this
+	 * request.
+	 */
+	if (ctx->handle.rb)
+		return -EBUSY;
+
+	/*
+	 * Perform sanity checks.
+	 */
+	if (cpu >= arr->map.max_entries)
+		return -E2BIG;
+	ee = READ_ONCE(arr->ptrs[cpu]);
+	if (!ee)
+		return -ENOENT;
+	ev = ee->event;
+	if (unlikely(ev->attr.type != PERF_TYPE_SOFTWARE ||
+		     ev->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
+		return -EINVAL;
+	if (unlikely(ev->oncpu != cpu))
+		return -EOPNOTSUPP;
+
+	size = round_up(size, sizeof(u64));
+
+	err = perf_output_begin_forward_in_page(&ctx->handle, ev, size);
+	if (err < 0)
+		return err;
+
+	ctx->buf_len = size;
+	ctx->buf = ctx->handle.addr;
+
+	return 0;
+}
+
+static const struct bpf_func_proto dtrace_buffer_reserve_proto = {
+	.func           = dtrace_buffer_reserve,
+	.gpl_only       = false,
+	.ctx_update	= true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
+	.arg2_type      = ARG_ANYTHING,			/* id */
+	.arg3_type      = ARG_CONST_MAP_PTR,		/* map */
+	.arg4_type      = ARG_ANYTHING,			/* size */
+};
+
+BPF_CALL_3(dtrace_buffer_commit, struct dtrace_bpf_ctx *, ctx,
+				 int, id, struct bpf_map *, map)
+{
+	/*
+	 * Make sure the writable-buffer id is valid.  We use the default which
+	 * is the offset of the start-of-buffer pointer in the public context.
+	 */
+	if (id != offsetof(struct dtrace_bpf_context, buf))
+		return -EINVAL;
+
+	/*
+	 * Verify that we have an uncommitted reserve.  If not, there is really
+	 * nothing to be done here.
+	 */
+	if (!ctx->handle.rb)
+		return 0;
+
+	perf_output_end(&ctx->handle);
+
+	ctx->handle.rb = NULL;
+	ctx->buf_len = 0;
+	ctx->buf = NULL;
+
+	return 0;
+}
+
+static const struct bpf_func_proto dtrace_buffer_commit_proto = {
+	.func           = dtrace_buffer_commit,
+	.gpl_only       = false,
+	.ctx_update	= true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,		/* ctx */
+	.arg2_type      = ARG_ANYTHING,			/* id */
+	.arg3_type      = ARG_CONST_MAP_PTR,		/* map */
+};
+
 static const struct bpf_func_proto *
 dtrace_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_finalize_context:
 		return &dtrace_finalize_context_proto;
+	case BPF_FUNC_buffer_reserve:
+		return &dtrace_buffer_reserve_proto;
+	case BPF_FUNC_buffer_commit:
+		return &dtrace_buffer_commit_proto;
 	case BPF_FUNC_perf_event_output:
 		return bpf_get_perf_event_output_proto();
 	case BPF_FUNC_trace_printk:
@@ -131,6 +239,22 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
 		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u32)))
 			return true;
 		break;
+	case bpf_ctx_range(struct dtrace_bpf_context, buf):
+		info->reg_type = PTR_TO_BUFFER;
+		info->buf_id = offsetof(struct dtrace_bpf_context, buf);
+
+		bpf_ctx_record_field_size(info, sizeof(u64));
+		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64)))
+			return true;
+		break;
+	case bpf_ctx_range(struct dtrace_bpf_context, buf_end):
+		info->reg_type = PTR_TO_BUFFER_END;
+		info->buf_id = offsetof(struct dtrace_bpf_context, buf);
+
+		bpf_ctx_record_field_size(info, sizeof(u64));
+		if (bpf_ctx_narrow_access_ok(off, size, sizeof(u64)))
+			return true;
+		break;
 	default:
 		if (size == sizeof(unsigned long))
 			return true;
@@ -152,6 +276,10 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
  *	si->dst_reg = ((type *)si->src_reg)->member
  *	target_size = sizeof(((type *)si->src_reg)->member)
  *
+ *  BPF_LDX_CTX_FIELD_DST(type, member, dst, si, target_size)
+ *	dst = ((type *)si->src_reg)->member
+ *	target_size = sizeof(((type *)si->src_reg)->member)
+ *
  *  BPF_LDX_LNK_FIELD(type, member, si, target_size)
  *	si->dst_reg = ((type *)si->dst_reg)->member
  *	target_size = sizeof(((type *)si->dst_reg)->member)
@@ -172,6 +300,13 @@ static bool dtrace_is_valid_access(int off, int size, enum bpf_access_type type,
 			*(target_size) = FIELD_SIZEOF(type, member); \
 			offsetof(type, member); \
 		    }))
+#define BPF_LDX_CTX_FIELD_DST(type, member, dst, si, target_size) \
+	BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
+		    (dst), (si)->src_reg, \
+		    ({ \
+			*(target_size) = FIELD_SIZEOF(type, member); \
+			offsetof(type, member); \
+		    }))
 #define BPF_LDX_LNK_FIELD(type, member, si, target_size) \
 	BPF_LDX_MEM(BPF_FIELD_SIZEOF(type, member), \
 		    (si)->dst_reg, (si)->dst_reg, \
@@ -261,6 +396,18 @@ static u32 dtrace_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_LDX_LNK_PTR(struct task_struct, cred, si);
 		*insn++ = BPF_LDX_LNK_FIELD(struct cred, egid, si, target_size);
 		break;
+	case offsetof(struct dtrace_bpf_context, buf):
+		*insn++ = BPF_LDX_CTX_FIELD(struct dtrace_bpf_ctx, buf, si,
+					    target_size);
+		break;
+	case offsetof(struct dtrace_bpf_context, buf_end):
+		/* buf_end = ctx->buf + ctx->buf_len */
+		*insn++ = BPF_LDX_CTX_FIELD(struct dtrace_bpf_ctx, buf, si,
+					    target_size);
+		*insn++ = BPF_LDX_CTX_FIELD_DST(struct dtrace_bpf_ctx, buf_len,
+						BPF_REG_AX, si, target_size);
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+		break;
 	default:
 		*insn++ = BPF_LDX_CTX_PTR(struct dtrace_bpf_ctx, regs, si);
 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
@@ -308,6 +455,9 @@ static void *dtrace_convert_ctx(enum bpf_prog_type stype, void *ctx)
 		gctx = this_cpu_ptr(&dtrace_ctx);
 		gctx->regs = (struct pt_regs *)ctx;
 		gctx->task = current;
+		gctx->handle.rb = NULL;
+		gctx->buf_len = 0;
+		gctx->buf = NULL;
 
 		return gctx;
 	}
diff --git a/tools/dtrace/dt_buffer.c b/tools/dtrace/dt_buffer.c
index 65c107ca8ac4..28fac9036d69 100644
--- a/tools/dtrace/dt_buffer.c
+++ b/tools/dtrace/dt_buffer.c
@@ -282,33 +282,27 @@ static void write_rb_tail(volatile struct perf_event_mmap_page *rb_page,
  */
 static int output_event(u64 *buf)
 {
-	u8				*data = (u8 *)buf;
-	struct perf_event_header	*hdr;
-	u32				size;
-	u64				probe_id, task;
-	u32				pid, ppid, cpu, euid, egid, tag;
+	u8	*data = (u8 *)buf;
+	u32	probe_id;
+	u32	flags;
+	u64	task;
+	u32	pid, ppid, cpu, euid, egid, tag;
 
-	hdr = (struct perf_event_header *)data;
-	data += sizeof(struct perf_event_header);
+	probe_id = *(u32 *)&(data[0]);
 
-	if (hdr->type != PERF_RECORD_SAMPLE)
-		return 1;
+	if (probe_id == PERF_RECORD_LOST) {
+		u16	size;
+		u64	lost;
 
-	size = *(u32 *)data;
-	data += sizeof(u32);
+		size = *(u16 *)&(data[6]);
+		lost = *(u16 *)&(data[16]);
 
-	/*
-	 * The sample should only take up 48 bytes, but as a result of how the
-	 * BPF program stores the data (filling in a struct that resides on the
-	 * stack, and sending that off using bpf_perf_event_output()), there is
-	 * some internal padding
-	 */
-	if (size != 52) {
-		printf("Sample size is wrong (%d vs expected %d)\n", size, 52);
-		goto out;
+		printf("[%ld probes dropped]\n", lost);
+
+		return size;
 	}
 
-	probe_id = *(u64 *)&(data[0]);
+	flags = *(u32 *)&(data[4]);
 	pid = *(u32 *)&(data[8]);
 	ppid = *(u32 *)&(data[12]);
 	cpu = *(u32 *)&(data[16]);
@@ -318,19 +312,14 @@ static int output_event(u64 *buf)
 	tag = *(u32 *)&(data[40]);
 
 	if (probe_id != 123)
-		printf("Corrupted data (probe_id = %ld)\n", probe_id);
+		printf("Corrupted data (probe_id = %d)\n", probe_id);
 	if (tag != 0xdace)
 		printf("Corrupted data (tag = %x)\n", tag);
 
-	printf("CPU-%d: EPID %ld PID %d PPID %d EUID %d EGID %d TASK %08lx\n",
-	       cpu, probe_id, pid, ppid, euid, egid, task);
+	printf("CPU-%d: [%d/%d] PID %d PPID %d EUID %d EGID %d TASK %08lx\n",
+	       cpu, probe_id, flags, pid, ppid, euid, egid, task);
 
-out:
-	/*
-	 * We processed the perf_event_header, the size, and ;size; bytes of
-	 * probe data.
-	 */
-	return sizeof(struct perf_event_header) + sizeof(u32) + size;
+	return 48;
 }
 
 /*
@@ -351,10 +340,9 @@ static void process_data(struct dtrace_buffer *buf)
 
 		/*
 		 * Ensure that the buffer contains enough data for at least one
-		 * sample (header + sample size + sample data).
+		 * sample.
 		 */
-		if (head - tail < sizeof(struct perf_event_header) +
-				  sizeof(u32) + 48)
+		if (head - tail < 48)
 			break;
 
 		if (*ptr)
diff --git a/tools/dtrace/probe1_bpf.c b/tools/dtrace/probe1_bpf.c
index 5b34edb61412..a3196261e66e 100644
--- a/tools/dtrace/probe1_bpf.c
+++ b/tools/dtrace/probe1_bpf.c
@@ -37,25 +37,16 @@ struct bpf_map_def SEC("maps") buffer_map = {
 	.max_entries = 2,
 };
 
-struct sample {
-	u64 probe_id;
-	u32 pid;
-	u32 ppid;
-	u32 cpu;
-	u32 euid;
-	u32 egid;
-	u64 task;
-	u32 tag;
-};
-
 #define DPROG(F)	SEC("dtrace/"__stringify(F)) int bpf_func_##F
+#define BUF_ID		offsetof(struct dtrace_bpf_context, buf)
 
 /* we jump here when syscall number == __NR_write */
 DPROG(__NR_write)(struct dtrace_bpf_context *ctx)
 {
 	int			cpu = bpf_get_smp_processor_id();
 	struct dtrace_ecb	*ecb;
-	struct sample		smpl;
+	u8			*buf, *buf_end;
+	int			err;
 
 	bpf_finalize_context(ctx, &probemap);
 
@@ -63,17 +54,25 @@ DPROG(__NR_write)(struct dtrace_bpf_context *ctx)
 	if (!ecb)
 		return 0;
 
-	memset(&smpl, 0, sizeof(smpl));
-	smpl.probe_id = ecb->probe_id;
-	smpl.pid = ctx->pid;
-	smpl.ppid = ctx->ppid;
-	smpl.cpu = ctx->cpu;
-	smpl.euid = ctx->euid;
-	smpl.egid = ctx->egid;
-	smpl.task = ctx->task;
-	smpl.tag = 0xdace;
-
-	bpf_perf_event_output(ctx, &buffer_map, cpu, &smpl, sizeof(smpl));
+	err = bpf_buffer_reserve(ctx, BUF_ID, &buffer_map, 48);
+	if (err < 0)
+		return -1;
+	buf = ctx->buf;
+	buf_end = ctx->buf_end;
+	if (buf + 48 > buf_end)
+		return -1;
+
+	*(u32 *)(&buf[0]) = ecb->probe_id;
+	*(u32 *)(&buf[4]) = 0;
+	*(u32 *)(&buf[8]) = ctx->pid;
+	*(u32 *)(&buf[12]) = ctx->ppid;
+	*(u32 *)(&buf[16]) = ctx->cpu;
+	*(u32 *)(&buf[20]) = ctx->euid;
+	*(u32 *)(&buf[24]) = ctx->egid;
+	*(u64 *)(&buf[32]) = ctx->task;
+	*(u32 *)(&buf[40]) = 0xdace;
+
+	bpf_buffer_commit(ctx, BUF_ID, &buffer_map);
 
 	return 0;
 }
@@ -84,7 +83,7 @@ int bpf_prog1(struct pt_regs *ctx)
 	struct dtrace_ecb	ecb;
 	int			cpu = bpf_get_smp_processor_id();
 
-	ecb.id = 1;
+	ecb.id = 3;
 	ecb.probe_id = 123;
 
 	bpf_map_update_elem(&probemap, &cpu, &ecb, BPF_ANY);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-05-21 20:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-20 23:55 [RFC PATCH 11/11] dtrace: make use of writable buffers in BPF Kris Van Hees
  -- strict thread matches above, loose matches on Subject: below --
2019-05-20 23:47 [RFC PATCH 00/11] bpf, trace, dtrace: DTrace BPF program type implementation and sample use Kris Van Hees
2019-05-21 20:40 ` [RFC PATCH 11/11] dtrace: make use of writable buffers in BPF Kris Van Hees

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.