All of lore.kernel.org
 help / color / mirror / Atom feed
* Help with the BPF verifier
@ 2018-11-01 18:52 Arnaldo Carvalho de Melo
  2018-11-01 19:10 ` David Miller
  2018-11-01 20:05 ` Edward Cree
  0 siblings, 2 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2018-11-01 18:52 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Daniel Borkmann, Jiri Olsa, Martin Lau, Alexei Starovoitov,
	Linux Networking Development Mailing List

tl;dr: I seem to be trying to get past clang optimizations that get the
       verifier to accept my proggie.

Hi,

	So I'm moving to use raw_syscalls:sys_exit to collect pointer
contents, using maps to tell the bpf program what to copy, how many
bytes, filters, etc.

	I'm at the start of it at this point I need to use an index to
get to the right syscall arg that is a filename, starting just with
"open" and "openat", that have the filename in different args, so to get
this first part working I'm doing it directly in the bpf restricted C
program, later this will be to maps, etc, so if I set the index as a
constant, just for testing, it works, look at the "open" and "openat"
calls below, later we'll see why openat is failing to augment its
"filename" arg while "open" works:

[root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
         ? (         ): sleep/10152  ... [continued]: execve()) = 0
     0.045 ( 0.004 ms): sleep/10152 brk() = 0x55ccff356000
     0.074 ( 0.007 ms): sleep/10152 access(filename: , mode: R) = -1 ENOENT No such file or directory
     0.089 ( 0.006 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.097 ( 0.003 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd283f0) = 0
     0.103 ( 0.006 ms): sleep/10152 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ffee9c000
     0.111 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.135 ( 0.007 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.144 ( 0.003 ms): sleep/10152 read(fd: 3, buf: 0x7ffecdd285b8, count: 832) = 832
     0.150 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd28450) = 0
     0.155 ( 0.005 ms): sleep/10152 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f8ffee9a000
     0.166 ( 0.007 ms): sleep/10152 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f8ffe8dc000
     0.175 ( 0.010 ms): sleep/10152 mprotect(start: 0x7f8ffea89000, len: 2093056) = 0
     0.188 ( 0.010 ms): sleep/10152 mmap(addr: 0x7f8ffec88000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f8ffec88000
     0.204 ( 0.005 ms): sleep/10152 mmap(addr: 0x7f8ffec8e000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f8ffec8e000
     0.218 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.239 ( 0.002 ms): sleep/10152 arch_prctl(option: 4098, arg2: 140256433779968) = 0
     0.312 ( 0.009 ms): sleep/10152 mprotect(start: 0x7f8ffec88000, len: 16384, prot: READ) = 0
     0.343 ( 0.005 ms): sleep/10152 mprotect(start: 0x55ccff1c6000, len: 4096, prot: READ) = 0
     0.354 ( 0.006 ms): sleep/10152 mprotect(start: 0x7f8ffeeb6000, len: 4096, prot: READ) = 0
     0.362 ( 0.019 ms): sleep/10152 munmap(addr: 0x7f8ffee9c000, len: 103334) = 0
     0.476 ( 0.002 ms): sleep/10152 brk() = 0x55ccff356000
     0.480 ( 0.004 ms): sleep/10152 brk(brk: 0x55ccff377000) = 0x55ccff377000
     0.487 ( 0.002 ms): sleep/10152 brk() = 0x55ccff377000
     0.497 ( 0.008 ms): sleep/10152 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3
     0.507 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7f8ffec8daa0) = 0
     0.511 ( 0.006 ms): sleep/10152 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ff7d0d000
     0.524 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.574 (1000.140 ms): sleep/10152 nanosleep(rqtp: 0x7ffecdd29130) = 0
  1000.753 ( 0.007 ms): sleep/10152 close(fd: 1) = 0
  1000.767 ( 0.004 ms): sleep/10152 close(fd: 2) = 0
  1000.781 (         ): sleep/10152 exit_group()
[root@seventh perf]# 

     1	// SPDX-License-Identifier: GPL-2.0
     2	/*
     3	 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
     4	 *
     5	 * Test it with:
     6	 *
     7	 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
     8	 *
     9	 * This exactly matches what is marshalled into the raw_syscall:sys_enter
    10	 * payload expected by the 'perf trace' beautifiers.
    11	 *
    12	 * For now it just uses the existing tracepoint augmentation code in 'perf
    13	 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
    14	 * code that will combine entry/exit in a strace like way.
    15	 */
       
    16	#include <stdio.h>
    17	#include <linux/socket.h>
       
    18	/* bpf-output associated map */
    19	struct bpf_map SEC("maps") __augmented_syscalls__ = {
    20		.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
    21		.key_size = sizeof(int),
    22		.value_size = sizeof(u32),
    23		.max_entries = __NR_CPUS__,
    24	};
       
    25	struct syscall_enter_args {
    26		unsigned long long common_tp_fields;
    27		long		   syscall_nr;
    28		unsigned long	   args[6];
    29	};
       
    30	struct syscall_exit_args {
    31		unsigned long long common_tp_fields;
    32		long		   syscall_nr;
    33		long		   ret;
    34	};
       
    35	struct augmented_filename {
    36		unsigned int	size;
    37		int		reserved;
    38		char		value[256];
    39	};
       
    40	#define SYS_OPEN 2
    41	#define SYS_OPENAT 257
       
    42	SEC("raw_syscalls:sys_enter")
    43	int sys_enter(struct syscall_enter_args *args)
    44	{
    45		struct {
    46			struct syscall_enter_args args;
    47			struct augmented_filename filename;
    48		} augmented_args;
    49		unsigned int len = sizeof(augmented_args);
    50		unsigned int filename_arg = 6;
       
    51		probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
       
    52		switch (augmented_args.args.syscall_nr) {
    53		case SYS_OPEN:	 filename_arg = 0; break;
    54		case SYS_OPENAT: filename_arg = 1; break;
    55		}
       
    56		if (filename_arg <= 5) {
    57			augmented_args.filename.reserved = 0;
    58			augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
    59								      sizeof(augmented_args.filename.value),
    60								      (const void *)args->args[0]);
    61			if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
    62				len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
    63				len &= sizeof(augmented_args.filename.value) - 1;
    64			}
    65		} else {
    66			len = sizeof(augmented_args.args);
    67		}
       
    68		perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
    69		return 0;
    70	}
       
    71	SEC("raw_syscalls:sys_exit")
    72	int sys_exit(struct syscall_exit_args *args)
    73	{
    74		return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
    75	}
       
    76	license(GPL);

In line #60 if I change that to 1, then "openat" works and "open"
doesn't, so what I wanted was to use filename_arg there as the index,
now it comes from that switch, but really it'll come from userspace,
that knows the syscall tables for each arch, etc.

But if I do that, i.e. apply this patch to that program:

--- /wb/augmented_raw_syscalls.c.old	2018-11-01 15:43:55.000394234 -0300
+++ /wb/augmented_raw_syscalls.c	2018-11-01 15:44:15.102367838 -0300
@@ -67,7 +67,7 @@
 		augmented_args.filename.reserved = 0;
 		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
 							      sizeof(augmented_args.filename.value),
-							      (const void *)args->args[0]);
+							      (const void *)args->args[filename_arg]);
 		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
 			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
 			len &= sizeof(augmented_args.filename.value) - 1;

Then I end up with the verifier complying, I tried various ways to get
around the compiler about filename_arg being safe to use as an index,
but I couldn't find the right trick, ideas?

This is what I end up with when I apply that patch:

[root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list available events
[root@seventh perf]# 

Using -v, as suggested, I get:

[root@seventh perf]# trace -v -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
bpf: builtin compilation failed: -95, try external compiler
Kernel build dir is set to /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: KBUILD_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h 
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x41300
set env: CLANG_EXEC=/usr/local/bin/clang
unset env: CLANG_OPTIONS
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h 
set env: PERF_BPF_INC_OPTIONS=-I/home/acme/lib/perf/include/bpf
set env: WORKING_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: CLANG_SOURCE=/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c
llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE
llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41300  -I/home/acme/lib/perf/include/bpf  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build -c /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c -target bpf  -O2 -o - 
libbpf: loading object 'tools/perf/examples/bpf/augmented_raw_syscalls.c' from buffer
libbpf: section(1) .strtab, size 168, link 0, flags 0, type=3
libbpf: skip section(1) .strtab
libbpf: section(2) .text, size 0, link 0, flags 6, type=1
libbpf: skip section(2) .text
libbpf: section(3) raw_syscalls:sys_enter, size 376, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_enter
libbpf: section(4) .relraw_syscalls:sys_enter, size 16, link 10, flags 0, type=9
libbpf: section(5) raw_syscalls:sys_exit, size 16, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_exit
libbpf: section(6) maps, size 56, link 0, flags 3, type=1
libbpf: section(7) license, size 4, link 0, flags 3, type=1
libbpf: license of tools/perf/examples/bpf/augmented_raw_syscalls.c is GPL
libbpf: section(8) version, size 4, link 0, flags 3, type=1
libbpf: kernel version of tools/perf/examples/bpf/augmented_raw_syscalls.c is 41300
libbpf: section(9) .llvm_addrsig, size 6, link 10, flags 80000000, type=1879002115
libbpf: skip section(9) .llvm_addrsig
libbpf: section(10) .symtab, size 240, link 1, flags 0, type=2
libbpf: maps in tools/perf/examples/bpf/augmented_raw_syscalls.c: 2 maps in 56 bytes
libbpf: map 0 is "__augmented_syscalls__"
libbpf: map 1 is "__bpf_stdout__"
libbpf: collecting relocating info for: 'raw_syscalls:sys_enter'
libbpf: relo for 4 value 28 name 124
libbpf: relocation: insn_idx=39
libbpf: relocation: find map 1 (__augmented_syscalls__) for insn 39
bpf: config program 'raw_syscalls:sys_enter'
bpf: config program 'raw_syscalls:sys_exit'
libbpf: create map __bpf_stdout__: fd=3
libbpf: create map __augmented_syscalls__: fd=4
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf: 
0: (bf) r6 = r1
1: (bf) r1 = r10
2: (07) r1 += -328
3: (b7) r7 = 64
4: (b7) r2 = 64
5: (bf) r3 = r6
6: (85) call bpf_probe_read#4
7: (b7) r2 = 1
8: (79) r3 = *(u64 *)(r10 -320)
9: (15) if r3 == 0x101 goto pc+1
 R0=inv(id=0) R2=inv1 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
10: (b7) r2 = 6
11: (b7) r1 = 0
12: (15) if r3 == 0x2 goto pc+1
 R0=inv(id=0) R1=inv0 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
13: (bf) r1 = r2
14: (25) if r1 > 0x5 goto pc+21
 R0=inv(id=0) R1=inv6 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
15: (b7) r2 = 0
16: (63) *(u32 *)(r10 -260) = r2
17: (67) r1 <<= 32
18: (77) r1 >>= 32
19: (67) r1 <<= 3
20: (bf) r2 = r6
21: (0f) r2 += r1
22: (79) r3 = *(u64 *)(r2 +16)
R2 invalid mem access 'inv'

libbpf: -- END LOG --
libbpf: failed to load program 'raw_syscalls:sys_enter'
libbpf: failed to load object 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading)
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list available events
[root@seventh perf]# 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-11-05 21:53 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-01 18:52 Help with the BPF verifier Arnaldo Carvalho de Melo
2018-11-01 19:10 ` David Miller
2018-11-01 19:13   ` Arnaldo Carvalho de Melo
2018-11-01 19:28     ` David Miller
2018-11-01 20:05 ` Edward Cree
2018-11-02 15:02   ` Arnaldo Carvalho de Melo
2018-11-02 15:42     ` Edward Cree
2018-11-02 21:27       ` Yonghong Song
2018-11-05 12:33         ` Arnaldo Carvalho de Melo
2018-11-03 11:29       ` Arnaldo Carvalho de Melo
2018-11-03 11:32         ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.