bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes
@ 2021-10-13 16:35 Hengqi Chen
  2021-10-15 19:30 ` Martin KaFai Lau
  0 siblings, 1 reply; 5+ messages in thread
From: Hengqi Chen @ 2021-10-13 16:35 UTC (permalink / raw)
  To: bpf; +Cc: alan.maguire

Hi, BPF community,


I would like to report a possible bug in bpf-next,
hope I don't make any stupid mistake. Here is the details:

I have two VMs:

One has the kernel built against the following commit:

0693b27644f04852e46f7f034e3143992b658869 (bpf-next)

The ksnoop tool (from BCC repo) works well on this VM.


Another has the kernel built against the following commit:

5319255b8df9271474bc9027cabf82253934f28d (bpf-next)

On this VM, the ksnoop tool failed with the following message:


libbpf: load bpf program failed: Permission denied

libbpf: -- BEGIN DUMP LOG ---

libbpf: 

R1 type=ctx expected=fp

; return ksnoop(ctx, false);

0: (b7) r2 = 0

1: (85) call pc+2

caller:

 R10=fp0

callee:

 frame1: R1=ctx(id=0,off=0,imm=0) R2_w=invP0 R10=fp0

; static int ksnoop(struct pt_regs *ctx, bool entry)

4: (7b) *(u64 *)(r10 -168) = r2

5: (bf) r9 = r1

; task = bpf_get_current_task();

6: (85) call bpf_get_current_task#35

; task = bpf_get_current_task();

7: (7b) *(u64 *)(r10 -16) = r0

8: (bf) r2 = r10

; 

9: (07) r2 += -16

; func_stack = bpf_map_lookup_elem(&ksnoop_func_stack, &task);

10: (18) r1 = 0xffff8c70353f3c00

12: (85) call bpf_map_lookup_elem#1

13: (bf) r7 = r0

; if (!func_stack) {

14: (55) if r7 != 0x0 goto pc+35

 frame1: R0=invP0 R7_w=invP0 R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-168=00000000

15: (b7) r1 = 0

; struct func_stack new_stack = {};

16: (7b) *(u64 *)(r10 -24) = r1

17: (7b) *(u64 *)(r10 -32) = r1

18: (7b) *(u64 *)(r10 -40) = r1

19: (7b) *(u64 *)(r10 -48) = r1

20: (7b) *(u64 *)(r10 -56) = r1

21: (7b) *(u64 *)(r10 -64) = r1

22: (7b) *(u64 *)(r10 -72) = r1

23: (7b) *(u64 *)(r10 -80) = r1

24: (7b) *(u64 *)(r10 -88) = r1

25: (7b) *(u64 *)(r10 -96) = r1

26: (7b) *(u64 *)(r10 -104) = r1

27: (7b) *(u64 *)(r10 -112) = r1

28: (7b) *(u64 *)(r10 -120) = r1

29: (7b) *(u64 *)(r10 -128) = r1

30: (7b) *(u64 *)(r10 -136) = r1

31: (7b) *(u64 *)(r10 -144) = r1

32: (7b) *(u64 *)(r10 -152) = r1

; new_stack.task = task;

33: (79) r1 = *(u64 *)(r10 -16)

; new_stack.task = task;

34: (7b) *(u64 *)(r10 -160) = r1

35: (bf) r7 = r10

; struct func_stack new_stack = {};

36: (07) r7 += -16

37: (bf) r3 = r10

38: (07) r3 += -160

; bpf_map_update_elem(&ksnoop_func_stack, &task, &new_stack,

39: (18) r1 = 0xffff8c70353f3c00

41: (bf) r2 = r7

42: (b7) r4 = 1

43: (85) call bpf_map_update_elem#2

; func_stack = bpf_map_lookup_elem(&ksnoop_func_stack, &task);

44: (18) r1 = 0xffff8c70353f3c00

46: (bf) r2 = r7

47: (85) call bpf_map_lookup_elem#1

48: (bf) r7 = r0

49: (15) if r7 == 0x0 goto pc+483

 frame1: R0_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R7_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

; stack_depth = func_stack->stack_depth;

50: (71) r6 = *(u8 *)(r7 +136)

 frame1: R0_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R7_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

; if (stack_depth > FUNC_MAX_STACK_DEPTH)

51: (25) if r6 > 0x10 goto pc+481

 frame1: R0_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R6_w=invP(id=0,umax_value=16,var_off=(0x0; 0xff),s32_max_value=255,u32_max_value=255) R7_w=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

; if (entry) {

52: (79) r1 = *(u64 *)(r10 -168)

53: (15) if r1 == 0x0 goto pc+74

; if (stack_depth == 0 || stack_depth >= FUNC_MAX_STACK_DEPTH)

128: (15) if r6 == 0x0 goto pc+404

 frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1=invP0 R6=invP(id=0,umax_value=16,var_off=(0x0; 0x1f)) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

129: (25) if r6 > 0xf goto pc+403

 frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1=invP0 R6=invP(id=0,umax_value=15,var_off=(0x0; 0x1f),s32_max_value=16,u32_max_value=16) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

130: (b7) r1 = 0

; if (stack_depth > 0)

131: (15) if r6 == 0x0 goto pc+2

 frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1_w=invP0 R6=invP(id=0,umax_value=15,var_off=(0x0; 0xf)) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

132: (bf) r1 = r6

133: (07) r1 += -1

; ip = func_stack->ips[stack_depth];

134: (bf) r2 = r1

135: (57) r2 &= 255

136: (67) r2 <<= 3

; last_ip = func_stack->ips[last_stack_depth];

137: (bf) r3 = r7

138: (07) r3 += 8

; ip = func_stack->ips[stack_depth];

139: (bf) r4 = r3

140: (0f) r4 += r2

; last_ip = func_stack->ips[last_stack_depth];

141: (67) r6 <<= 3

142: (0f) r3 += r6

; ip = func_stack->ips[stack_depth];

143: (79) r2 = *(u64 *)(r4 +0)

 frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1_w=invP(id=4,smin_value=-1,smax_value=14) R2_w=invP(id=0,umax_value=2040,var_off=(0x0; 0x7f8)) R3_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=120,var_off=(0x0; 0x78)) R4_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=2040,var_off=(0x0; 0x7f8)) R6_w=invP(id=0,umax_value=120,var_off=(0x0; 0x78)) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

invalid access to map value, value_size=144 off=2048 size=8

R4 max value is outside of the allowed memory range

processed 65 insns (limit 1000000) max_states_per_insn 0 total_states 3 peak_states 3 mark_read 2



libbpf: -- END LOG --

libbpf: failed to load program 'kprobe_return'

libbpf: failed to load object 'ksnoop_bpf'

libbpf: failed to load BPF skeleton 'ksnoop_bpf': -4007

Error: Could not load ksnoop BPF: Unknown error 4007

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes
  2021-10-13 16:35 BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes Hengqi Chen
@ 2021-10-15 19:30 ` Martin KaFai Lau
  2021-10-16  9:57   ` Hengqi Chen
  0 siblings, 1 reply; 5+ messages in thread
From: Martin KaFai Lau @ 2021-10-15 19:30 UTC (permalink / raw)
  To: Hengqi Chen; +Cc: bpf, alan.maguire

On Thu, Oct 14, 2021 at 12:35:42AM +0800, Hengqi Chen wrote:
> Hi, BPF community,
> 
> 
> I would like to report a possible bug in bpf-next,
> hope I don't make any stupid mistake. Here is the details:
> 
> I have two VMs:
> 
> One has the kernel built against the following commit:
> 
> 0693b27644f04852e46f7f034e3143992b658869 (bpf-next)
> 
> The ksnoop tool (from BCC repo) works well on this VM.
> 
> 
> Another has the kernel built against the following commit:
> 
> 5319255b8df9271474bc9027cabf82253934f28d (bpf-next)
> 
> On this VM, the ksnoop tool failed with the following message:
I see the error in both mentioned bpf-next commits above.
I use the latest llvm and bcc from github.

Can you confirm which llvm version (or llvm git commit) you are using
in both the good and the bad case?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes
  2021-10-15 19:30 ` Martin KaFai Lau
@ 2021-10-16  9:57   ` Hengqi Chen
  2021-10-18 13:53     ` Alan Maguire
  0 siblings, 1 reply; 5+ messages in thread
From: Hengqi Chen @ 2021-10-16  9:57 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: bpf, alan.maguire, Yonghong Song



On 2021/10/16 3:30 AM, Martin KaFai Lau wrote:
> On Thu, Oct 14, 2021 at 12:35:42AM +0800, Hengqi Chen wrote:
>> Hi, BPF community,
>>
>>
>> I would like to report a possible bug in bpf-next,
>> hope I don't make any stupid mistake. Here is the details:
>>
>> I have two VMs:
>>
>> One has the kernel built against the following commit:
>>
>> 0693b27644f04852e46f7f034e3143992b658869 (bpf-next)
>>
>> The ksnoop tool (from BCC repo) works well on this VM.
>>
>>
>> Another has the kernel built against the following commit:
>>
>> 5319255b8df9271474bc9027cabf82253934f28d (bpf-next)
>>
>> On this VM, the ksnoop tool failed with the following message:
> I see the error in both mentioned bpf-next commits above.
> I use the latest llvm and bcc from github.
> 
> Can you confirm which llvm version (or llvm git commit) you are using
> in both the good and the bad case?
> 

Indeed, this could be the problem of LLVM, not the kernel.

The following is the version info of my environment:

The good one:

	llvm-config-14 --version
	14.0.0

	clang -v     
	Ubuntu clang version 14.0.0-++20210915052613+c78ed20784ee-1~exp1~20210915153417.547
	Target: x86_64-pc-linux-gnu
	Thread model: posix
	InstalledDir: /usr/bin
	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
	Candidate multilib: .;@m64
	Selected multilib: .;@m64

The bad one:

	llvm-config-14 --version
	14.0.0

	clang -v         
	Ubuntu clang version 14.0.0-++20211008104411+f4145c074cb8-1~exp1~20211008085218.709
	Target: x86_64-pc-linux-gnu
	Thread model: posix
	InstalledDir: /usr/bin
	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/10
	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
	Candidate multilib: .;@m64
	Selected multilib: .;@m64

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes
  2021-10-16  9:57   ` Hengqi Chen
@ 2021-10-18 13:53     ` Alan Maguire
  2021-10-19  3:59       ` Yonghong Song
  0 siblings, 1 reply; 5+ messages in thread
From: Alan Maguire @ 2021-10-18 13:53 UTC (permalink / raw)
  To: Hengqi Chen; +Cc: Martin KaFai Lau, bpf, alan.maguire, Yonghong Song

On Sat, 16 Oct 2021, Hengqi Chen wrote:

> 
> 
> On 2021/10/16 3:30 AM, Martin KaFai Lau wrote:
> > On Thu, Oct 14, 2021 at 12:35:42AM +0800, Hengqi Chen wrote:
> >> Hi, BPF community,
> >>
> >>
> >> I would like to report a possible bug in bpf-next,
> >> hope I don't make any stupid mistake. Here is the details:
> >>
> >> I have two VMs:
> >>
> >> One has the kernel built against the following commit:
> >>
> >> 0693b27644f04852e46f7f034e3143992b658869 (bpf-next)
> >>
> >> The ksnoop tool (from BCC repo) works well on this VM.
> >>
> >>
> >> Another has the kernel built against the following commit:
> >>
> >> 5319255b8df9271474bc9027cabf82253934f28d (bpf-next)
> >>
> >> On this VM, the ksnoop tool failed with the following message:
> > I see the error in both mentioned bpf-next commits above.
> > I use the latest llvm and bcc from github.
> > 
> > Can you confirm which llvm version (or llvm git commit) you are using
> > in both the good and the bad case?
> > 
> 
> Indeed, this could be the problem of LLVM, not the kernel.
> 
> The following is the version info of my environment:
> 
> The good one:
> 
> 	llvm-config-14 --version
> 	14.0.0
> 
> 	clang -v     
> 	Ubuntu clang version 14.0.0-++20210915052613+c78ed20784ee-1~exp1~20210915153417.547
> 	Target: x86_64-pc-linux-gnu
> 	Thread model: posix
> 	InstalledDir: /usr/bin
> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
> 	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
> 	Candidate multilib: .;@m64
> 	Selected multilib: .;@m64
> 
> The bad one:
> 
> 	llvm-config-14 --version
> 	14.0.0
> 
> 	clang -v         
> 	Ubuntu clang version 14.0.0-++20211008104411+f4145c074cb8-1~exp1~20211008085218.709
> 	Target: x86_64-pc-linux-gnu
> 	Thread model: posix
> 	InstalledDir: /usr/bin
> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/10
> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
> 	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
> 	Candidate multilib: .;@m64
> 	Selected multilib: .;@m64
> 

Thanks for reporting! I've reproduced this and have a potential ksnoop
fix (below) which works at my end, but it would be good to confirm
it resolves the issue for you too.  The root cause of the verification
failure is the access of the ips[] array associated with the per-task map 
retained to track function call history; it uses a __u8 index to represent
stack depth, and the decrement operation seems to convince the
verifier that the value will wrap from 0 to 0xff, and thus lead to an
out-of-bounds map access as a result.  Adding a mask value to ensure the 
indexing does not fall out of range resolves the verification problems.

As to why we see this now, I'm not sure.  The accesses of the ips[]
values were all guarded by bounds checks, though looking at BPF code
around the verification error it looks like LLVM optimized those out.
If LLVM is doing more optimizations like that these days, that could
be a potential reason we see this now.

From 2133464fe9b92be51ec80e4db7fb23ff9e77c40e Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 18 Oct 2021 14:20:40 +0100
Subject: [PATCH] ksnoop: fix verification failures on 5.15 kernel

hengqi.chen@gmail.com reported:

I have two VMs:

One has the kernel built against the following commit:

0693b27644f04852e46f7f034e3143992b658869 (bpf-next)

The ksnoop tool (from BCC repo) works well on this VM.

Another has the kernel built against the following commit:

5319255b8df9271474bc9027cabf82253934f28d (bpf-next)

On this VM, the ksnoop tool failed with the following message:

[snip]

; last_ip = func_stack->ips[last_stack_depth];

141: (67) r6 <<= 3

142: (0f) r3 += r6

; ip = func_stack->ips[stack_depth];

143: (79) r2 = *(u64 *)(r4 +0)

 frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1_w=invP(id=4,smin_value=-1,smax_value=14) R2_w=invP(id=0,umax_value=2040,var_off=(0x0; 0x7f8)) R3_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=120,var_off=(0x0; 0x78)) R4_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=2040,var_off=(0x0; 0x7f8)) R6_w=invP(id=0,umax_value=120,var_off=(0x0; 0x78)) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000

invalid access to map value, value_size=144 off=2048 size=8

R4 max value is outside of the allowed memory range

processed 65 insns (limit 1000000) max_states_per_insn 0 total_states 3 peak_states 3 mark_read 2

libbpf: -- END LOG --

libbpf: failed to load program 'kprobe_return'

libbpf: failed to load object 'ksnoop_bpf'

libbpf: failed to load BPF skeleton 'ksnoop_bpf': -4007

Error: Could not load ksnoop BPF: Unknown error 4007

The above invalid map access appears to stem from the fact the
"stack_depth" variable (used to retrieve the instruction pointer
from the recorded call stack) is decremented.  The off=2048
value is a clue; this suggests an index resulting from an underflow
of the __u8 index value.  Adding a bitmask to the decrement operation
solves the problem.  It appears that the guards on stack_depth size
around the array dereference were optimized out.

Reported-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 libbpf-tools/ksnoop.bpf.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/libbpf-tools/ksnoop.bpf.c b/libbpf-tools/ksnoop.bpf.c
index f20b138..51dfe57 100644
--- a/libbpf-tools/ksnoop.bpf.c
+++ b/libbpf-tools/ksnoop.bpf.c
@@ -19,6 +19,8 @@
  * data should be collected.
  */
 #define FUNC_MAX_STACK_DEPTH	16
+/* used to convince verifier we do not stray outside of array bounds */
+#define FUNC_STACK_DEPTH_MASK	(FUNC_MAX_STACK_DEPTH - 1)
 
 #ifndef ENOSPC
 #define ENOSPC			28
@@ -99,7 +101,9 @@ static struct trace *get_trace(struct pt_regs *ctx, bool entry)
 		    last_stack_depth < FUNC_MAX_STACK_DEPTH)
 			last_ip = func_stack->ips[last_stack_depth];
 		/* push ip onto stack. return will pop it. */
-		func_stack->ips[stack_depth++] = ip;
+		func_stack->ips[stack_depth] = ip;
+		/* mask used in case bounds checks are optimized out */
+		stack_depth = (stack_depth + 1) & FUNC_STACK_DEPTH_MASK;
 		func_stack->stack_depth = stack_depth;
 		/* rather than zero stack entries on popping, we zero the
 		 * (stack_depth + 1)'th entry when pushing the current
@@ -118,8 +122,13 @@ static struct trace *get_trace(struct pt_regs *ctx, bool entry)
 		if (last_stack_depth >= 0 &&
 		    last_stack_depth < FUNC_MAX_STACK_DEPTH)
 			last_ip = func_stack->ips[last_stack_depth];
-		if (stack_depth > 0)
-			stack_depth = stack_depth - 1;
+		if (stack_depth > 0) {
+			/* logical OR convinces verifier that we don't
+			 * end up with a < 0 value, translating to 0xff
+			 * and an outside of map element access.
+			 */
+			stack_depth = (stack_depth - 1) & FUNC_STACK_DEPTH_MASK;
+		}
 		/* retrieve ip from stack as IP in pt_regs is
 		 * bpf kretprobe trampoline address.
 		 */
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes
  2021-10-18 13:53     ` Alan Maguire
@ 2021-10-19  3:59       ` Yonghong Song
  0 siblings, 0 replies; 5+ messages in thread
From: Yonghong Song @ 2021-10-19  3:59 UTC (permalink / raw)
  To: Alan Maguire, Hengqi Chen; +Cc: Martin KaFai Lau, bpf



On 10/18/21 6:53 AM, Alan Maguire wrote:
> On Sat, 16 Oct 2021, Hengqi Chen wrote:
> 
>>
>>
>> On 2021/10/16 3:30 AM, Martin KaFai Lau wrote:
>>> On Thu, Oct 14, 2021 at 12:35:42AM +0800, Hengqi Chen wrote:
>>>> Hi, BPF community,
>>>>
>>>>
>>>> I would like to report a possible bug in bpf-next,
>>>> hope I don't make any stupid mistake. Here is the details:
>>>>
>>>> I have two VMs:
>>>>
>>>> One has the kernel built against the following commit:
>>>>
>>>> 0693b27644f04852e46f7f034e3143992b658869 (bpf-next)
>>>>
>>>> The ksnoop tool (from BCC repo) works well on this VM.
>>>>
>>>>
>>>> Another has the kernel built against the following commit:
>>>>
>>>> 5319255b8df9271474bc9027cabf82253934f28d (bpf-next)
>>>>
>>>> On this VM, the ksnoop tool failed with the following message:
>>> I see the error in both mentioned bpf-next commits above.
>>> I use the latest llvm and bcc from github.
>>>
>>> Can you confirm which llvm version (or llvm git commit) you are using
>>> in both the good and the bad case?
>>>
>>
>> Indeed, this could be the problem of LLVM, not the kernel.
>>
>> The following is the version info of my environment:
>>
>> The good one:
>>
>> 	llvm-config-14 --version
>> 	14.0.0
>>
>> 	clang -v
>> 	Ubuntu clang version 14.0.0-++20210915052613+c78ed20784ee-1~exp1~20210915153417.547
>> 	Target: x86_64-pc-linux-gnu
>> 	Thread model: posix
>> 	InstalledDir: /usr/bin
>> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
>> 	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
>> 	Candidate multilib: .;@m64
>> 	Selected multilib: .;@m64
>>
>> The bad one:
>>
>> 	llvm-config-14 --version
>> 	14.0.0
>>
>> 	clang -v
>> 	Ubuntu clang version 14.0.0-++20211008104411+f4145c074cb8-1~exp1~20211008085218.709
>> 	Target: x86_64-pc-linux-gnu
>> 	Thread model: posix
>> 	InstalledDir: /usr/bin
>> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/10
>> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
>> 	Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/9
>> 	Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/11
>> 	Candidate multilib: .;@m64
>> 	Selected multilib: .;@m64
>>
> 
> Thanks for reporting! I've reproduced this and have a potential ksnoop
> fix (below) which works at my end, but it would be good to confirm
> it resolves the issue for you too.  The root cause of the verification
> failure is the access of the ips[] array associated with the per-task map
> retained to track function call history; it uses a __u8 index to represent
> stack depth, and the decrement operation seems to convince the
> verifier that the value will wrap from 0 to 0xff, and thus lead to an
> out-of-bounds map access as a result.  Adding a mask value to ensure the
> indexing does not fall out of range resolves the verification problems.
> 
> As to why we see this now, I'm not sure.  The accesses of the ips[]
> values were all guarded by bounds checks, though looking at BPF code
> around the verification error it looks like LLVM optimized those out.
> If LLVM is doing more optimizations like that these days, that could
> be a potential reason we see this now.
> 
>  From 2133464fe9b92be51ec80e4db7fb23ff9e77c40e Mon Sep 17 00:00:00 2001
> From: Alan Maguire <alan.maguire@oracle.com>
> Date: Mon, 18 Oct 2021 14:20:40 +0100
> Subject: [PATCH] ksnoop: fix verification failures on 5.15 kernel
> 
> hengqi.chen@gmail.com reported:
> 
> I have two VMs:
> 
> One has the kernel built against the following commit:
> 
> 0693b27644f04852e46f7f034e3143992b658869 (bpf-next)
> 
> The ksnoop tool (from BCC repo) works well on this VM.
> 
> Another has the kernel built against the following commit:
> 
> 5319255b8df9271474bc9027cabf82253934f28d (bpf-next)
> 
> On this VM, the ksnoop tool failed with the following message:
> 
> [snip]
> 
> ; last_ip = func_stack->ips[last_stack_depth];
> 
> 141: (67) r6 <<= 3
> 
> 142: (0f) r3 += r6
> 
> ; ip = func_stack->ips[stack_depth];
> 
> 143: (79) r2 = *(u64 *)(r4 +0)
> 
>   frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1_w=invP(id=4,smin_value=-1,smax_value=14) R2_w=invP(id=0,umax_value=2040,var_off=(0x0; 0x7f8)) R3_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=120,var_off=(0x0; 0x78)) R4_w=map_value(id=0,off=8,ks=8,vs=144,umax_value=2040,var_off=(0x0; 0x7f8)) R6_w=invP(id=0,umax_value=120,var_off=(0x0; 0x78)) R7=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000
> 
> invalid access to map value, value_size=144 off=2048 size=8
> 
> R4 max value is outside of the allowed memory range
> 
> processed 65 insns (limit 1000000) max_states_per_insn 0 total_states 3 peak_states 3 mark_read 2
> 
> libbpf: -- END LOG --
> 
> libbpf: failed to load program 'kprobe_return'
> 
> libbpf: failed to load object 'ksnoop_bpf'
> 
> libbpf: failed to load BPF skeleton 'ksnoop_bpf': -4007
> 
> Error: Could not load ksnoop BPF: Unknown error 4007
> 
> The above invalid map access appears to stem from the fact the
> "stack_depth" variable (used to retrieve the instruction pointer
> from the recorded call stack) is decremented.  The off=2048
> value is a clue; this suggests an index resulting from an underflow
> of the __u8 index value.  Adding a bitmask to the decrement operation
> solves the problem.  It appears that the guards on stack_depth size
> around the array dereference were optimized out.

This is a case that compiler optimization hurts verifier :-)

  frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1=invP0 
R6=invP(id=0,umax_value=15,var_off=(0x0; 
0x1f),s32_max_value=16,u32_max_value=16) R7
=map_value(id=0,off=0,ks=8,vs=144,imm=0) R9=ctx(id=0,off=0,imm=0) 
R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm 
fp-48=mmmmm
mmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm 
fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm 
fp-120=mmmmmmmm fp-
128=mmmmmmmm fp-136=mmmmmmmm fp-144=mmmmmmmm fp-152=mmmmmmmm 
fp-160=mmmmmmmm fp-168=00000000 

130: (b7) r1 = 0 

; if (stack_depth > 0) 

131: (15) if r6 == 0x0 goto pc+2 

  frame1: R0=map_value(id=0,off=0,ks=8,vs=144,imm=0) R1_w=invP0 
R6=invP(id=0,umax_value=15,var_off=(0x0; 0xf)) 
R7=map_value(id=0,off=0,ks=8,vs=144
,imm=0) R9=ctx(id=0,off=0,imm=0) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm 
fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm
  fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm 
fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm fp-128=mmmmmmmm 
fp-136=mmmmmmmm fp-1
44=mmmmmmmm fp-152=mmmmmmmm fp-160=mmmmmmmm fp-168=00000000
132: (bf) r1 = r6 


At insn 132, even we have r6 != 0, but range of R6 still shows it
could be 0. This makes later r1 - 1 will give a bigger range than
it actually was.

Your workaround below looks good to me. Could you send a pull
request to bcc? Thanks.

> 
> Reported-by: Hengqi Chen <hengqi.chen@gmail.com>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>   libbpf-tools/ksnoop.bpf.c | 15 ++++++++++++---
>   1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/libbpf-tools/ksnoop.bpf.c b/libbpf-tools/ksnoop.bpf.c
> index f20b138..51dfe57 100644
> --- a/libbpf-tools/ksnoop.bpf.c
> +++ b/libbpf-tools/ksnoop.bpf.c
> @@ -19,6 +19,8 @@
>    * data should be collected.
>    */
>   #define FUNC_MAX_STACK_DEPTH	16
> +/* used to convince verifier we do not stray outside of array bounds */
> +#define FUNC_STACK_DEPTH_MASK	(FUNC_MAX_STACK_DEPTH - 1)
>   
>   #ifndef ENOSPC
>   #define ENOSPC			28
> @@ -99,7 +101,9 @@ static struct trace *get_trace(struct pt_regs *ctx, bool entry)
>   		    last_stack_depth < FUNC_MAX_STACK_DEPTH)
>   			last_ip = func_stack->ips[last_stack_depth];
>   		/* push ip onto stack. return will pop it. */
> -		func_stack->ips[stack_depth++] = ip;
> +		func_stack->ips[stack_depth] = ip;
> +		/* mask used in case bounds checks are optimized out */
> +		stack_depth = (stack_depth + 1) & FUNC_STACK_DEPTH_MASK;
>   		func_stack->stack_depth = stack_depth;
>   		/* rather than zero stack entries on popping, we zero the
>   		 * (stack_depth + 1)'th entry when pushing the current
> @@ -118,8 +122,13 @@ static struct trace *get_trace(struct pt_regs *ctx, bool entry)
>   		if (last_stack_depth >= 0 &&
>   		    last_stack_depth < FUNC_MAX_STACK_DEPTH)
>   			last_ip = func_stack->ips[last_stack_depth];
> -		if (stack_depth > 0)
> -			stack_depth = stack_depth - 1;
> +		if (stack_depth > 0) {
> +			/* logical OR convinces verifier that we don't
> +			 * end up with a < 0 value, translating to 0xff
> +			 * and an outside of map element access.
> +			 */
> +			stack_depth = (stack_depth - 1) & FUNC_STACK_DEPTH_MASK;
> +		}
>   		/* retrieve ip from stack as IP in pt_regs is
>   		 * bpf kretprobe trampoline address.
>   		 */
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-10-19  3:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-13 16:35 BUG: Ksnoop tool failed to pass the BPF verifier with recent kernel changes Hengqi Chen
2021-10-15 19:30 ` Martin KaFai Lau
2021-10-16  9:57   ` Hengqi Chen
2021-10-18 13:53     ` Alan Maguire
2021-10-19  3:59       ` Yonghong Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).