From: Lorenz Bauer <lmb@cloudflare.com>
To: Alexei Starovoitov <ast@kernel.org>
Cc: kernel-team <kernel-team@cloudflare.com>,
bpf <bpf@vger.kernel.org>,
regressions@lists.linux.dev, Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>
Subject: Verifier rejects previously accepted program
Date: Wed, 3 Nov 2021 11:55:39 +0000 [thread overview]
Message-ID: <CACAyw99hVEJFoiBH_ZGyy=+oO-jyydoz6v1DeKPKs2HVsUH28w@mail.gmail.com> (raw)
Hi Alexei,
I started testing our load balancer on 5.15 today, unfortunately BPF
which is happy on 5.10 gets rejected on the latest kernel.
BPF program is too large. Processed 1000001 insn
processed 1000001 insns (limit 1000000) max_states_per_insn 28
total_states 40641 peak_states 1104 mark_read 53
It looks like the verifier has to spend more instructions on verifying
a gnarly loop we have (start and end are u8*):
#pragma clang loop unroll(full)
for (int b = 1 << 10; b >= 4; b >>= 1) {
if (start + b > end) {
continue;
}
// If we do 8 byte reads, we have to handle overflows which is
slower than 4 byte reads.
for (int i = 0; i < b; i += 4) {
csum += *(uint32_t *)(start + i);
}
start += b;
}
if (start + 2 <= end) {
csum += *(uint16_t *)(start);
start += 2;
}
if (start + 1 <= end) {
csum += *(start);
}
clang compiles this to:
; if (start + b > end) {
1906: bf 75 00 00 00 00 00 00 r5 = r7
1907: 07 05 00 00 4a 04 00 00 r5 += 1098
1908: 2d 15 0d 00 00 00 00 00 if r5 > r1 goto +13 <LBB0_374>
1909: b7 03 00 00 00 00 00 00 r3 = 0
1910: b7 00 00 00 4a 00 00 00 r0 = 74
; for (int i = 0; i < b; i += 4) {
1911: b7 08 00 00 4a 00 00 00 r8 = 74
0000000000003bc0 <LBB0_373>:
; csum += *(uint32_t *)(start + i);
1912: bf 74 00 00 00 00 00 00 r4 = r7
1913: 0f 04 00 00 00 00 00 00 r4 += r0
1914: 61 44 00 00 00 00 00 00 r4 = *(u32 *)(r4 + 0)
1915: 0f 43 00 00 00 00 00 00 r3 += r4
; for (int i = 0; i < b; i += 4) {
1916: 07 08 00 00 04 00 00 00 r8 += 4
1917: bf 09 00 00 00 00 00 00 r9 = r0
1918: 07 09 00 00 b6 ff ff ff r9 += -74
1919: bf 80 00 00 00 00 00 00 r0 = r8
1920: bf 54 00 00 00 00 00 00 r4 = r5
1921: a5 09 f6 ff fc 03 00 00 if r9 < 1020 goto -10 <LBB0_373>
0000000000003c10 <LBB0_374>:
; if (start + b > end) {
1922: bf 40 00 00 00 00 00 00 r0 = r4
1923: 07 00 00 00 00 02 00 00 r0 += 512
1924: bf 45 00 00 00 00 00 00 r5 = r4
1925: 2d 10 09 00 00 00 00 00 if r0 > r1 goto +9 <LBB0_377>
1926: b7 08 00 00 00 00 00 00 r8 = 0
0000000000003c38 <LBB0_376>:
1927: bf 89 00 00 00 00 00 00 r9 = r8
; csum += *(uint32_t *)(start + i);
1928: bf 45 00 00 00 00 00 00 r5 = r4
1929: 0f 95 00 00 00 00 00 00 r5 += r9
1930: 61 55 00 00 00 00 00 00 r5 = *(u32 *)(r5 + 0)
1931: 0f 53 00 00 00 00 00 00 r3 += r5
; for (int i = 0; i < b; i += 4) {
1932: 07 08 00 00 04 00 00 00 r8 += 4
1933: bf 05 00 00 00 00 00 00 r5 = r0
; for (int i = 0; i < b; i += 4) {
1934: a5 09 f8 ff fc 01 00 00 if r9 < 508 goto -8 <LBB0_376>
0000000000003c78 <LBB0_377>:
; if (start + b > end) {
1935: bf 50 00 00 00 00 00 00 r0 = r5
1936: 07 00 00 00 00 01 00 00 r0 += 256
1937: bf 54 00 00 00 00 00 00 r4 = r5
1938: 2d 10 09 00 00 00 00 00 if r0 > r1 goto +9 <LBB0_380>
1939: b7 08 00 00 00 00 00 00 r8 = 0
0000000000003ca0 <LBB0_379>:
1940: bf 89 00 00 00 00 00 00 r9 = r8
; csum += *(uint32_t *)(start + i);
1941: bf 54 00 00 00 00 00 00 r4 = r5
1942: 0f 94 00 00 00 00 00 00 r4 += r9
1943: 61 44 00 00 00 00 00 00 r4 = *(u32 *)(r4 + 0)
1944: 0f 43 00 00 00 00 00 00 r3 += r4
; for (int i = 0; i < b; i += 4) {
1945: 07 08 00 00 04 00 00 00 r8 += 4
1946: bf 04 00 00 00 00 00 00 r4 = r0
; for (int i = 0; i < b; i += 4) {
1947: a5 09 f8 ff fc 00 00 00 if r9 < 252 goto -8 <LBB0_379>
If b is <= 128 clang simply unrolls the loop. With log_level = 2 the
verifier output is full of tracing of those loops. A "short" excerpt:
; for (int i = 0; i < b; i += 4) {
1911: (b7) r8 = 74
1912: R0_w=inv74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4=pkt(id=0,off=74,r=1098,imm=0) R5_w=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
; csum += *(uint32_t *)(start + i);
1912: (bf) r4 = r7
1913: R0_w=inv74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=pkt(id=0,off=0,r=1098,imm=0) R5_w=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1913: (0f) r4 += r0
last_idx 1913 first_idx 1901
regs=1 stack=0 before 1912: (bf) r4 = r7
regs=1 stack=0 before 1911: (b7) r8 = 74
regs=1 stack=0 before 1910: (b7) r0 = 74
1914: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=pkt(id=0,off=74,r=1098,imm=0)
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1914: (61) r4 = *(u32 *)(r4 +0)
1915: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
; csum += *(uint32_t *)(start + i);
1915: (0f) r3 += r4
1916: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
; for (int i = 0; i < b; i += 4) {
1916: (07) r8 += 4
1917: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1917: (bf) r9 = r0
1918: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP74 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1918: (07) r9 += -74
1919: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1919: (bf) r0 = r8
1920: R0_w=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
1920: (bf) r4 = r5
1921: R0_w=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=pkt(id=0,off=1098,r=1098,imm=0)
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
; for (int i = 0; i < b; i += 4) {
1921: (a5) if r9 < 0x3fc goto pc-10
1912: R0=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4=pkt(id=0,off=1098,r=1098,imm=0) R5=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8=inv78
R9=invP0 R10=fp0 fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0
fp-48=mmmm0000 fp-56=00000000 fp-64=00000000 fp-72=0000mmmm
fp-80=mmmmmmmm fp-88=map_value fp-96=mmmmmmmm fp-104=map_value
fp-112=inv fp-120=fp fp-128=map_value
I've bisected the problem to commit 3e8ce29850f1 ("bpf: Prevent
pointer mismatch in bpf_timer_init.") The commit seems unrelated to
loop processing though (it does touch the verifier however). Either I
got the bisection wrong or there is something subtle going on.
Lorenz
#regzb introduced: 3e8ce29850f1 ("bpf: Prevent pointer mismatch in
bpf_timer_init.")
--
Lorenz Bauer | Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
www.cloudflare.com
next reply other threads:[~2021-11-03 11:56 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-03 11:55 Lorenz Bauer [this message]
2021-11-04 16:50 ` Verifier rejects previously accepted program Alexei Starovoitov
2021-11-04 23:30 ` sdf
2021-11-05 1:20 ` Alexei Starovoitov
2021-11-05 4:13 ` Stanislav Fomichev
2021-11-05 10:41 ` Lorenz Bauer
2021-11-05 19:49 ` Alexei Starovoitov
2021-11-08 13:21 ` Lorenz Bauer
2021-11-10 4:25 ` Alexei Starovoitov
2021-11-10 11:41 ` Lorenz Bauer
2021-11-10 16:50 ` Alexei Starovoitov
2021-11-10 17:05 ` Lorenz Bauer
2021-11-10 18:01 ` Thorsten Leemhuis
2021-11-10 19:16 ` Alexei Starovoitov
2021-11-10 19:49 ` Thorsten Leemhuis
2021-11-16 9:26 ` Lorenz Bauer
2021-11-16 10:59 ` Thorsten Leemhuis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CACAyw99hVEJFoiBH_ZGyy=+oO-jyydoz6v1DeKPKs2HVsUH28w@mail.gmail.com' \
--to=lmb@cloudflare.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@cloudflare.com \
--cc=regressions@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).