regressions.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Lorenz Bauer <lmb@cloudflare.com>
To: Alexei Starovoitov <ast@kernel.org>
Cc: kernel-team <kernel-team@cloudflare.com>,
	bpf <bpf@vger.kernel.org>,
	 regressions@lists.linux.dev, Andrii Nakryiko <andrii@kernel.org>,
	 Daniel Borkmann <daniel@iogearbox.net>
Subject: Verifier rejects previously accepted program
Date: Wed, 3 Nov 2021 11:55:39 +0000	[thread overview]
Message-ID: <CACAyw99hVEJFoiBH_ZGyy=+oO-jyydoz6v1DeKPKs2HVsUH28w@mail.gmail.com> (raw)

Hi Alexei,

I started testing our load balancer on 5.15 today, unfortunately BPF
which is happy on 5.10 gets rejected on the latest kernel.

    BPF program is too large. Processed 1000001 insn
    processed 1000001 insns (limit 1000000) max_states_per_insn 28
total_states 40641 peak_states 1104 mark_read 53

It looks like the verifier has to spend more instructions on verifying
a gnarly loop we have (start and end are u8*):

#pragma clang loop unroll(full)
    for (int b = 1 << 10; b >= 4; b >>= 1) {
        if (start + b > end) {
            continue;
        }

        // If we do 8 byte reads, we have to handle overflows which is
slower than 4 byte reads.
        for (int i = 0; i < b; i += 4) {
            csum += *(uint32_t *)(start + i);
        }

        start += b;
    }
    if (start + 2 <= end) {
        csum += *(uint16_t *)(start);
        start += 2;
    }
    if (start + 1 <= end) {
        csum += *(start);
    }

clang compiles this to:

;               if (start + b > end) {
    1906:       bf 75 00 00 00 00 00 00 r5 = r7
    1907:       07 05 00 00 4a 04 00 00 r5 += 1098
    1908:       2d 15 0d 00 00 00 00 00 if r5 > r1 goto +13 <LBB0_374>
    1909:       b7 03 00 00 00 00 00 00 r3 = 0
    1910:       b7 00 00 00 4a 00 00 00 r0 = 74
;               for (int i = 0; i < b; i += 4) {
    1911:       b7 08 00 00 4a 00 00 00 r8 = 74

0000000000003bc0 <LBB0_373>:
;                       csum += *(uint32_t *)(start + i);
    1912:       bf 74 00 00 00 00 00 00 r4 = r7
    1913:       0f 04 00 00 00 00 00 00 r4 += r0
    1914:       61 44 00 00 00 00 00 00 r4 = *(u32 *)(r4 + 0)
    1915:       0f 43 00 00 00 00 00 00 r3 += r4
;               for (int i = 0; i < b; i += 4) {
    1916:       07 08 00 00 04 00 00 00 r8 += 4
    1917:       bf 09 00 00 00 00 00 00 r9 = r0
    1918:       07 09 00 00 b6 ff ff ff r9 += -74
    1919:       bf 80 00 00 00 00 00 00 r0 = r8
    1920:       bf 54 00 00 00 00 00 00 r4 = r5
    1921:       a5 09 f6 ff fc 03 00 00 if r9 < 1020 goto -10 <LBB0_373>

0000000000003c10 <LBB0_374>:
;               if (start + b > end) {
    1922:       bf 40 00 00 00 00 00 00 r0 = r4
    1923:       07 00 00 00 00 02 00 00 r0 += 512
    1924:       bf 45 00 00 00 00 00 00 r5 = r4
    1925:       2d 10 09 00 00 00 00 00 if r0 > r1 goto +9 <LBB0_377>
    1926:       b7 08 00 00 00 00 00 00 r8 = 0

0000000000003c38 <LBB0_376>:
    1927:       bf 89 00 00 00 00 00 00 r9 = r8
;                       csum += *(uint32_t *)(start + i);
    1928:       bf 45 00 00 00 00 00 00 r5 = r4
    1929:       0f 95 00 00 00 00 00 00 r5 += r9
    1930:       61 55 00 00 00 00 00 00 r5 = *(u32 *)(r5 + 0)
    1931:       0f 53 00 00 00 00 00 00 r3 += r5
;               for (int i = 0; i < b; i += 4) {
    1932:       07 08 00 00 04 00 00 00 r8 += 4
    1933:       bf 05 00 00 00 00 00 00 r5 = r0
;               for (int i = 0; i < b; i += 4) {
    1934:       a5 09 f8 ff fc 01 00 00 if r9 < 508 goto -8 <LBB0_376>

0000000000003c78 <LBB0_377>:
;               if (start + b > end) {
    1935:       bf 50 00 00 00 00 00 00 r0 = r5
    1936:       07 00 00 00 00 01 00 00 r0 += 256
    1937:       bf 54 00 00 00 00 00 00 r4 = r5
    1938:       2d 10 09 00 00 00 00 00 if r0 > r1 goto +9 <LBB0_380>
    1939:       b7 08 00 00 00 00 00 00 r8 = 0

0000000000003ca0 <LBB0_379>:
    1940:       bf 89 00 00 00 00 00 00 r9 = r8
;                       csum += *(uint32_t *)(start + i);
    1941:       bf 54 00 00 00 00 00 00 r4 = r5
    1942:       0f 94 00 00 00 00 00 00 r4 += r9
    1943:       61 44 00 00 00 00 00 00 r4 = *(u32 *)(r4 + 0)
    1944:       0f 43 00 00 00 00 00 00 r3 += r4
;               for (int i = 0; i < b; i += 4) {
    1945:       07 08 00 00 04 00 00 00 r8 += 4
    1946:       bf 04 00 00 00 00 00 00 r4 = r0
;               for (int i = 0; i < b; i += 4) {
    1947:       a5 09 f8 ff fc 00 00 00 if r9 < 252 goto -8 <LBB0_379>

If b is <= 128 clang simply unrolls the loop. With log_level = 2 the
verifier output is full of tracing of those loops. A "short" excerpt:

    ; for (int i = 0; i < b; i += 4) {
    1911: (b7) r8 = 74
    1912: R0_w=inv74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4=pkt(id=0,off=74,r=1098,imm=0) R5_w=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    ; csum += *(uint32_t *)(start + i);
    1912: (bf) r4 = r7
    1913: R0_w=inv74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=pkt(id=0,off=0,r=1098,imm=0) R5_w=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1913: (0f) r4 += r0
    last_idx 1913 first_idx 1901
    regs=1 stack=0 before 1912: (bf) r4 = r7
    regs=1 stack=0 before 1911: (b7) r8 = 74
    regs=1 stack=0 before 1910: (b7) r0 = 74
    1914: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=pkt(id=0,off=74,r=1098,imm=0)
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1914: (61) r4 = *(u32 *)(r4 +0)
    1915: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0) R3_w=inv0
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    ; csum += *(uint32_t *)(start + i);
    1915: (0f) r3 += r4
    1916: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv74
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    ; for (int i = 0; i < b; i += 4) {
    1916: (07) r8 += 4
    1917: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78
R9=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1917: (bf) r9 = r0
    1918: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP74 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1918: (07) r9 += -74
    1919: R0_w=invP74 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1919: (bf) r0 = r8
    1920: R0_w=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    1920: (bf) r4 = r5
    1921: R0_w=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2_w=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4_w=pkt(id=0,off=1098,r=1098,imm=0)
R5_w=pkt(id=0,off=1098,r=1098,imm=0) R6=ctx(id=0,off=0,imm=0)
R7=pkt(id=0,off=0,r=1098,imm=0) R8_w=inv78 R9_w=invP0 R10=fp0
fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0 fp-48=mmmm0000
fp-56=00000000 fp-64=00000000 fp-72=0000mmmm fp-80=mmmmmmmm
fp-88=map_value fp-96=mmmmmmmm fp-104=map_value fp-112=inv fp-120=fp
fp-128=map_value
    ; for (int i = 0; i < b; i += 4) {
    1921: (a5) if r9 < 0x3fc goto pc-10
    1912: R0=inv78 R1=pkt_end(id=0,off=0,imm=0)
R2=map_value(id=0,off=0,ks=4,vs=368,imm=0)
R3=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff))
R4=pkt(id=0,off=1098,r=1098,imm=0) R5=pkt(id=0,off=1098,r=1098,imm=0)
R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=1098,imm=0) R8=inv78
R9=invP0 R10=fp0 fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmm00m0
fp-48=mmmm0000 fp-56=00000000 fp-64=00000000 fp-72=0000mmmm
fp-80=mmmmmmmm fp-88=map_value fp-96=mmmmmmmm fp-104=map_value
fp-112=inv fp-120=fp fp-128=map_value

I've bisected the problem to commit 3e8ce29850f1 ("bpf: Prevent
pointer mismatch in bpf_timer_init.") The commit seems unrelated to
loop processing though (it does touch the verifier however). Either I
got the bisection wrong or there is something subtle going on.

Lorenz

#regzb introduced: 3e8ce29850f1 ("bpf: Prevent pointer mismatch in
bpf_timer_init.")

--
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

             reply	other threads:[~2021-11-03 11:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-03 11:55 Lorenz Bauer [this message]
2021-11-04 16:50 ` Verifier rejects previously accepted program Alexei Starovoitov
2021-11-04 23:30   ` sdf
2021-11-05  1:20     ` Alexei Starovoitov
2021-11-05  4:13       ` Stanislav Fomichev
2021-11-05 10:41   ` Lorenz Bauer
2021-11-05 19:49     ` Alexei Starovoitov
2021-11-08 13:21       ` Lorenz Bauer
2021-11-10  4:25         ` Alexei Starovoitov
2021-11-10 11:41           ` Lorenz Bauer
2021-11-10 16:50             ` Alexei Starovoitov
2021-11-10 17:05               ` Lorenz Bauer
2021-11-10 18:01               ` Thorsten Leemhuis
2021-11-10 19:16                 ` Alexei Starovoitov
2021-11-10 19:49                   ` Thorsten Leemhuis
2021-11-16  9:26 ` Lorenz Bauer
2021-11-16 10:59   ` Thorsten Leemhuis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CACAyw99hVEJFoiBH_ZGyy=+oO-jyydoz6v1DeKPKs2HVsUH28w@mail.gmail.com' \
    --to=lmb@cloudflare.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@cloudflare.com \
    --cc=regressions@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).