All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop
@ 2021-02-26 17:48 Yonghong Song
  2021-02-26 21:08 ` Andrii Nakryiko
  0 siblings, 1 reply; 4+ messages in thread
From: Yonghong Song @ 2021-02-26 17:48 UTC (permalink / raw)
  To: bpf; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, zhenwei pi

The orignal bcc pull request
  https://github.com/iovisor/bcc/pull/3270
exposed a verifier failure with Clang 12/13 while
Clang 4 works fine. Further investigation exposed two issues.
  Issue 1: LLVM may generate code which uses less refined
     value. The issue is fixed in llvm patch
     https://reviews.llvm.org/D97479
  Issue 2: Spills with initial value 0 are marked as precise
     which makes later state pruning less effective.
     This is my rough initial analysis and further investigation
     is needed to find how to improve verifier pruning
     in such cases.

With the above llvm patch, for the new loop6.c test, which has
smaller loop bound compared to original test, I got
  $ test_progs -s -n 10/16
  ...
  stack depth 64
  processed 405099 insns (limit 1000000) max_states_per_insn 92
      total_states 8866 peak_states 889 mark_read 6
  #10/16 loop6.o:OK

Use the original loop bound, i.e., commenting out "#define WORKAROUND",
I got
  $ test_progs -s -n 10/16
  ...
  BPF program is too large. Processed 1000001 insn
  stack depth 64
  processed 1000001 insns (limit 1000000) max_states_per_insn 91
      total_states 23176 peak_states 5069 mark_read 6
  ...
  #10/16 loop6.o:FAIL

The purpose of this patch is to provide a regression
test for the above llvm fix and also provide a test
case for further analyzing the verifier pruning issue.

Cc: zhenwei pi <pizhenwei@bytedance.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/README.rst        |  39 +++++++
 .../bpf/prog_tests/bpf_verif_scale.c          |   1 +
 tools/testing/selftests/bpf/progs/loop6.c     | 101 ++++++++++++++++++
 3 files changed, 141 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/loop6.c

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..dbc8f6cc5c67 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
 
 __  https://reviews.llvm.org/D78466
 
+bpf_verif_scale/loop6.o test failure with Clang 12
+==================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+  * ``bpf_verif_scale/loop6.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+  R1 type=ctx expected=fp
+  The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+      15:       bc 51 00 00 00 00 00 00 w1 = w5
+      16:       04 01 00 00 ff ff ff ff w1 += -1
+      17:       67 05 00 00 20 00 00 00 r5 <<= 32
+      18:       77 05 00 00 20 00 00 00 r5 >>= 32
+      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+      20:       b7 05 00 00 06 00 00 00 r5 = 6
+  00000000000000a8 <LBB0_4>:
+      21:       b7 02 00 00 00 00 00 00 r2 = 0
+      22:       b7 01 00 00 00 00 00 00 r1 = 0
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__  https://reviews.llvm.org/D97479
+
 BPF CO-RE-based tests and Clang version
 =======================================
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..3d002c245d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
 		{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 		{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
 		{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "loop6.o", BPF_PROG_TYPE_KPROBE },
 
 		/* partial unroll. 19k insn in a loop.
 		 * Total program size 20.8k insn.
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..fe535922bed8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS	6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX		10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX		128
+#endif
+
+#define SG_CHAIN	0x01UL
+#define SG_END		0x02UL
+
+struct scatterlist {
+	unsigned long   page_link;
+	unsigned int    offset;
+	unsigned int    length;
+};
+
+#define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg)		((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+	struct scatterlist sg;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_last(&sg))
+		return NULL;
+
+	sgp++;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_chain(&sg))
+		sgp = sg_chain_ptr(&sg);
+
+	return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+	struct scatterlist *sgp;
+
+	bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+	return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int nested_loops(volatile struct pt_regs* ctx)
+{
+	struct scatterlist **sgs = PT_REGS_PARM2(ctx);
+	unsigned int num1 = PT_REGS_PARM3(ctx);
+	unsigned int num2 = PT_REGS_PARM4(ctx);
+	struct scatterlist *sgp = NULL;
+	__u64 length1 = 0, length2 = 0;
+	unsigned int i, n, len;
+
+	if (config != 0)
+		return 0;
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num1); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length1 += len;
+			n++;
+		}
+	}
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num2); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length2 += len;
+			n++;
+		}
+	}
+
+	config = 1;
+	result = length2 - length1;
+	return 0;
+}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop
  2021-02-26 17:48 [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop Yonghong Song
@ 2021-02-26 21:08 ` Andrii Nakryiko
  2021-02-26 22:31   ` Yonghong Song
  0 siblings, 1 reply; 4+ messages in thread
From: Andrii Nakryiko @ 2021-02-26 21:08 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team, zhenwei pi

On Fri, Feb 26, 2021 at 9:50 AM Yonghong Song <yhs@fb.com> wrote:
>
> The orignal bcc pull request
>   https://github.com/iovisor/bcc/pull/3270
> exposed a verifier failure with Clang 12/13 while
> Clang 4 works fine. Further investigation exposed two issues.
>   Issue 1: LLVM may generate code which uses less refined
>      value. The issue is fixed in llvm patch
>      https://reviews.llvm.org/D97479
>   Issue 2: Spills with initial value 0 are marked as precise
>      which makes later state pruning less effective.
>      This is my rough initial analysis and further investigation
>      is needed to find how to improve verifier pruning
>      in such cases.
>
> With the above llvm patch, for the new loop6.c test, which has
> smaller loop bound compared to original test, I got
>   $ test_progs -s -n 10/16
>   ...
>   stack depth 64
>   processed 405099 insns (limit 1000000) max_states_per_insn 92
>       total_states 8866 peak_states 889 mark_read 6
>   #10/16 loop6.o:OK
>
> Use the original loop bound, i.e., commenting out "#define WORKAROUND",
> I got
>   $ test_progs -s -n 10/16
>   ...
>   BPF program is too large. Processed 1000001 insn
>   stack depth 64
>   processed 1000001 insns (limit 1000000) max_states_per_insn 91
>       total_states 23176 peak_states 5069 mark_read 6
>   ...
>   #10/16 loop6.o:FAIL
>
> The purpose of this patch is to provide a regression
> test for the above llvm fix and also provide a test
> case for further analyzing the verifier pruning issue.
>
> Cc: zhenwei pi <pizhenwei@bytedance.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/testing/selftests/bpf/README.rst        |  39 +++++++
>  .../bpf/prog_tests/bpf_verif_scale.c          |   1 +
>  tools/testing/selftests/bpf/progs/loop6.c     | 101 ++++++++++++++++++
>  3 files changed, 141 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/loop6.c
>
> diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
> index fd148b8410fa..dbc8f6cc5c67 100644
> --- a/tools/testing/selftests/bpf/README.rst
> +++ b/tools/testing/selftests/bpf/README.rst
> @@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
>
>  __  https://reviews.llvm.org/D78466
>
> +bpf_verif_scale/loop6.o test failure with Clang 12
> +==================================================
> +
> +With Clang 12, the following bpf_verif_scale test failed:
> +  * ``bpf_verif_scale/loop6.o``
> +
> +The verifier output looks like
> +
> +.. code-block:: c
> +
> +  R1 type=ctx expected=fp
> +  The sequence of 8193 jumps is too complex.
> +
> +The reason is compiler generating the following code
> +
> +.. code-block:: c
> +
> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> +      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
> +      15:       bc 51 00 00 00 00 00 00 w1 = w5
> +      16:       04 01 00 00 ff ff ff ff w1 += -1
> +      17:       67 05 00 00 20 00 00 00 r5 <<= 32
> +      18:       77 05 00 00 20 00 00 00 r5 >>= 32
> +      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
> +      20:       b7 05 00 00 06 00 00 00 r5 = 6
> +  00000000000000a8 <LBB0_4>:
> +      21:       b7 02 00 00 00 00 00 00 r2 = 0
> +      22:       b7 01 00 00 00 00 00 00 r1 = 0
> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> +      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
> +      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
> +
> +Note that insn #15 has w1 = w5 and w1 is refined later but
> +r5(w5) is eventually saved on stack at insn #24 for later use.
> +This cause later verifier failure. The bug has been `fixed`__ in
> +Clang 13.
> +
> +__  https://reviews.llvm.org/D97479
> +
>  BPF CO-RE-based tests and Clang version
>  =======================================
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> index e698ee6bb6c2..3d002c245d2b 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> @@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
>                 { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
>                 { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
>                 { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
> +               { "loop6.o", BPF_PROG_TYPE_KPROBE },
>
>                 /* partial unroll. 19k insn in a loop.
>                  * Total program size 20.8k insn.
> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
> new file mode 100644
> index 000000000000..fe535922bed8
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/loop6.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/ptrace.h>
> +#include <stddef.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +/* typically virtio scsi has max SGs of 6 */
> +#define VIRTIO_MAX_SGS 6
> +
> +/* Verifier will fail with SG_MAX = 128. The failure can be
> + * workarounded with a smaller SG_MAX, e.g. 10.
> + */
> +#define WORKAROUND
> +#ifdef WORKAROUND
> +#define SG_MAX         10
> +#else
> +/* typically virtio blk has max SEG of 128 */
> +#define SG_MAX         128
> +#endif
> +
> +#define SG_CHAIN       0x01UL
> +#define SG_END         0x02UL
> +
> +struct scatterlist {
> +       unsigned long   page_link;
> +       unsigned int    offset;
> +       unsigned int    length;
> +};
> +
> +#define sg_is_chain(sg)                ((sg)->page_link & SG_CHAIN)
> +#define sg_is_last(sg)         ((sg)->page_link & SG_END)
> +#define sg_chain_ptr(sg)       \
> +       ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
> +
> +static inline struct scatterlist *__sg_next(struct scatterlist *sgp)

nit: here and below, it doesn't have to be inline, does it?

> +{
> +       struct scatterlist sg;
> +
> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> +       if (sg_is_last(&sg))
> +               return NULL;
> +
> +       sgp++;
> +
> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> +       if (sg_is_chain(&sg))
> +               sgp = sg_chain_ptr(&sg);
> +
> +       return sgp;
> +}
> +
> +static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
> +{
> +       struct scatterlist *sgp;
> +
> +       bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
> +       return sgp;
> +}
> +
> +int config = 0;
> +int result = 0;
> +
> +SEC("kprobe/virtqueue_add_sgs")
> +int nested_loops(volatile struct pt_regs* ctx)

libbpf provides BPF_KPROBE macro, similar to BPF_PROG for
fentry/fexit. Can you please use that instead? You won't need
PT_REGS_PARM macroses below, which will lead to nicer and shorter
code.

> +{
> +       struct scatterlist **sgs = PT_REGS_PARM2(ctx);
> +       unsigned int num1 = PT_REGS_PARM3(ctx);
> +       unsigned int num2 = PT_REGS_PARM4(ctx);
> +       struct scatterlist *sgp = NULL;
> +       __u64 length1 = 0, length2 = 0;
> +       unsigned int i, n, len;
> +
> +       if (config != 0)
> +               return 0;
> +
> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num1); i++) {
> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> +                    sgp = __sg_next(sgp)) {
> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> +                       length1 += len;
> +                       n++;
> +               }
> +       }
> +
> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num2); i++) {
> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> +                    sgp = __sg_next(sgp)) {
> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> +                       length2 += len;
> +                       n++;
> +               }
> +       }
> +
> +       config = 1;
> +       result = length2 - length1;
> +       return 0;
> +}
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop
  2021-02-26 21:08 ` Andrii Nakryiko
@ 2021-02-26 22:31   ` Yonghong Song
  2021-02-26 23:20     ` Andrii Nakryiko
  0 siblings, 1 reply; 4+ messages in thread
From: Yonghong Song @ 2021-02-26 22:31 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team, zhenwei pi



On 2/26/21 1:08 PM, Andrii Nakryiko wrote:
> On Fri, Feb 26, 2021 at 9:50 AM Yonghong Song <yhs@fb.com> wrote:
>>
>> The orignal bcc pull request
>>    https://github.com/iovisor/bcc/pull/3270
>> exposed a verifier failure with Clang 12/13 while
>> Clang 4 works fine. Further investigation exposed two issues.
>>    Issue 1: LLVM may generate code which uses less refined
>>       value. The issue is fixed in llvm patch
>>       https://reviews.llvm.org/D97479
>>    Issue 2: Spills with initial value 0 are marked as precise
>>       which makes later state pruning less effective.
>>       This is my rough initial analysis and further investigation
>>       is needed to find how to improve verifier pruning
>>       in such cases.
>>
>> With the above llvm patch, for the new loop6.c test, which has
>> smaller loop bound compared to original test, I got
>>    $ test_progs -s -n 10/16
>>    ...
>>    stack depth 64
>>    processed 405099 insns (limit 1000000) max_states_per_insn 92
>>        total_states 8866 peak_states 889 mark_read 6
>>    #10/16 loop6.o:OK
>>
>> Use the original loop bound, i.e., commenting out "#define WORKAROUND",
>> I got
>>    $ test_progs -s -n 10/16
>>    ...
>>    BPF program is too large. Processed 1000001 insn
>>    stack depth 64
>>    processed 1000001 insns (limit 1000000) max_states_per_insn 91
>>        total_states 23176 peak_states 5069 mark_read 6
>>    ...
>>    #10/16 loop6.o:FAIL
>>
>> The purpose of this patch is to provide a regression
>> test for the above llvm fix and also provide a test
>> case for further analyzing the verifier pruning issue.
>>
>> Cc: zhenwei pi <pizhenwei@bytedance.com>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   tools/testing/selftests/bpf/README.rst        |  39 +++++++
>>   .../bpf/prog_tests/bpf_verif_scale.c          |   1 +
>>   tools/testing/selftests/bpf/progs/loop6.c     | 101 ++++++++++++++++++
>>   3 files changed, 141 insertions(+)
>>   create mode 100644 tools/testing/selftests/bpf/progs/loop6.c
>>
>> diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
>> index fd148b8410fa..dbc8f6cc5c67 100644
>> --- a/tools/testing/selftests/bpf/README.rst
>> +++ b/tools/testing/selftests/bpf/README.rst
>> @@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
>>
>>   __  https://reviews.llvm.org/D78466
>>
>> +bpf_verif_scale/loop6.o test failure with Clang 12
>> +==================================================
>> +
>> +With Clang 12, the following bpf_verif_scale test failed:
>> +  * ``bpf_verif_scale/loop6.o``
>> +
>> +The verifier output looks like
>> +
>> +.. code-block:: c
>> +
>> +  R1 type=ctx expected=fp
>> +  The sequence of 8193 jumps is too complex.
>> +
>> +The reason is compiler generating the following code
>> +
>> +.. code-block:: c
>> +
>> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
>> +      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
>> +      15:       bc 51 00 00 00 00 00 00 w1 = w5
>> +      16:       04 01 00 00 ff ff ff ff w1 += -1
>> +      17:       67 05 00 00 20 00 00 00 r5 <<= 32
>> +      18:       77 05 00 00 20 00 00 00 r5 >>= 32
>> +      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
>> +      20:       b7 05 00 00 06 00 00 00 r5 = 6
>> +  00000000000000a8 <LBB0_4>:
>> +      21:       b7 02 00 00 00 00 00 00 r2 = 0
>> +      22:       b7 01 00 00 00 00 00 00 r1 = 0
>> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
>> +      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
>> +      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
>> +
>> +Note that insn #15 has w1 = w5 and w1 is refined later but
>> +r5(w5) is eventually saved on stack at insn #24 for later use.
>> +This cause later verifier failure. The bug has been `fixed`__ in
>> +Clang 13.
>> +
>> +__  https://reviews.llvm.org/D97479
>> +
>>   BPF CO-RE-based tests and Clang version
>>   =======================================
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
>> index e698ee6bb6c2..3d002c245d2b 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
>> @@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
>>                  { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
>>                  { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
>>                  { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
>> +               { "loop6.o", BPF_PROG_TYPE_KPROBE },
>>
>>                  /* partial unroll. 19k insn in a loop.
>>                   * Total program size 20.8k insn.
>> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
>> new file mode 100644
>> index 000000000000..fe535922bed8
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/loop6.c
>> @@ -0,0 +1,101 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include <linux/ptrace.h>
>> +#include <stddef.h>
>> +#include <linux/bpf.h>
>> +#include <bpf/bpf_helpers.h>
>> +#include <bpf/bpf_tracing.h>
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +/* typically virtio scsi has max SGs of 6 */
>> +#define VIRTIO_MAX_SGS 6
>> +
>> +/* Verifier will fail with SG_MAX = 128. The failure can be
>> + * workarounded with a smaller SG_MAX, e.g. 10.
>> + */
>> +#define WORKAROUND
>> +#ifdef WORKAROUND
>> +#define SG_MAX         10
>> +#else
>> +/* typically virtio blk has max SEG of 128 */
>> +#define SG_MAX         128
>> +#endif
>> +
>> +#define SG_CHAIN       0x01UL
>> +#define SG_END         0x02UL
>> +
>> +struct scatterlist {
>> +       unsigned long   page_link;
>> +       unsigned int    offset;
>> +       unsigned int    length;
>> +};
>> +
>> +#define sg_is_chain(sg)                ((sg)->page_link & SG_CHAIN)
>> +#define sg_is_last(sg)         ((sg)->page_link & SG_END)
>> +#define sg_chain_ptr(sg)       \
>> +       ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
>> +
>> +static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
> 
> nit: here and below, it doesn't have to be inline, does it?

I will keep inline here. With __noinline, current loop6.c failed
at verifier:
   BPF program is too large. Processed 1000001 insn
This may be another issue we need to investigate later.

> 
>> +{
>> +       struct scatterlist sg;
>> +
>> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
>> +       if (sg_is_last(&sg))
>> +               return NULL;
>> +
>> +       sgp++;
>> +
>> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
>> +       if (sg_is_chain(&sg))
>> +               sgp = sg_chain_ptr(&sg);
>> +
>> +       return sgp;
>> +}
>> +
>> +static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
>> +{
>> +       struct scatterlist *sgp;
>> +
>> +       bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
>> +       return sgp;
>> +}
>> +
>> +int config = 0;
>> +int result = 0;
>> +
>> +SEC("kprobe/virtqueue_add_sgs")
>> +int nested_loops(volatile struct pt_regs* ctx)
> 
> libbpf provides BPF_KPROBE macro, similar to BPF_PROG for
> fentry/fexit. Can you please use that instead? You won't need
> PT_REGS_PARM macroses below, which will lead to nicer and shorter
> code.

Sure. Will use. Indeed better.

> 
>> +{
>> +       struct scatterlist **sgs = PT_REGS_PARM2(ctx);
>> +       unsigned int num1 = PT_REGS_PARM3(ctx);
>> +       unsigned int num2 = PT_REGS_PARM4(ctx);
>> +       struct scatterlist *sgp = NULL;
>> +       __u64 length1 = 0, length2 = 0;
>> +       unsigned int i, n, len;
>> +
>> +       if (config != 0)
>> +               return 0;
>> +
>> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num1); i++) {
>> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
>> +                    sgp = __sg_next(sgp)) {
>> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
>> +                       length1 += len;
>> +                       n++;
>> +               }
>> +       }
>> +
>> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num2); i++) {
>> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
>> +                    sgp = __sg_next(sgp)) {
>> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
>> +                       length2 += len;
>> +                       n++;
>> +               }
>> +       }
>> +
>> +       config = 1;
>> +       result = length2 - length1;
>> +       return 0;
>> +}
>> --
>> 2.24.1
>>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop
  2021-02-26 22:31   ` Yonghong Song
@ 2021-02-26 23:20     ` Andrii Nakryiko
  0 siblings, 0 replies; 4+ messages in thread
From: Andrii Nakryiko @ 2021-02-26 23:20 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team, zhenwei pi

On Fri, Feb 26, 2021 at 2:31 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 2/26/21 1:08 PM, Andrii Nakryiko wrote:
> > On Fri, Feb 26, 2021 at 9:50 AM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> The orignal bcc pull request
> >>    https://github.com/iovisor/bcc/pull/3270
> >> exposed a verifier failure with Clang 12/13 while
> >> Clang 4 works fine. Further investigation exposed two issues.
> >>    Issue 1: LLVM may generate code which uses less refined
> >>       value. The issue is fixed in llvm patch
> >>       https://reviews.llvm.org/D97479
> >>    Issue 2: Spills with initial value 0 are marked as precise
> >>       which makes later state pruning less effective.
> >>       This is my rough initial analysis and further investigation
> >>       is needed to find how to improve verifier pruning
> >>       in such cases.
> >>
> >> With the above llvm patch, for the new loop6.c test, which has
> >> smaller loop bound compared to original test, I got
> >>    $ test_progs -s -n 10/16
> >>    ...
> >>    stack depth 64
> >>    processed 405099 insns (limit 1000000) max_states_per_insn 92
> >>        total_states 8866 peak_states 889 mark_read 6
> >>    #10/16 loop6.o:OK
> >>
> >> Use the original loop bound, i.e., commenting out "#define WORKAROUND",
> >> I got
> >>    $ test_progs -s -n 10/16
> >>    ...
> >>    BPF program is too large. Processed 1000001 insn
> >>    stack depth 64
> >>    processed 1000001 insns (limit 1000000) max_states_per_insn 91
> >>        total_states 23176 peak_states 5069 mark_read 6
> >>    ...
> >>    #10/16 loop6.o:FAIL
> >>
> >> The purpose of this patch is to provide a regression
> >> test for the above llvm fix and also provide a test
> >> case for further analyzing the verifier pruning issue.
> >>
> >> Cc: zhenwei pi <pizhenwei@bytedance.com>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   tools/testing/selftests/bpf/README.rst        |  39 +++++++
> >>   .../bpf/prog_tests/bpf_verif_scale.c          |   1 +
> >>   tools/testing/selftests/bpf/progs/loop6.c     | 101 ++++++++++++++++++
> >>   3 files changed, 141 insertions(+)
> >>   create mode 100644 tools/testing/selftests/bpf/progs/loop6.c
> >>
> >> diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
> >> index fd148b8410fa..dbc8f6cc5c67 100644
> >> --- a/tools/testing/selftests/bpf/README.rst
> >> +++ b/tools/testing/selftests/bpf/README.rst
> >> @@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
> >>
> >>   __  https://reviews.llvm.org/D78466
> >>
> >> +bpf_verif_scale/loop6.o test failure with Clang 12
> >> +==================================================
> >> +
> >> +With Clang 12, the following bpf_verif_scale test failed:
> >> +  * ``bpf_verif_scale/loop6.o``
> >> +
> >> +The verifier output looks like
> >> +
> >> +.. code-block:: c
> >> +
> >> +  R1 type=ctx expected=fp
> >> +  The sequence of 8193 jumps is too complex.
> >> +
> >> +The reason is compiler generating the following code
> >> +
> >> +.. code-block:: c
> >> +
> >> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> >> +      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
> >> +      15:       bc 51 00 00 00 00 00 00 w1 = w5
> >> +      16:       04 01 00 00 ff ff ff ff w1 += -1
> >> +      17:       67 05 00 00 20 00 00 00 r5 <<= 32
> >> +      18:       77 05 00 00 20 00 00 00 r5 >>= 32
> >> +      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
> >> +      20:       b7 05 00 00 06 00 00 00 r5 = 6
> >> +  00000000000000a8 <LBB0_4>:
> >> +      21:       b7 02 00 00 00 00 00 00 r2 = 0
> >> +      22:       b7 01 00 00 00 00 00 00 r1 = 0
> >> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> >> +      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
> >> +      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
> >> +
> >> +Note that insn #15 has w1 = w5 and w1 is refined later but
> >> +r5(w5) is eventually saved on stack at insn #24 for later use.
> >> +This cause later verifier failure. The bug has been `fixed`__ in
> >> +Clang 13.
> >> +
> >> +__  https://reviews.llvm.org/D97479
> >> +
> >>   BPF CO-RE-based tests and Clang version
> >>   =======================================
> >>
> >> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >> index e698ee6bb6c2..3d002c245d2b 100644
> >> --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> >> @@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
> >>                  { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
> >>                  { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
> >>                  { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
> >> +               { "loop6.o", BPF_PROG_TYPE_KPROBE },
> >>
> >>                  /* partial unroll. 19k insn in a loop.
> >>                   * Total program size 20.8k insn.
> >> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
> >> new file mode 100644
> >> index 000000000000..fe535922bed8
> >> --- /dev/null
> >> +++ b/tools/testing/selftests/bpf/progs/loop6.c
> >> @@ -0,0 +1,101 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +
> >> +#include <linux/ptrace.h>
> >> +#include <stddef.h>
> >> +#include <linux/bpf.h>
> >> +#include <bpf/bpf_helpers.h>
> >> +#include <bpf/bpf_tracing.h>
> >> +
> >> +char _license[] SEC("license") = "GPL";
> >> +
> >> +/* typically virtio scsi has max SGs of 6 */
> >> +#define VIRTIO_MAX_SGS 6
> >> +
> >> +/* Verifier will fail with SG_MAX = 128. The failure can be
> >> + * workarounded with a smaller SG_MAX, e.g. 10.
> >> + */
> >> +#define WORKAROUND
> >> +#ifdef WORKAROUND
> >> +#define SG_MAX         10
> >> +#else
> >> +/* typically virtio blk has max SEG of 128 */
> >> +#define SG_MAX         128
> >> +#endif
> >> +
> >> +#define SG_CHAIN       0x01UL
> >> +#define SG_END         0x02UL
> >> +
> >> +struct scatterlist {
> >> +       unsigned long   page_link;
> >> +       unsigned int    offset;
> >> +       unsigned int    length;
> >> +};
> >> +
> >> +#define sg_is_chain(sg)                ((sg)->page_link & SG_CHAIN)
> >> +#define sg_is_last(sg)         ((sg)->page_link & SG_END)
> >> +#define sg_chain_ptr(sg)       \
> >> +       ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
> >> +
> >> +static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
> >
> > nit: here and below, it doesn't have to be inline, does it?
>
> I will keep inline here. With __noinline, current loop6.c failed
> at verifier:
>    BPF program is too large. Processed 1000001 insn
> This may be another issue we need to investigate later.
>

sure, no worries. At least we learned something new :)

> >
> >> +{
> >> +       struct scatterlist sg;
> >> +
> >> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> >> +       if (sg_is_last(&sg))
> >> +               return NULL;
> >> +
> >> +       sgp++;
> >> +
> >> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> >> +       if (sg_is_chain(&sg))
> >> +               sgp = sg_chain_ptr(&sg);
> >> +
> >> +       return sgp;
> >> +}
> >> +
> >> +static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
> >> +{
> >> +       struct scatterlist *sgp;
> >> +
> >> +       bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
> >> +       return sgp;
> >> +}
> >> +
> >> +int config = 0;
> >> +int result = 0;
> >> +
> >> +SEC("kprobe/virtqueue_add_sgs")
> >> +int nested_loops(volatile struct pt_regs* ctx)
> >
> > libbpf provides BPF_KPROBE macro, similar to BPF_PROG for
> > fentry/fexit. Can you please use that instead? You won't need
> > PT_REGS_PARM macroses below, which will lead to nicer and shorter
> > code.
>
> Sure. Will use. Indeed better.
>
> >
> >> +{
> >> +       struct scatterlist **sgs = PT_REGS_PARM2(ctx);
> >> +       unsigned int num1 = PT_REGS_PARM3(ctx);
> >> +       unsigned int num2 = PT_REGS_PARM4(ctx);
> >> +       struct scatterlist *sgp = NULL;
> >> +       __u64 length1 = 0, length2 = 0;
> >> +       unsigned int i, n, len;
> >> +
> >> +       if (config != 0)
> >> +               return 0;
> >> +
> >> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num1); i++) {
> >> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> >> +                    sgp = __sg_next(sgp)) {
> >> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> >> +                       length1 += len;
> >> +                       n++;
> >> +               }
> >> +       }
> >> +
> >> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num2); i++) {
> >> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> >> +                    sgp = __sg_next(sgp)) {
> >> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> >> +                       length2 += len;
> >> +                       n++;
> >> +               }
> >> +       }
> >> +
> >> +       config = 1;
> >> +       result = length2 - length1;
> >> +       return 0;
> >> +}
> >> --
> >> 2.24.1
> >>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-26 23:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-26 17:48 [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop Yonghong Song
2021-02-26 21:08 ` Andrii Nakryiko
2021-02-26 22:31   ` Yonghong Song
2021-02-26 23:20     ` Andrii Nakryiko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.