All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
@ 2022-01-27  2:49 Hou Tao
  2022-02-01  0:02 ` Andrii Nakryiko
  0 siblings, 1 reply; 7+ messages in thread
From: Hou Tao @ 2022-01-27  2:49 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, Yonghong Song, Daniel Borkmann,
	Andrii Nakryiko, David S . Miller, Jakub Kicinski, netdev, bpf,
	houtao1

4096 is OK for x86-64, but for other archs with greater than 4KB
page size (e.g. 64KB under arm64), test_verifier for test case
"check valid spill/fill, ptr to mem" will fail, so just use
getpagesize() to initialize the ring buffer size. Do this for
test_progs as well.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 tools/testing/selftests/bpf/prog_tests/d_path.c | 14 ++++++++++++--
 .../testing/selftests/bpf/prog_tests/test_ima.c | 17 +++++++++++++----
 tools/testing/selftests/bpf/progs/ima.c         |  1 -
 .../bpf/progs/test_d_path_check_types.c         |  1 -
 tools/testing/selftests/bpf/test_verifier.c     |  2 +-
 5 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 911345c526e6..abfa3697e34d 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -171,10 +171,20 @@ static void test_d_path_check_rdonly_mem(void)
 static void test_d_path_check_types(void)
 {
 	struct test_d_path_check_types *skel;
+	int err;
+
+	skel = test_d_path_check_types__open();
+	if (!ASSERT_OK_PTR(skel, "d_path_check_types open failed"))
+		return;
 
-	skel = test_d_path_check_types__open_and_load();
-	ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+	err = bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+	if (!ASSERT_OK(err, "set max entries"))
+		goto cleanup;
 
+	err = test_d_path_check_types__load(skel);
+	ASSERT_EQ(err, -EACCES, "unexpected_load_passing_wrong_type");
+
+cleanup:
 	test_d_path_check_types__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index 97d8a6f84f4a..ffc4d8b6e753 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -48,11 +48,19 @@ void test_test_ima(void)
 	char cmd[256];
 
 	int err, duration = 0;
-	struct ima *skel = NULL;
+	struct ima *skel;
 
-	skel = ima__open_and_load();
-	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
-		goto close_prog;
+	skel = ima__open();
+	if (!ASSERT_OK_PTR(skel, "skel open"))
+		return;
+
+	err = bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+	if (!ASSERT_OK(err, "set max entries"))
+		goto destroy_skel;
+
+	err = ima__load(skel);
+	if (!ASSERT_OK(err, "skel load"))
+		goto destroy_skel;
 
 	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
 				   process_sample, NULL, NULL);
@@ -86,5 +94,6 @@ void test_test_ima(void)
 	CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
 close_prog:
 	ring_buffer__free(ringbuf);
+destroy_skel:
 	ima__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index 96060ff4ffc6..e192a9f16aea 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -13,7 +13,6 @@ u32 monitored_pid = 0;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf SEC(".maps");
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
index 7e02b7361307..1b68d4a65abb 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -8,7 +8,6 @@ extern const int bpf_prog_active __ksym;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf SEC(".maps");
 
 SEC("fentry/security_inode_getattr")
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 29bbaa58233c..6acb5e747715 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -931,7 +931,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	}
 	if (*fixup_map_ringbuf) {
 		map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
-					   0, 4096);
+					   0, getpagesize());
 		do {
 			prog[*fixup_map_ringbuf].imm = map_fds[20];
 			fixup_map_ringbuf++;
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-01-27  2:49 [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size Hou Tao
@ 2022-02-01  0:02 ` Andrii Nakryiko
  2022-02-01  8:43   ` Hou Tao
  0 siblings, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2022-02-01  0:02 UTC (permalink / raw)
  To: Hou Tao
  Cc: Alexei Starovoitov, Martin KaFai Lau, Yonghong Song,
	Daniel Borkmann, Andrii Nakryiko, David S . Miller,
	Jakub Kicinski, Networking, bpf

On Wed, Jan 26, 2022 at 6:34 PM Hou Tao <houtao1@huawei.com> wrote:
>
> 4096 is OK for x86-64, but for other archs with greater than 4KB
> page size (e.g. 64KB under arm64), test_verifier for test case
> "check valid spill/fill, ptr to mem" will fail, so just use
> getpagesize() to initialize the ring buffer size. Do this for
> test_progs as well.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  tools/testing/selftests/bpf/prog_tests/d_path.c | 14 ++++++++++++--
>  .../testing/selftests/bpf/prog_tests/test_ima.c | 17 +++++++++++++----
>  tools/testing/selftests/bpf/progs/ima.c         |  1 -
>  .../bpf/progs/test_d_path_check_types.c         |  1 -
>  tools/testing/selftests/bpf/test_verifier.c     |  2 +-
>  5 files changed, 26 insertions(+), 9 deletions(-)
>

[...]

> @@ -86,5 +94,6 @@ void test_test_ima(void)
>         CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
>  close_prog:
>         ring_buffer__free(ringbuf);
> +destroy_skel:
>         ima__destroy(skel);
>  }
> diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> index 96060ff4ffc6..e192a9f16aea 100644
> --- a/tools/testing/selftests/bpf/progs/ima.c
> +++ b/tools/testing/selftests/bpf/progs/ima.c
> @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
>
>  struct {
>         __uint(type, BPF_MAP_TYPE_RINGBUF);
> -       __uint(max_entries, 1 << 12);

Should we just bump it to 64/128/256KB instead? It's quite annoying to
do a split open and then load just due to this...

I'm also wondering if we should either teach kernel to round up to
closes power-of-2 of page_size internally, or teach libbpf to do this
for RINGBUF maps. Thoughts?


>  } ringbuf SEC(".maps");
>
>  char _license[] SEC("license") = "GPL";
> diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> index 7e02b7361307..1b68d4a65abb 100644
> --- a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
> @@ -8,7 +8,6 @@ extern const int bpf_prog_active __ksym;
>
>  struct {
>         __uint(type, BPF_MAP_TYPE_RINGBUF);
> -       __uint(max_entries, 1 << 12);
>  } ringbuf SEC(".maps");
>
>  SEC("fentry/security_inode_getattr")
> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> index 29bbaa58233c..6acb5e747715 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -931,7 +931,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
>         }
>         if (*fixup_map_ringbuf) {
>                 map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
> -                                          0, 4096);
> +                                          0, getpagesize());
>                 do {
>                         prog[*fixup_map_ringbuf].imm = map_fds[20];
>                         fixup_map_ringbuf++;
> --
> 2.29.2
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-02-01  0:02 ` Andrii Nakryiko
@ 2022-02-01  8:43   ` Hou Tao
  2022-02-02  1:29     ` Andrii Nakryiko
  0 siblings, 1 reply; 7+ messages in thread
From: Hou Tao @ 2022-02-01  8:43 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: andrii, ast, bpf, daniel, davem, houtao1, kafai, kuba, netdev, yhs

Hi Andrii,

> >
> > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > page size (e.g. 64KB under arm64), test_verifier for test case
> > "check valid spill/fill, ptr to mem" will fail, so just use
> > getpagesize() to initialize the ring buffer size. Do this for
> > test_progs as well.
> >
[...]

> > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > index 96060ff4ffc6..e192a9f16aea 100644
> > --- a/tools/testing/selftests/bpf/progs/ima.c
> > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> >
> >  struct {
> >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > -       __uint(max_entries, 1 << 12);
> 
> Should we just bump it to 64/128/256KB instead? It's quite annoying to
> do a split open and then load just due to this...
>
Agreed.

> I'm also wondering if we should either teach kernel to round up to
> closes power-of-2 of page_size internally, or teach libbpf to do this
> for RINGBUF maps. Thoughts?
>
It seems that max_entries doesn't need to be page-aligned. For example
if max_entries is 4096 and page size is 65536, we can allocate a
65536-sized page and set rb->mask 4095 and it will work. The only
downside is 60KB memory is waster, but it is the implementation
details and can be improved if subpage mapping can be supported.

So how about removing the page-aligned restraint in kernel ?

Regards,
Tao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-02-01  8:43   ` Hou Tao
@ 2022-02-02  1:29     ` Andrii Nakryiko
  2022-02-02  2:36       ` Hou Tao
  0 siblings, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2022-02-02  1:29 UTC (permalink / raw)
  To: Hou Tao
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	David S. Miller, Hou Tao, Martin Lau, Jakub Kicinski, Networking,
	Yonghong Song

On Tue, Feb 1, 2022 at 12:43 AM Hou Tao <hotforest@gmail.com> wrote:
>
> Hi Andrii,
>
> > >
> > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > getpagesize() to initialize the ring buffer size. Do this for
> > > test_progs as well.
> > >
> [...]
>
> > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > index 96060ff4ffc6..e192a9f16aea 100644
> > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > >
> > >  struct {
> > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > -       __uint(max_entries, 1 << 12);
> >
> > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > do a split open and then load just due to this...
> >
> Agreed.
>
> > I'm also wondering if we should either teach kernel to round up to
> > closes power-of-2 of page_size internally, or teach libbpf to do this
> > for RINGBUF maps. Thoughts?
> >
> It seems that max_entries doesn't need to be page-aligned. For example
> if max_entries is 4096 and page size is 65536, we can allocate a
> 65536-sized page and set rb->mask 4095 and it will work. The only
> downside is 60KB memory is waster, but it is the implementation
> details and can be improved if subpage mapping can be supported.
>
> So how about removing the page-aligned restraint in kernel ?
>

No, if you read BPF ringbuf code carefully you'll see that we map the
entire ringbuf data twice in the memory (see [0] for lame ASCII
diagram), so that records that are wrapped at the end of the ringbuf
and go back to the start are still accessible as a linear array. It's
a very important guarantee, so it has to be page size multiple. But
auto-increasing it to the closest power-of-2 of page size seems like a
pretty low-impact change. Hard to imagine breaking anything except
some carefully crafted tests for ENOSPC behavior.

  [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89

> Regards,
> Tao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-02-02  1:29     ` Andrii Nakryiko
@ 2022-02-02  2:36       ` Hou Tao
  2022-02-02  6:45         ` Andrii Nakryiko
  0 siblings, 1 reply; 7+ messages in thread
From: Hou Tao @ 2022-02-02  2:36 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: andrii, ast, bpf, daniel, davem, hotforest, houtao1, kafai, kuba,
	netdev, yhs

Hi,

> >
> > Hi Andrii,
> >
> > > >
> > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > test_progs as well.
> > > >
> > [...]
> >
> > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > >
> > > >  struct {
> > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > -       __uint(max_entries, 1 << 12);
> > >
> > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > do a split open and then load just due to this...
> > >
> > Agreed.
> >
> > > I'm also wondering if we should either teach kernel to round up to
> > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > for RINGBUF maps. Thoughts?
> > >
> > It seems that max_entries doesn't need to be page-aligned. For example
> > if max_entries is 4096 and page size is 65536, we can allocate a
> > 65536-sized page and set rb->mask 4095 and it will work. The only
> > downside is 60KB memory is waster, but it is the implementation
> > details and can be improved if subpage mapping can be supported.
> >
> > So how about removing the page-aligned restraint in kernel ?
> >
> 
> No, if you read BPF ringbuf code carefully you'll see that we map the
> entire ringbuf data twice in the memory (see [0] for lame ASCII
> diagram), so that records that are wrapped at the end of the ringbuf
> and go back to the start are still accessible as a linear array. It's
> a very important guarantee, so it has to be page size multiple. But
> auto-increasing it to the closest power-of-2 of page size seems like a
> pretty low-impact change. Hard to imagine breaking anything except
> some carefully crafted tests for ENOSPC behavior.
>

Yes, i know the double map trick. What i tried to say is that:
(1) remove the page-aligned restrain for max_entries
(2) still allocate page-aligned memory for ringbuf

instead of rounding max_entries up to closest power-of-2 page size
directly, so max_entries from userspace is unchanged and double map trick
still works.

> [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89

> > Regards,
> > Tao


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-02-02  2:36       ` Hou Tao
@ 2022-02-02  6:45         ` Andrii Nakryiko
  2022-02-03 11:12           ` Hou Tao
  0 siblings, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2022-02-02  6:45 UTC (permalink / raw)
  To: Hou Tao
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	David S. Miller, Hou Tao, Martin Lau, Jakub Kicinski, Networking,
	Yonghong Song

On Tue, Feb 1, 2022 at 6:36 PM Hou Tao <hotforest@gmail.com> wrote:
>
> Hi,
>
> > >
> > > Hi Andrii,
> > >
> > > > >
> > > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > > test_progs as well.
> > > > >
> > > [...]
> > >
> > > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > > >
> > > > >  struct {
> > > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > > -       __uint(max_entries, 1 << 12);
> > > >
> > > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > > do a split open and then load just due to this...
> > > >
> > > Agreed.
> > >
> > > > I'm also wondering if we should either teach kernel to round up to
> > > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > > for RINGBUF maps. Thoughts?
> > > >
> > > It seems that max_entries doesn't need to be page-aligned. For example
> > > if max_entries is 4096 and page size is 65536, we can allocate a
> > > 65536-sized page and set rb->mask 4095 and it will work. The only
> > > downside is 60KB memory is waster, but it is the implementation
> > > details and can be improved if subpage mapping can be supported.
> > >
> > > So how about removing the page-aligned restraint in kernel ?
> > >
> >
> > No, if you read BPF ringbuf code carefully you'll see that we map the
> > entire ringbuf data twice in the memory (see [0] for lame ASCII
> > diagram), so that records that are wrapped at the end of the ringbuf
> > and go back to the start are still accessible as a linear array. It's
> > a very important guarantee, so it has to be page size multiple. But
> > auto-increasing it to the closest power-of-2 of page size seems like a
> > pretty low-impact change. Hard to imagine breaking anything except
> > some carefully crafted tests for ENOSPC behavior.
> >
>
> Yes, i know the double map trick. What i tried to say is that:
> (1) remove the page-aligned restrain for max_entries
> (2) still allocate page-aligned memory for ringbuf
>
> instead of rounding max_entries up to closest power-of-2 page size
> directly, so max_entries from userspace is unchanged and double map trick
> still works.

I don't see how. Knowing the correct and exact size of the ringbuf
data area is mandatory for correctly consuming ringbuf data from
user-space. But if I'm missing something, feel free to give it a try
and see if it actually works.

>
> > [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89
>
> > > Regards,
> > > Tao
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size
  2022-02-02  6:45         ` Andrii Nakryiko
@ 2022-02-03 11:12           ` Hou Tao
  0 siblings, 0 replies; 7+ messages in thread
From: Hou Tao @ 2022-02-03 11:12 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: andrii, ast, bpf, daniel, davem, hotforest, houtao1, kafai, kuba,
	netdev, yhs

Hi,

> On Tue, Feb 1, 2022 at 6:36 PM Hou Tao <hotforest@gmail.com> wrote:
> >
> > Hi,
> >
> > > >
> > > > Hi Andrii,
> > > >
> > > > > >
> > > > > > 4096 is OK for x86-64, but for other archs with greater than 4KB
> > > > > > page size (e.g. 64KB under arm64), test_verifier for test case
> > > > > > "check valid spill/fill, ptr to mem" will fail, so just use
> > > > > > getpagesize() to initialize the ring buffer size. Do this for
> > > > > > test_progs as well.
> > > > > >
> > > > [...]
> > > >
> > > > > > diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
> > > > > > index 96060ff4ffc6..e192a9f16aea 100644
> > > > > > --- a/tools/testing/selftests/bpf/progs/ima.c
> > > > > > +++ b/tools/testing/selftests/bpf/progs/ima.c
> > > > > > @@ -13,7 +13,6 @@ u32 monitored_pid = 0;
> > > > > >
> > > > > >  struct {
> > > > > >         __uint(type, BPF_MAP_TYPE_RINGBUF);
> > > > > > -       __uint(max_entries, 1 << 12);
> > > > >
> > > > > Should we just bump it to 64/128/256KB instead? It's quite annoying to
> > > > > do a split open and then load just due to this...
> > > > >
> > > > Agreed.
> > > >
> > > > > I'm also wondering if we should either teach kernel to round up to
> > > > > closes power-of-2 of page_size internally, or teach libbpf to do this
> > > > > for RINGBUF maps. Thoughts?
> > > > >
[...]
> > >
> > > No, if you read BPF ringbuf code carefully you'll see that we map the
> > > entire ringbuf data twice in the memory (see [0] for lame ASCII
> > > diagram), so that records that are wrapped at the end of the ringbuf
> > > and go back to the start are still accessible as a linear array. It's
> > > a very important guarantee, so it has to be page size multiple. But
> > > auto-increasing it to the closest power-of-2 of page size seems like a
> > > pretty low-impact change. Hard to imagine breaking anything except
> > > some carefully crafted tests for ENOSPC behavior.
> > >
> >
> > Yes, i know the double map trick. What i tried to say is that:
> > (1) remove the page-aligned restrain for max_entries
> > (2) still allocate page-aligned memory for ringbuf
> >
> > instead of rounding max_entries up to closest power-of-2 page size
> > directly, so max_entries from userspace is unchanged and double map trick
> > still works.
> 
> I don't see how. Knowing the correct and exact size of the ringbuf
> data area is mandatory for correctly consuming ringbuf data from
> user-space. But if I'm missing something, feel free to give it a try
> and see if it actually works.
> 
You are right. The userspace needs max_entries to do mmap() for data
area, so max_entries must be page-sized aligned.

If we want to do the automatic round-up, i think libbpf would be a better
place, because if the round-up is done in kernel, the userspace program
may use the old max_entries to call mmap(), the consumer side will not
work and leads to confusion for usage. If we do auto-round-up in libbpf,
the setup procedure is hidden from libbpf user. Will add the auto
round-up and its tests in libbpf.

Regards
Tao
> 
> >
> > > [0] https://github.com/torvalds/linux/blob/master/kernel/bpf/ringbuf.c#L73-L89
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-02-03 11:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27  2:49 [PATCH bpf-next] selftests/bpf: use getpagesize() to initialize ring buffer size Hou Tao
2022-02-01  0:02 ` Andrii Nakryiko
2022-02-01  8:43   ` Hou Tao
2022-02-02  1:29     ` Andrii Nakryiko
2022-02-02  2:36       ` Hou Tao
2022-02-02  6:45         ` Andrii Nakryiko
2022-02-03 11:12           ` Hou Tao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.