* [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
@ 2019-08-16 10:53 Ilya Leoshkevich
2019-08-16 15:43 ` Yonghong Song
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Ilya Leoshkevich @ 2019-08-16 10:53 UTC (permalink / raw)
To: Daniel Borkmann, Alexei Starovoitov
Cc: bpf, Andrey Ignatov, Yonghong Song, Heiko Carstens,
Vasily Gorbik, Ilya Leoshkevich
"ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
nux". This is because verifier rewrites a complete 32-bit
bpf_sysctl.file_pos update to a partial update of the first 32 bits of
64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
systems.
Fix by using an offset on big-endian systems.
Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
a problem there, since it expects to see 0, which it gets with high
probability in error cases, so change it to seek to offset 3 and expect
3 in bpf_sysctl.file_pos.
Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
v1->v2: Merge bpf_ctx_narrow_load_shift and
bpf_ctx_narrow_access_offset.
include/linux/filter.h | 8 ++++----
kernel/bpf/cgroup.c | 10 ++++++++--
kernel/bpf/verifier.c | 4 ++--
tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++-
4 files changed, 22 insertions(+), 9 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 92c6e31fb008..2ce57645f3cd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
}
static inline u8
-bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
+bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
{
- u8 load_off = off & (size_default - 1);
+ u8 access_off = off & (size_default - 1);
#ifdef __LITTLE_ENDIAN
- return load_off * 8;
+ return access_off;
#else
- return (size_default - (load_off + size)) * 8;
+ return size_default - (access_off + size);
#endif
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0a00eaca6fae..00c4647ce92a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1325,6 +1325,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ u32 read_size;
switch (si->off) {
case offsetof(struct bpf_sysctl, write):
@@ -1356,7 +1357,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, ppos));
*insn++ = BPF_STX_MEM(
- BPF_SIZEOF(u32), treg, si->src_reg, 0);
+ BPF_SIZEOF(u32), treg, si->src_reg,
+ bpf_ctx_narrow_access_offset(
+ 0, sizeof(u32), sizeof(loff_t)));
*insn++ = BPF_LDX_MEM(
BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
@@ -1365,8 +1368,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sysctl_kern, ppos));
+ read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
*insn++ = BPF_LDX_MEM(
- BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
+ BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
+ bpf_ctx_narrow_access_offset(
+ 0, read_size, sizeof(loff_t)));
}
*target_size = sizeof(u32);
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c84d83f86141..d1d4c995a9eb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8616,8 +8616,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
if (is_narrower_load && size < target_size) {
- u8 shift = bpf_ctx_narrow_load_shift(off, size,
- size_default);
+ u8 shift = bpf_ctx_narrow_access_offset(
+ off, size, size_default) * 8;
if (ctx_field_size <= 4) {
if (shift)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a3bebd7c68dd..abc26248a7f1 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -31,6 +31,7 @@ struct sysctl_test {
enum bpf_attach_type attach_type;
const char *sysctl;
int open_flags;
+ int seek;
const char *newval;
const char *oldval;
enum {
@@ -139,7 +140,7 @@ static struct sysctl_test tests[] = {
/* If (file_pos == X) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -152,6 +153,7 @@ static struct sysctl_test tests[] = {
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
+ .seek = 3,
.result = SUCCESS,
},
{
@@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path,
if (fd < 0)
return fd;
+ if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) {
+ log_err("lseek(%d) failed", test->seek);
+ goto err;
+ }
+
if (test->open_flags == O_RDONLY) {
char buf[128];
--
2.21.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
2019-08-16 10:53 [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390 Ilya Leoshkevich
@ 2019-08-16 15:43 ` Yonghong Song
2019-09-03 22:13 ` Daniel Borkmann
2019-09-16 10:11 ` Daniel Borkmann
2 siblings, 0 replies; 6+ messages in thread
From: Yonghong Song @ 2019-08-16 15:43 UTC (permalink / raw)
To: Ilya Leoshkevich, Daniel Borkmann, Alexei Starovoitov
Cc: bpf, Andrey Ignatov, Heiko Carstens, Vasily Gorbik
On 8/16/19 3:53 AM, Ilya Leoshkevich wrote:
> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
> nux". This is because verifier rewrites a complete 32-bit
> bpf_sysctl.file_pos update to a partial update of the first 32 bits of
> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
> systems.
>
> Fix by using an offset on big-endian systems.
>
> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
> a problem there, since it expects to see 0, which it gets with high
> probability in error cases, so change it to seek to offset 3 and expect
> 3 in bpf_sysctl.file_pos.
>
> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Yonghong Song <yhs@fb.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
2019-08-16 10:53 [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390 Ilya Leoshkevich
2019-08-16 15:43 ` Yonghong Song
@ 2019-09-03 22:13 ` Daniel Borkmann
2019-09-04 13:53 ` Ilya Leoshkevich
2019-09-08 3:37 ` Andrey Ignatov
2019-09-16 10:11 ` Daniel Borkmann
2 siblings, 2 replies; 6+ messages in thread
From: Daniel Borkmann @ 2019-09-03 22:13 UTC (permalink / raw)
To: Ilya Leoshkevich, Alexei Starovoitov
Cc: bpf, Andrey Ignatov, Yonghong Song, Heiko Carstens, Vasily Gorbik
On 8/16/19 12:53 PM, Ilya Leoshkevich wrote:
> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
> nux". This is because verifier rewrites a complete 32-bit
> bpf_sysctl.file_pos update to a partial update of the first 32 bits of
> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
> systems.
>
> Fix by using an offset on big-endian systems.
>
> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
> a problem there, since it expects to see 0, which it gets with high
> probability in error cases, so change it to seek to offset 3 and expect
> 3 in bpf_sysctl.file_pos.
>
> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
> v1->v2: Merge bpf_ctx_narrow_load_shift and
> bpf_ctx_narrow_access_offset.
>
> include/linux/filter.h | 8 ++++----
> kernel/bpf/cgroup.c | 10 ++++++++--
> kernel/bpf/verifier.c | 4 ++--
> tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++-
> 4 files changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 92c6e31fb008..2ce57645f3cd 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> }
>
> static inline u8
> -bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
> +bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
> {
> - u8 load_off = off & (size_default - 1);
> + u8 access_off = off & (size_default - 1);
>
> #ifdef __LITTLE_ENDIAN
> - return load_off * 8;
> + return access_off;
> #else
> - return (size_default - (load_off + size)) * 8;
> + return size_default - (access_off + size);
> #endif
> }
>
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 0a00eaca6fae..00c4647ce92a 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1325,6 +1325,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> struct bpf_prog *prog, u32 *target_size)
> {
> struct bpf_insn *insn = insn_buf;
> + u32 read_size;
>
> switch (si->off) {
> case offsetof(struct bpf_sysctl, write):
> @@ -1356,7 +1357,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> treg, si->dst_reg,
> offsetof(struct bpf_sysctl_kern, ppos));
> *insn++ = BPF_STX_MEM(
> - BPF_SIZEOF(u32), treg, si->src_reg, 0);
> + BPF_SIZEOF(u32), treg, si->src_reg,
> + bpf_ctx_narrow_access_offset(
> + 0, sizeof(u32), sizeof(loff_t)));
> *insn++ = BPF_LDX_MEM(
> BPF_DW, treg, si->dst_reg,
> offsetof(struct bpf_sysctl_kern, tmp_reg));
> @@ -1365,8 +1368,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
> si->dst_reg, si->src_reg,
> offsetof(struct bpf_sysctl_kern, ppos));
> + read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
> *insn++ = BPF_LDX_MEM(
> - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
> + BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
> + bpf_ctx_narrow_access_offset(
> + 0, read_size, sizeof(loff_t)));
I see what you're doing, but generally I'm a bit puzzled on why we need these
partial store/loads and cannot access the full loff_t value internally with the
rewrite. Why was BPF_SIZEOF(u32) chosen in the first place? Looks like git history
doesn't have any useful insight here ... Andrey mind to put some clarifications
on this? Thx
> }
> *target_size = sizeof(u32);
> break;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index c84d83f86141..d1d4c995a9eb 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -8616,8 +8616,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> }
>
> if (is_narrower_load && size < target_size) {
> - u8 shift = bpf_ctx_narrow_load_shift(off, size,
> - size_default);
> + u8 shift = bpf_ctx_narrow_access_offset(
> + off, size, size_default) * 8;
> if (ctx_field_size <= 4) {
> if (shift)
> insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
> diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
> index a3bebd7c68dd..abc26248a7f1 100644
> --- a/tools/testing/selftests/bpf/test_sysctl.c
> +++ b/tools/testing/selftests/bpf/test_sysctl.c
> @@ -31,6 +31,7 @@ struct sysctl_test {
> enum bpf_attach_type attach_type;
> const char *sysctl;
> int open_flags;
> + int seek;
> const char *newval;
> const char *oldval;
> enum {
> @@ -139,7 +140,7 @@ static struct sysctl_test tests[] = {
> /* If (file_pos == X) */
> BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
> offsetof(struct bpf_sysctl, file_pos)),
> - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
> + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2),
>
> /* return ALLOW; */
> BPF_MOV64_IMM(BPF_REG_0, 1),
> @@ -152,6 +153,7 @@ static struct sysctl_test tests[] = {
> .attach_type = BPF_CGROUP_SYSCTL,
> .sysctl = "kernel/ostype",
> .open_flags = O_RDONLY,
> + .seek = 3,
> .result = SUCCESS,
> },
> {
> @@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path,
> if (fd < 0)
> return fd;
>
> + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) {
> + log_err("lseek(%d) failed", test->seek);
> + goto err;
> + }
> +
> if (test->open_flags == O_RDONLY) {
> char buf[128];
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
2019-09-03 22:13 ` Daniel Borkmann
@ 2019-09-04 13:53 ` Ilya Leoshkevich
2019-09-08 3:37 ` Andrey Ignatov
1 sibling, 0 replies; 6+ messages in thread
From: Ilya Leoshkevich @ 2019-09-04 13:53 UTC (permalink / raw)
To: Daniel Borkmann
Cc: Alexei Starovoitov, bpf, Andrey Ignatov, Yonghong Song,
Heiko Carstens, Vasily Gorbik
> Am 04.09.2019 um 00:13 schrieb Daniel Borkmann <daniel@iogearbox.net>:
>
> On 8/16/19 12:53 PM, Ilya Leoshkevich wrote:
>> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
>> nux". This is because verifier rewrites a complete 32-bit
>> bpf_sysctl.file_pos update to a partial update of the first 32 bits of
>> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
>> systems.
>> Fix by using an offset on big-endian systems.
>> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
>> a problem there, since it expects to see 0, which it gets with high
>> probability in error cases, so change it to seek to offset 3 and expect
>> 3 in bpf_sysctl.file_pos.
>> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
>> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
>> ---
>> v1->v2: Merge bpf_ctx_narrow_load_shift and
>> bpf_ctx_narrow_access_offset.
>> include/linux/filter.h | 8 ++++----
>> kernel/bpf/cgroup.c | 10 ++++++++--
>> kernel/bpf/verifier.c | 4 ++--
>> tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++-
>> 4 files changed, 22 insertions(+), 9 deletions(-)
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 92c6e31fb008..2ce57645f3cd 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
>> }
>> static inline u8
>> -bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
>> +bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
>> {
>> - u8 load_off = off & (size_default - 1);
>> + u8 access_off = off & (size_default - 1);
>> #ifdef __LITTLE_ENDIAN
>> - return load_off * 8;
>> + return access_off;
>> #else
>> - return (size_default - (load_off + size)) * 8;
>> + return size_default - (access_off + size);
>> #endif
>> }
>> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
>> index 0a00eaca6fae..00c4647ce92a 100644
>> --- a/kernel/bpf/cgroup.c
>> +++ b/kernel/bpf/cgroup.c
>> @@ -1325,6 +1325,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
>> struct bpf_prog *prog, u32 *target_size)
>> {
>> struct bpf_insn *insn = insn_buf;
>> + u32 read_size;
>> switch (si->off) {
>> case offsetof(struct bpf_sysctl, write):
>> @@ -1356,7 +1357,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
>> treg, si->dst_reg,
>> offsetof(struct bpf_sysctl_kern, ppos));
>> *insn++ = BPF_STX_MEM(
>> - BPF_SIZEOF(u32), treg, si->src_reg, 0);
>> + BPF_SIZEOF(u32), treg, si->src_reg,
>> + bpf_ctx_narrow_access_offset(
>> + 0, sizeof(u32), sizeof(loff_t)));
>> *insn++ = BPF_LDX_MEM(
>> BPF_DW, treg, si->dst_reg,
>> offsetof(struct bpf_sysctl_kern, tmp_reg));
>> @@ -1365,8 +1368,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
>> BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
>> si->dst_reg, si->src_reg,
>> offsetof(struct bpf_sysctl_kern, ppos));
>> + read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
>> *insn++ = BPF_LDX_MEM(
>> - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
>> + BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
>> + bpf_ctx_narrow_access_offset(
>> + 0, read_size, sizeof(loff_t)));
>
> I see what you're doing, but generally I'm a bit puzzled on why we need these
> partial store/loads and cannot access the full loff_t value internally with the
> rewrite. Why was BPF_SIZEOF(u32) chosen in the first place? Looks like git history
> doesn't have any useful insight here ... Andrey mind to put some clarifications
> on this? Thx
Do you mean rewriting e.g. the following:
BPF_LDX_MEM(access_size, si->dst_reg, si->src_reg,
offsetof(struct bpf_sysctl, file_pos)));
as the following?
BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sysctl_kern, ppos));
BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 0); # full loff_t
while also doing
*target_size = sizeof(loff_t);
in order to let is_narrower_load logic in convert_ctx_accesses () do the
masking? That would work, but wouldn't the generated code be less
efficient due to explicit masking?
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
2019-09-03 22:13 ` Daniel Borkmann
2019-09-04 13:53 ` Ilya Leoshkevich
@ 2019-09-08 3:37 ` Andrey Ignatov
1 sibling, 0 replies; 6+ messages in thread
From: Andrey Ignatov @ 2019-09-08 3:37 UTC (permalink / raw)
To: Daniel Borkmann
Cc: Ilya Leoshkevich, Alexei Starovoitov, bpf, Andrey Ignatov,
Yonghong Song, Heiko Carstens, Vasily Gorbik
Daniel Borkmann <daniel@iogearbox.net> [Wed, 4 Sep 2019 00:13:03 +0200]:
> On 8/16/19 12:53 PM, Ilya Leoshkevich wrote:
> > "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
> > nux". This is because verifier rewrites a complete 32-bit
> > bpf_sysctl.file_pos update to a partial update of the first 32 bits of
> > 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
> > systems.
> >
> > Fix by using an offset on big-endian systems.
> >
> > Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
> > a problem there, since it expects to see 0, which it gets with high
> > probability in error cases, so change it to seek to offset 3 and expect
> > 3 in bpf_sysctl.file_pos.
> >
> > Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> > v1->v2: Merge bpf_ctx_narrow_load_shift and
> > bpf_ctx_narrow_access_offset.
> >
> > include/linux/filter.h | 8 ++++----
> > kernel/bpf/cgroup.c | 10 ++++++++--
> > kernel/bpf/verifier.c | 4 ++--
> > tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++-
> > 4 files changed, 22 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index 92c6e31fb008..2ce57645f3cd 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> > }
> > static inline u8
> > -bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
> > +bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
> > {
> > - u8 load_off = off & (size_default - 1);
> > + u8 access_off = off & (size_default - 1);
> > #ifdef __LITTLE_ENDIAN
> > - return load_off * 8;
> > + return access_off;
> > #else
> > - return (size_default - (load_off + size)) * 8;
> > + return size_default - (access_off + size);
> > #endif
> > }
> > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > index 0a00eaca6fae..00c4647ce92a 100644
> > --- a/kernel/bpf/cgroup.c
> > +++ b/kernel/bpf/cgroup.c
> > @@ -1325,6 +1325,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> > struct bpf_prog *prog, u32 *target_size)
> > {
> > struct bpf_insn *insn = insn_buf;
> > + u32 read_size;
> > switch (si->off) {
> > case offsetof(struct bpf_sysctl, write):
> > @@ -1356,7 +1357,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> > treg, si->dst_reg,
> > offsetof(struct bpf_sysctl_kern, ppos));
> > *insn++ = BPF_STX_MEM(
> > - BPF_SIZEOF(u32), treg, si->src_reg, 0);
> > + BPF_SIZEOF(u32), treg, si->src_reg,
> > + bpf_ctx_narrow_access_offset(
> > + 0, sizeof(u32), sizeof(loff_t)));
> > *insn++ = BPF_LDX_MEM(
> > BPF_DW, treg, si->dst_reg,
> > offsetof(struct bpf_sysctl_kern, tmp_reg));
> > @@ -1365,8 +1368,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
> > BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
> > si->dst_reg, si->src_reg,
> > offsetof(struct bpf_sysctl_kern, ppos));
> > + read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
> > *insn++ = BPF_LDX_MEM(
> > - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
> > + BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
> > + bpf_ctx_narrow_access_offset(
> > + 0, read_size, sizeof(loff_t)));
>
> I see what you're doing, but generally I'm a bit puzzled on why we need these
> partial store/loads and cannot access the full loff_t value internally with the
> rewrite. Why was BPF_SIZEOF(u32) chosen in the first place? Looks like git history
> doesn't have any useful insight here ... Andrey mind to put some clarifications
> on this? Thx
Hi Daniel,
Sorry for delay, I took a week of vacation w/ pretty limited Internet
access and w/o access to my work e-mail (so answering from personal
one).
loff_t is signed and can be negative, but file position used in
bpf_sysctl ctx can only be >= 0 since negative file position doesn't
make sense here. When picking up the unsigned int size I made an
assumption that writing >4GB to sysctl file in /proc/sys would never be
needed in practice so I chose u32. This also allowed not to care about
negative values passed to loff_t since MSB is never touched and stays
zero.
You're right the commit message could be more detailed on this part.
One thing I missed in the initial patch though is the endianness and,
yes, making the code play well with both LE and BE requires this
additional work.
> > }
> > *target_size = sizeof(u32);
> > break;
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index c84d83f86141..d1d4c995a9eb 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -8616,8 +8616,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> > }
> > if (is_narrower_load && size < target_size) {
> > - u8 shift = bpf_ctx_narrow_load_shift(off, size,
> > - size_default);
> > + u8 shift = bpf_ctx_narrow_access_offset(
> > + off, size, size_default) * 8;
> > if (ctx_field_size <= 4) {
> > if (shift)
> > insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
> > diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
> > index a3bebd7c68dd..abc26248a7f1 100644
> > --- a/tools/testing/selftests/bpf/test_sysctl.c
> > +++ b/tools/testing/selftests/bpf/test_sysctl.c
> > @@ -31,6 +31,7 @@ struct sysctl_test {
> > enum bpf_attach_type attach_type;
> > const char *sysctl;
> > int open_flags;
> > + int seek;
> > const char *newval;
> > const char *oldval;
> > enum {
> > @@ -139,7 +140,7 @@ static struct sysctl_test tests[] = {
> > /* If (file_pos == X) */
> > BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
> > offsetof(struct bpf_sysctl, file_pos)),
> > - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
> > + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2),
> > /* return ALLOW; */
> > BPF_MOV64_IMM(BPF_REG_0, 1),
> > @@ -152,6 +153,7 @@ static struct sysctl_test tests[] = {
> > .attach_type = BPF_CGROUP_SYSCTL,
> > .sysctl = "kernel/ostype",
> > .open_flags = O_RDONLY,
> > + .seek = 3,
> > .result = SUCCESS,
> > },
> > {
> > @@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path,
> > if (fd < 0)
> > return fd;
> > + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) {
> > + log_err("lseek(%d) failed", test->seek);
> > + goto err;
> > + }
> > +
> > if (test->open_flags == O_RDONLY) {
> > char buf[128];
> >
>
--
Andrey Ignatov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390
2019-08-16 10:53 [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390 Ilya Leoshkevich
2019-08-16 15:43 ` Yonghong Song
2019-09-03 22:13 ` Daniel Borkmann
@ 2019-09-16 10:11 ` Daniel Borkmann
2 siblings, 0 replies; 6+ messages in thread
From: Daniel Borkmann @ 2019-09-16 10:11 UTC (permalink / raw)
To: Ilya Leoshkevich, Alexei Starovoitov
Cc: bpf, Andrey Ignatov, Yonghong Song, Heiko Carstens, Vasily Gorbik
On 8/16/19 12:53 PM, Ilya Leoshkevich wrote:
> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value !=
> nux". This is because verifier rewrites a complete 32-bit
> bpf_sysctl.file_pos update to a partial update of the first 32 bits of
> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
> systems.
>
> Fix by using an offset on big-endian systems.
>
> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
> a problem there, since it expects to see 0, which it gets with high
> probability in error cases, so change it to seek to offset 3 and expect
> 3 in bpf_sysctl.file_pos.
>
> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Applied, thanks!
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-09-16 10:44 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-16 10:53 [PATCH bpf v2] bpf: fix accessing bpf_sysctl.file_pos on s390 Ilya Leoshkevich
2019-08-16 15:43 ` Yonghong Song
2019-09-03 22:13 ` Daniel Borkmann
2019-09-04 13:53 ` Ilya Leoshkevich
2019-09-08 3:37 ` Andrey Ignatov
2019-09-16 10:11 ` Daniel Borkmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).