* [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
@ 2020-09-10 11:02 Lorenz Bauer
2020-09-10 12:21 ` Jesper Dangaard Brouer
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Lorenz Bauer @ 2020-09-10 11:02 UTC (permalink / raw)
To: ast, daniel, jakub; +Cc: bpf, kernel-team, Lorenz Bauer
As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
This leads to suboptimal instructions being generated (IPv4, x86):
1372 struct bpf_sk_lookup_kern ctx = {
0xffffffff81b87f30 <+624>: xor %eax,%eax
0xffffffff81b87f32 <+626>: mov $0x6,%ecx
0xffffffff81b87f37 <+631>: lea 0x90(%rsp),%rdi
0xffffffff81b87f3f <+639>: movl $0x110002,0x88(%rsp)
0xffffffff81b87f4a <+650>: rep stos %rax,%es:(%rdi)
0xffffffff81b87f4d <+653>: mov 0x8(%rsp),%eax
0xffffffff81b87f51 <+657>: mov %r13d,0x90(%rsp)
0xffffffff81b87f59 <+665>: incl %gs:0x7e4970a0(%rip)
0xffffffff81b87f60 <+672>: mov %eax,0x8c(%rsp)
0xffffffff81b87f67 <+679>: movzwl 0x10(%rsp),%eax
0xffffffff81b87f6c <+684>: mov %ax,0xa8(%rsp)
0xffffffff81b87f74 <+692>: movzwl 0x38(%rsp),%eax
0xffffffff81b87f79 <+697>: mov %ax,0xaa(%rsp)
Fix this by moving around sport and dport. pahole confirms there
are no more holes:
struct bpf_sk_lookup_kern {
u16 family; /* 0 2 */
u16 protocol; /* 2 2 */
__be16 sport; /* 4 2 */
u16 dport; /* 6 2 */
struct {
__be32 saddr; /* 8 4 */
__be32 daddr; /* 12 4 */
} v4; /* 8 8 */
struct {
const struct in6_addr * saddr; /* 16 8 */
const struct in6_addr * daddr; /* 24 8 */
} v6; /* 16 16 */
struct sock * selected_sk; /* 32 8 */
bool no_reuseport; /* 40 1 */
/* size: 48, cachelines: 1, members: 8 */
/* padding: 7 */
/* last cacheline: 48 bytes */
};
The assembly also doesn't contain the pesky rep stos anymore:
1372 struct bpf_sk_lookup_kern ctx = {
0xffffffff81b87f60 <+624>: movzwl 0x10(%rsp),%eax
0xffffffff81b87f65 <+629>: movq $0x0,0xa8(%rsp)
0xffffffff81b87f71 <+641>: movq $0x0,0xb0(%rsp)
0xffffffff81b87f7d <+653>: mov %ax,0x9c(%rsp)
0xffffffff81b87f85 <+661>: movzwl 0x38(%rsp),%eax
0xffffffff81b87f8a <+666>: movq $0x0,0xb8(%rsp)
0xffffffff81b87f96 <+678>: mov %ax,0x9e(%rsp)
0xffffffff81b87f9e <+686>: mov 0x8(%rsp),%eax
0xffffffff81b87fa2 <+690>: movq $0x0,0xc0(%rsp)
0xffffffff81b87fae <+702>: movl $0x110002,0x98(%rsp)
0xffffffff81b87fb9 <+713>: mov %eax,0xa0(%rsp)
0xffffffff81b87fc0 <+720>: mov %r13d,0xa4(%rsp)
1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/
Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
include/linux/filter.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 995625950cc1..e962dd8117d8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1287,6 +1287,8 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
struct bpf_sk_lookup_kern {
u16 family;
u16 protocol;
+ __be16 sport;
+ u16 dport;
struct {
__be32 saddr;
__be32 daddr;
@@ -1295,8 +1297,6 @@ struct bpf_sk_lookup_kern {
const struct in6_addr *saddr;
const struct in6_addr *daddr;
} v6;
- __be16 sport;
- u16 dport;
struct sock *selected_sk;
bool no_reuseport;
};
--
2.25.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
@ 2020-09-10 12:21 ` Jesper Dangaard Brouer
2020-09-10 15:19 ` Arnaldo Carvalho de Melo
2020-09-11 0:53 ` Alexei Starovoitov
2 siblings, 0 replies; 5+ messages in thread
From: Jesper Dangaard Brouer @ 2020-09-10 12:21 UTC (permalink / raw)
To: Lorenz Bauer
Cc: brouer, ast, daniel, jakub, bpf, kernel-team, Stefano Brivio,
Lorenzo Bianconi
On Thu, 10 Sep 2020 12:02:48 +0100
Lorenz Bauer <lmb@cloudflare.com> wrote:
> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
>
> 1372 struct bpf_sk_lookup_kern ctx = {
> 0xffffffff81b87f30 <+624>: xor %eax,%eax
> 0xffffffff81b87f32 <+626>: mov $0x6,%ecx
> 0xffffffff81b87f37 <+631>: lea 0x90(%rsp),%rdi
> 0xffffffff81b87f3f <+639>: movl $0x110002,0x88(%rsp)
> 0xffffffff81b87f4a <+650>: rep stos %rax,%es:(%rdi)
> 0xffffffff81b87f4d <+653>: mov 0x8(%rsp),%eax
> 0xffffffff81b87f51 <+657>: mov %r13d,0x90(%rsp)
> 0xffffffff81b87f59 <+665>: incl %gs:0x7e4970a0(%rip)
> 0xffffffff81b87f60 <+672>: mov %eax,0x8c(%rsp)
> 0xffffffff81b87f67 <+679>: movzwl 0x10(%rsp),%eax
> 0xffffffff81b87f6c <+684>: mov %ax,0xa8(%rsp)
> 0xffffffff81b87f74 <+692>: movzwl 0x38(%rsp),%eax
> 0xffffffff81b87f79 <+697>: mov %ax,0xaa(%rsp)
>
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
>
> struct bpf_sk_lookup_kern {
> u16 family; /* 0 2 */
> u16 protocol; /* 2 2 */
> __be16 sport; /* 4 2 */
> u16 dport; /* 6 2 */
> struct {
> __be32 saddr; /* 8 4 */
> __be32 daddr; /* 12 4 */
> } v4; /* 8 8 */
> struct {
> const struct in6_addr * saddr; /* 16 8 */
> const struct in6_addr * daddr; /* 24 8 */
> } v6; /* 16 16 */
> struct sock * selected_sk; /* 32 8 */
> bool no_reuseport; /* 40 1 */
>
> /* size: 48, cachelines: 1, members: 8 */
> /* padding: 7 */
> /* last cacheline: 48 bytes */
> };
>
> The assembly also doesn't contain the pesky rep stos anymore:
>
> 1372 struct bpf_sk_lookup_kern ctx = {
> 0xffffffff81b87f60 <+624>: movzwl 0x10(%rsp),%eax
> 0xffffffff81b87f65 <+629>: movq $0x0,0xa8(%rsp)
> 0xffffffff81b87f71 <+641>: movq $0x0,0xb0(%rsp)
> 0xffffffff81b87f7d <+653>: mov %ax,0x9c(%rsp)
> 0xffffffff81b87f85 <+661>: movzwl 0x38(%rsp),%eax
> 0xffffffff81b87f8a <+666>: movq $0x0,0xb8(%rsp)
> 0xffffffff81b87f96 <+678>: mov %ax,0x9e(%rsp)
> 0xffffffff81b87f9e <+686>: mov 0x8(%rsp),%eax
> 0xffffffff81b87fa2 <+690>: movq $0x0,0xc0(%rsp)
> 0xffffffff81b87fae <+702>: movl $0x110002,0x98(%rsp)
> 0xffffffff81b87fb9 <+713>: mov %eax,0xa0(%rsp)
> 0xffffffff81b87fc0 <+720>: mov %r13d,0xa4(%rsp)
>
> 1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/
>
> Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
I'm very happy to see others have also discovered the slowdown of 'rep stos',
as I've been hunting these for years. My understanding is that the
'rep-stos' slowdown comes from the CPU-instruction saving the CPU-flags
to allow it to be interrupted. That makes sense when memset zeroing
large areas, but for small mem size structs this is slower than
clearing them in other ways. I have a micro-benchmark as a
kernel-module here[2], where I explore different methods of memset.
[2] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/lib/time_bench_memset.c
As you have discovered the GCC compiler will generate these rep stos
for clearing a struct if not all members are initialized. If you want
to fix some more of these, then I remember there were some in the
net/core/flow_dissector.c code.
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
2020-09-10 12:21 ` Jesper Dangaard Brouer
@ 2020-09-10 15:19 ` Arnaldo Carvalho de Melo
2020-09-11 0:53 ` Alexei Starovoitov
2 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-09-10 15:19 UTC (permalink / raw)
To: Lorenz Bauer; +Cc: ast, daniel, jakub, bpf, kernel-team
Em Thu, Sep 10, 2020 at 12:02:48PM +0100, Lorenz Bauer escreveu:
> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
>
> 1372 struct bpf_sk_lookup_kern ctx = {
> 0xffffffff81b87f30 <+624>: xor %eax,%eax
> 0xffffffff81b87f32 <+626>: mov $0x6,%ecx
> 0xffffffff81b87f37 <+631>: lea 0x90(%rsp),%rdi
> 0xffffffff81b87f3f <+639>: movl $0x110002,0x88(%rsp)
> 0xffffffff81b87f4a <+650>: rep stos %rax,%es:(%rdi)
> 0xffffffff81b87f4d <+653>: mov 0x8(%rsp),%eax
> 0xffffffff81b87f51 <+657>: mov %r13d,0x90(%rsp)
> 0xffffffff81b87f59 <+665>: incl %gs:0x7e4970a0(%rip)
> 0xffffffff81b87f60 <+672>: mov %eax,0x8c(%rsp)
> 0xffffffff81b87f67 <+679>: movzwl 0x10(%rsp),%eax
> 0xffffffff81b87f6c <+684>: mov %ax,0xa8(%rsp)
> 0xffffffff81b87f74 <+692>: movzwl 0x38(%rsp),%eax
> 0xffffffff81b87f79 <+697>: mov %ax,0xaa(%rsp)
>
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
>
> struct bpf_sk_lookup_kern {
> u16 family; /* 0 2 */
> u16 protocol; /* 2 2 */
> __be16 sport; /* 4 2 */
> u16 dport; /* 6 2 */
> struct {
> __be32 saddr; /* 8 4 */
> __be32 daddr; /* 12 4 */
> } v4; /* 8 8 */
> struct {
> const struct in6_addr * saddr; /* 16 8 */
> const struct in6_addr * daddr; /* 24 8 */
> } v6; /* 16 16 */
> struct sock * selected_sk; /* 32 8 */
> bool no_reuseport; /* 40 1 */
>
> /* size: 48, cachelines: 1, members: 8 */
> /* padding: 7 */
> /* last cacheline: 48 bytes */
> };
Cool, looking at:
[root@five ~]# pahole --sizes | grep ^bpf | sort -nr -k2 | head
bpf_ringbuf 12288 4
bpf_ctx_convert 6960 0
bpf_verifier_env 4816 3
bpf_func_state 1360 1
bpf_trace_sample_data 1152 0
bpf_verifier_log 1048 1
bpf_dispatcher 1048 2
bpf_struct_ops 976 0
bpf_seq_printf_buf 768 0
bpf_htab 640 1
[root@five ~]# pahole --sizes | grep ^bpf | sort -nr -k3 | head
bpf_ringbuf 12288 4
bpf_verifier_env 4816 3
bpf_verifier_state 120 2
bpf_trampoline 376 2
bpf_sk_lookup_kern 56 2
bpf_reg_state 120 2
bpf_func_proto 64 2
bpf_dispatcher 1048 2
bpf_verifier_log 1048 1
bpf_struct_ops_value 64 1
[root@five ~]#
second column is size, third is number of holes (before your patch).
bpf_ringbuf is interesting (this is all using /sys/kernel/btf/vmlinux):
[root@five ~]# pahole bpf_ringbuf
struct bpf_ringbuf {
wait_queue_head_t waitq; /* 0 24 */
struct irq_work work; /* 24 24 */
u64 mask; /* 48 8 */
struct page * * pages; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
int nr_pages; /* 64 4 */
/* XXX 60 bytes hole, try to pack */
/* --- cacheline 2 boundary (128 bytes) --- */
spinlock_t spinlock; /* 128 4 */
/* XXX 3964 bytes hole, try to pack */
/* --- cacheline 64 boundary (4096 bytes) --- */
long unsigned int consumer_pos; /* 4096 8 */
/* XXX 4088 bytes hole, try to pack */
/* --- cacheline 128 boundary (8192 bytes) --- */
long unsigned int producer_pos; /* 8192 8 */
/* XXX 4088 bytes hole, try to pack */
/* --- cacheline 192 boundary (12288 bytes) --- */
char data[]; /* 12288 0 */
/* size: 12288, cachelines: 192, members: 9 */
/* sum members: 88, holes: 4, sum holes: 12200 */
};
[root@five ~]#
Which seems crazy, lemme see using DWARF:
[root@five ~]# pahole -F dwarf bpf_ringbuf
struct bpf_ringbuf {
wait_queue_head_t waitq; /* 0 24 */
struct irq_work work; /* 24 24 */
u64 mask; /* 48 8 */
struct page * * pages; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
int nr_pages; /* 64 4 */
/* XXX 60 bytes hole, try to pack */
/* --- cacheline 2 boundary (128 bytes) --- */
spinlock_t spinlock __attribute__((__aligned__(64))); /* 128 4 */
/* XXX 3964 bytes hole, try to pack */
/* --- cacheline 64 boundary (4096 bytes) --- */
long unsigned int consumer_pos __attribute__((__aligned__(4096))); /* 4096 8 */
/* XXX 4088 bytes hole, try to pack */
/* --- cacheline 128 boundary (8192 bytes) --- */
long unsigned int producer_pos __attribute__((__aligned__(4096))); /* 8192 8 */
/* XXX 4088 bytes hole, try to pack */
/* --- cacheline 192 boundary (12288 bytes) --- */
char data[] __attribute__((__aligned__(4096))); /* 12288 0 */
/* size: 12288, cachelines: 192, members: 9 */
/* sum members: 88, holes: 4, sum holes: 12200 */
/* forced alignments: 4, forced holes: 4, sum forced holes: 12200 */
} __attribute__((__aligned__(4096)));
[root@five ~]#
Yeah, matches the header files:
struct bpf_ringbuf {
wait_queue_head_t waitq;
struct irq_work work;
u64 mask;
struct page **pages;
int nr_pages;
spinlock_t spinlock ____cacheline_aligned_in_smp;
/* Consumer and producer counters are put into separate pages to allow
* mapping consumer page as r/w, but restrict producer page to r/o.
* This protects producer position from being modified by user-space
* application and ruining in-kernel position tracking.
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
char data[] __aligned(PAGE_SIZE);
};
But:
[root@five ~]# pahole bpf_verifier_env
struct bpf_verifier_env {
u32 insn_idx; /* 0 4 */
u32 prev_insn_idx; /* 4 4 */
struct bpf_prog * prog; /* 8 8 */
const struct bpf_verifier_ops * ops; /* 16 8 */
struct bpf_verifier_stack_elem * head; /* 24 8 */
int stack_size; /* 32 4 */
bool strict_alignment; /* 36 1 */
bool test_state_freq; /* 37 1 */
/* XXX 2 bytes hole, try to pack */
struct bpf_verifier_state * cur_state; /* 40 8 */
struct bpf_verifier_state_list * * explored_states; /* 48 8 */
struct bpf_verifier_state_list * free_list; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct bpf_map * used_maps[64]; /* 64 512 */
/* --- cacheline 9 boundary (576 bytes) --- */
u32 used_map_cnt; /* 576 4 */
u32 id_gen; /* 580 4 */
bool allow_ptr_leaks; /* 584 1 */
bool allow_ptr_to_map_access; /* 585 1 */
bool bpf_capable; /* 586 1 */
bool bypass_spec_v1; /* 587 1 */
bool bypass_spec_v4; /* 588 1 */
bool seen_direct_write; /* 589 1 */
/* XXX 2 bytes hole, try to pack */
struct bpf_insn_aux_data * insn_aux_data; /* 592 8 */
const struct bpf_line_info * prev_linfo; /* 600 8 */
struct bpf_verifier_log log; /* 608 1048 */
/* --- cacheline 25 boundary (1600 bytes) was 56 bytes ago --- */
struct bpf_subprog_info subprog_info[257]; /* 1656 3084 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 74 boundary (4736 bytes) was 8 bytes ago --- */
struct {
int * insn_state; /* 4744 8 */
int * insn_stack; /* 4752 8 */
int cur_stack; /* 4760 4 */
} cfg; /* 4744 24 */
/* XXX last struct has 4 bytes of padding */
u32 pass_cnt; /* 4768 4 */
u32 subprog_cnt; /* 4772 4 */
u32 prev_insn_processed; /* 4776 4 */
u32 insn_processed; /* 4780 4 */
u32 prev_jmps_processed; /* 4784 4 */
u32 jmps_processed; /* 4788 4 */
u64 verification_time; /* 4792 8 */
/* --- cacheline 75 boundary (4800 bytes) --- */
u32 max_states_per_insn; /* 4800 4 */
u32 total_states; /* 4804 4 */
u32 peak_states; /* 4808 4 */
u32 longest_mark_read_walk; /* 4812 4 */
/* size: 4816, cachelines: 76, members: 36 */
/* sum members: 4808, holes: 3, sum holes: 8 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 16 bytes */
};
[root@five ~]#
[root@five ~]# pahole --reorganize bpf_verifier_env | tail
u32 jmps_processed; /* 4788 4 */
u64 verification_time; /* 4792 8 */
/* --- cacheline 75 boundary (4800 bytes) --- */
u32 max_states_per_insn; /* 4800 4 */
u32 total_states; /* 4804 4 */
/* size: 4808, cachelines: 76, members: 36 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 8 bytes */
}; /* saved 8 bytes! */
[root@five ~]#
And headers have no explicit alignment in the headers. Maybe doing the
reorg will not help.
But looking at disasm for rep stos + pahole... humm...
:-)
One last comment:
[root@five ~]# pahole bpf_trampoline
struct bpf_trampoline {
struct hlist_node hlist; /* 0 16 */
struct mutex mutex; /* 16 32 */
refcount_t refcnt; /* 48 4 */
/* XXX 4 bytes hole, try to pack */
u64 key; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct {
struct btf_func_model model; /* 64 14 */
/* XXX 2 bytes hole, try to pack */
void * addr; /* 80 8 */
bool ftrace_managed; /* 88 1 */
} func; /* 64 32 */
/* XXX last struct has 7 bytes of padding */
struct bpf_prog * extension_prog; /* 96 8 */
struct hlist_head progs_hlist[3]; /* 104 24 */
/* --- cacheline 2 boundary (128 bytes) --- */
int progs_cnt[3]; /* 128 12 */
/* XXX 4 bytes hole, try to pack */
void * image; /* 144 8 */
u64 selector; /* 152 8 */
struct bpf_ksym ksym; /* 160 216 */
/* XXX last struct has 7 bytes of padding */
/* size: 376, cachelines: 6, members: 11 */
/* sum members: 368, holes: 2, sum holes: 8 */
/* paddings: 2, sum paddings: 14 */
/* last cacheline: 56 bytes */
};
[root@five ~]#
perhaps moving ftrace_managed to before addr in that anonymous struct on
the 'func' member may help?
- Arnaldo
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
2020-09-10 12:21 ` Jesper Dangaard Brouer
2020-09-10 15:19 ` Arnaldo Carvalho de Melo
@ 2020-09-11 0:53 ` Alexei Starovoitov
2020-09-11 8:05 ` Lorenz Bauer
2 siblings, 1 reply; 5+ messages in thread
From: Alexei Starovoitov @ 2020-09-11 0:53 UTC (permalink / raw)
To: Lorenz Bauer
Cc: Alexei Starovoitov, Daniel Borkmann, Jakub Sitnicki, bpf, kernel-team
On Thu, Sep 10, 2020 at 4:03 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
>
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
>
> Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Applied to bpf-next.
I feel it's a bit of a stretch to consider it a fix, but if you really
insist I can let it go as a fix
and reapply to a different tree. Just let me know.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
2020-09-11 0:53 ` Alexei Starovoitov
@ 2020-09-11 8:05 ` Lorenz Bauer
0 siblings, 0 replies; 5+ messages in thread
From: Lorenz Bauer @ 2020-09-11 8:05 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Alexei Starovoitov, Daniel Borkmann, Jakub Sitnicki, bpf, kernel-team
On Fri, 11 Sep 2020 at 01:53, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Sep 10, 2020 at 4:03 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
> >
> > As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> > This leads to suboptimal instructions being generated (IPv4, x86):
> >
> > Fix this by moving around sport and dport. pahole confirms there
> > are no more holes:
> >
> > Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> > Suggested-by: Alexei Starovoitov <ast@kernel.org>
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
>
> Applied to bpf-next.
> I feel it's a bit of a stretch to consider it a fix, but if you really
> insist I can let it go as a fix
> and reapply to a different tree. Just let me know.
Thanks, bpf-next is fine by me.
--
Lorenz Bauer | Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
www.cloudflare.com
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-09-11 8:05 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
2020-09-10 12:21 ` Jesper Dangaard Brouer
2020-09-10 15:19 ` Arnaldo Carvalho de Melo
2020-09-11 0:53 ` Alexei Starovoitov
2020-09-11 8:05 ` Lorenz Bauer
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.