All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
@ 2020-09-10 11:02 Lorenz Bauer
  2020-09-10 12:21 ` Jesper Dangaard Brouer
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Lorenz Bauer @ 2020-09-10 11:02 UTC (permalink / raw)
  To: ast, daniel, jakub; +Cc: bpf, kernel-team, Lorenz Bauer

As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
This leads to suboptimal instructions being generated (IPv4, x86):

    1372                    struct bpf_sk_lookup_kern ctx = {
       0xffffffff81b87f30 <+624>:   xor    %eax,%eax
       0xffffffff81b87f32 <+626>:   mov    $0x6,%ecx
       0xffffffff81b87f37 <+631>:   lea    0x90(%rsp),%rdi
       0xffffffff81b87f3f <+639>:   movl   $0x110002,0x88(%rsp)
       0xffffffff81b87f4a <+650>:   rep stos %rax,%es:(%rdi)
       0xffffffff81b87f4d <+653>:   mov    0x8(%rsp),%eax
       0xffffffff81b87f51 <+657>:   mov    %r13d,0x90(%rsp)
       0xffffffff81b87f59 <+665>:   incl   %gs:0x7e4970a0(%rip)
       0xffffffff81b87f60 <+672>:   mov    %eax,0x8c(%rsp)
       0xffffffff81b87f67 <+679>:   movzwl 0x10(%rsp),%eax
       0xffffffff81b87f6c <+684>:   mov    %ax,0xa8(%rsp)
       0xffffffff81b87f74 <+692>:   movzwl 0x38(%rsp),%eax
       0xffffffff81b87f79 <+697>:   mov    %ax,0xaa(%rsp)

Fix this by moving around sport and dport. pahole confirms there
are no more holes:

    struct bpf_sk_lookup_kern {
        u16                        family;       /*     0     2 */
        u16                        protocol;     /*     2     2 */
        __be16                     sport;        /*     4     2 */
        u16                        dport;        /*     6     2 */
        struct {
                __be32             saddr;        /*     8     4 */
                __be32             daddr;        /*    12     4 */
        } v4;                                    /*     8     8 */
        struct {
                const struct in6_addr  * saddr;  /*    16     8 */
                const struct in6_addr  * daddr;  /*    24     8 */
        } v6;                                    /*    16    16 */
        struct sock *              selected_sk;  /*    32     8 */
        bool                       no_reuseport; /*    40     1 */

        /* size: 48, cachelines: 1, members: 8 */
        /* padding: 7 */
        /* last cacheline: 48 bytes */
    };

The assembly also doesn't contain the pesky rep stos anymore:

    1372                    struct bpf_sk_lookup_kern ctx = {
       0xffffffff81b87f60 <+624>:   movzwl 0x10(%rsp),%eax
       0xffffffff81b87f65 <+629>:   movq   $0x0,0xa8(%rsp)
       0xffffffff81b87f71 <+641>:   movq   $0x0,0xb0(%rsp)
       0xffffffff81b87f7d <+653>:   mov    %ax,0x9c(%rsp)
       0xffffffff81b87f85 <+661>:   movzwl 0x38(%rsp),%eax
       0xffffffff81b87f8a <+666>:   movq   $0x0,0xb8(%rsp)
       0xffffffff81b87f96 <+678>:   mov    %ax,0x9e(%rsp)
       0xffffffff81b87f9e <+686>:   mov    0x8(%rsp),%eax
       0xffffffff81b87fa2 <+690>:   movq   $0x0,0xc0(%rsp)
       0xffffffff81b87fae <+702>:   movl   $0x110002,0x98(%rsp)
       0xffffffff81b87fb9 <+713>:   mov    %eax,0xa0(%rsp)
       0xffffffff81b87fc0 <+720>:   mov    %r13d,0xa4(%rsp)

1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/

Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/linux/filter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 995625950cc1..e962dd8117d8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1287,6 +1287,8 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
 struct bpf_sk_lookup_kern {
 	u16		family;
 	u16		protocol;
+	__be16		sport;
+	u16		dport;
 	struct {
 		__be32 saddr;
 		__be32 daddr;
@@ -1295,8 +1297,6 @@ struct bpf_sk_lookup_kern {
 		const struct in6_addr *saddr;
 		const struct in6_addr *daddr;
 	} v6;
-	__be16		sport;
-	u16		dport;
 	struct sock	*selected_sk;
 	bool		no_reuseport;
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
  2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
@ 2020-09-10 12:21 ` Jesper Dangaard Brouer
  2020-09-10 15:19 ` Arnaldo Carvalho de Melo
  2020-09-11  0:53 ` Alexei Starovoitov
  2 siblings, 0 replies; 5+ messages in thread
From: Jesper Dangaard Brouer @ 2020-09-10 12:21 UTC (permalink / raw)
  To: Lorenz Bauer
  Cc: brouer, ast, daniel, jakub, bpf, kernel-team, Stefano Brivio,
	Lorenzo Bianconi

On Thu, 10 Sep 2020 12:02:48 +0100
Lorenz Bauer <lmb@cloudflare.com> wrote:

> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
> 
>     1372                    struct bpf_sk_lookup_kern ctx = {
>        0xffffffff81b87f30 <+624>:   xor    %eax,%eax
>        0xffffffff81b87f32 <+626>:   mov    $0x6,%ecx
>        0xffffffff81b87f37 <+631>:   lea    0x90(%rsp),%rdi
>        0xffffffff81b87f3f <+639>:   movl   $0x110002,0x88(%rsp)
>        0xffffffff81b87f4a <+650>:   rep stos %rax,%es:(%rdi)
>        0xffffffff81b87f4d <+653>:   mov    0x8(%rsp),%eax
>        0xffffffff81b87f51 <+657>:   mov    %r13d,0x90(%rsp)
>        0xffffffff81b87f59 <+665>:   incl   %gs:0x7e4970a0(%rip)
>        0xffffffff81b87f60 <+672>:   mov    %eax,0x8c(%rsp)
>        0xffffffff81b87f67 <+679>:   movzwl 0x10(%rsp),%eax
>        0xffffffff81b87f6c <+684>:   mov    %ax,0xa8(%rsp)
>        0xffffffff81b87f74 <+692>:   movzwl 0x38(%rsp),%eax
>        0xffffffff81b87f79 <+697>:   mov    %ax,0xaa(%rsp)
> 
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
> 
>     struct bpf_sk_lookup_kern {
>         u16                        family;       /*     0     2 */
>         u16                        protocol;     /*     2     2 */
>         __be16                     sport;        /*     4     2 */
>         u16                        dport;        /*     6     2 */
>         struct {
>                 __be32             saddr;        /*     8     4 */
>                 __be32             daddr;        /*    12     4 */
>         } v4;                                    /*     8     8 */
>         struct {
>                 const struct in6_addr  * saddr;  /*    16     8 */
>                 const struct in6_addr  * daddr;  /*    24     8 */
>         } v6;                                    /*    16    16 */
>         struct sock *              selected_sk;  /*    32     8 */
>         bool                       no_reuseport; /*    40     1 */
> 
>         /* size: 48, cachelines: 1, members: 8 */
>         /* padding: 7 */
>         /* last cacheline: 48 bytes */
>     };
> 
> The assembly also doesn't contain the pesky rep stos anymore:
> 
>     1372                    struct bpf_sk_lookup_kern ctx = {
>        0xffffffff81b87f60 <+624>:   movzwl 0x10(%rsp),%eax
>        0xffffffff81b87f65 <+629>:   movq   $0x0,0xa8(%rsp)
>        0xffffffff81b87f71 <+641>:   movq   $0x0,0xb0(%rsp)
>        0xffffffff81b87f7d <+653>:   mov    %ax,0x9c(%rsp)
>        0xffffffff81b87f85 <+661>:   movzwl 0x38(%rsp),%eax
>        0xffffffff81b87f8a <+666>:   movq   $0x0,0xb8(%rsp)
>        0xffffffff81b87f96 <+678>:   mov    %ax,0x9e(%rsp)
>        0xffffffff81b87f9e <+686>:   mov    0x8(%rsp),%eax
>        0xffffffff81b87fa2 <+690>:   movq   $0x0,0xc0(%rsp)
>        0xffffffff81b87fae <+702>:   movl   $0x110002,0x98(%rsp)
>        0xffffffff81b87fb9 <+713>:   mov    %eax,0xa0(%rsp)
>        0xffffffff81b87fc0 <+720>:   mov    %r13d,0xa4(%rsp)
> 
> 1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/
> 
> Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

I'm very happy to see others have also discovered the slowdown of 'rep stos',
as I've been hunting these for years.  My understanding is that the
'rep-stos' slowdown comes from the CPU-instruction saving the CPU-flags
to allow it to be interrupted.  That makes sense when memset zeroing
large areas, but for small mem size structs this is slower than
clearing them in other ways.  I have a micro-benchmark as a
kernel-module here[2], where I explore different methods of memset.

[2] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/lib/time_bench_memset.c

As you have discovered the GCC compiler will generate these rep stos
for clearing a struct if not all members are initialized. If you want
to fix some more of these, then I remember there were some in the
net/core/flow_dissector.c code.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
  2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
  2020-09-10 12:21 ` Jesper Dangaard Brouer
@ 2020-09-10 15:19 ` Arnaldo Carvalho de Melo
  2020-09-11  0:53 ` Alexei Starovoitov
  2 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-09-10 15:19 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, jakub, bpf, kernel-team

Em Thu, Sep 10, 2020 at 12:02:48PM +0100, Lorenz Bauer escreveu:
> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
> 
>     1372                    struct bpf_sk_lookup_kern ctx = {
>        0xffffffff81b87f30 <+624>:   xor    %eax,%eax
>        0xffffffff81b87f32 <+626>:   mov    $0x6,%ecx
>        0xffffffff81b87f37 <+631>:   lea    0x90(%rsp),%rdi
>        0xffffffff81b87f3f <+639>:   movl   $0x110002,0x88(%rsp)
>        0xffffffff81b87f4a <+650>:   rep stos %rax,%es:(%rdi)
>        0xffffffff81b87f4d <+653>:   mov    0x8(%rsp),%eax
>        0xffffffff81b87f51 <+657>:   mov    %r13d,0x90(%rsp)
>        0xffffffff81b87f59 <+665>:   incl   %gs:0x7e4970a0(%rip)
>        0xffffffff81b87f60 <+672>:   mov    %eax,0x8c(%rsp)
>        0xffffffff81b87f67 <+679>:   movzwl 0x10(%rsp),%eax
>        0xffffffff81b87f6c <+684>:   mov    %ax,0xa8(%rsp)
>        0xffffffff81b87f74 <+692>:   movzwl 0x38(%rsp),%eax
>        0xffffffff81b87f79 <+697>:   mov    %ax,0xaa(%rsp)
> 
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
> 
>     struct bpf_sk_lookup_kern {
>         u16                        family;       /*     0     2 */
>         u16                        protocol;     /*     2     2 */
>         __be16                     sport;        /*     4     2 */
>         u16                        dport;        /*     6     2 */
>         struct {
>                 __be32             saddr;        /*     8     4 */
>                 __be32             daddr;        /*    12     4 */
>         } v4;                                    /*     8     8 */
>         struct {
>                 const struct in6_addr  * saddr;  /*    16     8 */
>                 const struct in6_addr  * daddr;  /*    24     8 */
>         } v6;                                    /*    16    16 */
>         struct sock *              selected_sk;  /*    32     8 */
>         bool                       no_reuseport; /*    40     1 */
> 
>         /* size: 48, cachelines: 1, members: 8 */
>         /* padding: 7 */
>         /* last cacheline: 48 bytes */
>     };

Cool, looking at:

[root@five ~]# pahole --sizes | grep ^bpf | sort -nr -k2 | head
bpf_ringbuf	12288	4
bpf_ctx_convert	6960	0
bpf_verifier_env	4816	3
bpf_func_state	1360	1
bpf_trace_sample_data	1152	0
bpf_verifier_log	1048	1
bpf_dispatcher	1048	2
bpf_struct_ops	976	0
bpf_seq_printf_buf	768	0
bpf_htab	640	1
[root@five ~]# pahole --sizes | grep ^bpf | sort -nr -k3 | head
bpf_ringbuf	12288	4
bpf_verifier_env	4816	3
bpf_verifier_state	120	2
bpf_trampoline	376	2
bpf_sk_lookup_kern	56	2
bpf_reg_state	120	2
bpf_func_proto	64	2
bpf_dispatcher	1048	2
bpf_verifier_log	1048	1
bpf_struct_ops_value	64	1
[root@five ~]#

second column is size, third is number of holes (before your patch).

bpf_ringbuf is interesting (this is all using /sys/kernel/btf/vmlinux):

[root@five ~]# pahole bpf_ringbuf
struct bpf_ringbuf {
	wait_queue_head_t          waitq;                /*     0    24 */
	struct irq_work            work;                 /*    24    24 */
	u64                        mask;                 /*    48     8 */
	struct page * *            pages;                /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	int                        nr_pages;             /*    64     4 */

	/* XXX 60 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) --- */
	spinlock_t                 spinlock;             /*   128     4 */

	/* XXX 3964 bytes hole, try to pack */

	/* --- cacheline 64 boundary (4096 bytes) --- */
	long unsigned int          consumer_pos;         /*  4096     8 */

	/* XXX 4088 bytes hole, try to pack */

	/* --- cacheline 128 boundary (8192 bytes) --- */
	long unsigned int          producer_pos;         /*  8192     8 */

	/* XXX 4088 bytes hole, try to pack */

	/* --- cacheline 192 boundary (12288 bytes) --- */
	char                       data[];               /* 12288     0 */

	/* size: 12288, cachelines: 192, members: 9 */
	/* sum members: 88, holes: 4, sum holes: 12200 */
};
[root@five ~]#

Which seems crazy, lemme see using DWARF:

[root@five ~]# pahole -F dwarf bpf_ringbuf
struct bpf_ringbuf {
	wait_queue_head_t          waitq;                /*     0    24 */
	struct irq_work            work;                 /*    24    24 */
	u64                        mask;                 /*    48     8 */
	struct page * *            pages;                /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	int                        nr_pages;             /*    64     4 */

	/* XXX 60 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) --- */
	spinlock_t                 spinlock __attribute__((__aligned__(64))); /*   128     4 */

	/* XXX 3964 bytes hole, try to pack */

	/* --- cacheline 64 boundary (4096 bytes) --- */
	long unsigned int          consumer_pos __attribute__((__aligned__(4096))); /*  4096     8 */

	/* XXX 4088 bytes hole, try to pack */

	/* --- cacheline 128 boundary (8192 bytes) --- */
	long unsigned int          producer_pos __attribute__((__aligned__(4096))); /*  8192     8 */

	/* XXX 4088 bytes hole, try to pack */

	/* --- cacheline 192 boundary (12288 bytes) --- */
	char                       data[] __attribute__((__aligned__(4096))); /* 12288     0 */

	/* size: 12288, cachelines: 192, members: 9 */
	/* sum members: 88, holes: 4, sum holes: 12200 */
	/* forced alignments: 4, forced holes: 4, sum forced holes: 12200 */
} __attribute__((__aligned__(4096)));
[root@five ~]#

Yeah, matches the header files:

struct bpf_ringbuf {
        wait_queue_head_t waitq;
        struct irq_work work;
        u64 mask;
        struct page **pages;
        int nr_pages;
        spinlock_t spinlock ____cacheline_aligned_in_smp;
        /* Consumer and producer counters are put into separate pages to allow
         * mapping consumer page as r/w, but restrict producer page to r/o.
         * This protects producer position from being modified by user-space
         * application and ruining in-kernel position tracking.
         */
        unsigned long consumer_pos __aligned(PAGE_SIZE);
        unsigned long producer_pos __aligned(PAGE_SIZE);
        char data[] __aligned(PAGE_SIZE);
};


But:

[root@five ~]# pahole bpf_verifier_env
struct bpf_verifier_env {
	u32                        insn_idx;             /*     0     4 */
	u32                        prev_insn_idx;        /*     4     4 */
	struct bpf_prog *          prog;                 /*     8     8 */
	const struct bpf_verifier_ops  * ops;            /*    16     8 */
	struct bpf_verifier_stack_elem * head;           /*    24     8 */
	int                        stack_size;           /*    32     4 */
	bool                       strict_alignment;     /*    36     1 */
	bool                       test_state_freq;      /*    37     1 */

	/* XXX 2 bytes hole, try to pack */

	struct bpf_verifier_state * cur_state;           /*    40     8 */
	struct bpf_verifier_state_list * * explored_states; /*    48     8 */
	struct bpf_verifier_state_list * free_list;      /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct bpf_map *           used_maps[64];        /*    64   512 */
	/* --- cacheline 9 boundary (576 bytes) --- */
	u32                        used_map_cnt;         /*   576     4 */
	u32                        id_gen;               /*   580     4 */
	bool                       allow_ptr_leaks;      /*   584     1 */
	bool                       allow_ptr_to_map_access; /*   585     1 */
	bool                       bpf_capable;          /*   586     1 */
	bool                       bypass_spec_v1;       /*   587     1 */
	bool                       bypass_spec_v4;       /*   588     1 */
	bool                       seen_direct_write;    /*   589     1 */

	/* XXX 2 bytes hole, try to pack */

	struct bpf_insn_aux_data * insn_aux_data;        /*   592     8 */
	const struct bpf_line_info  * prev_linfo;        /*   600     8 */
	struct bpf_verifier_log    log;                  /*   608  1048 */
	/* --- cacheline 25 boundary (1600 bytes) was 56 bytes ago --- */
	struct bpf_subprog_info    subprog_info[257];    /*  1656  3084 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 74 boundary (4736 bytes) was 8 bytes ago --- */
	struct {
		int *              insn_state;           /*  4744     8 */
		int *              insn_stack;           /*  4752     8 */
		int                cur_stack;            /*  4760     4 */
	} cfg;                                           /*  4744    24 */

	/* XXX last struct has 4 bytes of padding */

	u32                        pass_cnt;             /*  4768     4 */
	u32                        subprog_cnt;          /*  4772     4 */
	u32                        prev_insn_processed;  /*  4776     4 */
	u32                        insn_processed;       /*  4780     4 */
	u32                        prev_jmps_processed;  /*  4784     4 */
	u32                        jmps_processed;       /*  4788     4 */
	u64                        verification_time;    /*  4792     8 */
	/* --- cacheline 75 boundary (4800 bytes) --- */
	u32                        max_states_per_insn;  /*  4800     4 */
	u32                        total_states;         /*  4804     4 */
	u32                        peak_states;          /*  4808     4 */
	u32                        longest_mark_read_walk; /*  4812     4 */

	/* size: 4816, cachelines: 76, members: 36 */
	/* sum members: 4808, holes: 3, sum holes: 8 */
	/* paddings: 1, sum paddings: 4 */
	/* last cacheline: 16 bytes */
};
[root@five ~]#

[root@five ~]# pahole --reorganize bpf_verifier_env | tail
	u32                        jmps_processed;       /*  4788     4 */
	u64                        verification_time;    /*  4792     8 */
	/* --- cacheline 75 boundary (4800 bytes) --- */
	u32                        max_states_per_insn;  /*  4800     4 */
	u32                        total_states;         /*  4804     4 */

	/* size: 4808, cachelines: 76, members: 36 */
	/* paddings: 1, sum paddings: 4 */
	/* last cacheline: 8 bytes */
};   /* saved 8 bytes! */
[root@five ~]#


And headers have no explicit alignment in the headers. Maybe doing the
reorg will not help.

But looking at disasm for rep stos + pahole... humm...

:-)

One last comment:

[root@five ~]# pahole bpf_trampoline
struct bpf_trampoline {
	struct hlist_node          hlist;                /*     0    16 */
	struct mutex               mutex;                /*    16    32 */
	refcount_t                 refcnt;               /*    48     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        key;                  /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct {
		struct btf_func_model model;             /*    64    14 */

		/* XXX 2 bytes hole, try to pack */

		void *             addr;                 /*    80     8 */
		bool               ftrace_managed;       /*    88     1 */
	} func;                                          /*    64    32 */

	/* XXX last struct has 7 bytes of padding */

	struct bpf_prog *          extension_prog;       /*    96     8 */
	struct hlist_head          progs_hlist[3];       /*   104    24 */
	/* --- cacheline 2 boundary (128 bytes) --- */
	int                        progs_cnt[3];         /*   128    12 */

	/* XXX 4 bytes hole, try to pack */

	void *                     image;                /*   144     8 */
	u64                        selector;             /*   152     8 */
	struct bpf_ksym            ksym;                 /*   160   216 */

	/* XXX last struct has 7 bytes of padding */

	/* size: 376, cachelines: 6, members: 11 */
	/* sum members: 368, holes: 2, sum holes: 8 */
	/* paddings: 2, sum paddings: 14 */
	/* last cacheline: 56 bytes */
};
[root@five ~]#

perhaps moving ftrace_managed to before addr in that anonymous struct on
the 'func' member may help?

- Arnaldo


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
  2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
  2020-09-10 12:21 ` Jesper Dangaard Brouer
  2020-09-10 15:19 ` Arnaldo Carvalho de Melo
@ 2020-09-11  0:53 ` Alexei Starovoitov
  2020-09-11  8:05   ` Lorenz Bauer
  2 siblings, 1 reply; 5+ messages in thread
From: Alexei Starovoitov @ 2020-09-11  0:53 UTC (permalink / raw)
  To: Lorenz Bauer
  Cc: Alexei Starovoitov, Daniel Borkmann, Jakub Sitnicki, bpf, kernel-team

On Thu, Sep 10, 2020 at 4:03 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> This leads to suboptimal instructions being generated (IPv4, x86):
>
> Fix this by moving around sport and dport. pahole confirms there
> are no more holes:
>
> Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>

Applied to bpf-next.
I feel it's a bit of a stretch to consider it a fix, but if you really
insist I can let it go as a fix
and reapply to a different tree. Just let me know.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern
  2020-09-11  0:53 ` Alexei Starovoitov
@ 2020-09-11  8:05   ` Lorenz Bauer
  0 siblings, 0 replies; 5+ messages in thread
From: Lorenz Bauer @ 2020-09-11  8:05 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Jakub Sitnicki, bpf, kernel-team

On Fri, 11 Sep 2020 at 01:53, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Sep 10, 2020 at 4:03 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
> >
> > As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes.
> > This leads to suboptimal instructions being generated (IPv4, x86):
> >
> > Fix this by moving around sport and dport. pahole confirms there
> > are no more holes:
> >
> > Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point")
> > Suggested-by: Alexei Starovoitov <ast@kernel.org>
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
>
> Applied to bpf-next.
> I feel it's a bit of a stretch to consider it a fix, but if you really
> insist I can let it go as a fix
> and reapply to a different tree. Just let me know.

Thanks, bpf-next is fine by me.

-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-11  8:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-10 11:02 [PATCH bpf] bpf: plug hole in struct bpf_sk_lookup_kern Lorenz Bauer
2020-09-10 12:21 ` Jesper Dangaard Brouer
2020-09-10 15:19 ` Arnaldo Carvalho de Melo
2020-09-11  0:53 ` Alexei Starovoitov
2020-09-11  8:05   ` Lorenz Bauer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.