From: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
To: andrii@kernel.org, ast@kernel.org,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Christophe Leroy <christophe.leroy@csgroup.eu>,
daniel@iogearbox.net, john.fastabend@gmail.com, kafai@fb.com,
kpsingh@chromium.org, Michael Ellerman <mpe@ellerman.id.au>,
Paul Mackerras <paulus@samba.org>,
sandipan@linux.ibm.com, songliubraving@fb.com, yhs@fb.com
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, netdev@vger.kernel.org
Subject: Re: [PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
Date: Fri, 07 Jan 2022 17:21:19 +0530 [thread overview]
Message-ID: <1641556157.ms6rd82ggh.naveen@linux.ibm.com> (raw)
In-Reply-To: <b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu>
Christophe Leroy wrote:
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to
> avoid having to save them/restore on the stack.
>
> Before this patch, the test #359 ADD default X is:
>
> 0: 7c 64 1b 78 mr r4,r3
> 4: 38 60 00 00 li r3,0
> 8: 94 21 ff b0 stwu r1,-80(r1)
> c: 60 00 00 00 nop
> 10: 92 e1 00 2c stw r23,44(r1)
> 14: 93 01 00 30 stw r24,48(r1)
> 18: 93 21 00 34 stw r25,52(r1)
> 1c: 93 41 00 38 stw r26,56(r1)
> 20: 39 80 00 00 li r12,0
> 24: 39 60 00 00 li r11,0
> 28: 3b 40 00 00 li r26,0
> 2c: 3b 20 00 00 li r25,0
> 30: 7c 98 23 78 mr r24,r4
> 34: 7c 77 1b 78 mr r23,r3
> 38: 39 80 00 42 li r12,66
> 3c: 39 60 00 00 li r11,0
> 40: 7d 8c d2 14 add r12,r12,r26
> 44: 39 60 00 00 li r11,0
> 48: 7d 83 63 78 mr r3,r12
> 4c: 82 e1 00 2c lwz r23,44(r1)
> 50: 83 01 00 30 lwz r24,48(r1)
> 54: 83 21 00 34 lwz r25,52(r1)
> 58: 83 41 00 38 lwz r26,56(r1)
> 5c: 38 21 00 50 addi r1,r1,80
> 60: 4e 80 00 20 blr
>
> After this patch, the same test has become:
>
> 0: 7c 64 1b 78 mr r4,r3
> 4: 38 60 00 00 li r3,0
> 8: 94 21 ff b0 stwu r1,-80(r1)
> c: 60 00 00 00 nop
> 10: 39 80 00 00 li r12,0
> 14: 39 60 00 00 li r11,0
> 18: 39 00 00 00 li r8,0
> 1c: 38 e0 00 00 li r7,0
> 20: 7c 86 23 78 mr r6,r4
> 24: 7c 65 1b 78 mr r5,r3
> 28: 39 80 00 42 li r12,66
> 2c: 39 60 00 00 li r11,0
> 30: 7d 8c 42 14 add r12,r12,r8
> 34: 39 60 00 00 li r11,0
> 38: 7d 83 63 78 mr r3,r12
> 3c: 38 21 00 50 addi r1,r1,80
> 40: 4e 80 00 20 blr
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> arch/powerpc/net/bpf_jit.h | 16 ++++++++++++++++
> arch/powerpc/net/bpf_jit64.h | 2 +-
> arch/powerpc/net/bpf_jit_comp.c | 2 ++
> arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
> arch/powerpc/net/bpf_jit_comp64.c | 4 ++++
> 5 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index a45b8266355d..776abef4d2a0 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
> #define SEEN_STACK 0x40000000 /* uses BPF stack */
> #define SEEN_TAILCALL 0x80000000 /* uses tail calls */
>
> +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
> +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
> +
> +#ifdef CONFIG_PPC64
> +extern const int b2p[MAX_BPF_JIT_REG + 2];
> +#else
> +extern const int b2p[MAX_BPF_JIT_REG + 1];
> +#endif
> +
> struct codegen_context {
> /*
> * This is used to track register usage as well
> @@ -129,6 +138,7 @@ struct codegen_context {
> unsigned int seen;
> unsigned int idx;
> unsigned int stack_size;
> + int b2p[ARRAY_SIZE(b2p)];
> };
>
> static inline void bpf_flush_icache(void *start, void *end)
> @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
> ctx->seen |= 1 << (31 - i);
> }
>
> +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
> +{
> + ctx->seen &= ~(1 << (31 - i));
> +}
> +
> void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> u32 *addrs, bool extra_pass);
> void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
> void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> +void bpf_jit_realloc_regs(struct codegen_context *ctx);
>
> #endif
>
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index b05f2e67bba1..7b713edfa7e2 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -39,7 +39,7 @@
> #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
>
> /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 2] = {
> /* function return value */
> [BPF_REG_0] = 8,
> /* function arguments */
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index efac89964873..798ac4350a82 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> }
>
> memset(&cgctx, 0, sizeof(struct codegen_context));
> + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
>
> /* Make sure that the stack is quadword aligned. */
> cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> }
> }
>
> + bpf_jit_realloc_regs(&cgctx);
> /*
> * Pretend to build prologue, given the features we've seen. This will
> * update ctgtx.idx as it pretends to output instructions, then we can
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 29ce802d7534..003843273b43 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -37,7 +37,7 @@
> #define TMP_REG (MAX_BPF_JIT_REG + 0)
>
> /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 1] = {
> /* function return value */
> [BPF_REG_0] = 12,
> /* function arguments */
> @@ -60,7 +60,7 @@ static const int b2p[] = {
>
> static int bpf_to_ppc(struct codegen_context *ctx, int reg)
> {
> - return b2p[reg];
> + return ctx->b2p[reg];
> }
>
> /* PPC NVR range -- update this if we ever use NVRs below r17 */
> @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
> return BPF_PPC_STACKFRAME(ctx) - 4;
> }
>
> +void bpf_jit_realloc_regs(struct codegen_context *ctx)
> +{
> + if (ctx->seen & SEEN_FUNC)
> + return;
Can't you remap BPF_REG_5, BPF_REG_AX and TMP_REG regardless of
SEEN_FUNC?
- Naveen
next prev parent reply other threads:[~2022-01-07 11:52 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-22 16:37 [PATCH v2 0/8] Implement EBPF on powerpc32 Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 1/8] powerpc/bpf: Remove classical BPF support for PPC32 Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 2/8] powerpc/bpf: Change register numbering for bpf_set/is_seen_register() Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 3/8] powerpc/bpf: Move common helpers into bpf_jit.h Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 4/8] powerpc/bpf: Move common functions into bpf_jit_comp.c Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 5/8] powerpc/bpf: Change values of SEEN_ flags Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 6/8] powerpc/asm: Add some opcodes in asm/ppc-opcode.h for PPC32 eBPF Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 7/8] powerpc/bpf: Implement extended BPF on PPC32 Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Christophe Leroy
2022-01-07 11:51 ` Naveen N. Rao [this message]
2022-01-10 12:13 ` Christophe Leroy
2021-03-22 17:53 ` [PATCH v2 0/8] Implement EBPF on powerpc32 Andrii Nakryiko
2021-03-26 14:41 ` Christophe Leroy
2021-03-26 18:09 ` Andrii Nakryiko
2021-04-10 14:28 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1641556157.ms6rd82ggh.naveen@linux.ibm.com \
--to=naveen.n.rao@linux.ibm.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=benh@kernel.crashing.org \
--cc=bpf@vger.kernel.org \
--cc=christophe.leroy@csgroup.eu \
--cc=daniel@iogearbox.net \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=kpsingh@chromium.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mpe@ellerman.id.au \
--cc=netdev@vger.kernel.org \
--cc=paulus@samba.org \
--cc=sandipan@linux.ibm.com \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).