All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
To: andrii@kernel.org, ast@kernel.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	daniel@iogearbox.net, john.fastabend@gmail.com, kafai@fb.com,
	kpsingh@chromium.org, Michael Ellerman <mpe@ellerman.id.au>,
	Paul Mackerras <paulus@samba.org>,
	sandipan@linux.ibm.com, songliubraving@fb.com, yhs@fb.com
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, netdev@vger.kernel.org
Subject: Re: [PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
Date: Fri, 07 Jan 2022 17:21:19 +0530	[thread overview]
Message-ID: <1641556157.ms6rd82ggh.naveen@linux.ibm.com> (raw)
In-Reply-To: <b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu>

Christophe Leroy wrote:
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to
> avoid having to save them/restore on the stack.
> 
> Before this patch, the test #359 ADD default X is:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	92 e1 00 2c 	stw     r23,44(r1)
>   14:	93 01 00 30 	stw     r24,48(r1)
>   18:	93 21 00 34 	stw     r25,52(r1)
>   1c:	93 41 00 38 	stw     r26,56(r1)
>   20:	39 80 00 00 	li      r12,0
>   24:	39 60 00 00 	li      r11,0
>   28:	3b 40 00 00 	li      r26,0
>   2c:	3b 20 00 00 	li      r25,0
>   30:	7c 98 23 78 	mr      r24,r4
>   34:	7c 77 1b 78 	mr      r23,r3
>   38:	39 80 00 42 	li      r12,66
>   3c:	39 60 00 00 	li      r11,0
>   40:	7d 8c d2 14 	add     r12,r12,r26
>   44:	39 60 00 00 	li      r11,0
>   48:	7d 83 63 78 	mr      r3,r12
>   4c:	82 e1 00 2c 	lwz     r23,44(r1)
>   50:	83 01 00 30 	lwz     r24,48(r1)
>   54:	83 21 00 34 	lwz     r25,52(r1)
>   58:	83 41 00 38 	lwz     r26,56(r1)
>   5c:	38 21 00 50 	addi    r1,r1,80
>   60:	4e 80 00 20 	blr
> 
> After this patch, the same test has become:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	39 80 00 00 	li      r12,0
>   14:	39 60 00 00 	li      r11,0
>   18:	39 00 00 00 	li      r8,0
>   1c:	38 e0 00 00 	li      r7,0
>   20:	7c 86 23 78 	mr      r6,r4
>   24:	7c 65 1b 78 	mr      r5,r3
>   28:	39 80 00 42 	li      r12,66
>   2c:	39 60 00 00 	li      r11,0
>   30:	7d 8c 42 14 	add     r12,r12,r8
>   34:	39 60 00 00 	li      r11,0
>   38:	7d 83 63 78 	mr      r3,r12
>   3c:	38 21 00 50 	addi    r1,r1,80
>   40:	4e 80 00 20 	blr
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
>  arch/powerpc/net/bpf_jit64.h      |  2 +-
>  arch/powerpc/net/bpf_jit_comp.c   |  2 ++
>  arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
>  arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
>  5 files changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index a45b8266355d..776abef4d2a0 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
>  #define SEEN_STACK	0x40000000 /* uses BPF stack */
>  #define SEEN_TAILCALL	0x80000000 /* uses tail calls */
>  
> +#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
> +#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
> +
> +#ifdef CONFIG_PPC64
> +extern const int b2p[MAX_BPF_JIT_REG + 2];
> +#else
> +extern const int b2p[MAX_BPF_JIT_REG + 1];
> +#endif
> +
>  struct codegen_context {
>  	/*
>  	 * This is used to track register usage as well
> @@ -129,6 +138,7 @@ struct codegen_context {
>  	unsigned int seen;
>  	unsigned int idx;
>  	unsigned int stack_size;
> +	int b2p[ARRAY_SIZE(b2p)];
>  };
>  
>  static inline void bpf_flush_icache(void *start, void *end)
> @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
>  	ctx->seen |= 1 << (31 - i);
>  }
>  
> +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
> +{
> +	ctx->seen &= ~(1 << (31 - i));
> +}
> +
>  void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>  		       u32 *addrs, bool extra_pass);
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> +void bpf_jit_realloc_regs(struct codegen_context *ctx);
>  
>  #endif
>  
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index b05f2e67bba1..7b713edfa7e2 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -39,7 +39,7 @@
>  #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 2] = {
>  	/* function return value */
>  	[BPF_REG_0] = 8,
>  	/* function arguments */
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index efac89964873..798ac4350a82 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  	}
>  
>  	memset(&cgctx, 0, sizeof(struct codegen_context));
> +	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
>  
>  	/* Make sure that the stack is quadword aligned. */
>  	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  		}
>  	}
>  
> +	bpf_jit_realloc_regs(&cgctx);
>  	/*
>  	 * Pretend to build prologue, given the features we've seen.  This will
>  	 * update ctgtx.idx as it pretends to output instructions, then we can
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 29ce802d7534..003843273b43 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -37,7 +37,7 @@
>  #define TMP_REG	(MAX_BPF_JIT_REG + 0)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 1] = {
>  	/* function return value */
>  	[BPF_REG_0] = 12,
>  	/* function arguments */
> @@ -60,7 +60,7 @@ static const int b2p[] = {
>  
>  static int bpf_to_ppc(struct codegen_context *ctx, int reg)
>  {
> -	return b2p[reg];
> +	return ctx->b2p[reg];
>  }
>  
>  /* PPC NVR range -- update this if we ever use NVRs below r17 */
> @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
>  	return BPF_PPC_STACKFRAME(ctx) - 4;
>  }
>  
> +void bpf_jit_realloc_regs(struct codegen_context *ctx)
> +{
> +	if (ctx->seen & SEEN_FUNC)
> +		return;

Can't you remap BPF_REG_5, BPF_REG_AX and TMP_REG regardless of 
SEEN_FUNC?

- Naveen


WARNING: multiple messages have this Message-ID (diff)
From: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
To: andrii@kernel.org, ast@kernel.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	daniel@iogearbox.net, john.fastabend@gmail.com, kafai@fb.com,
	kpsingh@chromium.org, Michael Ellerman <mpe@ellerman.id.au>,
	Paul Mackerras <paulus@samba.org>,
	sandipan@linux.ibm.com, songliubraving@fb.com, yhs@fb.com
Cc: netdev@vger.kernel.org, bpf@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
Date: Fri, 07 Jan 2022 17:21:19 +0530	[thread overview]
Message-ID: <1641556157.ms6rd82ggh.naveen@linux.ibm.com> (raw)
In-Reply-To: <b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu>

Christophe Leroy wrote:
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to
> avoid having to save them/restore on the stack.
> 
> Before this patch, the test #359 ADD default X is:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	92 e1 00 2c 	stw     r23,44(r1)
>   14:	93 01 00 30 	stw     r24,48(r1)
>   18:	93 21 00 34 	stw     r25,52(r1)
>   1c:	93 41 00 38 	stw     r26,56(r1)
>   20:	39 80 00 00 	li      r12,0
>   24:	39 60 00 00 	li      r11,0
>   28:	3b 40 00 00 	li      r26,0
>   2c:	3b 20 00 00 	li      r25,0
>   30:	7c 98 23 78 	mr      r24,r4
>   34:	7c 77 1b 78 	mr      r23,r3
>   38:	39 80 00 42 	li      r12,66
>   3c:	39 60 00 00 	li      r11,0
>   40:	7d 8c d2 14 	add     r12,r12,r26
>   44:	39 60 00 00 	li      r11,0
>   48:	7d 83 63 78 	mr      r3,r12
>   4c:	82 e1 00 2c 	lwz     r23,44(r1)
>   50:	83 01 00 30 	lwz     r24,48(r1)
>   54:	83 21 00 34 	lwz     r25,52(r1)
>   58:	83 41 00 38 	lwz     r26,56(r1)
>   5c:	38 21 00 50 	addi    r1,r1,80
>   60:	4e 80 00 20 	blr
> 
> After this patch, the same test has become:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	39 80 00 00 	li      r12,0
>   14:	39 60 00 00 	li      r11,0
>   18:	39 00 00 00 	li      r8,0
>   1c:	38 e0 00 00 	li      r7,0
>   20:	7c 86 23 78 	mr      r6,r4
>   24:	7c 65 1b 78 	mr      r5,r3
>   28:	39 80 00 42 	li      r12,66
>   2c:	39 60 00 00 	li      r11,0
>   30:	7d 8c 42 14 	add     r12,r12,r8
>   34:	39 60 00 00 	li      r11,0
>   38:	7d 83 63 78 	mr      r3,r12
>   3c:	38 21 00 50 	addi    r1,r1,80
>   40:	4e 80 00 20 	blr
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
>  arch/powerpc/net/bpf_jit64.h      |  2 +-
>  arch/powerpc/net/bpf_jit_comp.c   |  2 ++
>  arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
>  arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
>  5 files changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index a45b8266355d..776abef4d2a0 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
>  #define SEEN_STACK	0x40000000 /* uses BPF stack */
>  #define SEEN_TAILCALL	0x80000000 /* uses tail calls */
>  
> +#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
> +#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
> +
> +#ifdef CONFIG_PPC64
> +extern const int b2p[MAX_BPF_JIT_REG + 2];
> +#else
> +extern const int b2p[MAX_BPF_JIT_REG + 1];
> +#endif
> +
>  struct codegen_context {
>  	/*
>  	 * This is used to track register usage as well
> @@ -129,6 +138,7 @@ struct codegen_context {
>  	unsigned int seen;
>  	unsigned int idx;
>  	unsigned int stack_size;
> +	int b2p[ARRAY_SIZE(b2p)];
>  };
>  
>  static inline void bpf_flush_icache(void *start, void *end)
> @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
>  	ctx->seen |= 1 << (31 - i);
>  }
>  
> +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
> +{
> +	ctx->seen &= ~(1 << (31 - i));
> +}
> +
>  void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>  		       u32 *addrs, bool extra_pass);
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> +void bpf_jit_realloc_regs(struct codegen_context *ctx);
>  
>  #endif
>  
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index b05f2e67bba1..7b713edfa7e2 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -39,7 +39,7 @@
>  #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 2] = {
>  	/* function return value */
>  	[BPF_REG_0] = 8,
>  	/* function arguments */
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index efac89964873..798ac4350a82 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  	}
>  
>  	memset(&cgctx, 0, sizeof(struct codegen_context));
> +	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
>  
>  	/* Make sure that the stack is quadword aligned. */
>  	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  		}
>  	}
>  
> +	bpf_jit_realloc_regs(&cgctx);
>  	/*
>  	 * Pretend to build prologue, given the features we've seen.  This will
>  	 * update ctgtx.idx as it pretends to output instructions, then we can
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 29ce802d7534..003843273b43 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -37,7 +37,7 @@
>  #define TMP_REG	(MAX_BPF_JIT_REG + 0)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 1] = {
>  	/* function return value */
>  	[BPF_REG_0] = 12,
>  	/* function arguments */
> @@ -60,7 +60,7 @@ static const int b2p[] = {
>  
>  static int bpf_to_ppc(struct codegen_context *ctx, int reg)
>  {
> -	return b2p[reg];
> +	return ctx->b2p[reg];
>  }
>  
>  /* PPC NVR range -- update this if we ever use NVRs below r17 */
> @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
>  	return BPF_PPC_STACKFRAME(ctx) - 4;
>  }
>  
> +void bpf_jit_realloc_regs(struct codegen_context *ctx)
> +{
> +	if (ctx->seen & SEEN_FUNC)
> +		return;

Can't you remap BPF_REG_5, BPF_REG_AX and TMP_REG regardless of 
SEEN_FUNC?

- Naveen


  reply	other threads:[~2022-01-07 11:52 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-22 16:37 [PATCH v2 0/8] Implement EBPF on powerpc32 Christophe Leroy
2021-03-22 16:37 ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 1/8] powerpc/bpf: Remove classical BPF support for PPC32 Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 2/8] powerpc/bpf: Change register numbering for bpf_set/is_seen_register() Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 3/8] powerpc/bpf: Move common helpers into bpf_jit.h Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 4/8] powerpc/bpf: Move common functions into bpf_jit_comp.c Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 5/8] powerpc/bpf: Change values of SEEN_ flags Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 6/8] powerpc/asm: Add some opcodes in asm/ppc-opcode.h for PPC32 eBPF Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 7/8] powerpc/bpf: Implement extended BPF on PPC32 Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2021-03-22 16:37 ` [PATCH v2 8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Christophe Leroy
2021-03-22 16:37   ` Christophe Leroy
2022-01-07 11:51   ` Naveen N. Rao [this message]
2022-01-07 11:51     ` Naveen N. Rao
2022-01-10 12:13     ` Christophe Leroy
2022-01-10 12:13       ` Christophe Leroy
2021-03-22 17:53 ` [PATCH v2 0/8] Implement EBPF on powerpc32 Andrii Nakryiko
2021-03-22 17:53   ` Andrii Nakryiko
2021-03-26 14:41   ` Christophe Leroy
2021-03-26 14:41     ` Christophe Leroy
2021-03-26 18:09     ` Andrii Nakryiko
2021-03-26 18:09       ` Andrii Nakryiko
2021-04-10 14:28 ` Michael Ellerman
2021-04-10 14:28   ` Michael Ellerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1641556157.ms6rd82ggh.naveen@linux.ibm.com \
    --to=naveen.n.rao@linux.ibm.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=benh@kernel.crashing.org \
    --cc=bpf@vger.kernel.org \
    --cc=christophe.leroy@csgroup.eu \
    --cc=daniel@iogearbox.net \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=netdev@vger.kernel.org \
    --cc=paulus@samba.org \
    --cc=sandipan@linux.ibm.com \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.