netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ARM: net: bpf: improve prologue code sequence
@ 2019-12-09 11:17 Russell King
  2019-12-11 13:56 ` Daniel Borkmann
  0 siblings, 1 reply; 2+ messages in thread
From: Russell King @ 2019-12-09 11:17 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Shubham Bansal, Alexei Starovoitov, Martin KaFai Lau, Song Liu,
	Yonghong Song, Andrii Nakryiko, netdev, bpf, linux-arm-kernel

Improve the prologue code sequence to be able to take advantage of
64-bit stores, changing the code from:

  push    {r4, r5, r6, r7, r8, r9, fp, lr}
  mov     fp, sp
  sub     ip, sp, #80     ; 0x50
  sub     sp, sp, #600    ; 0x258
  str     ip, [fp, #-100] ; 0xffffff9c
  mov     r6, #0
  str     r6, [fp, #-96]  ; 0xffffffa0
  mov     r4, #0
  mov     r3, r4
  mov     r2, r0
  str     r4, [fp, #-104] ; 0xffffff98
  str     r4, [fp, #-108] ; 0xffffff94

to the tighter:

  push    {r4, r5, r6, r7, r8, r9, fp, lr}
  mov     fp, sp
  mov     r3, #0
  sub     r2, sp, #80     ; 0x50
  sub     sp, sp, #600    ; 0x258
  strd    r2, [fp, #-100] ; 0xffffff9c
  mov     r2, #0
  strd    r2, [fp, #-108] ; 0xffffff94
  mov     r2, r0

resulting in a saving of three instructions.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/net/bpf_jit_32.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 97dc386e3cb8..cc29869d12a3 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
 
 static void build_prologue(struct jit_ctx *ctx)
 {
-	const s8 r0 = bpf2a32[BPF_REG_0][1];
-	const s8 r2 = bpf2a32[BPF_REG_1][1];
-	const s8 r3 = bpf2a32[BPF_REG_1][0];
-	const s8 r4 = bpf2a32[BPF_REG_6][1];
-	const s8 fplo = bpf2a32[BPF_REG_FP][1];
-	const s8 fphi = bpf2a32[BPF_REG_FP][0];
+	const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
+	const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
+	const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
 	const s8 *tcc = bpf2a32[TCALL_CNT];
 
 	/* Save callee saved registers. */
@@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
 	emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
 #endif
-	/* Save frame pointer for later */
-	emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
+	/* mov r3, #0 */
+	/* sub r2, sp, #SCRATCH_SIZE */
+	emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
+	emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
 
 	ctx->stack_size = imm8m(STACK_SIZE);
 
@@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
 
 	/* Set up BPF prog stack base register */
-	emit_a32_mov_r(fplo, ARM_IP, ctx);
-	emit_a32_mov_i(fphi, 0, ctx);
+	emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx);
 
-	/* mov r4, 0 */
-	emit(ARM_MOV_I(r4, 0), ctx);
+	/* Initialize Tail Count */
+	emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
+	emit_a32_mov_r64(true, tcc, bpf_r1, ctx);
 
 	/* Move BPF_CTX to BPF_R1 */
-	emit(ARM_MOV_R(r3, r4), ctx);
-	emit(ARM_MOV_R(r2, r0), ctx);
-	/* Initialize Tail Count */
-	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
-	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
+	emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
+
 	/* end of prologue */
 }
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] ARM: net: bpf: improve prologue code sequence
  2019-12-09 11:17 [PATCH] ARM: net: bpf: improve prologue code sequence Russell King
@ 2019-12-11 13:56 ` Daniel Borkmann
  0 siblings, 0 replies; 2+ messages in thread
From: Daniel Borkmann @ 2019-12-11 13:56 UTC (permalink / raw)
  To: Russell King
  Cc: Shubham Bansal, Alexei Starovoitov, Martin KaFai Lau, Song Liu,
	Yonghong Song, Andrii Nakryiko, netdev, bpf, linux-arm-kernel

On Mon, Dec 09, 2019 at 11:17:30AM +0000, Russell King wrote:
> Improve the prologue code sequence to be able to take advantage of
> 64-bit stores, changing the code from:
> 
>   push    {r4, r5, r6, r7, r8, r9, fp, lr}
>   mov     fp, sp
>   sub     ip, sp, #80     ; 0x50
>   sub     sp, sp, #600    ; 0x258
>   str     ip, [fp, #-100] ; 0xffffff9c
>   mov     r6, #0
>   str     r6, [fp, #-96]  ; 0xffffffa0
>   mov     r4, #0
>   mov     r3, r4
>   mov     r2, r0
>   str     r4, [fp, #-104] ; 0xffffff98
>   str     r4, [fp, #-108] ; 0xffffff94
> 
> to the tighter:
> 
>   push    {r4, r5, r6, r7, r8, r9, fp, lr}
>   mov     fp, sp
>   mov     r3, #0
>   sub     r2, sp, #80     ; 0x50
>   sub     sp, sp, #600    ; 0x258
>   strd    r2, [fp, #-100] ; 0xffffff9c
>   mov     r2, #0
>   strd    r2, [fp, #-108] ; 0xffffff94
>   mov     r2, r0
> 
> resulting in a saving of three instructions.
> 
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>

Applied, thanks!

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-12-11 13:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-09 11:17 [PATCH] ARM: net: bpf: improve prologue code sequence Russell King
2019-12-11 13:56 ` Daniel Borkmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).