* [PATCH 2/3] powerpc/ebpf32: Rework 64 bits shifts to avoid tests and branches
2021-04-12 11:44 [PATCH 1/3] powerpc/ebpf32: Fix comment on BPF_ALU{64} | BPF_LSH | BPF_K Christophe Leroy
@ 2021-04-12 11:44 ` Christophe Leroy
2021-04-12 11:44 ` [PATCH 3/3] powerpc/ebpf32: Use standard function call for functions within 32M distance Christophe Leroy
2021-04-21 13:08 ` [PATCH 1/3] powerpc/ebpf32: Fix comment on BPF_ALU{64} | BPF_LSH | BPF_K Michael Ellerman
2 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2021-04-12 11:44 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ast,
daniel, andrii, kafai, songliubraving, yhs, john.fastabend,
kpsingh, naveen.n.rao, sandipan
Cc: linux-kernel, linuxppc-dev, netdev, bpf
Re-implement BPF_ALU64 | BPF_{LSH/RSH/ARSH} | BPF_X with branchless
implementation copied from misc_32.S.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/net/bpf_jit_comp32.c | 39 +++++++++++++++----------------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index ca6fe1583460..ef21b09df76e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -548,16 +548,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
- EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32));
- PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4);
- EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg, __REG_R0));
- EMIT(PPC_RAW_LI(dst_reg, 0));
- PPC_JMP((ctx->idx + 6) * 4);
+ bpf_set_seen_register(ctx, tmp_reg);
EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
- EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
break;
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
if (!imm)
@@ -585,16 +584,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
- EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32));
- PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4);
- EMIT(PPC_RAW_SRW(dst_reg, dst_reg_h, __REG_R0));
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- PPC_JMP((ctx->idx + 6) * 4);
- EMIT(PPC_RAW_SUBFIC(0, src_reg, 32));
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
- EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
if (!imm)
@@ -622,16 +620,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg));
break;
case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
- EMIT(PPC_RAW_ADDIC_DOT(__REG_R0, src_reg, -32));
- PPC_BCC_SHORT(COND_LT, (ctx->idx + 4) * 4);
- EMIT(PPC_RAW_SRAW(dst_reg, dst_reg_h, __REG_R0));
- EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31));
- PPC_JMP((ctx->idx + 6) * 4);
- EMIT(PPC_RAW_SUBFIC(0, src_reg, 32));
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
- EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+ EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
+ EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
break;
case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
if (!imm)
--
2.25.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] powerpc/ebpf32: Use standard function call for functions within 32M distance
2021-04-12 11:44 [PATCH 1/3] powerpc/ebpf32: Fix comment on BPF_ALU{64} | BPF_LSH | BPF_K Christophe Leroy
2021-04-12 11:44 ` [PATCH 2/3] powerpc/ebpf32: Rework 64 bits shifts to avoid tests and branches Christophe Leroy
@ 2021-04-12 11:44 ` Christophe Leroy
2021-04-21 13:08 ` [PATCH 1/3] powerpc/ebpf32: Fix comment on BPF_ALU{64} | BPF_LSH | BPF_K Michael Ellerman
2 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2021-04-12 11:44 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, ast,
daniel, andrii, kafai, songliubraving, yhs, john.fastabend,
kpsingh, naveen.n.rao, sandipan
Cc: linux-kernel, linuxppc-dev, netdev, bpf
If the target of a function call is within 32 Mbytes distance, use a
standard function call with 'bl' of the 'lis/ori/mtlr/blrl' sequence.
In the first pass, no memory has been allocated yet and the code
position is not known yet (image pointer is NULL). This pass is there
to calculate the amount of memory to allocate for the EBPF code, so
assume the 4 instructions sequence is required, so that enough memory
is allocated.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/ppc-opcode.h | 1 +
arch/powerpc/net/bpf_jit.h | 3 +++
arch/powerpc/net/bpf_jit_comp32.c | 16 +++++++++++-----
3 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 5b60020dc1f4..ac41776661e9 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -265,6 +265,7 @@
#define PPC_INST_ORI 0x60000000
#define PPC_INST_ORIS 0x64000000
#define PPC_INST_BRANCH 0x48000000
+#define PPC_INST_BL 0x48000001
#define PPC_INST_BRANCH_COND 0x40800000
/* Prefixes */
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 776abef4d2a0..99fad093f43e 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -26,6 +26,9 @@
/* Long jump; (unconditional 'branch') */
#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
(((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* blr; (unconditional 'branch' with link) to absolute address */
+#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \
+ (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
/* "cond" here covers BO:BI fields. */
#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \
(((cond) & 0x3ff) << 16) | \
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index ef21b09df76e..bbb16099e8c7 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -187,11 +187,17 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
{
- /* Load function address into r0 */
- EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
- EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
- EMIT(PPC_RAW_MTLR(__REG_R0));
- EMIT(PPC_RAW_BLRL());
+ s32 rel = (s32)func - (s32)(image + ctx->idx);
+
+ if (image && rel < 0x2000000 && rel >= -0x2000000) {
+ PPC_BL_ABS(func);
+ } else {
+ /* Load function address into r0 */
+ EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
+ EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
+ EMIT(PPC_RAW_MTLR(__REG_R0));
+ EMIT(PPC_RAW_BLRL());
+ }
}
static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
--
2.25.0
^ permalink raw reply related [flat|nested] 4+ messages in thread