linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping
@ 2021-07-27  6:55 Jordan Niethe
  2021-07-27  6:55 ` [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64 Jordan Niethe
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Jordan Niethe @ 2021-07-27  6:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: naveen.n.rao, Jordan Niethe

In bpf_jit_build_body(), the mapping of TMP_REG_1 and TMP_REG_2's bpf
register to ppc register is evalulated at every use despite not
changing. Instead, determine the ppc register once and store the result.
This will be more useful when a later patch introduces a more complex
mapping from bpf registers to ppc registers.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/net/bpf_jit_comp64.c | 163 +++++++++++++-----------------
 1 file changed, 69 insertions(+), 94 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index de8595880fee..1dfec85bb03b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -285,6 +285,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		u32 code = insn[i].code;
 		u32 dst_reg = b2p[insn[i].dst_reg];
 		u32 src_reg = b2p[insn[i].src_reg];
+		u32 tmp1_reg = b2p[TMP_REG_1];
+		u32 tmp2_reg = b2p[TMP_REG_2];
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
 		bool func_addr_fixed;
@@ -337,8 +339,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				if (imm >= -32768 && imm < 32768)
 					EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
 				else {
-					PPC_LI32(b2p[TMP_REG_1], imm);
-					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]));
+					PPC_LI32(tmp1_reg, imm);
+					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
 				}
 			}
 			goto bpf_alu32_trunc;
@@ -354,32 +356,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			if (imm >= -32768 && imm < 32768)
 				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
-				PPC_LI32(b2p[TMP_REG_1], imm);
+				PPC_LI32(tmp1_reg, imm);
 				if (BPF_CLASS(code) == BPF_ALU)
-					EMIT(PPC_RAW_MULW(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
 				else
-					EMIT(PPC_RAW_MULD(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
 		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
 			if (BPF_OP(code) == BPF_MOD) {
-				EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg));
-				EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg,
-						b2p[TMP_REG_1]));
-				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
+				EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
 			} else
 				EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
 		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
 			if (BPF_OP(code) == BPF_MOD) {
-				EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg));
-				EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg,
-						b2p[TMP_REG_1]));
-				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
+				EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
 			} else
 				EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
 			break;
@@ -392,35 +390,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			else if (imm == 1)
 				goto bpf_alu32_trunc;
 
-			PPC_LI32(b2p[TMP_REG_1], imm);
+			PPC_LI32(tmp1_reg, imm);
 			switch (BPF_CLASS(code)) {
 			case BPF_ALU:
 				if (BPF_OP(code) == BPF_MOD) {
-					EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2],
-							dst_reg,
-							b2p[TMP_REG_1]));
-					EMIT(PPC_RAW_MULW(b2p[TMP_REG_1],
-							b2p[TMP_REG_1],
-							b2p[TMP_REG_2]));
-					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
+					EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
 				} else
-					EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg));
 				break;
 			case BPF_ALU64:
 				if (BPF_OP(code) == BPF_MOD) {
-					EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2],
-							dst_reg,
-							b2p[TMP_REG_1]));
-					EMIT(PPC_RAW_MULD(b2p[TMP_REG_1],
-							b2p[TMP_REG_1],
-							b2p[TMP_REG_2]));
-					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
+					EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
 				} else
-					EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg,
-							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg));
 				break;
 			}
 			goto bpf_alu32_trunc;
@@ -442,8 +428,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
 				/* Sign-extended */
-				PPC_LI32(b2p[TMP_REG_1], imm);
-				EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1]));
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
@@ -454,8 +440,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
-				PPC_LI32(b2p[TMP_REG_1], imm);
-				EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1]));
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
 			} else {
 				if (IMM_L(imm))
 					EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
@@ -471,8 +457,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
-				PPC_LI32(b2p[TMP_REG_1], imm);
-				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]));
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
 			} else {
 				if (IMM_L(imm))
 					EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
@@ -573,11 +559,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			switch (imm) {
 			case 16:
 				/* Rotate 8 bits left & mask with 0x0000ff00 */
-				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23));
+				EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23));
 				/* Rotate 8 bits right & insert LSB to reg */
-				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31));
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31));
 				/* Move result back to dst_reg */
-				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
 				break;
 			case 32:
 				/*
@@ -585,12 +571,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				 * 2 bytes are already in their final position
 				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
 				 */
-				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31));
+				EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31));
 				/* Rotate 24 bits and insert byte 1 */
-				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7));
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7));
 				/* Rotate 24 bits and insert byte 3 */
-				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23));
-				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
 				break;
 			case 64:
 				/*
@@ -602,8 +588,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				 * same across all passes
 				 */
 				PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
-				EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
-				EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_ADDI(tmp1_reg, 1, bpf_jit_stack_local(ctx)));
+				EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg));
 				break;
 			}
 			break;
@@ -633,32 +619,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
-				src_reg = b2p[TMP_REG_1];
+				EMIT(PPC_RAW_LI(tmp1_reg, imm));
+				src_reg = tmp1_reg;
 			}
 			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
-				src_reg = b2p[TMP_REG_1];
+				EMIT(PPC_RAW_LI(tmp1_reg, imm));
+				src_reg = tmp1_reg;
 			}
 			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				PPC_LI32(b2p[TMP_REG_1], imm);
-				src_reg = b2p[TMP_REG_1];
+				PPC_LI32(tmp1_reg, imm);
+				src_reg = tmp1_reg;
 			}
 			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				PPC_LI32(b2p[TMP_REG_1], imm);
-				src_reg = b2p[TMP_REG_1];
+				PPC_LI32(tmp1_reg, imm);
+				src_reg = tmp1_reg;
 			}
 			PPC_BPF_STL(src_reg, dst_reg, off);
 			break;
@@ -677,14 +663,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			/* *(u32 *)(dst + off) += src */
 
 			/* Get EA into TMP_REG_1 */
-			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
+			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
 			tmp_idx = ctx->idx * 4;
 			/* load value from memory into TMP_REG_2 */
-			EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
+			EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0));
 			/* add value from src_reg into this */
-			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
+			EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg));
 			/* store result back */
-			EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
+			EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg));
 			/* we're done if this succeeded */
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 			break;
@@ -697,11 +683,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			}
 			/* *(u64 *)(dst + off) += src */
 
-			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
+			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
 			tmp_idx = ctx->idx * 4;
-			EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
-			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
-			EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
+			EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0));
+			EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg));
+			EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg));
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 			break;
 
@@ -879,14 +865,10 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			case BPF_JMP | BPF_JSET | BPF_X:
 			case BPF_JMP32 | BPF_JSET | BPF_X:
 				if (BPF_CLASS(code) == BPF_JMP) {
-					EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg,
-						    src_reg));
+					EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
 				} else {
-					int tmp_reg = b2p[TMP_REG_1];
-
-					EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg));
-					EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
-						       31));
+					EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg));
+					EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31));
 				}
 				break;
 			case BPF_JMP | BPF_JNE | BPF_K:
@@ -915,14 +897,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
 				} else {
 					/* sign-extending load */
-					PPC_LI32(b2p[TMP_REG_1], imm);
+					PPC_LI32(tmp1_reg, imm);
 					/* ... but unsigned comparison */
 					if (is_jmp32)
-						EMIT(PPC_RAW_CMPLW(dst_reg,
-							  b2p[TMP_REG_1]));
+						EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg));
 					else
-						EMIT(PPC_RAW_CMPLD(dst_reg,
-							  b2p[TMP_REG_1]));
+						EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg));
 				}
 				break;
 			}
@@ -947,13 +927,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 					else
 						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
 				} else {
-					PPC_LI32(b2p[TMP_REG_1], imm);
+					PPC_LI32(tmp1_reg, imm);
 					if (is_jmp32)
-						EMIT(PPC_RAW_CMPW(dst_reg,
-							 b2p[TMP_REG_1]));
+						EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
 					else
-						EMIT(PPC_RAW_CMPD(dst_reg,
-							 b2p[TMP_REG_1]));
+						EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg));
 				}
 				break;
 			}
@@ -962,19 +940,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				/* andi does not sign-extend the immediate */
 				if (imm >= 0 && imm < 32768)
 					/* PPC_ANDI is _only/always_ dot-form */
-					EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm));
+					EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
 				else {
-					int tmp_reg = b2p[TMP_REG_1];
-
-					PPC_LI32(tmp_reg, imm);
+					PPC_LI32(tmp1_reg, imm);
 					if (BPF_CLASS(code) == BPF_JMP) {
-						EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg,
-							    tmp_reg));
+						EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg,
+								     tmp1_reg));
 					} else {
-						EMIT(PPC_RAW_AND(tmp_reg, dst_reg,
-							tmp_reg));
-						EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg,
-							       0, 0, 31));
+						EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg));
+						EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg,
+									0, 0, 31));
 					}
 				}
 				break;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64
  2021-07-27  6:55 [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Jordan Niethe
@ 2021-07-27  6:55 ` Jordan Niethe
  2022-01-07 17:25   ` Naveen N. Rao
  2021-07-27  6:55 ` [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Jordan Niethe
  2022-01-07 17:13 ` [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Naveen N. Rao
  2 siblings, 1 reply; 9+ messages in thread
From: Jordan Niethe @ 2021-07-27  6:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: naveen.n.rao, Jordan Niethe

Prepare for doing commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF
registers to volatile registers when possible on PPC32") on PPC64 in a
later patch. Instead of directly accessing the const b2p[] array for
mapping bpf to ppc registers use bpf_to_ppc() which allows per struct
codegen_context mappings.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/net/bpf_jit.h        |  5 ++
 arch/powerpc/net/bpf_jit64.h      | 30 +++++-----
 arch/powerpc/net/bpf_jit_comp32.c |  5 --
 arch/powerpc/net/bpf_jit_comp64.c | 96 ++++++++++++++++---------------
 4 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 99fad093f43e..db86fa37f1dd 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -172,6 +172,11 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 
+static inline int bpf_to_ppc(struct codegen_context *ctx, int reg)
+{
+	return ctx->b2p[reg];
+}
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 7b713edfa7e2..89b625d9342b 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -68,23 +68,23 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
  * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
  * so ensure that it isn't in use already.
  */
-#define PPC_BPF_LL(r, base, i) do {					      \
-				if ((i) % 4) {				      \
-					EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\
-					EMIT(PPC_RAW_LDX(r, base,	      \
-							b2p[TMP_REG_2]));     \
-				} else					      \
-					EMIT(PPC_RAW_LD(r, base, i));	      \
+#define PPC_BPF_LL(ctx, r, base, i) do {						  \
+				if ((i) % 4) {						  \
+					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
+					EMIT(PPC_RAW_LDX(r, base,			  \
+							bpf_to_ppc(ctx, TMP_REG_2)));	  \
+				} else							  \
+					EMIT(PPC_RAW_LD(r, base, i));			  \
 				} while(0)
-#define PPC_BPF_STL(r, base, i) do {					      \
-				if ((i) % 4) {				      \
-					EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\
-					EMIT(PPC_RAW_STDX(r, base,	      \
-							b2p[TMP_REG_2]));     \
-				} else					      \
-					EMIT(PPC_RAW_STD(r, base, i));	      \
+#define PPC_BPF_STL(ctx, r, base, i) do {						  \
+				if ((i) % 4) {						  \
+					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
+					EMIT(PPC_RAW_STDX(r, base,			  \
+							bpf_to_ppc(ctx, TMP_REG_2)));	  \
+				} else							  \
+					EMIT(PPC_RAW_STD(r, base, i));			  \
 				} while(0)
-#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
+#define PPC_BPF_STLU(ctx, r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 34bb1583fc0c..eaf942075719 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -58,11 +58,6 @@ const int b2p[MAX_BPF_JIT_REG + 1] = {
 	[TMP_REG] = 31,		/* 32 bits */
 };
 
-static int bpf_to_ppc(struct codegen_context *ctx, int reg)
-{
-	return ctx->b2p[reg];
-}
-
 /* PPC NVR range -- update this if we ever use NVRs below r17 */
 #define BPF_PPC_NVR_MIN		17
 #define BPF_PPC_TC		16
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 1dfec85bb03b..f7a668c1e364 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -26,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 	 * - the bpf program uses its stack area
 	 * The latter condition is deduced from the usage of BPF_REG_FP
 	 */
-	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]);
+	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP));
 }
 
 /*
@@ -78,9 +78,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 * invoked through a tail call.
 	 */
 	if (ctx->seen & SEEN_TAILCALL) {
-		EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0));
+		EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_1), 0));
 		/* this goes in the redzone */
-		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
+		PPC_BPF_STL(ctx, bpf_to_ppc(ctx, TMP_REG_1), 1, -(BPF_PPC_STACK_SAVE + 8));
 	} else {
 		EMIT(PPC_RAW_NOP());
 		EMIT(PPC_RAW_NOP());
@@ -95,10 +95,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 		 */
 		if (ctx->seen & SEEN_FUNC) {
 			EMIT(PPC_RAW_MFLR(_R0));
-			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
+			PPC_BPF_STL(ctx, 0, 1, PPC_LR_STKOFF);
 		}
 
-		PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
+		PPC_BPF_STLU(ctx, 1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
 	}
 
 	/*
@@ -107,13 +107,14 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 * in the protected zone below the previous stack frame
 	 */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, b2p[i]))
-			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
+		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
+			PPC_BPF_STL(ctx, bpf_to_ppc(ctx, i), 1,
+				    bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
 
 	/* Setup frame pointer to point to the bpf stack area */
-	if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
-		EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
-				STACK_FRAME_MIN_SIZE + ctx->stack_size));
+	if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP)))
+		EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), 1,
+				  STACK_FRAME_MIN_SIZE + ctx->stack_size));
 }
 
 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -122,14 +123,15 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 
 	/* Restore NVRs */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, b2p[i]))
-			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
+		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
+			PPC_BPF_LL(ctx, bpf_to_ppc(ctx, i), 1,
+				   bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
 
 	/* Tear down our stack frame */
 	if (bpf_has_stack_frame(ctx)) {
 		EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size));
 		if (ctx->seen & SEEN_FUNC) {
-			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
+			PPC_BPF_LL(ctx, 0, 1, PPC_LR_STKOFF);
 			EMIT(PPC_RAW_MTLR(0));
 		}
 	}
@@ -140,7 +142,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	bpf_jit_emit_common_epilogue(image, ctx);
 
 	/* Move result to r3 */
-	EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0]));
+	EMIT(PPC_RAW_MR(3, bpf_to_ppc(ctx, BPF_REG_0)));
 
 	EMIT(PPC_RAW_BLR());
 }
@@ -150,18 +152,18 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
 {
 #ifdef PPC64_ELF_ABI_v1
 	/* func points to the function descriptor */
-	PPC_LI64(b2p[TMP_REG_2], func);
+	PPC_LI64(bpf_to_ppc(ctx, TMP_REG_2), func);
 	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+	PPC_BPF_LL(ctx, bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_2), 0);
 	/* ... and move it to CTR */
-	EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
+	EMIT(PPC_RAW_MTCTR(bpf_to_ppc(ctx, TMP_REG_1)));
 	/*
 	 * Load TOC from function descriptor at offset 8.
 	 * We can clobber r2 since we get called through a
 	 * function pointer (so caller will save/restore r2)
 	 * and since we don't use a TOC ourself.
 	 */
-	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+	PPC_BPF_LL(ctx, 2, bpf_to_ppc(ctx, TMP_REG_2), 8);
 #else
 	/* We can clobber r12 */
 	PPC_FUNC_ADDR(12, func);
@@ -197,9 +199,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
 	 * function pointer (so caller will save/restore r2)
 	 * and since we don't use a TOC ourself.
 	 */
-	PPC_BPF_LL(2, 12, 8);
+	PPC_BPF_LL(ctx, 2, 12, 8);
 	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(12, 12, 0);
+	PPC_BPF_LL(ctx, 12, 12, 0);
 #endif
 
 	EMIT(PPC_RAW_MTCTR(12));
@@ -214,54 +216,58 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 	 * r4/BPF_REG_2 - pointer to bpf_array
 	 * r5/BPF_REG_3 - index in bpf_array
 	 */
-	int b2p_bpf_array = b2p[BPF_REG_2];
-	int b2p_index = b2p[BPF_REG_3];
+	int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2);
+	int b2p_index = bpf_to_ppc(ctx, BPF_REG_3);
 
 	/*
 	 * if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
-	EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+	EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, TMP_REG_1), b2p_bpf_array,
+			 offsetof(struct bpf_array, map.max_entries)));
 	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
-	EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1]));
+	EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(ctx, TMP_REG_1)));
 	PPC_BCC(COND_GE, out);
 
 	/*
 	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 	 *   goto out;
 	 */
-	PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
-	EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT));
+	PPC_BPF_LL(ctx, bpf_to_ppc(ctx, TMP_REG_1), 1, bpf_jit_stack_tailcallcnt(ctx));
+	EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(ctx, TMP_REG_1), MAX_TAIL_CALL_CNT));
 	PPC_BCC(COND_GT, out);
 
 	/*
 	 * tail_call_cnt++;
 	 */
-	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1));
-	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+	EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1), 1));
+	PPC_BPF_STL(ctx, bpf_to_ppc(ctx, TMP_REG_1), 1, bpf_jit_stack_tailcallcnt(ctx));
 
 	/* prog = array->ptrs[index]; */
-	EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8));
-	EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array));
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+	EMIT(PPC_RAW_MULI(bpf_to_ppc(ctx, TMP_REG_1), b2p_index, 8));
+	EMIT(PPC_RAW_ADD(bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1), b2p_bpf_array));
+	PPC_BPF_LL(ctx, bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1),
+		   offsetof(struct bpf_array, ptrs));
 
 	/*
 	 * if (prog == NULL)
 	 *   goto out;
 	 */
-	EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0));
+	EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(ctx, TMP_REG_1), 0));
 	PPC_BCC(COND_EQ, out);
 
 	/* goto *(prog->bpf_func + prologue_size); */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+	PPC_BPF_LL(ctx, bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1),
+		   offsetof(struct bpf_prog, bpf_func));
 #ifdef PPC64_ELF_ABI_v1
 	/* skip past the function descriptor */
-	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
-			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE));
+	EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1),
+			  FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE));
 #else
-	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE));
+	EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, TMP_REG_1), bpf_to_ppc(ctx, TMP_REG_1),
+			  BPF_TAILCALL_PROLOGUE_SIZE));
 #endif
-	EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
+	EMIT(PPC_RAW_MTCTR(bpf_to_ppc(ctx, TMP_REG_1)));
 
 	/* tear down stack, restore NVRs, ... */
 	bpf_jit_emit_common_epilogue(image, ctx);
@@ -283,10 +289,10 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 
 	for (i = 0; i < flen; i++) {
 		u32 code = insn[i].code;
-		u32 dst_reg = b2p[insn[i].dst_reg];
-		u32 src_reg = b2p[insn[i].src_reg];
-		u32 tmp1_reg = b2p[TMP_REG_1];
-		u32 tmp2_reg = b2p[TMP_REG_2];
+		u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
+		u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
+		u32 tmp1_reg = bpf_to_ppc(ctx, TMP_REG_1);
+		u32 tmp2_reg = bpf_to_ppc(ctx, TMP_REG_2);
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
 		bool func_addr_fixed;
@@ -587,7 +593,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				 * the instructions generated will remain the
 				 * same across all passes
 				 */
-				PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
+				PPC_BPF_STL(ctx, dst_reg, 1, bpf_jit_stack_local(ctx));
 				EMIT(PPC_RAW_ADDI(tmp1_reg, 1, bpf_jit_stack_local(ctx)));
 				EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg));
 				break;
@@ -646,7 +652,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				PPC_LI32(tmp1_reg, imm);
 				src_reg = tmp1_reg;
 			}
-			PPC_BPF_STL(src_reg, dst_reg, off);
+			PPC_BPF_STL(ctx, src_reg, dst_reg, off);
 			break;
 
 		/*
@@ -714,7 +720,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			break;
 		/* dst = *(u64 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_DW:
-			PPC_BPF_LL(dst_reg, src_reg, off);
+			PPC_BPF_LL(ctx, dst_reg, src_reg, off);
 			break;
 
 		/*
@@ -759,7 +765,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			else
 				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 			/* move return value from r3 to BPF_REG_0 */
-			EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3));
+			EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), 3));
 			break;
 
 		/*
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC64
  2021-07-27  6:55 [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Jordan Niethe
  2021-07-27  6:55 ` [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64 Jordan Niethe
@ 2021-07-27  6:55 ` Jordan Niethe
  2021-08-05  8:21   ` Christophe Leroy
  2022-02-22 14:23   ` Christophe Leroy
  2022-01-07 17:13 ` [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Naveen N. Rao
  2 siblings, 2 replies; 9+ messages in thread
From: Jordan Niethe @ 2021-07-27  6:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: naveen.n.rao, Jordan Niethe

Implement commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF registers to
volatile registers when possible on PPC32") for PPC64.

When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to avoid
having to save them/restore on the stack. To keep track of which
registers can be reallocated to make sure registers are set seen when
used.

Before this patch, the test #359 ADD default X is:
   0:   nop
   4:   nop
   8:   std     r27,-40(r1)
   c:   std     r28,-32(r1)
  10:   xor     r8,r8,r8
  14:   rotlwi  r8,r8,0
  18:   xor     r28,r28,r28
  1c:   rotlwi  r28,r28,0
  20:   mr      r27,r3
  24:   li      r8,66
  28:   add     r8,r8,r28
  2c:   rotlwi  r8,r8,0
  30:   ld      r27,-40(r1)
  34:   ld      r28,-32(r1)
  38:   mr      r3,r8
  3c:   blr

After this patch, the same test has become:
   0:   nop
   4:   nop
   8:   xor     r8,r8,r8
   c:   rotlwi  r8,r8,0
  10:   xor     r5,r5,r5
  14:   rotlwi  r5,r5,0
  18:   mr      r4,r3
  1c:   li      r8,66
  20:   add     r8,r8,r5
  24:   rotlwi  r8,r8,0
  28:   mr      r3,r8
  2c:   blr

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/net/bpf_jit64.h      |  2 ++
 arch/powerpc/net/bpf_jit_comp64.c | 60 +++++++++++++++++++++++++------
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 89b625d9342b..e20521bf77bf 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -70,6 +70,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
  */
 #define PPC_BPF_LL(ctx, r, base, i) do {						  \
 				if ((i) % 4) {						  \
+					bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_2));\
 					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
 					EMIT(PPC_RAW_LDX(r, base,			  \
 							bpf_to_ppc(ctx, TMP_REG_2)));	  \
@@ -78,6 +79,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
 				} while(0)
 #define PPC_BPF_STL(ctx, r, base, i) do {						  \
 				if ((i) % 4) {						  \
+					bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_2));\
 					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
 					EMIT(PPC_RAW_STDX(r, base,			  \
 							bpf_to_ppc(ctx, TMP_REG_2)));	  \
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index f7a668c1e364..287e0322bbf3 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -66,6 +66,24 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 
 void bpf_jit_realloc_regs(struct codegen_context *ctx)
 {
+	if (ctx->seen & SEEN_FUNC)
+		return;
+
+	while (ctx->seen & SEEN_NVREG_MASK &&
+	       (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+		int old = 32 - fls(ctx->seen & SEEN_NVREG_MASK);
+		int new = 32 - fls(~ctx->seen & SEEN_VREG_MASK);
+		int i;
+
+		for (i = BPF_REG_0; i <= TMP_REG_2; i++) {
+			if (ctx->b2p[i] != old)
+				continue;
+			ctx->b2p[i] = new;
+			bpf_set_seen_register(ctx, new);
+			bpf_clear_seen_register(ctx, old);
+			break;
+		}
+	}
 }
 
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
@@ -106,10 +124,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 * If we haven't created our own stack frame, we save these
 	 * in the protected zone below the previous stack frame
 	 */
-	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
-			PPC_BPF_STL(ctx, bpf_to_ppc(ctx, i), 1,
-				    bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			PPC_BPF_STL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
 
 	/* Setup frame pointer to point to the bpf stack area */
 	if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP)))
@@ -122,10 +139,9 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 	int i;
 
 	/* Restore NVRs */
-	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
-			PPC_BPF_LL(ctx, bpf_to_ppc(ctx, i), 1,
-				   bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			PPC_BPF_LL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
 
 	/* Tear down our stack frame */
 	if (bpf_has_stack_frame(ctx)) {
@@ -223,6 +239,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 	 * if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
+	bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_1));
 	EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, TMP_REG_1), b2p_bpf_array,
 			 offsetof(struct bpf_array, map.max_entries)));
 	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
@@ -318,9 +335,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 * optimization but everything else should work without
 		 * any issues.
 		 */
-		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
+		if (dst_reg >= 3 && dst_reg < 32)
 			bpf_set_seen_register(ctx, dst_reg);
-		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
+		if (src_reg >= 3 && src_reg < 32)
 			bpf_set_seen_register(ctx, src_reg);
 
 		switch (code) {
@@ -345,6 +362,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				if (imm >= -32768 && imm < 32768)
 					EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
 				else {
+					bpf_set_seen_register(ctx, tmp1_reg);
 					PPC_LI32(tmp1_reg, imm);
 					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
 				}
@@ -362,6 +380,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			if (imm >= -32768 && imm < 32768)
 				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				if (BPF_CLASS(code) == BPF_ALU)
 					EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
@@ -372,6 +391,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
 		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
 			if (BPF_OP(code) == BPF_MOD) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
 				EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
 				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -381,6 +401,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
 		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
 			if (BPF_OP(code) == BPF_MOD) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
 				EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
 				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -396,10 +417,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			else if (imm == 1)
 				goto bpf_alu32_trunc;
 
+			bpf_set_seen_register(ctx, tmp1_reg);
 			PPC_LI32(tmp1_reg, imm);
 			switch (BPF_CLASS(code)) {
 			case BPF_ALU:
 				if (BPF_OP(code) == BPF_MOD) {
+					bpf_set_seen_register(ctx, tmp2_reg);
 					EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
 					EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
 					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -408,6 +431,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				break;
 			case BPF_ALU64:
 				if (BPF_OP(code) == BPF_MOD) {
+					bpf_set_seen_register(ctx, tmp2_reg);
 					EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
 					EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
 					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
@@ -434,6 +458,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
 				/* Sign-extended */
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
 			}
@@ -446,6 +471,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
 			} else {
@@ -463,6 +489,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
 			} else {
@@ -562,6 +589,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			if (BPF_SRC(code) == BPF_FROM_LE)
 				goto emit_clear;
 #endif
+			bpf_set_seen_register(ctx, tmp1_reg);
 			switch (imm) {
 			case 16:
 				/* Rotate 8 bits left & mask with 0x0000ff00 */
@@ -625,6 +653,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				EMIT(PPC_RAW_LI(tmp1_reg, imm));
 				src_reg = tmp1_reg;
 			}
@@ -633,6 +662,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				EMIT(PPC_RAW_LI(tmp1_reg, imm));
 				src_reg = tmp1_reg;
 			}
@@ -641,6 +671,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				src_reg = tmp1_reg;
 			}
@@ -649,6 +680,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
+				bpf_set_seen_register(ctx, tmp1_reg);
 				PPC_LI32(tmp1_reg, imm);
 				src_reg = tmp1_reg;
 			}
@@ -669,6 +701,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			/* *(u32 *)(dst + off) += src */
 
 			/* Get EA into TMP_REG_1 */
+			bpf_set_seen_register(ctx, tmp1_reg);
+			bpf_set_seen_register(ctx, tmp2_reg);
 			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
 			tmp_idx = ctx->idx * 4;
 			/* load value from memory into TMP_REG_2 */
@@ -689,6 +723,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			}
 			/* *(u64 *)(dst + off) += src */
 
+			bpf_set_seen_register(ctx, tmp1_reg);
+			bpf_set_seen_register(ctx, tmp2_reg);
 			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
 			tmp_idx = ctx->idx * 4;
 			EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0));
@@ -870,6 +906,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				break;
 			case BPF_JMP | BPF_JSET | BPF_X:
 			case BPF_JMP32 | BPF_JSET | BPF_X:
+				bpf_set_seen_register(ctx, tmp1_reg);
 				if (BPF_CLASS(code) == BPF_JMP) {
 					EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
 				} else {
@@ -903,6 +940,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
 				} else {
 					/* sign-extending load */
+					bpf_set_seen_register(ctx, tmp1_reg);
 					PPC_LI32(tmp1_reg, imm);
 					/* ... but unsigned comparison */
 					if (is_jmp32)
@@ -933,6 +971,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 					else
 						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
 				} else {
+					bpf_set_seen_register(ctx, tmp1_reg);
 					PPC_LI32(tmp1_reg, imm);
 					if (is_jmp32)
 						EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
@@ -944,6 +983,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			case BPF_JMP | BPF_JSET | BPF_K:
 			case BPF_JMP32 | BPF_JSET | BPF_K:
 				/* andi does not sign-extend the immediate */
+				bpf_set_seen_register(ctx, tmp1_reg);
 				if (imm >= 0 && imm < 32768)
 					/* PPC_ANDI is _only/always_ dot-form */
 					EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC64
  2021-07-27  6:55 ` [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Jordan Niethe
@ 2021-08-05  8:21   ` Christophe Leroy
  2022-01-07 17:58     ` Naveen N. Rao
  2022-02-22 14:23   ` Christophe Leroy
  1 sibling, 1 reply; 9+ messages in thread
From: Christophe Leroy @ 2021-08-05  8:21 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev; +Cc: naveen.n.rao



Le 27/07/2021 à 08:55, Jordan Niethe a écrit :
> Implement commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF registers to
> volatile registers when possible on PPC32") for PPC64.
> 
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to avoid
> having to save them/restore on the stack. To keep track of which
> registers can be reallocated to make sure registers are set seen when
> used.

Maybe you could try and do as on PPC32, try to use r0 as much as possible instead of TMP regs.
r0 needs to be used carefully because for some instructions (ex: addi, lwz, etc) r0 means 0 instead 
of register 0, but it would help freeing one more register in several cases.

> 
> Before this patch, the test #359 ADD default X is:
>     0:   nop
>     4:   nop
>     8:   std     r27,-40(r1)
>     c:   std     r28,-32(r1)
>    10:   xor     r8,r8,r8
>    14:   rotlwi  r8,r8,0
>    18:   xor     r28,r28,r28
>    1c:   rotlwi  r28,r28,0
>    20:   mr      r27,r3
>    24:   li      r8,66
>    28:   add     r8,r8,r28
>    2c:   rotlwi  r8,r8,0
>    30:   ld      r27,-40(r1)
>    34:   ld      r28,-32(r1)
>    38:   mr      r3,r8
>    3c:   blr
> 
> After this patch, the same test has become:
>     0:   nop
>     4:   nop
>     8:   xor     r8,r8,r8
>     c:   rotlwi  r8,r8,0
>    10:   xor     r5,r5,r5
>    14:   rotlwi  r5,r5,0
>    18:   mr      r4,r3
>    1c:   li      r8,66
>    20:   add     r8,r8,r5
>    24:   rotlwi  r8,r8,0
>    28:   mr      r3,r8
>    2c:   blr
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>   arch/powerpc/net/bpf_jit64.h      |  2 ++
>   arch/powerpc/net/bpf_jit_comp64.c | 60 +++++++++++++++++++++++++------
>   2 files changed, 52 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index 89b625d9342b..e20521bf77bf 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -70,6 +70,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
>    */
>   #define PPC_BPF_LL(ctx, r, base, i) do {						  \
>   				if ((i) % 4) {						  \
> +					bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_2));\
>   					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
>   					EMIT(PPC_RAW_LDX(r, base,			  \
>   							bpf_to_ppc(ctx, TMP_REG_2)));	  \
> @@ -78,6 +79,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = {
>   				} while(0)
>   #define PPC_BPF_STL(ctx, r, base, i) do {						  \
>   				if ((i) % 4) {						  \
> +					bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_2));\
>   					EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, TMP_REG_2), (i)));\
>   					EMIT(PPC_RAW_STDX(r, base,			  \
>   							bpf_to_ppc(ctx, TMP_REG_2)));	  \
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index f7a668c1e364..287e0322bbf3 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -66,6 +66,24 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
>   
>   void bpf_jit_realloc_regs(struct codegen_context *ctx)
>   {
> +	if (ctx->seen & SEEN_FUNC)
> +		return;
> +
> +	while (ctx->seen & SEEN_NVREG_MASK &&
> +	       (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
> +		int old = 32 - fls(ctx->seen & SEEN_NVREG_MASK);
> +		int new = 32 - fls(~ctx->seen & SEEN_VREG_MASK);
> +		int i;
> +
> +		for (i = BPF_REG_0; i <= TMP_REG_2; i++) {
> +			if (ctx->b2p[i] != old)
> +				continue;
> +			ctx->b2p[i] = new;
> +			bpf_set_seen_register(ctx, new);
> +			bpf_clear_seen_register(ctx, old);
> +			break;
> +		}
> +	}

This function is not very different from the one for PPC32. Maybe we could cook a common function.


>   }
>   
>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> @@ -106,10 +124,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
>   	 * If we haven't created our own stack frame, we save these
>   	 * in the protected zone below the previous stack frame
>   	 */
> -	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
> -		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
> -			PPC_BPF_STL(ctx, bpf_to_ppc(ctx, i), 1,
> -				    bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
> +	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
> +		if (bpf_is_seen_register(ctx, i))
> +			PPC_BPF_STL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
>   
>   	/* Setup frame pointer to point to the bpf stack area */
>   	if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP)))
> @@ -122,10 +139,9 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
>   	int i;
>   
>   	/* Restore NVRs */
> -	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
> -		if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, i)))
> -			PPC_BPF_LL(ctx, bpf_to_ppc(ctx, i), 1,
> -				   bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ctx, i)));
> +	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
> +		if (bpf_is_seen_register(ctx, i))
> +			PPC_BPF_LL(ctx, i, 1, bpf_jit_stack_offsetof(ctx, i));
>   
>   	/* Tear down our stack frame */
>   	if (bpf_has_stack_frame(ctx)) {
> @@ -223,6 +239,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
>   	 * if (index >= array->map.max_entries)
>   	 *   goto out;
>   	 */
> +	bpf_set_seen_register(ctx, bpf_to_ppc(ctx, TMP_REG_1));
>   	EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, TMP_REG_1), b2p_bpf_array,
>   			 offsetof(struct bpf_array, map.max_entries)));
>   	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
> @@ -318,9 +335,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		 * optimization but everything else should work without
>   		 * any issues.
>   		 */
> -		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
> +		if (dst_reg >= 3 && dst_reg < 32)
>   			bpf_set_seen_register(ctx, dst_reg);
> -		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
> +		if (src_reg >= 3 && src_reg < 32)
>   			bpf_set_seen_register(ctx, src_reg);
>   
>   		switch (code) {
> @@ -345,6 +362,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				if (imm >= -32768 && imm < 32768)
>   					EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
>   				else {
> +					bpf_set_seen_register(ctx, tmp1_reg);
>   					PPC_LI32(tmp1_reg, imm);
>   					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
>   				}
> @@ -362,6 +380,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			if (imm >= -32768 && imm < 32768)
>   				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
>   			else {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				if (BPF_CLASS(code) == BPF_ALU)
>   					EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
> @@ -372,6 +391,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
>   		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
>   			if (BPF_OP(code) == BPF_MOD) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
>   				EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
>   				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
> @@ -381,6 +401,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
>   		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
>   			if (BPF_OP(code) == BPF_MOD) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
>   				EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
>   				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
> @@ -396,10 +417,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			else if (imm == 1)
>   				goto bpf_alu32_trunc;
>   
> +			bpf_set_seen_register(ctx, tmp1_reg);
>   			PPC_LI32(tmp1_reg, imm);
>   			switch (BPF_CLASS(code)) {
>   			case BPF_ALU:
>   				if (BPF_OP(code) == BPF_MOD) {
> +					bpf_set_seen_register(ctx, tmp2_reg);
>   					EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
>   					EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
>   					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
> @@ -408,6 +431,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				break;
>   			case BPF_ALU64:
>   				if (BPF_OP(code) == BPF_MOD) {
> +					bpf_set_seen_register(ctx, tmp2_reg);
>   					EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
>   					EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
>   					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
> @@ -434,6 +458,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
>   			else {
>   				/* Sign-extended */
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
>   			}
> @@ -446,6 +471,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
>   			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
>   				/* Sign-extended */
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
>   			} else {
> @@ -463,6 +489,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
>   			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
>   				/* Sign-extended */
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
>   			} else {
> @@ -562,6 +589,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			if (BPF_SRC(code) == BPF_FROM_LE)
>   				goto emit_clear;
>   #endif
> +			bpf_set_seen_register(ctx, tmp1_reg);
>   			switch (imm) {
>   			case 16:
>   				/* Rotate 8 bits left & mask with 0x0000ff00 */
> @@ -625,6 +653,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
>   		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
>   			if (BPF_CLASS(code) == BPF_ST) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				EMIT(PPC_RAW_LI(tmp1_reg, imm));
>   				src_reg = tmp1_reg;
>   			}
> @@ -633,6 +662,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
>   		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
>   			if (BPF_CLASS(code) == BPF_ST) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				EMIT(PPC_RAW_LI(tmp1_reg, imm));
>   				src_reg = tmp1_reg;
>   			}
> @@ -641,6 +671,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
>   		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
>   			if (BPF_CLASS(code) == BPF_ST) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				src_reg = tmp1_reg;
>   			}
> @@ -649,6 +680,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
>   		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
>   			if (BPF_CLASS(code) == BPF_ST) {
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				PPC_LI32(tmp1_reg, imm);
>   				src_reg = tmp1_reg;
>   			}
> @@ -669,6 +701,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			/* *(u32 *)(dst + off) += src */
>   
>   			/* Get EA into TMP_REG_1 */
> +			bpf_set_seen_register(ctx, tmp1_reg);
> +			bpf_set_seen_register(ctx, tmp2_reg);
>   			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
>   			tmp_idx = ctx->idx * 4;
>   			/* load value from memory into TMP_REG_2 */
> @@ -689,6 +723,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			}
>   			/* *(u64 *)(dst + off) += src */
>   
> +			bpf_set_seen_register(ctx, tmp1_reg);
> +			bpf_set_seen_register(ctx, tmp2_reg);
>   			EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off));
>   			tmp_idx = ctx->idx * 4;
>   			EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0));
> @@ -870,6 +906,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				break;
>   			case BPF_JMP | BPF_JSET | BPF_X:
>   			case BPF_JMP32 | BPF_JSET | BPF_X:
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				if (BPF_CLASS(code) == BPF_JMP) {
>   					EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
>   				} else {
> @@ -903,6 +940,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
>   				} else {
>   					/* sign-extending load */
> +					bpf_set_seen_register(ctx, tmp1_reg);
>   					PPC_LI32(tmp1_reg, imm);
>   					/* ... but unsigned comparison */
>   					if (is_jmp32)
> @@ -933,6 +971,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   					else
>   						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
>   				} else {
> +					bpf_set_seen_register(ctx, tmp1_reg);
>   					PPC_LI32(tmp1_reg, imm);
>   					if (is_jmp32)
>   						EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
> @@ -944,6 +983,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			case BPF_JMP | BPF_JSET | BPF_K:
>   			case BPF_JMP32 | BPF_JSET | BPF_K:
>   				/* andi does not sign-extend the immediate */
> +				bpf_set_seen_register(ctx, tmp1_reg);
>   				if (imm >= 0 && imm < 32768)
>   					/* PPC_ANDI is _only/always_ dot-form */
>   					EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping
  2021-07-27  6:55 [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Jordan Niethe
  2021-07-27  6:55 ` [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64 Jordan Niethe
  2021-07-27  6:55 ` [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Jordan Niethe
@ 2022-01-07 17:13 ` Naveen N. Rao
  2 siblings, 0 replies; 9+ messages in thread
From: Naveen N. Rao @ 2022-01-07 17:13 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev

Jordan Niethe wrote:
> In bpf_jit_build_body(), the mapping of TMP_REG_1 and TMP_REG_2's bpf
> register to ppc register is evalulated at every use despite not
> changing. Instead, determine the ppc register once and store the result.
> This will be more useful when a later patch introduces a more complex
> mapping from bpf registers to ppc registers.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/net/bpf_jit_comp64.c | 163 +++++++++++++-----------------
>  1 file changed, 69 insertions(+), 94 deletions(-)

I know this is eons ago and this patch will need updating, but if you 
intend to revive this:
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>


Thanks,
Naveen


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64
  2021-07-27  6:55 ` [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64 Jordan Niethe
@ 2022-01-07 17:25   ` Naveen N. Rao
  0 siblings, 0 replies; 9+ messages in thread
From: Naveen N. Rao @ 2022-01-07 17:25 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev

Jordan Niethe wrote:
> Prepare for doing commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF
> registers to volatile registers when possible on PPC32") on PPC64 in a
> later patch. Instead of directly accessing the const b2p[] array for
> mapping bpf to ppc registers use bpf_to_ppc() which allows per struct
> codegen_context mappings.
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/net/bpf_jit.h        |  5 ++
>  arch/powerpc/net/bpf_jit64.h      | 30 +++++-----
>  arch/powerpc/net/bpf_jit_comp32.c |  5 --
>  arch/powerpc/net/bpf_jit_comp64.c | 96 ++++++++++++++++---------------
>  4 files changed, 71 insertions(+), 65 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 99fad093f43e..db86fa37f1dd 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -172,6 +172,11 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_realloc_regs(struct codegen_context *ctx);
> 
> +static inline int bpf_to_ppc(struct codegen_context *ctx, int reg)
> +{
> +	return ctx->b2p[reg];
> +}
> +
>  #endif

You are following what has been done on ppc32 here, but since ctx is 
almost always available where b2p[] is used, I'm thinking it might be 
simpler to convert it into a macro:

#define b2p(i)	ctx->cb2p[i]

We will just need to rename the global b2p array, as well as the one in 
codegen_context. Everywhere else, it will be a simple b2p[] -> b2p() 
change.


- Naveen


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC64
  2021-08-05  8:21   ` Christophe Leroy
@ 2022-01-07 17:58     ` Naveen N. Rao
  0 siblings, 0 replies; 9+ messages in thread
From: Naveen N. Rao @ 2022-01-07 17:58 UTC (permalink / raw)
  To: Christophe Leroy, Jordan Niethe, linuxppc-dev

Christophe Leroy wrote:
> 
> 
> Le 27/07/2021 à 08:55, Jordan Niethe a écrit :
>> Implement commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF registers to
>> volatile registers when possible on PPC32") for PPC64.
>> 
>> When the BPF routine doesn't call any function, the non volatile
>> registers can be reallocated to volatile registers in order to avoid
>> having to save them/restore on the stack. To keep track of which
>> registers can be reallocated to make sure registers are set seen when
>> used.
> 
> Maybe you could try and do as on PPC32, try to use r0 as much as possible instead of TMP regs.
> r0 needs to be used carefully because for some instructions (ex: addi, lwz, etc) r0 means 0 instead 
> of register 0, but it would help freeing one more register in several cases.

Yes, but I think the utility of register re-mapping is debatable on 
ppc64 since we are using NVRs only for BPF NVRs. Unlike the savings seen 
with the test case shown in the commit description (and with other test 
programs in test_bpf), most real world BPF programs will be generated by 
llvm which will only use the NVRs if necessary. I also suspect that most 
BPF programs will end up making at least one helper call.

On ppc32 though, there is value in re-mapping registers, especially 
BPF_REG_AX and TMP_REG, and to a lesser extent, BPF_REG_5, since those 
are volatile BPF registers and can be remapped regardless of a helper 
call.


- Naveen


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC64
  2021-07-27  6:55 ` [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Jordan Niethe
  2021-08-05  8:21   ` Christophe Leroy
@ 2022-02-22 14:23   ` Christophe Leroy
  2022-03-02 16:40     ` Naveen N. Rao
  1 sibling, 1 reply; 9+ messages in thread
From: Christophe Leroy @ 2022-02-22 14:23 UTC (permalink / raw)
  To: Jordan Niethe, linuxppc-dev; +Cc: naveen.n.rao



Le 27/07/2021 à 08:55, Jordan Niethe a écrit :
> Implement commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF registers to
> volatile registers when possible on PPC32") for PPC64.
> 
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to avoid
> having to save them/restore on the stack. To keep track of which
> registers can be reallocated to make sure registers are set seen when
> used.
> 
> Before this patch, the test #359 ADD default X is:
>     0:   nop
>     4:   nop
>     8:   std     r27,-40(r1)
>     c:   std     r28,-32(r1)
>    10:   xor     r8,r8,r8
>    14:   rotlwi  r8,r8,0
>    18:   xor     r28,r28,r28
>    1c:   rotlwi  r28,r28,0
>    20:   mr      r27,r3
>    24:   li      r8,66
>    28:   add     r8,r8,r28
>    2c:   rotlwi  r8,r8,0
>    30:   ld      r27,-40(r1)
>    34:   ld      r28,-32(r1)
>    38:   mr      r3,r8
>    3c:   blr
> 
> After this patch, the same test has become:
>     0:   nop
>     4:   nop
>     8:   xor     r8,r8,r8
>     c:   rotlwi  r8,r8,0
>    10:   xor     r5,r5,r5
>    14:   rotlwi  r5,r5,0
>    18:   mr      r4,r3
>    1c:   li      r8,66
>    20:   add     r8,r8,r5
>    24:   rotlwi  r8,r8,0
>    28:   mr      r3,r8
>    2c:   blr
> 
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>

If this series is still applicable, it needs to be rebased of Naveen's 
series https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=286000

Christophe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC64
  2022-02-22 14:23   ` Christophe Leroy
@ 2022-03-02 16:40     ` Naveen N. Rao
  0 siblings, 0 replies; 9+ messages in thread
From: Naveen N. Rao @ 2022-03-02 16:40 UTC (permalink / raw)
  To: Christophe Leroy, Jordan Niethe, linuxppc-dev

Christophe Leroy wrote:
> 
> 
> Le 27/07/2021 à 08:55, Jordan Niethe a écrit :
>> Implement commit 40272035e1d0 ("powerpc/bpf: Reallocate BPF registers to
>> volatile registers when possible on PPC32") for PPC64.
>> 
>> When the BPF routine doesn't call any function, the non volatile
>> registers can be reallocated to volatile registers in order to avoid
>> having to save them/restore on the stack. To keep track of which
>> registers can be reallocated to make sure registers are set seen when
>> used.
>> 
>> Before this patch, the test #359 ADD default X is:
>>     0:   nop
>>     4:   nop
>>     8:   std     r27,-40(r1)
>>     c:   std     r28,-32(r1)
>>    10:   xor     r8,r8,r8
>>    14:   rotlwi  r8,r8,0
>>    18:   xor     r28,r28,r28
>>    1c:   rotlwi  r28,r28,0
>>    20:   mr      r27,r3
>>    24:   li      r8,66
>>    28:   add     r8,r8,r28
>>    2c:   rotlwi  r8,r8,0
>>    30:   ld      r27,-40(r1)
>>    34:   ld      r28,-32(r1)
>>    38:   mr      r3,r8
>>    3c:   blr
>> 
>> After this patch, the same test has become:
>>     0:   nop
>>     4:   nop
>>     8:   xor     r8,r8,r8
>>     c:   rotlwi  r8,r8,0
>>    10:   xor     r5,r5,r5
>>    14:   rotlwi  r5,r5,0
>>    18:   mr      r4,r3
>>    1c:   li      r8,66
>>    20:   add     r8,r8,r5
>>    24:   rotlwi  r8,r8,0
>>    28:   mr      r3,r8
>>    2c:   blr
>> 
>> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
> 
> If this series is still applicable, it needs to be rebased of Naveen's 
> series https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=286000

Thanks for bringing this up. My apologies - I missed copying you and 
Jordan on the new series.

I have included the first patch and a variant of the second patch in 
this series, in the new series I posted. For patch 3/3, it might be 
simpler to not track temp register usage on ppc64.


Thanks,
Naveen


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-03-02 16:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-27  6:55 [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Jordan Niethe
2021-07-27  6:55 ` [PATCH 2/3] powerpc/bpf: Use helper for mapping bpf to ppc registers on PPC64 Jordan Niethe
2022-01-07 17:25   ` Naveen N. Rao
2021-07-27  6:55 ` [PATCH 3/3] powerpc/bpf: Reallocate BPF registers to volatile registers when possible " Jordan Niethe
2021-08-05  8:21   ` Christophe Leroy
2022-01-07 17:58     ` Naveen N. Rao
2022-02-22 14:23   ` Christophe Leroy
2022-03-02 16:40     ` Naveen N. Rao
2022-01-07 17:13 ` [PATCH 1/3] powerpc64/bpf: Store temp registers' bpf to ppc mapping Naveen N. Rao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).