bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies
@ 2021-09-29 23:47 Jie Meng
  2021-09-30 22:50 ` Daniel Borkmann
  0 siblings, 1 reply; 2+ messages in thread
From: Jie Meng @ 2021-09-29 23:47 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel; +Cc: Jie Meng

Instead of unconditionally performing push/pop on rax/rdx in case of
division/modulo, we can save a few bytes in case of dest register
being either BPF r0 (rax) or r3 (rdx) since the result is written in
there anyway.

Also, we do not need to copy src to r11 unless src is either rax, rdx
or an immediate.

Signed-off-by: Jie Meng <jmeng@fb.com>
---
 arch/x86/net/bpf_jit_comp.c                | 71 +++++++++++++---------
 tools/testing/selftests/bpf/verifier/jit.c | 28 +++++++++
 2 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 20d2d6a1f9de..346b4131d496 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_MOD | BPF_X:
 		case BPF_ALU64 | BPF_DIV | BPF_X:
 		case BPF_ALU64 | BPF_MOD | BPF_K:
-		case BPF_ALU64 | BPF_DIV | BPF_K:
-			EMIT1(0x50); /* push rax */
-			EMIT1(0x52); /* push rdx */
-
-			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov r11, src_reg */
-				EMIT_mov(AUX_REG, src_reg);
-			else
+		case BPF_ALU64 | BPF_DIV | BPF_K: {
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x50); /* push rax */
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x52); /* push rdx */
+
+			if (BPF_SRC(insn->code) == BPF_X) {
+				if (src_reg == BPF_REG_0 ||
+				    src_reg == BPF_REG_3) {
+					/* mov r11, src_reg */
+					EMIT_mov(AUX_REG, src_reg);
+					src_reg = AUX_REG;
+				}
+			} else {
 				/* mov r11, imm32 */
 				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
+				src_reg = AUX_REG;
+			}
 
-			/* mov rax, dst_reg */
-			EMIT_mov(BPF_REG_0, dst_reg);
+			if (dst_reg != BPF_REG_0)
+				/* mov rax, dst_reg */
+				emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
 
 			/*
 			 * xor edx, edx
@@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			 */
 			EMIT2(0x31, 0xd2);
 
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				/* div r11 */
-				EMIT3(0x49, 0xF7, 0xF3);
-			else
-				/* div r11d */
-				EMIT3(0x41, 0xF7, 0xF3);
-
-			if (BPF_OP(insn->code) == BPF_MOD)
-				/* mov r11, rdx */
-				EMIT3(0x49, 0x89, 0xD3);
-			else
-				/* mov r11, rax */
-				EMIT3(0x49, 0x89, 0xC3);
-
-			EMIT1(0x5A); /* pop rdx */
-			EMIT1(0x58); /* pop rax */
-
-			/* mov dst_reg, r11 */
-			EMIT_mov(dst_reg, AUX_REG);
+			if (is64)
+				EMIT1(add_1mod(0x48, src_reg));
+			else if (is_ereg(src_reg))
+				EMIT1(add_1mod(0x40, src_reg));
+			/* div src_reg */
+			EMIT2(0xF7, add_1reg(0xF0, src_reg));
+
+			if (BPF_OP(insn->code) == BPF_MOD &&
+			    dst_reg != BPF_REG_3)
+				/* mov dst_reg, rdx */
+				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
+			else if (BPF_OP(insn->code) == BPF_DIV &&
+				 dst_reg != BPF_REG_0)
+				/* mov dst_reg, rax */
+				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
+
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x5A); /* pop rdx */
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x58); /* pop rax */
 			break;
+		}
 
 		case BPF_ALU | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index eedcb752bf70..0f2583f0685a 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -102,6 +102,34 @@
 	.result = ACCEPT,
 	.retval = 2,
 },
+{
+	"jit: various div tests",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+	BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+	BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
 {
 	"jit: jsgt, jslt",
 	.insns = {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies
  2021-09-29 23:47 [PATCH bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies Jie Meng
@ 2021-09-30 22:50 ` Daniel Borkmann
  0 siblings, 0 replies; 2+ messages in thread
From: Daniel Borkmann @ 2021-09-30 22:50 UTC (permalink / raw)
  To: Jie Meng, bpf, ast, andrii

On 9/30/21 1:47 AM, Jie Meng wrote:
> Instead of unconditionally performing push/pop on rax/rdx in case of
> division/modulo, we can save a few bytes in case of dest register
> being either BPF r0 (rax) or r3 (rdx) since the result is written in
> there anyway.
> 
> Also, we do not need to copy src to r11 unless src is either rax, rdx
> or an immediate.
> 
> Signed-off-by: Jie Meng <jmeng@fb.com>

Thanks for looking into this!

Diff looks correct to me, but it would be nice to add some more details into the commit
description so that this better visualizes before/after which would have helped review
as well, example [0].

   [0] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ced185824c89b60e65b5a2606954c098320cdfb8

>   arch/x86/net/bpf_jit_comp.c                | 71 +++++++++++++---------
>   tools/testing/selftests/bpf/verifier/jit.c | 28 +++++++++
>   2 files changed, 70 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 20d2d6a1f9de..346b4131d496 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>   		case BPF_ALU64 | BPF_MOD | BPF_X:
>   		case BPF_ALU64 | BPF_DIV | BPF_X:
>   		case BPF_ALU64 | BPF_MOD | BPF_K:
> -		case BPF_ALU64 | BPF_DIV | BPF_K:
> -			EMIT1(0x50); /* push rax */
> -			EMIT1(0x52); /* push rdx */
> -
> -			if (BPF_SRC(insn->code) == BPF_X)
> -				/* mov r11, src_reg */
> -				EMIT_mov(AUX_REG, src_reg);
> -			else
> +		case BPF_ALU64 | BPF_DIV | BPF_K: {
> +			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> +
> +			if (dst_reg != BPF_REG_0)
> +				EMIT1(0x50); /* push rax */
> +			if (dst_reg != BPF_REG_3)
> +				EMIT1(0x52); /* push rdx */
> +
> +			if (BPF_SRC(insn->code) == BPF_X) {
> +				if (src_reg == BPF_REG_0 ||
> +				    src_reg == BPF_REG_3) {
> +					/* mov r11, src_reg */
> +					EMIT_mov(AUX_REG, src_reg);
> +					src_reg = AUX_REG;
> +				}
> +			} else {
>   				/* mov r11, imm32 */
>   				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
> +				src_reg = AUX_REG;
> +			}
>   
> -			/* mov rax, dst_reg */
> -			EMIT_mov(BPF_REG_0, dst_reg);
> +			if (dst_reg != BPF_REG_0)
> +				/* mov rax, dst_reg */
> +				emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
>   
>   			/*
>   			 * xor edx, edx
> @@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>   			 */
>   			EMIT2(0x31, 0xd2);
>   
> -			if (BPF_CLASS(insn->code) == BPF_ALU64)
> -				/* div r11 */
> -				EMIT3(0x49, 0xF7, 0xF3);
> -			else
> -				/* div r11d */
> -				EMIT3(0x41, 0xF7, 0xF3);
> -
> -			if (BPF_OP(insn->code) == BPF_MOD)
> -				/* mov r11, rdx */
> -				EMIT3(0x49, 0x89, 0xD3);
> -			else
> -				/* mov r11, rax */
> -				EMIT3(0x49, 0x89, 0xC3);
> -
> -			EMIT1(0x5A); /* pop rdx */
> -			EMIT1(0x58); /* pop rax */
> -
> -			/* mov dst_reg, r11 */
> -			EMIT_mov(dst_reg, AUX_REG);
> +			if (is64)
> +				EMIT1(add_1mod(0x48, src_reg));
> +			else if (is_ereg(src_reg))
> +				EMIT1(add_1mod(0x40, src_reg));
> +			/* div src_reg */
> +			EMIT2(0xF7, add_1reg(0xF0, src_reg));
> +
> +			if (BPF_OP(insn->code) == BPF_MOD &&
> +			    dst_reg != BPF_REG_3)
> +				/* mov dst_reg, rdx */
> +				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
> +			else if (BPF_OP(insn->code) == BPF_DIV &&
> +				 dst_reg != BPF_REG_0)
> +				/* mov dst_reg, rax */
> +				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
> +
> +			if (dst_reg != BPF_REG_3)
> +				EMIT1(0x5A); /* pop rdx */
> +			if (dst_reg != BPF_REG_0)
> +				EMIT1(0x58); /* pop rax */
>   			break;
> +		}
>   
>   		case BPF_ALU | BPF_MUL | BPF_K:
>   		case BPF_ALU64 | BPF_MUL | BPF_K:
> diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
> index eedcb752bf70..0f2583f0685a 100644
> --- a/tools/testing/selftests/bpf/verifier/jit.c
> +++ b/tools/testing/selftests/bpf/verifier/jit.c
> @@ -102,6 +102,34 @@
>   	.result = ACCEPT,
>   	.retval = 2,
>   },
> +{
> +	"jit: various div tests",
> +	.insns = {
> +	BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
> +	BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
> +	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
> +	BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
> +	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
> +	BPF_MOV64_IMM(BPF_REG_0, 1),
> +	BPF_EXIT_INSN(),
> +	BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
> +	BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
> +	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
> +	BPF_MOV64_IMM(BPF_REG_0, 1),
> +	BPF_EXIT_INSN(),
> +	BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
> +	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
> +	BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
> +	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),

Could you add some more coverage? This only has BPF_DIV + BPF_X, but lets also
add ...

  - BPF_DIV + BPF_K
  - BPF_MOD + BPF_X
  - BPF_MOD + BPF_K

... and corner cases were src == dst reg, for combinations with R0/R3/R<other>.

> +	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
> +	BPF_MOV64_IMM(BPF_REG_0, 1),
> +	BPF_EXIT_INSN(),
> +	BPF_MOV64_IMM(BPF_REG_0, 2),
> +	BPF_EXIT_INSN(),
> +	},
> +	.result = ACCEPT,
> +	.retval = 2,
> +},
>   {
>   	"jit: jsgt, jslt",
>   	.insns = {
> 


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-30 22:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 23:47 [PATCH bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies Jie Meng
2021-09-30 22:50 ` Daniel Borkmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).