bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies
@ 2021-10-02  3:56 Jie Meng
  2021-10-06  2:56 ` Alexei Starovoitov
  0 siblings, 1 reply; 2+ messages in thread
From: Jie Meng @ 2021-10-02  3:56 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel; +Cc: Jie Meng

Instead of unconditionally performing push/pop on rax/rdx in case of
division/modulo, we can save a few bytes in case of dest register
being either BPF r0 (rax) or r3 (rdx) since the result is written in
there anyway.

Also, we do not need to copy src to r11 unless src is either rax, rdx
or an immediate.

For example, before the patch:
  22:   push   %rax
  23:   push   %rdx
  24:   mov    %rsi,%r11
  27:   xor    %edx,%edx
  29:   div    %r11
  2c:   mov    %rax,%r11
  2f:   pop    %rdx
  30:   pop    %rax
  31:   mov    %r11,%rax
  34:   leaveq
  35:   retq

After:
  22:   push   %rdx
  23:   xor    %edx,%edx
  25:   div    %rsi
  28:   pop    %rdx
  29:   leaveq
  2a:   retq

Signed-off-by: Jie Meng <jmeng@fb.com>
---
 arch/x86/net/bpf_jit_comp.c                | 71 +++++++++++++---------
 tools/testing/selftests/bpf/verifier/jit.c | 47 ++++++++++++++
 2 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 20d2d6a1f9de..346b4131d496 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_MOD | BPF_X:
 		case BPF_ALU64 | BPF_DIV | BPF_X:
 		case BPF_ALU64 | BPF_MOD | BPF_K:
-		case BPF_ALU64 | BPF_DIV | BPF_K:
-			EMIT1(0x50); /* push rax */
-			EMIT1(0x52); /* push rdx */
-
-			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov r11, src_reg */
-				EMIT_mov(AUX_REG, src_reg);
-			else
+		case BPF_ALU64 | BPF_DIV | BPF_K: {
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x50); /* push rax */
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x52); /* push rdx */
+
+			if (BPF_SRC(insn->code) == BPF_X) {
+				if (src_reg == BPF_REG_0 ||
+				    src_reg == BPF_REG_3) {
+					/* mov r11, src_reg */
+					EMIT_mov(AUX_REG, src_reg);
+					src_reg = AUX_REG;
+				}
+			} else {
 				/* mov r11, imm32 */
 				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
+				src_reg = AUX_REG;
+			}
 
-			/* mov rax, dst_reg */
-			EMIT_mov(BPF_REG_0, dst_reg);
+			if (dst_reg != BPF_REG_0)
+				/* mov rax, dst_reg */
+				emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
 
 			/*
 			 * xor edx, edx
@@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			 */
 			EMIT2(0x31, 0xd2);
 
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				/* div r11 */
-				EMIT3(0x49, 0xF7, 0xF3);
-			else
-				/* div r11d */
-				EMIT3(0x41, 0xF7, 0xF3);
-
-			if (BPF_OP(insn->code) == BPF_MOD)
-				/* mov r11, rdx */
-				EMIT3(0x49, 0x89, 0xD3);
-			else
-				/* mov r11, rax */
-				EMIT3(0x49, 0x89, 0xC3);
-
-			EMIT1(0x5A); /* pop rdx */
-			EMIT1(0x58); /* pop rax */
-
-			/* mov dst_reg, r11 */
-			EMIT_mov(dst_reg, AUX_REG);
+			if (is64)
+				EMIT1(add_1mod(0x48, src_reg));
+			else if (is_ereg(src_reg))
+				EMIT1(add_1mod(0x40, src_reg));
+			/* div src_reg */
+			EMIT2(0xF7, add_1reg(0xF0, src_reg));
+
+			if (BPF_OP(insn->code) == BPF_MOD &&
+			    dst_reg != BPF_REG_3)
+				/* mov dst_reg, rdx */
+				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
+			else if (BPF_OP(insn->code) == BPF_DIV &&
+				 dst_reg != BPF_REG_0)
+				/* mov dst_reg, rax */
+				emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
+
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x5A); /* pop rdx */
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x58); /* pop rax */
 			break;
+		}
 
 		case BPF_ALU | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index eedcb752bf70..79021c30e51e 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -102,6 +102,53 @@
 	.result = ACCEPT,
 	.retval = 2,
 },
+{
+	"jit: various div tests",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+	BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+	BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+	BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+	BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
 {
 	"jit: jsgt, jslt",
 	.insns = {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2 bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies
  2021-10-02  3:56 [PATCH v2 bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies Jie Meng
@ 2021-10-06  2:56 ` Alexei Starovoitov
  0 siblings, 0 replies; 2+ messages in thread
From: Alexei Starovoitov @ 2021-10-06  2:56 UTC (permalink / raw)
  To: Jie Meng; +Cc: bpf, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann

On Fri, Oct 1, 2021 at 8:56 PM Jie Meng <jmeng@fb.com> wrote:
>
> Instead of unconditionally performing push/pop on rax/rdx in case of
> division/modulo, we can save a few bytes in case of dest register
> being either BPF r0 (rax) or r3 (rdx) since the result is written in
> there anyway.
>
> Also, we do not need to copy src to r11 unless src is either rax, rdx
> or an immediate.
>
> For example, before the patch:
>   22:   push   %rax
>   23:   push   %rdx
>   24:   mov    %rsi,%r11
>   27:   xor    %edx,%edx
>   29:   div    %r11
>   2c:   mov    %rax,%r11
>   2f:   pop    %rdx
>   30:   pop    %rax
>   31:   mov    %r11,%rax
>   34:   leaveq
>   35:   retq
>
> After:
>   22:   push   %rdx
>   23:   xor    %edx,%edx
>   25:   div    %rsi
>   28:   pop    %rdx
>   29:   leaveq
>   2a:   retq
>
> Signed-off-by: Jie Meng <jmeng@fb.com>

Applied. Thanks

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-10-06  2:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-02  3:56 [PATCH v2 bpf-next] bpf,x64: Save bytes for DIV by reducing reg copies Jie Meng
2021-10-06  2:56 ` Alexei Starovoitov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).