* [PATCH v2 0/6] tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
Since v1:
- Redo the whole change, only hoisting when variable is used 10+ times
- Remove goto statement/label
- Take care of the following pattern:
case INDEX_op_bswap64_i64:
a2 =3D TCG_REG_R0;
if (a0 =3D=3D a1) {
a0 =3D TCG_REG_R0;
a2 =3D a1;
}
...
if (a0 =3D=3D 0) {
tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
^^^^ take original a[0]
}
Attempt to fix the warning reported by Miroslav using GCC 10:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg771520.html
Diff with v1:
Key:
[----] : patches are identical
[####] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respective=
ly
001/6:[0063] [FC] 'tcg/arm: Hoist common argument loads in tcg_out_op()'
002/6:[down] 'tcg/arm: Replace goto statement by fall through comment'
003/6:[0190] [FC] 'tcg/ppc: Hoist common argument loads in tcg_out_op()'
004/6:[0136] [FC] 'tcg/s390: Hoist common argument loads in tcg_out_op()'
005/6:[----] [--] 'tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS el=
ements'
006/6:[----] [--] 'tcg: Restrict tcg_out_vec_op() to arrays of TCG_MAX_OP_ARG=
S elements'
Philippe Mathieu-Daud=C3=A9 (6):
tcg/arm: Hoist common argument loads in tcg_out_op()
tcg/arm: Replace goto statement by fall through comment
tcg/ppc: Hoist common argument loads in tcg_out_op()
tcg/s390: Hoist common argument loads in tcg_out_op()
tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements
tcg: Restrict tcg_out_vec_op() to arrays of TCG_MAX_OP_ARGS elements
tcg/tcg.c | 19 +--
tcg/aarch64/tcg-target.c.inc | 3 +-
tcg/arm/tcg-target.c.inc | 196 +++++++++++++++----------------
tcg/i386/tcg-target.c.inc | 6 +-
tcg/mips/tcg-target.c.inc | 3 +-
tcg/ppc/tcg-target.c.inc | 191 ++++++++++++++----------------
tcg/riscv/tcg-target.c.inc | 3 +-
tcg/s390/tcg-target.c.inc | 222 +++++++++++++++++------------------
tcg/tci/tcg-target.c.inc | 5 +-
9 files changed, 311 insertions(+), 337 deletions(-)
--=20
2.26.2
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/6] tcg/arm: Hoist common argument loads in tcg_out_op()
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/arm/tcg-target.c.inc | 192 +++++++++++++++++++--------------------
1 file changed, 92 insertions(+), 100 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0fd11264544..59bd196994f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1747,15 +1747,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
static void tcg_out_epilogue(TCGContext *s);
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2, a3, a4, a5;
- int c;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ a3 = args[3];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, a0);
tcg_out_epilogue(s);
break;
case INDEX_op_goto_tb:
@@ -1765,7 +1773,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCGReg base = TCG_REG_PC;
tcg_debug_assert(s->tb_jmp_insn_offset == 0);
- ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
+ ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0);
dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
dil = sextract32(dif, 0, 12);
if (dif != dil) {
@@ -1778,74 +1786,68 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movi32(s, COND_AL, base, ptr - dil);
}
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
- set_jmp_reset_offset(s, args[0]);
+ set_jmp_reset_offset(s, a0);
}
break;
case INDEX_op_goto_ptr:
- tcg_out_bx(s, COND_AL, args[0]);
+ tcg_out_bx(s, COND_AL, a0);
break;
case INDEX_op_br:
- tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
+ tcg_out_goto_label(s, COND_AL, arg_label(a0));
break;
case INDEX_op_ld8u_i32:
- tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld8s_i32:
- tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16u_i32:
- tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16s_i32:
- tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld_i32:
- tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld32u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st8_i32:
- tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st8(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st16_i32:
- tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st16(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st_i32:
- tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_movcond_i32:
/* Constraints mean that v2 is always in the same register as dest,
* so we only need to do "if condition passed, move v1 to dest".
*/
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
- ARITH_MVN, args[0], 0, args[3], const_args[3]);
+ ARITH_MVN, a0, 0, a3, const_args[3]);
break;
case INDEX_op_add_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, a0, a1, a2, c2);
break;
case INDEX_op_sub_i32:
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+ if (c2) {
+ tcg_out_movi32(s, COND_AL, a0, a1 - a2);
} else {
- tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
- args[0], args[2], args[1], 1);
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB, a0, a2, a1, 1);
}
} else {
- tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, a0, a1, a2, c2);
}
break;
case INDEX_op_and_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, a0, a1, a2, c2);
break;
case INDEX_op_andc_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, a0, a1, a2, c2);
break;
case INDEX_op_or_i32:
c = ARITH_ORR;
@@ -1854,11 +1856,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_EOR;
/* Fall through. */
gen_arith:
- tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rI(s, COND_AL, c, a0, a1, a2, c2);
break;
case INDEX_op_add2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if (a0 == a3 || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
@@ -1866,15 +1867,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0, a2, a4, const_args[4]);
tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
a1, a3, a5, const_args[5]);
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_sub2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
- if (const_args[2]) {
+ if (c2) {
if (const_args[4]) {
tcg_out_movi32(s, COND_AL, a0, a4);
a4 = a0;
@@ -1884,7 +1884,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
}
- if (const_args[3]) {
+ if (const_a3) {
if (const_args[5]) {
tcg_out_movi32(s, COND_AL, a1, a5);
a5 = a1;
@@ -1894,69 +1894,64 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
a1, a3, a5, const_args[5]);
}
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_neg_i32:
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, a0, a1, 0);
break;
case INDEX_op_not_i32:
- tcg_out_dat_reg(s, COND_AL,
- ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MVN, a0, 0, a1, SHIFT_IMM_LSL(0));
break;
case INDEX_op_mul_i32:
- tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_mul32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mulu2_i32:
- tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_umull32(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_muls2_i32:
- tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_smull32(s, COND_AL, a0, a1, a2, a3);
break;
- /* XXX: Perhaps args[2] & 0x1f is wrong */
+ /* XXX: Perhaps a2 & 0x1f is wrong */
case INDEX_op_shl_i32:
- c = const_args[2] ?
- SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
+ c = c2 ? SHIFT_IMM_LSL(a2 & 0x1f) : SHIFT_REG_LSL(a2);
goto gen_shift32;
case INDEX_op_shr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_LSR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(a2);
goto gen_shift32;
case INDEX_op_sar_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ASR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(a2);
goto gen_shift32;
case INDEX_op_rotr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ROR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(a2);
/* Fall through. */
gen_shift32:
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, c);
break;
case INDEX_op_rotl_i32:
- if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
- ((0x20 - args[2]) & 0x1f) ?
- SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+ if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
+ ((0x20 - a2) & 0x1f) ?
+ SHIFT_IMM_ROR((0x20 - a2) & 0x1f) :
SHIFT_IMM_LSL(0));
} else {
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, a2, 0x20);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
SHIFT_REG_ROR(TCG_REG_TMP));
}
break;
case INDEX_op_ctz_i32:
- tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
+ tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0);
a1 = TCG_REG_TMP;
goto do_clz;
case INDEX_op_clz_i32:
- a1 = args[1];
do_clz:
- a0 = args[0];
- a2 = args[2];
- c = const_args[2];
+ c = c2;
if (c && a2 == 32) {
tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
break;
@@ -1970,17 +1965,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_brcond_i32:
tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[0], args[1], const_args[1]);
- tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
- arg_label(args[3]));
+ a0, a1, const_args[1]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[a2], arg_label(a3));
break;
case INDEX_op_setcond_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
- ARITH_MOV, args[0], 0, 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
- ARITH_MOV, args[0], 0, 0);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[a3],
+ ARITH_MOV, a0, 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(a3)],
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_brcond2_i32:
@@ -1989,9 +1982,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_setcond2_i32:
c = tcg_out_cmp2(s, args + 1, const_args + 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, a0, 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
- ARITH_MOV, args[0], 0, 0);
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_qemu_ld_i32:
@@ -2008,63 +2001,62 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_bswap16_i32:
- tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap16(s, COND_AL, a0, a1);
break;
case INDEX_op_bswap32_i32:
- tcg_out_bswap32(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap32(s, COND_AL, a0, a1);
break;
case INDEX_op_ext8s_i32:
- tcg_out_ext8s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext8s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16s_i32:
- tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16u_i32:
- tcg_out_ext16u(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16u(s, COND_AL, a0, a1);
break;
case INDEX_op_deposit_i32:
- tcg_out_deposit(s, COND_AL, args[0], args[2],
- args[3], args[4], const_args[2]);
+ tcg_out_deposit(s, COND_AL, a0, a2, a3, args[4], c2);
break;
case INDEX_op_extract_i32:
- tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_extract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_sextract_i32:
- tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_sextract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_extract2_i32:
/* ??? These optimization vs zero should be generic. */
/* ??? But we can't substitute 2 for 1 in the opcode stream yet. */
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
+ if (c2) {
+ tcg_out_movi(s, TCG_TYPE_REG, a0, 0);
} else {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a2, SHIFT_IMM_LSL(32 - a3));
}
- } else if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[1], SHIFT_IMM_LSR(args[3]));
+ } else if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a1, SHIFT_IMM_LSR(a3));
} else {
/* We can do extract2 in 2 insns, vs the 3 required otherwise. */
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
- tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
- args[1], SHIFT_IMM_LSR(args[3]));
+ a2, SHIFT_IMM_LSL(32 - a3));
+ tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, TCG_REG_TMP,
+ a1, SHIFT_IMM_LSR(a3));
}
break;
case INDEX_op_div_i32:
- tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_sdiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_divu_i32:
- tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_udiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 1/6] tcg/arm: Hoist common argument loads in tcg_out_op()
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/arm/tcg-target.c.inc | 192 +++++++++++++++++++--------------------
1 file changed, 92 insertions(+), 100 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0fd11264544..59bd196994f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1747,15 +1747,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
static void tcg_out_epilogue(TCGContext *s);
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2, a3, a4, a5;
- int c;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ a3 = args[3];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, a0);
tcg_out_epilogue(s);
break;
case INDEX_op_goto_tb:
@@ -1765,7 +1773,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCGReg base = TCG_REG_PC;
tcg_debug_assert(s->tb_jmp_insn_offset == 0);
- ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
+ ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0);
dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
dil = sextract32(dif, 0, 12);
if (dif != dil) {
@@ -1778,74 +1786,68 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movi32(s, COND_AL, base, ptr - dil);
}
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
- set_jmp_reset_offset(s, args[0]);
+ set_jmp_reset_offset(s, a0);
}
break;
case INDEX_op_goto_ptr:
- tcg_out_bx(s, COND_AL, args[0]);
+ tcg_out_bx(s, COND_AL, a0);
break;
case INDEX_op_br:
- tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
+ tcg_out_goto_label(s, COND_AL, arg_label(a0));
break;
case INDEX_op_ld8u_i32:
- tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld8s_i32:
- tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16u_i32:
- tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16s_i32:
- tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld_i32:
- tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld32u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st8_i32:
- tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st8(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st16_i32:
- tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st16(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st_i32:
- tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_movcond_i32:
/* Constraints mean that v2 is always in the same register as dest,
* so we only need to do "if condition passed, move v1 to dest".
*/
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
- ARITH_MVN, args[0], 0, args[3], const_args[3]);
+ ARITH_MVN, a0, 0, a3, const_args[3]);
break;
case INDEX_op_add_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, a0, a1, a2, c2);
break;
case INDEX_op_sub_i32:
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+ if (c2) {
+ tcg_out_movi32(s, COND_AL, a0, a1 - a2);
} else {
- tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
- args[0], args[2], args[1], 1);
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB, a0, a2, a1, 1);
}
} else {
- tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, a0, a1, a2, c2);
}
break;
case INDEX_op_and_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, a0, a1, a2, c2);
break;
case INDEX_op_andc_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, a0, a1, a2, c2);
break;
case INDEX_op_or_i32:
c = ARITH_ORR;
@@ -1854,11 +1856,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_EOR;
/* Fall through. */
gen_arith:
- tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rI(s, COND_AL, c, a0, a1, a2, c2);
break;
case INDEX_op_add2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if (a0 == a3 || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
@@ -1866,15 +1867,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0, a2, a4, const_args[4]);
tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
a1, a3, a5, const_args[5]);
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_sub2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
- if (const_args[2]) {
+ if (c2) {
if (const_args[4]) {
tcg_out_movi32(s, COND_AL, a0, a4);
a4 = a0;
@@ -1884,7 +1884,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
}
- if (const_args[3]) {
+ if (const_a3) {
if (const_args[5]) {
tcg_out_movi32(s, COND_AL, a1, a5);
a5 = a1;
@@ -1894,69 +1894,64 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
a1, a3, a5, const_args[5]);
}
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_neg_i32:
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, a0, a1, 0);
break;
case INDEX_op_not_i32:
- tcg_out_dat_reg(s, COND_AL,
- ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MVN, a0, 0, a1, SHIFT_IMM_LSL(0));
break;
case INDEX_op_mul_i32:
- tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_mul32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mulu2_i32:
- tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_umull32(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_muls2_i32:
- tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_smull32(s, COND_AL, a0, a1, a2, a3);
break;
- /* XXX: Perhaps args[2] & 0x1f is wrong */
+ /* XXX: Perhaps a2 & 0x1f is wrong */
case INDEX_op_shl_i32:
- c = const_args[2] ?
- SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
+ c = c2 ? SHIFT_IMM_LSL(a2 & 0x1f) : SHIFT_REG_LSL(a2);
goto gen_shift32;
case INDEX_op_shr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_LSR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(a2);
goto gen_shift32;
case INDEX_op_sar_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ASR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(a2);
goto gen_shift32;
case INDEX_op_rotr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ROR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(a2);
/* Fall through. */
gen_shift32:
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, c);
break;
case INDEX_op_rotl_i32:
- if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
- ((0x20 - args[2]) & 0x1f) ?
- SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+ if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
+ ((0x20 - a2) & 0x1f) ?
+ SHIFT_IMM_ROR((0x20 - a2) & 0x1f) :
SHIFT_IMM_LSL(0));
} else {
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, a2, 0x20);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
SHIFT_REG_ROR(TCG_REG_TMP));
}
break;
case INDEX_op_ctz_i32:
- tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
+ tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0);
a1 = TCG_REG_TMP;
goto do_clz;
case INDEX_op_clz_i32:
- a1 = args[1];
do_clz:
- a0 = args[0];
- a2 = args[2];
- c = const_args[2];
+ c = c2;
if (c && a2 == 32) {
tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
break;
@@ -1970,17 +1965,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_brcond_i32:
tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[0], args[1], const_args[1]);
- tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
- arg_label(args[3]));
+ a0, a1, const_args[1]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[a2], arg_label(a3));
break;
case INDEX_op_setcond_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
- ARITH_MOV, args[0], 0, 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
- ARITH_MOV, args[0], 0, 0);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[a3],
+ ARITH_MOV, a0, 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(a3)],
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_brcond2_i32:
@@ -1989,9 +1982,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_setcond2_i32:
c = tcg_out_cmp2(s, args + 1, const_args + 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, a0, 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
- ARITH_MOV, args[0], 0, 0);
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_qemu_ld_i32:
@@ -2008,63 +2001,62 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_bswap16_i32:
- tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap16(s, COND_AL, a0, a1);
break;
case INDEX_op_bswap32_i32:
- tcg_out_bswap32(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap32(s, COND_AL, a0, a1);
break;
case INDEX_op_ext8s_i32:
- tcg_out_ext8s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext8s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16s_i32:
- tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16u_i32:
- tcg_out_ext16u(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16u(s, COND_AL, a0, a1);
break;
case INDEX_op_deposit_i32:
- tcg_out_deposit(s, COND_AL, args[0], args[2],
- args[3], args[4], const_args[2]);
+ tcg_out_deposit(s, COND_AL, a0, a2, a3, args[4], c2);
break;
case INDEX_op_extract_i32:
- tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_extract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_sextract_i32:
- tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_sextract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_extract2_i32:
/* ??? These optimization vs zero should be generic. */
/* ??? But we can't substitute 2 for 1 in the opcode stream yet. */
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
+ if (c2) {
+ tcg_out_movi(s, TCG_TYPE_REG, a0, 0);
} else {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a2, SHIFT_IMM_LSL(32 - a3));
}
- } else if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[1], SHIFT_IMM_LSR(args[3]));
+ } else if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a1, SHIFT_IMM_LSR(a3));
} else {
/* We can do extract2 in 2 insns, vs the 3 required otherwise. */
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
- tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
- args[1], SHIFT_IMM_LSR(args[3]));
+ a2, SHIFT_IMM_LSL(32 - a3));
+ tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, TCG_REG_TMP,
+ a1, SHIFT_IMM_LSR(a3));
}
break;
case INDEX_op_div_i32:
- tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_sdiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_divu_i32:
- tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_udiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 2/6] tcg/arm: Replace goto statement by fall through comment
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/arm/tcg-target.c.inc | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 59bd196994f..0ffb2b13d14 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1947,10 +1947,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_ctz_i32:
tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0);
a1 = TCG_REG_TMP;
- goto do_clz;
-
+ /* Fall through. */
case INDEX_op_clz_i32:
- do_clz:
c = c2;
if (c && a2 == 32) {
tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 2/6] tcg/arm: Replace goto statement by fall through comment
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/arm/tcg-target.c.inc | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 59bd196994f..0ffb2b13d14 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1947,10 +1947,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_ctz_i32:
tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0);
a1 = TCG_REG_TMP;
- goto do_clz;
-
+ /* Fall through. */
case INDEX_op_clz_i32:
- do_clz:
c = c2;
if (c && a2 == 32) {
tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/ppc/tcg-target.c.inc | 188 ++++++++++++++++++---------------------
1 file changed, 85 insertions(+), 103 deletions(-)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 19a4a12f155..70b747a8a30 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2357,15 +2357,22 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2;
- int c;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, a0);
tcg_out_b(s, 0, tcg_code_gen_epilogue);
break;
case INDEX_op_goto_tb:
@@ -2389,11 +2396,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Indirect jump. */
tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
- (intptr_t)(s->tb_jmp_insn_offset + args[0]));
+ (intptr_t)(s->tb_jmp_insn_offset + a0));
}
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
tcg_out32(s, BCCTR | BO_ALWAYS);
- set_jmp_reset_offset(s, args[0]);
+ set_jmp_reset_offset(s, a0);
if (USE_REG_TB) {
/* For the unlinked case, need to reset TCG_REG_TB. */
tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
@@ -2403,7 +2410,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_goto_ptr:
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
if (USE_REG_TB) {
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
}
tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
tcg_out32(s, BCCTR | BO_ALWAYS);
@@ -2424,49 +2431,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
- tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
break;
case INDEX_op_ld8s_i32:
case INDEX_op_ld8s_i64:
- tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
break;
case INDEX_op_ld16u_i32:
case INDEX_op_ld16u_i64:
- tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LHZ, LHZX, a0, a1, a2);
break;
case INDEX_op_ld16s_i32:
case INDEX_op_ld16s_i64:
- tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LHA, LHAX, a0, a1, a2);
break;
case INDEX_op_ld_i32:
case INDEX_op_ld32u_i64:
- tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LWZ, LWZX, a0, a1, a2);
break;
case INDEX_op_ld32s_i64:
- tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LWA, LWAX, a0, a1, a2);
break;
case INDEX_op_ld_i64:
- tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LD, LDX, a0, a1, a2);
break;
case INDEX_op_st8_i32:
case INDEX_op_st8_i64:
- tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STB, STBX, a0, a1, a2);
break;
case INDEX_op_st16_i32:
case INDEX_op_st16_i64:
- tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STH, STHX, a0, a1, a2);
break;
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
- tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STW, STWX, a0, a1, a2);
break;
case INDEX_op_st_i64:
- tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STD, STDX, a0, a1, a2);
break;
case INDEX_op_add_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_32:
tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
@@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_sub_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
if (const_args[2]) {
tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
@@ -2491,7 +2496,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_and_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi32(s, a0, a1, a2);
} else {
@@ -2499,7 +2503,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_and_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi64(s, a0, a1, a2);
} else {
@@ -2508,7 +2511,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_or_i64:
case INDEX_op_or_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_ori32(s, a0, a1, a2);
} else {
@@ -2517,7 +2519,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_xori32(s, a0, a1, a2);
} else {
@@ -2525,7 +2526,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_andc_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi32(s, a0, a1, ~a2);
} else {
@@ -2533,7 +2533,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_andc_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi64(s, a0, a1, ~a2);
} else {
@@ -2542,57 +2541,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_orc_i32:
if (const_args[2]) {
- tcg_out_ori32(s, args[0], args[1], ~args[2]);
+ tcg_out_ori32(s, a0, a1, ~args[2]);
break;
}
/* FALLTHRU */
case INDEX_op_orc_i64:
- tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, ORC | SAB(args[1], a0, a2));
break;
case INDEX_op_eqv_i32:
if (const_args[2]) {
- tcg_out_xori32(s, args[0], args[1], ~args[2]);
+ tcg_out_xori32(s, a0, a1, ~args[2]);
break;
}
/* FALLTHRU */
case INDEX_op_eqv_i64:
- tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, EQV | SAB(args[1], a0, a2));
break;
case INDEX_op_nand_i32:
case INDEX_op_nand_i64:
- tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, NAND | SAB(args[1], a0, a2));
break;
case INDEX_op_nor_i32:
case INDEX_op_nor_i64:
- tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, NOR | SAB(args[1], a0, a2));
break;
case INDEX_op_clz_i32:
- tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctz_i32:
- tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctpop_i32:
- tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
+ tcg_out32(s, CNTPOPW | SAB(args[1], a0, 0));
break;
case INDEX_op_clz_i64:
- tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctz_i64:
- tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctpop_i64:
- tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
+ tcg_out32(s, CNTPOPD | SAB(args[1], a0, 0));
break;
case INDEX_op_mul_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out32(s, MULLI | TAI(a0, a1, a2));
} else {
@@ -2601,27 +2595,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_div_i32:
- tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVW | TAB(args[0], a1, a2));
break;
case INDEX_op_divu_i32:
- tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVWU | TAB(args[0], a1, a2));
break;
case INDEX_op_shl_i32:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shli32(s, args[0], args[1], args[2] & 31);
+ tcg_out_shli32(s, a0, a1, a2 & 31);
} else {
- tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SLW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_shr_i32:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shri32(s, args[0], args[1], args[2] & 31);
+ tcg_out_shri32(s, a0, a1, a2 & 31);
} else {
- tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_sar_i32:
@@ -2629,33 +2623,32 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Limit immediate shift count lest we create an illegal insn. */
tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
} else {
- tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRAW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_rotl_i32:
if (const_args[2]) {
- tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
} else {
- tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ tcg_out32(s, RLWNM | SAB(args[1], a0, a2)
| MB(0) | ME(31));
}
break;
case INDEX_op_rotr_i32:
if (const_args[2]) {
- tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, 32 - a2, 0, 31);
} else {
- tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
- tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
- | MB(0) | ME(31));
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, a2, 32));
+ tcg_out32(s, RLWNM | SAB(args[1], a0, TCG_REG_R0) | MB(0) | ME(31));
}
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ tcg_out_brcond(s, a2, a0, a1, const_args[1],
arg_label(args[3]), TCG_TYPE_I32);
break;
case INDEX_op_brcond_i64:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ tcg_out_brcond(s, a2, a0, a1, const_args[1],
arg_label(args[3]), TCG_TYPE_I64);
break;
case INDEX_op_brcond2_i32:
@@ -2669,11 +2662,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_not_i32:
case INDEX_op_not_i64:
- tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
+ tcg_out32(s, NOR | SAB(args[1], a0, a1));
break;
case INDEX_op_add_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_64:
tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
@@ -2682,7 +2674,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_sub_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
if (const_args[2]) {
tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
@@ -2700,17 +2691,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_shl_i64:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shli64(s, args[0], args[1], args[2] & 63);
+ tcg_out_shli64(s, a0, a1, a2 & 63);
} else {
- tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SLD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_shr_i64:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shri64(s, args[0], args[1], args[2] & 63);
+ tcg_out_shri64(s, a0, a1, a2 & 63);
} else {
- tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_sar_i64:
@@ -2718,27 +2709,26 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
} else {
- tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRAD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_rotl_i64:
if (const_args[2]) {
- tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
+ tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
} else {
- tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
+ tcg_out32(s, RLDCL | SAB(args[1], a0, a2) | MB64(0));
}
break;
case INDEX_op_rotr_i64:
if (const_args[2]) {
- tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
+ tcg_out_rld(s, RLDICL, a0, a1, 64 - a2, 0);
} else {
- tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
- tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, a2, 64));
+ tcg_out32(s, RLDCL | SAB(args[1], a0, TCG_REG_R0) | MB64(0));
}
break;
case INDEX_op_mul_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out32(s, MULLI | TAI(a0, a1, a2));
} else {
@@ -2746,10 +2736,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_div_i64:
- tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVD | TAB(args[0], a1, a2));
break;
case INDEX_op_divu_i64:
- tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVDU | TAB(args[0], a1, a2));
break;
case INDEX_op_qemu_ld_i32:
@@ -2781,16 +2771,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32(s, c | RS(args[1]) | RA(args[0]));
break;
case INDEX_op_extu_i32_i64:
- tcg_out_ext32u(s, args[0], args[1]);
+ tcg_out_ext32u(s, a0, a1);
break;
case INDEX_op_setcond_i32:
- tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_setcond_i64:
- tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_setcond2_i32:
tcg_out_setcond2(s, args, const_args);
@@ -2798,7 +2786,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
- a0 = args[0], a1 = args[1];
/* a1 = abcd */
if (a0 != a1) {
/* a0 = (a1 r<< 24) & 0xff # 000c */
@@ -2818,7 +2805,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_bswap32_i32:
case INDEX_op_bswap32_i64:
/* Stolen from gcc's builtin_bswap32 */
- a1 = args[1];
a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
/* a1 = args[1] # abcd */
@@ -2835,7 +2821,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_bswap64_i64:
- a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
+ a2 = TCG_REG_R0;
if (a0 == a1) {
a0 = TCG_REG_R0;
a2 = a1;
@@ -2869,36 +2855,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_deposit_i32:
if (const_args[2]) {
uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
- tcg_out_andi32(s, args[0], args[0], ~mask);
+ tcg_out_andi32(s, a0, a0, ~mask);
} else {
- tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ tcg_out_rlw(s, RLWIMI, a0, a2, args[3],
32 - args[3] - args[4], 31 - args[3]);
}
break;
case INDEX_op_deposit_i64:
if (const_args[2]) {
uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
- tcg_out_andi64(s, args[0], args[0], ~mask);
+ tcg_out_andi64(s, a0, a0, ~mask);
} else {
- tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
- 64 - args[3] - args[4]);
+ tcg_out_rld(s, RLDIMI, a0, a2, args[3], 64 - args[3] - args[4]);
}
break;
case INDEX_op_extract_i32:
- tcg_out_rlw(s, RLWINM, args[0], args[1],
- 32 - args[2], 32 - args[3], 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, 32 - a2, 32 - args[3], 31);
break;
case INDEX_op_extract_i64:
- tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
+ tcg_out_rld(s, RLDICL, a0, a1, 64 - a2, 64 - args[3]);
break;
case INDEX_op_movcond_i32:
- tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
+ tcg_out_movcond(s, TCG_TYPE_I32, args[5], a0, a1, a2,
args[3], args[4], const_args[2]);
break;
case INDEX_op_movcond_i64:
- tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
+ tcg_out_movcond(s, TCG_TYPE_I64, args[5], a0, a1, a2,
args[3], args[4], const_args[2]);
break;
@@ -2910,14 +2894,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Note that the CA bit is defined based on the word size of the
environment. So in 64-bit mode it's always carry-out of bit 63.
The fallback code using deposit works just as well for 32-bit. */
- a0 = args[0], a1 = args[1];
if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
a0 = TCG_REG_R0;
}
if (const_args[4]) {
- tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
+ tcg_out32(s, ADDIC | TAI(a0, a2, args[4]));
} else {
- tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
+ tcg_out32(s, ADDC | TAB(a0, a2, args[4]));
}
if (const_args[5]) {
tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
@@ -2934,14 +2917,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
#else
case INDEX_op_sub2_i32:
#endif
- a0 = args[0], a1 = args[1];
if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
a0 = TCG_REG_R0;
}
if (const_args[2]) {
- tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
+ tcg_out32(s, SUBFIC | TAI(a0, args[4], a2));
} else {
- tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
+ tcg_out32(s, SUBFC | TAB(a0, args[4], a2));
}
if (const_args[3]) {
tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
@@ -2954,20 +2936,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_muluh_i32:
- tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHWU | TAB(args[0], a1, a2));
break;
case INDEX_op_mulsh_i32:
- tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHW | TAB(args[0], a1, a2));
break;
case INDEX_op_muluh_i64:
- tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHDU | TAB(args[0], a1, a2));
break;
case INDEX_op_mulsh_i64:
- tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHD | TAB(args[0], a1, a2));
break;
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/ppc/tcg-target.c.inc | 188 ++++++++++++++++++---------------------
1 file changed, 85 insertions(+), 103 deletions(-)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 19a4a12f155..70b747a8a30 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2357,15 +2357,22 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2;
- int c;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, a0);
tcg_out_b(s, 0, tcg_code_gen_epilogue);
break;
case INDEX_op_goto_tb:
@@ -2389,11 +2396,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Indirect jump. */
tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
- (intptr_t)(s->tb_jmp_insn_offset + args[0]));
+ (intptr_t)(s->tb_jmp_insn_offset + a0));
}
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
tcg_out32(s, BCCTR | BO_ALWAYS);
- set_jmp_reset_offset(s, args[0]);
+ set_jmp_reset_offset(s, a0);
if (USE_REG_TB) {
/* For the unlinked case, need to reset TCG_REG_TB. */
tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
@@ -2403,7 +2410,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_goto_ptr:
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
if (USE_REG_TB) {
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
}
tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
tcg_out32(s, BCCTR | BO_ALWAYS);
@@ -2424,49 +2431,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
- tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
break;
case INDEX_op_ld8s_i32:
case INDEX_op_ld8s_i64:
- tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
break;
case INDEX_op_ld16u_i32:
case INDEX_op_ld16u_i64:
- tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LHZ, LHZX, a0, a1, a2);
break;
case INDEX_op_ld16s_i32:
case INDEX_op_ld16s_i64:
- tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LHA, LHAX, a0, a1, a2);
break;
case INDEX_op_ld_i32:
case INDEX_op_ld32u_i64:
- tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LWZ, LWZX, a0, a1, a2);
break;
case INDEX_op_ld32s_i64:
- tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LWA, LWAX, a0, a1, a2);
break;
case INDEX_op_ld_i64:
- tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, LD, LDX, a0, a1, a2);
break;
case INDEX_op_st8_i32:
case INDEX_op_st8_i64:
- tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STB, STBX, a0, a1, a2);
break;
case INDEX_op_st16_i32:
case INDEX_op_st16_i64:
- tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STH, STHX, a0, a1, a2);
break;
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
- tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STW, STWX, a0, a1, a2);
break;
case INDEX_op_st_i64:
- tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
+ tcg_out_mem_long(s, STD, STDX, a0, a1, a2);
break;
case INDEX_op_add_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_32:
tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
@@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_sub_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
if (const_args[2]) {
tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
@@ -2491,7 +2496,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_and_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi32(s, a0, a1, a2);
} else {
@@ -2499,7 +2503,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_and_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi64(s, a0, a1, a2);
} else {
@@ -2508,7 +2511,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_or_i64:
case INDEX_op_or_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_ori32(s, a0, a1, a2);
} else {
@@ -2517,7 +2519,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_xori32(s, a0, a1, a2);
} else {
@@ -2525,7 +2526,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_andc_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi32(s, a0, a1, ~a2);
} else {
@@ -2533,7 +2533,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_andc_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_andi64(s, a0, a1, ~a2);
} else {
@@ -2542,57 +2541,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_orc_i32:
if (const_args[2]) {
- tcg_out_ori32(s, args[0], args[1], ~args[2]);
+ tcg_out_ori32(s, a0, a1, ~args[2]);
break;
}
/* FALLTHRU */
case INDEX_op_orc_i64:
- tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, ORC | SAB(args[1], a0, a2));
break;
case INDEX_op_eqv_i32:
if (const_args[2]) {
- tcg_out_xori32(s, args[0], args[1], ~args[2]);
+ tcg_out_xori32(s, a0, a1, ~args[2]);
break;
}
/* FALLTHRU */
case INDEX_op_eqv_i64:
- tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, EQV | SAB(args[1], a0, a2));
break;
case INDEX_op_nand_i32:
case INDEX_op_nand_i64:
- tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, NAND | SAB(args[1], a0, a2));
break;
case INDEX_op_nor_i32:
case INDEX_op_nor_i64:
- tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, NOR | SAB(args[1], a0, a2));
break;
case INDEX_op_clz_i32:
- tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctz_i32:
- tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctpop_i32:
- tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
+ tcg_out32(s, CNTPOPW | SAB(args[1], a0, 0));
break;
case INDEX_op_clz_i64:
- tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctz_i64:
- tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
- args[2], const_args[2]);
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, a0, a1, a2, const_args[2]);
break;
case INDEX_op_ctpop_i64:
- tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
+ tcg_out32(s, CNTPOPD | SAB(args[1], a0, 0));
break;
case INDEX_op_mul_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out32(s, MULLI | TAI(a0, a1, a2));
} else {
@@ -2601,27 +2595,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_div_i32:
- tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVW | TAB(args[0], a1, a2));
break;
case INDEX_op_divu_i32:
- tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVWU | TAB(args[0], a1, a2));
break;
case INDEX_op_shl_i32:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shli32(s, args[0], args[1], args[2] & 31);
+ tcg_out_shli32(s, a0, a1, a2 & 31);
} else {
- tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SLW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_shr_i32:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shri32(s, args[0], args[1], args[2] & 31);
+ tcg_out_shri32(s, a0, a1, a2 & 31);
} else {
- tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_sar_i32:
@@ -2629,33 +2623,32 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Limit immediate shift count lest we create an illegal insn. */
tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
} else {
- tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRAW | SAB(args[1], a0, a2));
}
break;
case INDEX_op_rotl_i32:
if (const_args[2]) {
- tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
} else {
- tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ tcg_out32(s, RLWNM | SAB(args[1], a0, a2)
| MB(0) | ME(31));
}
break;
case INDEX_op_rotr_i32:
if (const_args[2]) {
- tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, 32 - a2, 0, 31);
} else {
- tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
- tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
- | MB(0) | ME(31));
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, a2, 32));
+ tcg_out32(s, RLWNM | SAB(args[1], a0, TCG_REG_R0) | MB(0) | ME(31));
}
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ tcg_out_brcond(s, a2, a0, a1, const_args[1],
arg_label(args[3]), TCG_TYPE_I32);
break;
case INDEX_op_brcond_i64:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ tcg_out_brcond(s, a2, a0, a1, const_args[1],
arg_label(args[3]), TCG_TYPE_I64);
break;
case INDEX_op_brcond2_i32:
@@ -2669,11 +2662,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_not_i32:
case INDEX_op_not_i64:
- tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
+ tcg_out32(s, NOR | SAB(args[1], a0, a1));
break;
case INDEX_op_add_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_64:
tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
@@ -2682,7 +2674,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_sub_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
if (const_args[2]) {
tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
@@ -2700,17 +2691,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_shl_i64:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shli64(s, args[0], args[1], args[2] & 63);
+ tcg_out_shli64(s, a0, a1, a2 & 63);
} else {
- tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SLD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_shr_i64:
if (const_args[2]) {
/* Limit immediate shift count lest we create an illegal insn. */
- tcg_out_shri64(s, args[0], args[1], args[2] & 63);
+ tcg_out_shri64(s, a0, a1, a2 & 63);
} else {
- tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_sar_i64:
@@ -2718,27 +2709,26 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
} else {
- tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, SRAD | SAB(args[1], a0, a2));
}
break;
case INDEX_op_rotl_i64:
if (const_args[2]) {
- tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
+ tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
} else {
- tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
+ tcg_out32(s, RLDCL | SAB(args[1], a0, a2) | MB64(0));
}
break;
case INDEX_op_rotr_i64:
if (const_args[2]) {
- tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
+ tcg_out_rld(s, RLDICL, a0, a1, 64 - a2, 0);
} else {
- tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
- tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, a2, 64));
+ tcg_out32(s, RLDCL | SAB(args[1], a0, TCG_REG_R0) | MB64(0));
}
break;
case INDEX_op_mul_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out32(s, MULLI | TAI(a0, a1, a2));
} else {
@@ -2746,10 +2736,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_div_i64:
- tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVD | TAB(args[0], a1, a2));
break;
case INDEX_op_divu_i64:
- tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, DIVDU | TAB(args[0], a1, a2));
break;
case INDEX_op_qemu_ld_i32:
@@ -2781,16 +2771,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32(s, c | RS(args[1]) | RA(args[0]));
break;
case INDEX_op_extu_i32_i64:
- tcg_out_ext32u(s, args[0], args[1]);
+ tcg_out_ext32u(s, a0, a1);
break;
case INDEX_op_setcond_i32:
- tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_setcond_i64:
- tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_setcond2_i32:
tcg_out_setcond2(s, args, const_args);
@@ -2798,7 +2786,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
- a0 = args[0], a1 = args[1];
/* a1 = abcd */
if (a0 != a1) {
/* a0 = (a1 r<< 24) & 0xff # 000c */
@@ -2818,7 +2805,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_bswap32_i32:
case INDEX_op_bswap32_i64:
/* Stolen from gcc's builtin_bswap32 */
- a1 = args[1];
a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
/* a1 = args[1] # abcd */
@@ -2835,7 +2821,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_bswap64_i64:
- a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
+ a2 = TCG_REG_R0;
if (a0 == a1) {
a0 = TCG_REG_R0;
a2 = a1;
@@ -2869,36 +2855,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_deposit_i32:
if (const_args[2]) {
uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
- tcg_out_andi32(s, args[0], args[0], ~mask);
+ tcg_out_andi32(s, a0, a0, ~mask);
} else {
- tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ tcg_out_rlw(s, RLWIMI, a0, a2, args[3],
32 - args[3] - args[4], 31 - args[3]);
}
break;
case INDEX_op_deposit_i64:
if (const_args[2]) {
uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
- tcg_out_andi64(s, args[0], args[0], ~mask);
+ tcg_out_andi64(s, a0, a0, ~mask);
} else {
- tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
- 64 - args[3] - args[4]);
+ tcg_out_rld(s, RLDIMI, a0, a2, args[3], 64 - args[3] - args[4]);
}
break;
case INDEX_op_extract_i32:
- tcg_out_rlw(s, RLWINM, args[0], args[1],
- 32 - args[2], 32 - args[3], 31);
+ tcg_out_rlw(s, RLWINM, a0, a1, 32 - a2, 32 - args[3], 31);
break;
case INDEX_op_extract_i64:
- tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
+ tcg_out_rld(s, RLDICL, a0, a1, 64 - a2, 64 - args[3]);
break;
case INDEX_op_movcond_i32:
- tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
+ tcg_out_movcond(s, TCG_TYPE_I32, args[5], a0, a1, a2,
args[3], args[4], const_args[2]);
break;
case INDEX_op_movcond_i64:
- tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
+ tcg_out_movcond(s, TCG_TYPE_I64, args[5], a0, a1, a2,
args[3], args[4], const_args[2]);
break;
@@ -2910,14 +2894,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
/* Note that the CA bit is defined based on the word size of the
environment. So in 64-bit mode it's always carry-out of bit 63.
The fallback code using deposit works just as well for 32-bit. */
- a0 = args[0], a1 = args[1];
if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
a0 = TCG_REG_R0;
}
if (const_args[4]) {
- tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
+ tcg_out32(s, ADDIC | TAI(a0, a2, args[4]));
} else {
- tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
+ tcg_out32(s, ADDC | TAB(a0, a2, args[4]));
}
if (const_args[5]) {
tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
@@ -2934,14 +2917,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
#else
case INDEX_op_sub2_i32:
#endif
- a0 = args[0], a1 = args[1];
if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
a0 = TCG_REG_R0;
}
if (const_args[2]) {
- tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
+ tcg_out32(s, SUBFIC | TAI(a0, args[4], a2));
} else {
- tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
+ tcg_out32(s, SUBFC | TAB(a0, args[4], a2));
}
if (const_args[3]) {
tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
@@ -2954,20 +2936,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_muluh_i32:
- tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHWU | TAB(args[0], a1, a2));
break;
case INDEX_op_mulsh_i32:
- tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHW | TAB(args[0], a1, a2));
break;
case INDEX_op_muluh_i64:
- tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHDU | TAB(args[0], a1, a2));
break;
case INDEX_op_mulsh_i64:
- tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
+ tcg_out32(s, MULHD | TAB(args[0], a1, a2));
break;
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-14 0:28 ` Richard Henderson
-1 siblings, 0 replies; 18+ messages in thread
From: Richard Henderson @ 2021-01-14 0:28 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Aleksandar Rikalo, qemu-s390x, qemu-arm, Alistair Francis,
Palmer Dabbelt, Miroslav Rezanina, Aurelien Jarno
On 1/13/21 7:24 AM, Philippe Mathieu-Daudé wrote:
> case INDEX_op_ld8s_i32:
> case INDEX_op_ld8s_i64:
> - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
> + tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
> tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
Missed replacements.
> - a0 = args[0], a1 = args[1], a2 = args[2];
> if (const_args[2]) {
Missed replacement.
> do_addi_32:
> tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
> @@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
> }
> break;
> case INDEX_op_sub_i32:
> - a0 = args[0], a1 = args[1], a2 = args[2];
> if (const_args[1]) {
> if (const_args[2]) {
And again.
Let's just drop the hoisting parts and only do the signature parts for now.
I'd rather think of a way to split up this large function than waste time
optimizing it.
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
@ 2021-01-14 0:28 ` Richard Henderson
0 siblings, 0 replies; 18+ messages in thread
From: Richard Henderson @ 2021-01-14 0:28 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Andrzej Zaborowski,
Palmer Dabbelt, qemu-s390x, Jiaxun Yang
On 1/13/21 7:24 AM, Philippe Mathieu-Daudé wrote:
> case INDEX_op_ld8s_i32:
> case INDEX_op_ld8s_i64:
> - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
> + tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
> tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
Missed replacements.
> - a0 = args[0], a1 = args[1], a2 = args[2];
> if (const_args[2]) {
Missed replacement.
> do_addi_32:
> tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
> @@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
> }
> break;
> case INDEX_op_sub_i32:
> - a0 = args[0], a1 = args[1], a2 = args[2];
> if (const_args[1]) {
> if (const_args[2]) {
And again.
Let's just drop the hoisting parts and only do the signature parts for now.
I'd rather think of a way to split up this large function than waste time
optimizing it.
r~
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
2021-01-14 0:28 ` Richard Henderson
@ 2021-01-14 9:59 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-14 9:59 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
Cc: Thomas Huth, Cornelia Huck, qemu-riscv, Stefan Weil, Huacai Chen,
Aleksandar Rikalo, qemu-s390x, qemu-arm, Alistair Francis,
Palmer Dabbelt, Miroslav Rezanina, Aurelien Jarno
On 1/14/21 1:28 AM, Richard Henderson wrote:
>
> Let's just drop the hoisting parts and only do the signature parts for now.
> I'd rather think of a way to split up this large function than waste time
> optimizing it.
Agreed :) Thanks!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()
@ 2021-01-14 9:59 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-14 9:59 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Aleksandar Rikalo, Jiaxun Yang, qemu-s390x, qemu-arm,
Alistair Francis, Palmer Dabbelt, Miroslav Rezanina,
Andrzej Zaborowski, Aurelien Jarno
On 1/14/21 1:28 AM, Richard Henderson wrote:
>
> Let's just drop the hoisting parts and only do the signature parts for now.
> I'd rather think of a way to split up this large function than waste time
> optimizing it.
Agreed :) Thanks!
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 4/6] tcg/s390: Hoist common argument loads in tcg_out_op()
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/s390/tcg-target.c.inc | 222 ++++++++++++++++++--------------------
1 file changed, 107 insertions(+), 115 deletions(-)
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index d7ef0790556..ec202e79cfc 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -1732,15 +1732,22 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
case glue(glue(INDEX_op_,x),_i64)
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
S390Opcode op, op2;
- TCGArg a0, a1, a2;
+ TCGArg a0, a1, a2, a4;
+ int c2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ a4 = args[4];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
- a0 = args[0];
if (a0 == 0) {
tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
} else {
@@ -1750,7 +1757,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
- a0 = args[0];
if (s->tb_jmp_insn_offset) {
/*
* branch displacement must be aligned for atomic patching;
@@ -1784,7 +1790,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_ptr:
- a0 = args[0];
if (USE_REG_TB) {
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
}
@@ -1794,44 +1799,42 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
OP_32_64(ld8u):
/* ??? LLC (RXY format) is only present with the extended-immediate
facility, whereas LLGC is always present. */
- tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGC, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(ld8s):
/* ??? LB is no smaller than LGB, so no point to using it. */
- tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGB, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(ld16u):
/* ??? LLH (RXY format) is only present with the extended-immediate
facility, whereas LLGH is always present. */
- tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGH, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld16s_i32:
- tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_LH, RXY_LHY, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld_i32:
- tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
break;
OP_32_64(st8):
- tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_STC, RXY_STCY, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(st16):
- tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_STH, RXY_STHY, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_st_i32:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
break;
case INDEX_op_add_i32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (const_args[2]) {
do_addi_32:
if (a0 == a1) {
@@ -1852,9 +1855,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_sub_i32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (const_args[2]) {
- a2 = -a2;
+ a2 = -args[2];
goto do_addi_32;
} else if (a0 == a1) {
tcg_out_insn(s, RR, SR, a0, a2);
@@ -1864,7 +1867,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_andi(s, TCG_TYPE_I32, a0, a2);
@@ -1875,7 +1878,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_or_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_ori(s, TCG_TYPE_I32, a0, a2);
@@ -1886,45 +1889,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_xor_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_xori(s, TCG_TYPE_I32, a0, a2);
} else if (a0 == a1) {
- tcg_out_insn(s, RR, XR, args[0], args[2]);
+ tcg_out_insn(s, RR, XR, a0, a2);
} else {
tcg_out_insn(s, RRF, XRK, a0, a1, a2);
}
break;
case INDEX_op_neg_i32:
- tcg_out_insn(s, RR, LCR, args[0], args[1]);
+ tcg_out_insn(s, RR, LCR, a0, a1);
break;
case INDEX_op_mul_i32:
if (const_args[2]) {
if ((int32_t)args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MHI, args[0], args[2]);
+ tcg_out_insn(s, RI, MHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSFI, a0, a2);
}
} else {
- tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+ tcg_out_insn(s, RRE, MSR, a0, a2);
}
break;
case INDEX_op_div2_i32:
- tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RR, DR, TCG_REG_R2, a4);
break;
case INDEX_op_divu2_i32:
- tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DLR, TCG_REG_R2, a4);
break;
case INDEX_op_shl_i32:
op = RS_SLL;
op2 = RSY_SLLK;
do_shift32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (a0 == a1) {
if (const_args[2]) {
tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
@@ -1952,110 +1955,107 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rotl_i32:
/* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, a2, 0);
}
break;
case INDEX_op_rotr_i32:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1],
- TCG_REG_NONE, (32 - args[2]) & 31);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_REG_NONE, (32 - a2) & 31);
} else {
- tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, a2);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_TMP0, 0);
}
break;
case INDEX_op_ext8s_i32:
- tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext8s(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext16s_i32:
- tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext16s(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext8u_i32:
- tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext8u(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext16u_i32:
- tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext16u(s, TCG_TYPE_I32, a0, a1);
break;
OP_32_64(bswap16):
/* The TCG bswap definition requires bits 0-47 already be zero.
Thus we don't need the G-type insns to implement bswap16_i64. */
- tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
- tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
+ tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
break;
OP_32_64(bswap32):
- tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
break;
case INDEX_op_add2_i32:
if (const_args[4]) {
- tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+ tcg_out_insn(s, RIL, ALFI, a0, a4);
} else {
- tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ tcg_out_insn(s, RR, ALR, a0, a4);
}
- tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ tcg_out_insn(s, RRE, ALCR, a1, args[5]);
break;
case INDEX_op_sub2_i32:
if (const_args[4]) {
- tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+ tcg_out_insn(s, RIL, SLFI, a0, a4);
} else {
- tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ tcg_out_insn(s, RR, SLR, a0, a4);
}
- tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ tcg_out_insn(s, RRE, SLBR, a1, args[5]);
break;
case INDEX_op_br:
- tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
+ tgen_branch(s, S390_CC_ALWAYS, arg_label(a0));
break;
case INDEX_op_brcond_i32:
- tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
- args[1], const_args[1], arg_label(args[3]));
+ tgen_brcond(s, TCG_TYPE_I32, a2, a0,
+ a1, const_args[1], arg_label(args[3]));
break;
case INDEX_op_setcond_i32:
- tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tgen_setcond(s, TCG_TYPE_I32, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_movcond_i32:
- tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ tgen_movcond(s, TCG_TYPE_I32, args[5], a0, a1,
+ a2, const_args[2], args[3], const_args[3]);
break;
case INDEX_op_qemu_ld_i32:
/* ??? Technically we can use a non-extending instruction. */
case INDEX_op_qemu_ld_i64:
- tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+ tcg_out_qemu_ld(s, a0, a1, a2);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
- tcg_out_qemu_st(s, args[0], args[1], args[2]);
+ tcg_out_qemu_st(s, a0, a1, a2);
break;
case INDEX_op_ld16s_i64:
- tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGH, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld32u_i64:
- tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGF, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld32s_i64:
- tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGF, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld_i64:
- tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
break;
case INDEX_op_st32_i64:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
break;
case INDEX_op_st_i64:
- tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
break;
case INDEX_op_add_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_64:
if (a0 == a1) {
@@ -2084,7 +2084,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_sub_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
a2 = -a2;
goto do_addi_64;
@@ -2096,18 +2095,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
- tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
+ tgen_andi(s, TCG_TYPE_I64, a0, a2);
} else if (a0 == a1) {
- tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ tcg_out_insn(s, RRE, NGR, a0, a2);
} else {
tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
}
break;
case INDEX_op_or_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen_ori(s, TCG_TYPE_I64, a0, a2);
@@ -2118,7 +2115,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_xor_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen_xori(s, TCG_TYPE_I64, a0, a2);
@@ -2130,21 +2126,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_neg_i64:
- tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LCGR, a0, a1);
break;
case INDEX_op_bswap64_i64:
- tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LRVGR, a0, a1);
break;
case INDEX_op_mul_i64:
if (const_args[2]) {
- if (args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+ if (a2 == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MGHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSGFI, a0, a2);
}
} else {
- tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+ tcg_out_insn(s, RRE, MSGR, a0, a2);
}
break;
@@ -2153,10 +2149,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
into R3 with this definition, but as we do in fact always
produce both quotient and remainder using INDEX_op_div_i64
instead requires jumping through even more hoops. */
- tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, a4);
break;
case INDEX_op_divu2_i64:
- tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, a4);
break;
case INDEX_op_mulu2_i64:
tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
@@ -2166,9 +2162,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
op = RSY_SLLG;
do_shift64:
if (const_args[2]) {
- tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, op, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, op, a0, a1, a2, 0);
}
break;
case INDEX_op_shr_i64:
@@ -2180,87 +2176,83 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rotl_i64:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, a2, 0);
}
break;
case INDEX_op_rotr_i64:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
- TCG_REG_NONE, (64 - args[2]) & 63);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_REG_NONE, (64 - a2) & 63);
} else {
/* We can use the smaller 32-bit negate because only the
low 6 bits are examined for the rotate. */
- tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, a2);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_TMP0, 0);
}
break;
case INDEX_op_ext8s_i64:
- tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext8s(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext16s_i64:
- tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext16s(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext_i32_i64:
case INDEX_op_ext32s_i64:
- tgen_ext32s(s, args[0], args[1]);
+ tgen_ext32s(s, a0, a1);
break;
case INDEX_op_ext8u_i64:
- tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext8u(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext16u_i64:
- tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext16u(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_extu_i32_i64:
case INDEX_op_ext32u_i64:
- tgen_ext32u(s, args[0], args[1]);
+ tgen_ext32u(s, a0, a1);
break;
case INDEX_op_add2_i64:
if (const_args[4]) {
- if ((int64_t)args[4] >= 0) {
- tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+ if ((int64_t)a4 >= 0) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a4);
} else {
- tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
+ tcg_out_insn(s, RIL, SLGFI, a0, -a4);
}
} else {
- tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALGR, a0, a4);
}
- tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ tcg_out_insn(s, RRE, ALCGR, a1, args[5]);
break;
case INDEX_op_sub2_i64:
if (const_args[4]) {
- if ((int64_t)args[4] >= 0) {
- tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+ if ((int64_t)a4 >= 0) {
+ tcg_out_insn(s, RIL, SLGFI, a0, a4);
} else {
- tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
+ tcg_out_insn(s, RIL, ALGFI, a0, -a4);
}
} else {
- tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLGR, a0, a4);
}
- tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ tcg_out_insn(s, RRE, SLBGR, a1, args[5]);
break;
case INDEX_op_brcond_i64:
- tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
- args[1], const_args[1], arg_label(args[3]));
+ tgen_brcond(s, TCG_TYPE_I64, a2, a0,
+ a1, const_args[1], arg_label(args[3]));
break;
case INDEX_op_setcond_i64:
- tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tgen_setcond(s, TCG_TYPE_I64, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_movcond_i64:
- tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ tgen_movcond(s, TCG_TYPE_I64, args[5], a0, a1,
+ a2, const_args[2], args[3], const_args[3]);
break;
OP_32_64(deposit):
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
- tgen_deposit(s, a0, a2, args[3], args[4], 1);
+ tgen_deposit(s, a0, a2, args[3], a4, 1);
} else {
/* Since we can't support "0Z" as a constraint, we allow a1 in
any register. Fix things up as if a matching constraint. */
@@ -2272,22 +2264,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
tcg_out_mov(s, type, a0, a1);
}
- tgen_deposit(s, a0, a2, args[3], args[4], 0);
+ tgen_deposit(s, a0, a2, args[3], a4, 0);
}
break;
OP_32_64(extract):
- tgen_extract(s, args[0], args[1], args[2], args[3]);
+ tgen_extract(s, a0, a1, a2, args[3]);
break;
case INDEX_op_clz_i64:
- tgen_clz(s, args[0], args[1], args[2], const_args[2]);
+ tgen_clz(s, a0, a1, a2, const_args[2]);
break;
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
- if (args[0] & TCG_MO_ST_LD) {
+ if (a0 & TCG_MO_ST_LD) {
tcg_out_insn(s, RR, BCR,
s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 4/6] tcg/s390: Hoist common argument loads in tcg_out_op()
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/s390/tcg-target.c.inc | 222 ++++++++++++++++++--------------------
1 file changed, 107 insertions(+), 115 deletions(-)
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index d7ef0790556..ec202e79cfc 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -1732,15 +1732,22 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
case glue(glue(INDEX_op_,x),_i64)
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
S390Opcode op, op2;
- TCGArg a0, a1, a2;
+ TCGArg a0, a1, a2, a4;
+ int c2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ a4 = args[4];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
- a0 = args[0];
if (a0 == 0) {
tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
} else {
@@ -1750,7 +1757,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
- a0 = args[0];
if (s->tb_jmp_insn_offset) {
/*
* branch displacement must be aligned for atomic patching;
@@ -1784,7 +1790,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_ptr:
- a0 = args[0];
if (USE_REG_TB) {
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
}
@@ -1794,44 +1799,42 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
OP_32_64(ld8u):
/* ??? LLC (RXY format) is only present with the extended-immediate
facility, whereas LLGC is always present. */
- tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGC, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(ld8s):
/* ??? LB is no smaller than LGB, so no point to using it. */
- tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGB, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(ld16u):
/* ??? LLH (RXY format) is only present with the extended-immediate
facility, whereas LLGH is always present. */
- tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGH, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld16s_i32:
- tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_LH, RXY_LHY, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld_i32:
- tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
break;
OP_32_64(st8):
- tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_STC, RXY_STCY, a0, a1, TCG_REG_NONE, a2);
break;
OP_32_64(st16):
- tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, RX_STH, RXY_STHY, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_st_i32:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
break;
case INDEX_op_add_i32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (const_args[2]) {
do_addi_32:
if (a0 == a1) {
@@ -1852,9 +1855,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_sub_i32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (const_args[2]) {
- a2 = -a2;
+ a2 = -args[2];
goto do_addi_32;
} else if (a0 == a1) {
tcg_out_insn(s, RR, SR, a0, a2);
@@ -1864,7 +1867,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_andi(s, TCG_TYPE_I32, a0, a2);
@@ -1875,7 +1878,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_or_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_ori(s, TCG_TYPE_I32, a0, a2);
@@ -1886,45 +1889,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_xor_i32:
- a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen_xori(s, TCG_TYPE_I32, a0, a2);
} else if (a0 == a1) {
- tcg_out_insn(s, RR, XR, args[0], args[2]);
+ tcg_out_insn(s, RR, XR, a0, a2);
} else {
tcg_out_insn(s, RRF, XRK, a0, a1, a2);
}
break;
case INDEX_op_neg_i32:
- tcg_out_insn(s, RR, LCR, args[0], args[1]);
+ tcg_out_insn(s, RR, LCR, a0, a1);
break;
case INDEX_op_mul_i32:
if (const_args[2]) {
if ((int32_t)args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MHI, args[0], args[2]);
+ tcg_out_insn(s, RI, MHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSFI, a0, a2);
}
} else {
- tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+ tcg_out_insn(s, RRE, MSR, a0, a2);
}
break;
case INDEX_op_div2_i32:
- tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RR, DR, TCG_REG_R2, a4);
break;
case INDEX_op_divu2_i32:
- tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DLR, TCG_REG_R2, a4);
break;
case INDEX_op_shl_i32:
op = RS_SLL;
op2 = RSY_SLLK;
do_shift32:
- a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ a2 = (int32_t)args[2];
if (a0 == a1) {
if (const_args[2]) {
tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
@@ -1952,110 +1955,107 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rotl_i32:
/* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, a2, 0);
}
break;
case INDEX_op_rotr_i32:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLL, args[0], args[1],
- TCG_REG_NONE, (32 - args[2]) & 31);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_REG_NONE, (32 - a2) & 31);
} else {
- tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
- tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, a2);
+ tcg_out_sh64(s, RSY_RLL, a0, a1, TCG_TMP0, 0);
}
break;
case INDEX_op_ext8s_i32:
- tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext8s(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext16s_i32:
- tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext16s(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext8u_i32:
- tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext8u(s, TCG_TYPE_I32, a0, a1);
break;
case INDEX_op_ext16u_i32:
- tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
+ tgen_ext16u(s, TCG_TYPE_I32, a0, a1);
break;
OP_32_64(bswap16):
/* The TCG bswap definition requires bits 0-47 already be zero.
Thus we don't need the G-type insns to implement bswap16_i64. */
- tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
- tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
+ tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
break;
OP_32_64(bswap32):
- tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
break;
case INDEX_op_add2_i32:
if (const_args[4]) {
- tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+ tcg_out_insn(s, RIL, ALFI, a0, a4);
} else {
- tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ tcg_out_insn(s, RR, ALR, a0, a4);
}
- tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ tcg_out_insn(s, RRE, ALCR, a1, args[5]);
break;
case INDEX_op_sub2_i32:
if (const_args[4]) {
- tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+ tcg_out_insn(s, RIL, SLFI, a0, a4);
} else {
- tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ tcg_out_insn(s, RR, SLR, a0, a4);
}
- tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ tcg_out_insn(s, RRE, SLBR, a1, args[5]);
break;
case INDEX_op_br:
- tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
+ tgen_branch(s, S390_CC_ALWAYS, arg_label(a0));
break;
case INDEX_op_brcond_i32:
- tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
- args[1], const_args[1], arg_label(args[3]));
+ tgen_brcond(s, TCG_TYPE_I32, a2, a0,
+ a1, const_args[1], arg_label(args[3]));
break;
case INDEX_op_setcond_i32:
- tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tgen_setcond(s, TCG_TYPE_I32, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_movcond_i32:
- tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ tgen_movcond(s, TCG_TYPE_I32, args[5], a0, a1,
+ a2, const_args[2], args[3], const_args[3]);
break;
case INDEX_op_qemu_ld_i32:
/* ??? Technically we can use a non-extending instruction. */
case INDEX_op_qemu_ld_i64:
- tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+ tcg_out_qemu_ld(s, a0, a1, a2);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
- tcg_out_qemu_st(s, args[0], args[1], args[2]);
+ tcg_out_qemu_st(s, a0, a1, a2);
break;
case INDEX_op_ld16s_i64:
- tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGH, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld32u_i64:
- tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LLGF, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld32s_i64:
- tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_mem(s, 0, RXY_LGF, a0, a1, TCG_REG_NONE, a2);
break;
case INDEX_op_ld_i64:
- tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
break;
case INDEX_op_st32_i64:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
break;
case INDEX_op_st_i64:
- tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
break;
case INDEX_op_add_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
do_addi_64:
if (a0 == a1) {
@@ -2084,7 +2084,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_sub_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
a2 = -a2;
goto do_addi_64;
@@ -2096,18 +2095,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_and_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
- tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
+ tgen_andi(s, TCG_TYPE_I64, a0, a2);
} else if (a0 == a1) {
- tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ tcg_out_insn(s, RRE, NGR, a0, a2);
} else {
tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
}
break;
case INDEX_op_or_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen_ori(s, TCG_TYPE_I64, a0, a2);
@@ -2118,7 +2115,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_xor_i64:
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen_xori(s, TCG_TYPE_I64, a0, a2);
@@ -2130,21 +2126,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_neg_i64:
- tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LCGR, a0, a1);
break;
case INDEX_op_bswap64_i64:
- tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
+ tcg_out_insn(s, RRE, LRVGR, a0, a1);
break;
case INDEX_op_mul_i64:
if (const_args[2]) {
- if (args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+ if (a2 == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MGHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSGFI, a0, a2);
}
} else {
- tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+ tcg_out_insn(s, RRE, MSGR, a0, a2);
}
break;
@@ -2153,10 +2149,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
into R3 with this definition, but as we do in fact always
produce both quotient and remainder using INDEX_op_div_i64
instead requires jumping through even more hoops. */
- tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, a4);
break;
case INDEX_op_divu2_i64:
- tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+ tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, a4);
break;
case INDEX_op_mulu2_i64:
tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
@@ -2166,9 +2162,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
op = RSY_SLLG;
do_shift64:
if (const_args[2]) {
- tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, op, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, op, a0, a1, a2, 0);
}
break;
case INDEX_op_shr_i64:
@@ -2180,87 +2176,83 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rotl_i64:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
- TCG_REG_NONE, args[2]);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_REG_NONE, a2);
} else {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, a2, 0);
}
break;
case INDEX_op_rotr_i64:
if (const_args[2]) {
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
- TCG_REG_NONE, (64 - args[2]) & 63);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_REG_NONE, (64 - a2) & 63);
} else {
/* We can use the smaller 32-bit negate because only the
low 6 bits are examined for the rotate. */
- tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
- tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, a2);
+ tcg_out_sh64(s, RSY_RLLG, a0, a1, TCG_TMP0, 0);
}
break;
case INDEX_op_ext8s_i64:
- tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext8s(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext16s_i64:
- tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext16s(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext_i32_i64:
case INDEX_op_ext32s_i64:
- tgen_ext32s(s, args[0], args[1]);
+ tgen_ext32s(s, a0, a1);
break;
case INDEX_op_ext8u_i64:
- tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext8u(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_ext16u_i64:
- tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
+ tgen_ext16u(s, TCG_TYPE_I64, a0, a1);
break;
case INDEX_op_extu_i32_i64:
case INDEX_op_ext32u_i64:
- tgen_ext32u(s, args[0], args[1]);
+ tgen_ext32u(s, a0, a1);
break;
case INDEX_op_add2_i64:
if (const_args[4]) {
- if ((int64_t)args[4] >= 0) {
- tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+ if ((int64_t)a4 >= 0) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a4);
} else {
- tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
+ tcg_out_insn(s, RIL, SLGFI, a0, -a4);
}
} else {
- tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALGR, a0, a4);
}
- tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ tcg_out_insn(s, RRE, ALCGR, a1, args[5]);
break;
case INDEX_op_sub2_i64:
if (const_args[4]) {
- if ((int64_t)args[4] >= 0) {
- tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+ if ((int64_t)a4 >= 0) {
+ tcg_out_insn(s, RIL, SLGFI, a0, a4);
} else {
- tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
+ tcg_out_insn(s, RIL, ALGFI, a0, -a4);
}
} else {
- tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLGR, a0, a4);
}
- tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ tcg_out_insn(s, RRE, SLBGR, a1, args[5]);
break;
case INDEX_op_brcond_i64:
- tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
- args[1], const_args[1], arg_label(args[3]));
+ tgen_brcond(s, TCG_TYPE_I64, a2, a0,
+ a1, const_args[1], arg_label(args[3]));
break;
case INDEX_op_setcond_i64:
- tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
- args[2], const_args[2]);
+ tgen_setcond(s, TCG_TYPE_I64, args[3], a0, a1, a2, const_args[2]);
break;
case INDEX_op_movcond_i64:
- tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ tgen_movcond(s, TCG_TYPE_I64, args[5], a0, a1,
+ a2, const_args[2], args[3], const_args[3]);
break;
OP_32_64(deposit):
- a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[1]) {
- tgen_deposit(s, a0, a2, args[3], args[4], 1);
+ tgen_deposit(s, a0, a2, args[3], a4, 1);
} else {
/* Since we can't support "0Z" as a constraint, we allow a1 in
any register. Fix things up as if a matching constraint. */
@@ -2272,22 +2264,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
tcg_out_mov(s, type, a0, a1);
}
- tgen_deposit(s, a0, a2, args[3], args[4], 0);
+ tgen_deposit(s, a0, a2, args[3], a4, 0);
}
break;
OP_32_64(extract):
- tgen_extract(s, args[0], args[1], args[2], args[3]);
+ tgen_extract(s, a0, a1, a2, args[3]);
break;
case INDEX_op_clz_i64:
- tgen_clz(s, args[0], args[1], args[2], const_args[2]);
+ tgen_clz(s, a0, a1, a2, const_args[2]);
break;
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
- if (args[0] & TCG_MO_ST_LD) {
+ if (a0 & TCG_MO_ST_LD) {
tcg_out_insn(s, RR, BCR,
s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 5/6] tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
tcg_reg_alloc_op() allocates arrays of TCG_MAX_OP_ARGS elements.
The Aarch64 target already does this since commit 8d8db193f25
("tcg-aarch64: Hoist common argument loads in tcg_out_op"),
SPARC since commit b357f902bff ("tcg-sparc: Hoist common argument
loads in tcg_out_op").
RISCV missed it upon introduction in commit bdf503819ee
("tcg/riscv: Add the out op decoder"), MIPS since commit
22ee3a987d5 ("tcg-mips: Hoist args loads") and i386 since
commit 42d5b514928 ("tcg/i386: Hoist common arguments in
tcg_out_op").
Provide this information as a hint to the compiler in the function
prototype, and update the funtion definitions.
This fixes this warning (using GCC 11):
tcg/aarch64/tcg-target.c.inc:1855:37: error: argument 3 of type 'const TCGArg[16]' {aka 'const long unsigned int[16]'} with mismatched bound [-Werror=array-parameter=]
tcg/aarch64/tcg-target.c.inc:1856:34: error: argument 4 of type 'const int[16]' with mismatched bound [-Werror=array-parameter=]
Reported-by: Miroslav Rezanina <mrezanin@redhat.com>
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/tcg.c | 5 +++--
tcg/i386/tcg-target.c.inc | 3 ++-
tcg/mips/tcg-target.c.inc | 3 ++-
tcg/riscv/tcg-target.c.inc | 3 ++-
tcg/tci/tcg-target.c.inc | 5 +++--
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 472bf1755bf..97d074d8fab 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -110,8 +110,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
static void tcg_out_movi(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg);
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
#if TCG_TARGET_MAYBE_vec
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 46e856f4421..d121dca8789 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2215,7 +2215,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2;
int c, const_a2, vexop, rexw = 0;
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index add157f6c32..b9bb54f0ecc 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1691,7 +1691,8 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
MIPSInsn i1, i2;
TCGArg a0, a1, a2;
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index c60b91ba58f..5bf0d069532 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1238,7 +1238,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0 = args[0];
TCGArg a1 = args[1];
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index d5a4d9d37cf..60464524f3d 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -553,8 +553,9 @@ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
uint8_t *old_code_ptr = s->code_ptr;
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 5/6] tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
tcg_reg_alloc_op() allocates arrays of TCG_MAX_OP_ARGS elements.
The Aarch64 target already does this since commit 8d8db193f25
("tcg-aarch64: Hoist common argument loads in tcg_out_op"),
SPARC since commit b357f902bff ("tcg-sparc: Hoist common argument
loads in tcg_out_op").
RISCV missed it upon introduction in commit bdf503819ee
("tcg/riscv: Add the out op decoder"), MIPS since commit
22ee3a987d5 ("tcg-mips: Hoist args loads") and i386 since
commit 42d5b514928 ("tcg/i386: Hoist common arguments in
tcg_out_op").
Provide this information as a hint to the compiler in the function
prototype, and update the funtion definitions.
This fixes this warning (using GCC 11):
tcg/aarch64/tcg-target.c.inc:1855:37: error: argument 3 of type 'const TCGArg[16]' {aka 'const long unsigned int[16]'} with mismatched bound [-Werror=array-parameter=]
tcg/aarch64/tcg-target.c.inc:1856:34: error: argument 4 of type 'const int[16]' with mismatched bound [-Werror=array-parameter=]
Reported-by: Miroslav Rezanina <mrezanin@redhat.com>
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/tcg.c | 5 +++--
tcg/i386/tcg-target.c.inc | 3 ++-
tcg/mips/tcg-target.c.inc | 3 ++-
tcg/riscv/tcg-target.c.inc | 3 ++-
tcg/tci/tcg-target.c.inc | 5 +++--
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 472bf1755bf..97d074d8fab 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -110,8 +110,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
static void tcg_out_movi(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg);
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
#if TCG_TARGET_MAYBE_vec
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 46e856f4421..d121dca8789 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2215,7 +2215,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2;
int c, const_a2, vexop, rexw = 0;
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index add157f6c32..b9bb54f0ecc 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1691,7 +1691,8 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
MIPSInsn i1, i2;
TCGArg a0, a1, a2;
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index c60b91ba58f..5bf0d069532 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1238,7 +1238,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0 = args[0];
TCGArg a1 = args[1];
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index d5a4d9d37cf..60464524f3d 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -553,8 +553,9 @@ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
uint8_t *old_code_ptr = s->code_ptr;
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 6/6] tcg: Restrict tcg_out_vec_op() to arrays of TCG_MAX_OP_ARGS elements
2021-01-13 17:24 ` Philippe Mathieu-Daudé
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
-1 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Thomas Huth, Huacai Chen, qemu-riscv, Stefan Weil, Cornelia Huck,
Richard Henderson, Aleksandar Rikalo, Philippe Mathieu-Daudé,
qemu-s390x, qemu-arm, Alistair Francis, Palmer Dabbelt,
Miroslav Rezanina, Aurelien Jarno
tcg_reg_alloc_op() allocates arrays of TCG_MAX_OP_ARGS elements.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/tcg.c | 14 ++++++++------
tcg/aarch64/tcg-target.c.inc | 3 ++-
tcg/i386/tcg-target.c.inc | 3 ++-
tcg/ppc/tcg-target.c.inc | 3 ++-
4 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 97d074d8fab..3a20327f9cb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -120,9 +120,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset);
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg);
-static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
- unsigned vece, const TCGArg *args,
- const int *const_args);
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
#else
static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
@@ -139,9 +140,10 @@ static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
{
g_assert_not_reached();
}
-static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
- unsigned vece, const TCGArg *args,
- const int *const_args)
+static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
g_assert_not_reached();
}
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index ab199b143f3..32811976e78 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -2276,7 +2276,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static const AArch64Insn cmp_insn[16] = {
[TCG_COND_EQ] = I3616_CMEQ,
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index d121dca8789..87bf75735a1 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2654,7 +2654,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static int const add_insn[4] = {
OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 70b747a8a30..b8f5f8a53e1 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3137,7 +3137,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static const uint32_t
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 6/6] tcg: Restrict tcg_out_vec_op() to arrays of TCG_MAX_OP_ARGS elements
@ 2021-01-13 17:24 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 18+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-01-13 17:24 UTC (permalink / raw)
To: qemu-devel
Cc: Aurelien Jarno, Cornelia Huck, qemu-arm, Alistair Francis,
Miroslav Rezanina, Thomas Huth, qemu-riscv, Huacai Chen,
Stefan Weil, Aleksandar Rikalo, Richard Henderson,
Andrzej Zaborowski, Palmer Dabbelt, qemu-s390x,
Philippe Mathieu-Daudé,
Jiaxun Yang
tcg_reg_alloc_op() allocates arrays of TCG_MAX_OP_ARGS elements.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/tcg.c | 14 ++++++++------
tcg/aarch64/tcg-target.c.inc | 3 ++-
tcg/i386/tcg-target.c.inc | 3 ++-
tcg/ppc/tcg-target.c.inc | 3 ++-
4 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 97d074d8fab..3a20327f9cb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -120,9 +120,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset);
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg);
-static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
- unsigned vece, const TCGArg *args,
- const int *const_args);
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
#else
static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
@@ -139,9 +140,10 @@ static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
{
g_assert_not_reached();
}
-static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
- unsigned vece, const TCGArg *args,
- const int *const_args)
+static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
g_assert_not_reached();
}
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index ab199b143f3..32811976e78 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -2276,7 +2276,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static const AArch64Insn cmp_insn[16] = {
[TCG_COND_EQ] = I3616_CMEQ,
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index d121dca8789..87bf75735a1 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2654,7 +2654,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static int const add_insn[4] = {
OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 70b747a8a30..b8f5f8a53e1 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3137,7 +3137,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
- const TCGArg *args, const int *const_args)
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
static const uint32_t
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
--
2.26.2
^ permalink raw reply related [flat|nested] 18+ messages in thread