From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54212) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aTB3n-0004LO-KI for qemu-devel@nongnu.org; Tue, 09 Feb 2016 11:22:41 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aTB3k-00008W-Bf for qemu-devel@nongnu.org; Tue, 09 Feb 2016 11:22:39 -0500 Received: from mailapp01.imgtec.com ([195.59.15.196]:14052 helo=imgpgp01.kl.imgtec.org) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aTB3j-000085-UF for qemu-devel@nongnu.org; Tue, 09 Feb 2016 11:22:36 -0500 Date: Tue, 9 Feb 2016 16:22:34 +0000 From: James Hogan Message-ID: <20160209162234.GB3678@jhogan-linux.le.imgtec.org> References: <1455014403-10742-1-git-send-email-rth@twiddle.net> <1455014403-10742-15-git-send-email-rth@twiddle.net> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="PmA2V3Z32TCmWXqI" Content-Disposition: inline In-Reply-To: <1455014403-10742-15-git-send-email-rth@twiddle.net> Subject: Re: [Qemu-devel] [PATCH 14/15] tcg-mips: Use mipsr6 instructions in branches List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: Leon Alrae , qemu-devel@nongnu.org, aurelien@aurel32.net --PmA2V3Z32TCmWXqI Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: quoted-printable Hi Richard, On Tue, Feb 09, 2016 at 09:40:02PM +1100, Richard Henderson wrote: > Using compact branches, when possible, avoids a delay slot nop. >=20 > Signed-off-by: Richard Henderson > --- > include/elf.h | 4 + > tcg/mips/tcg-target.c | 216 +++++++++++++++++++++++++++++++++++++++-----= ------ > 2 files changed, 172 insertions(+), 48 deletions(-) >=20 > diff --git a/include/elf.h b/include/elf.h > index 1098d21..6e52ba0 100644 > --- a/include/elf.h > +++ b/include/elf.h > @@ -352,6 +352,10 @@ typedef struct { > #define R_MIPS_CALLHI16 30 > #define R_MIPS_CALLLO16 31 > /* > + * Incomplete list of MIPS R6 relocation types. > + */ > +#define R_MIPS_PC26_S2 61 > +/* > * This range is reserved for vendor specific relocations. > */ > #define R_MIPS_LOVENDOR 100 > diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c > index e0972ba..06e15d4 100644 > --- a/tcg/mips/tcg-target.c > +++ b/tcg/mips/tcg-target.c > @@ -152,6 +152,19 @@ static inline void reloc_pc16(tcg_insn_unit *pc, tcg= _insn_unit *target) > *pc =3D deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); > } > =20 > +static inline uint32_t reloc_pc26_val(tcg_insn_unit *pc, tcg_insn_unit *= target) > +{ > + /* Let the compiler perform the right-shift as part of the arithmeti= c. */ > + ptrdiff_t disp =3D target - (pc + 1); > + assert(disp =3D=3D sextract32(disp, 0, 26)); > + return disp & 0x1ffffff; > +} > + > +static inline void reloc_pc26(tcg_insn_unit *pc, tcg_insn_unit *target) > +{ > + *pc =3D deposit32(*pc, 0, 26, reloc_pc16_val(pc, target)); > +} > + > static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *ta= rget) > { > assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) =3D=3D 0); > @@ -166,9 +179,17 @@ static inline void reloc_26(tcg_insn_unit *pc, tcg_i= nsn_unit *target) > static void patch_reloc(tcg_insn_unit *code_ptr, int type, > intptr_t value, intptr_t addend) > { > - assert(type =3D=3D R_MIPS_PC16); > assert(addend =3D=3D 0); > - reloc_pc16(code_ptr, (tcg_insn_unit *)value); > + switch (type) { > + case R_MIPS_PC16: > + reloc_pc16(code_ptr, (tcg_insn_unit *)value); > + break; > + case R_MIPS_PC26_S2: > + reloc_pc26(code_ptr, (tcg_insn_unit *)value); > + break; > + default: > + tcg_abort(); > + } > } > =20 > #define TCG_CT_CONST_ZERO 0x100 > @@ -309,7 +330,10 @@ typedef enum { > OPC_BEQ =3D 004 << 26, > OPC_BNE =3D 005 << 26, > OPC_BLEZ =3D 006 << 26, > + OPC_BGEUC =3D OPC_BLEZ, /* R6: rs !=3D 0, rt !=3D 0, rs !=3D r= t */ > OPC_BGTZ =3D 007 << 26, > + OPC_BLTUC =3D OPC_BGTZ, /* R6: rs !=3D 0, rt !=3D 0, rs !=3D r= t */ > + OPC_BEQC =3D 010 << 26, /* R6: rs > rt */ > OPC_ADDIU =3D 011 << 26, > OPC_SLTI =3D 012 << 26, > OPC_SLTIU =3D 013 << 26, > @@ -318,6 +342,9 @@ typedef enum { > OPC_XORI =3D 016 << 26, > OPC_LUI =3D 017 << 26, > OPC_AUI =3D OPC_LUI, > + OPC_BGEC =3D 026 << 26, > + OPC_BLTC =3D 027 << 26, > + OPC_BNEC =3D 030 << 26, /* R6: rs > rt */ > OPC_DADDIU =3D 031 << 26, > OPC_DAUI =3D 035 << 26, > OPC_LB =3D 040 << 26, > @@ -329,6 +356,7 @@ typedef enum { > OPC_SB =3D 050 << 26, > OPC_SH =3D 051 << 26, > OPC_SW =3D 053 << 26, > + OPC_BC =3D 062 << 26, > OPC_LD =3D 067 << 26, > OPC_SD =3D 077 << 26, > =20 > @@ -527,6 +555,17 @@ static inline void tcg_out_opc_br(TCGContext *s, MIP= SInsn opc, > tcg_out_opc_imm(s, opc, rt, rs, offset); > } > =20 > +static void tcg_out_opc_br_pc16(TCGContext *s, MIPSInsn opc, > + TCGReg arg1, TCGReg arg2, TCGLabel *l) > +{ > + tcg_out_opc_br(s, opc, arg1, arg2); > + if (l->has_value) { > + reloc_pc16(s->code_ptr - 1, l->u.value_ptr); > + } else { > + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); > + } > +} > + > /* > * Type sa > */ > @@ -1002,59 +1041,129 @@ static void tcg_out_brcond(TCGContext *s, TCGCon= d cond, TCGReg arg1, > [TCG_COND_GE] =3D OPC_BGEZ, > }; > =20 > - MIPSInsn s_opc =3D OPC_SLTU; > - MIPSInsn b_opc; > - int cmp_map; > + MIPSInsn b_opc =3D 0; > + bool compact =3D false; > + int cmp_map, t; > + > + /* We shouldn't expect to have arg1 =3D=3D arg2, as the TCG optimizer > + should have eliminated all such. However, the R6 encodings do > + not allow this situation, so e.g. if the optimizer is disabled > + we must fall back to normal compares. */ > + if (use_mips32r6_instructions && arg1 !=3D arg2) { > + switch (cond) { > + case TCG_COND_EQ: > + case TCG_COND_NE: > + if (arg1 < arg2) { > + t =3D arg1, arg1 =3D arg2, arg2 =3D t; > + } > + b_opc =3D cond =3D=3D TCG_COND_EQ ? OPC_BEQC : OPC_BNEC; > + compact =3D true; > + break; > =20 > - switch (cond) { > - case TCG_COND_EQ: > - b_opc =3D OPC_BEQ; > - break; > - case TCG_COND_NE: > - b_opc =3D OPC_BNE; > - break; > + case TCG_COND_LE: > + case TCG_COND_GT: > + if (arg1 =3D=3D TCG_REG_ZERO) { > + break; > + } > + /* Swap arguments to turn LE to GE or GT to LT. > + This also produces BLEZC/BGTZC when arg2 =3D 0. */ > + t =3D arg1, arg1 =3D arg2, arg2 =3D t; > + b_opc =3D cond =3D=3D TCG_COND_LE ? OPC_BGEC : OPC_BLTC; > + compact =3D true; > + break; > =20 > - case TCG_COND_LT: > - case TCG_COND_GT: > - case TCG_COND_LE: > - case TCG_COND_GE: > - if (arg2 =3D=3D 0) { > - b_opc =3D b_zero[cond]; > - arg2 =3D arg1; > - arg1 =3D 0; > + case TCG_COND_GE: > + case TCG_COND_LT: > + if (arg1 =3D=3D TCG_REG_ZERO) { > + break; > + } > + /* The encoding of BGEZC/BLTZC requires rs =3D rt. */ > + if (arg2 =3D=3D TCG_REG_ZERO) { > + arg2 =3D arg1; > + } > + b_opc =3D cond =3D=3D TCG_COND_GE ? OPC_BGEC : OPC_BLTC; > + compact =3D true; > break; > - } > - s_opc =3D OPC_SLT; > - /* FALLTHRU */ > =20 > - case TCG_COND_LTU: > - case TCG_COND_GTU: > - case TCG_COND_LEU: > - case TCG_COND_GEU: > - cmp_map =3D mips_cmp_map[cond]; > - if (cmp_map & MIPS_CMP_SWAP) { > - TCGReg t =3D arg1; > - arg1 =3D arg2; > - arg2 =3D t; > + case TCG_COND_LEU: > + /* Swap arguments to turn LE to GE. */ > + t =3D arg1, arg1 =3D arg2, arg2 =3D t; > + /* FALLTHRU */ > + case TCG_COND_GEU: > + b_opc =3D OPC_BGEUC; > + compact =3D true; > + break; > + > + case TCG_COND_GTU: > + /* Swap arguments to turn GT to LT. */ > + t =3D arg1, arg1 =3D arg2, arg2 =3D t; > + /* FALLTHRU */ > + case TCG_COND_LTU: > + b_opc =3D OPC_BLTUC; > + compact =3D true; > + break; > + > + default: > + tcg_abort(); > + break; > } > - tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); > - b_opc =3D (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); > - arg1 =3D TCG_TMP0; > - arg2 =3D TCG_REG_ZERO; > - break; > + } > =20 > - default: > - tcg_abort(); > - break; > + if (b_opc =3D=3D 0) { > + MIPSInsn s_opc =3D OPC_SLTU; > + > + switch (cond) { > + case TCG_COND_EQ: > + b_opc =3D OPC_BEQ; > + break; > + case TCG_COND_NE: > + b_opc =3D OPC_BNE; > + break; > + > + case TCG_COND_LT: > + case TCG_COND_GT: > + case TCG_COND_LE: > + case TCG_COND_GE: > + if (arg2 =3D=3D 0) { > + b_opc =3D b_zero[cond]; > + arg2 =3D arg1; > + arg1 =3D 0; > + break; > + } > + s_opc =3D OPC_SLT; > + /* FALLTHRU */ > + > + case TCG_COND_LTU: > + case TCG_COND_GTU: > + case TCG_COND_LEU: > + case TCG_COND_GEU: > + cmp_map =3D mips_cmp_map[cond]; > + if (cmp_map & MIPS_CMP_SWAP) { > + TCGReg t =3D arg1; > + arg1 =3D arg2; > + arg2 =3D t; > + } > + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); > + if (use_mips32r6_instructions) { > + b_opc =3D (cmp_map & MIPS_CMP_INV ? OPC_BEQC : OPC_BNEC); > + compact =3D true; > + } else { > + b_opc =3D (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); > + } > + arg1 =3D TCG_TMP0; > + arg2 =3D TCG_REG_ZERO; > + break; > + > + default: > + tcg_abort(); > + break; > + } > } > =20 > - tcg_out_opc_br(s, b_opc, arg1, arg2); > - if (l->has_value) { > - reloc_pc16(s->code_ptr - 1, l->u.value_ptr); > - } else { > - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); > + tcg_out_opc_br_pc16(s, b_opc, arg1, arg2, l); > + if (!compact) { > + tcg_out_nop(s); Unfortunately this isn't quite right. As far as I understand them, conditional compact branches have a forbidden slot after them which isn't permitted to contain a control transfer instruction (CTI). Executing a conditional compact branch with a CTI in the forbidden slot is required to signal a reserved instruction, but only if the branch is not taken (giving user process a SIGILL). E.g.=20 Program received signal SIGILL, Illegal instruction. [Switching to Thread 0xfff1c32e00 (LWP 204)] 0x000000fff30b0068 in code_gen_buffer () (gdb) disas/r Dump of assembler code for function code_gen_buffer: 0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0) =3D> 0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c <= code_gen_buffer+40> 0x000000fff30b006c <+8>: 1d c0 c2 08 j 0xfff30b0074 0x000000fff30b0070 <+12>: 00 00 00 00 nop (gdb) set *0x000000fff30b006c=3D0 (gdb) disas/r Dump of assembler code for function code_gen_buffer: 0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0) =3D> 0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c <= code_gen_buffer+40> 0x000000fff30b006c <+8>: 00 00 00 00 nop 0x000000fff30b0070 <+12>: 00 00 00 00 nop (gdb) stepi 0x000000fff30b0070 in code_gen_buffer () (gdb) disas/r Dump of assembler code for function code_gen_buffer: 0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0) 0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c 0x000000fff30b006c <+8>: 00 00 00 00 nop =3D> 0x000000fff30b0070 <+12>: 00 00 00 00 nop So to be correct + efficient, it should only put the nop in if the next generated instruction is a CTI. I imagine that would be a bit messy / fragile, but maybe doable? I haven't looked too deeply. Cheers James > } > - tcg_out_nop(s); > } > =20 > static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, > @@ -1826,8 +1935,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOp= code opc, > s->tb_next_offset[a0] =3D tcg_current_code_size(s); > break; > case INDEX_op_br: > - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, > - arg_label(a0)); > + { > + TCGLabel *l =3D arg_label(a0); > + if (use_mips32r6_instructions) { > + tcg_out32(s, OPC_BC); > + if (l->has_value) { > + reloc_pc26(s->code_ptr - 1, l->u.value_ptr); > + } else { > + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC26_S2, l,= 0); > + } > + } else { > + tcg_out_opc_br_pc16(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZE= RO, l); > + } > + } > break; > =20 > case INDEX_op_ld8u_i32: > --=20 > 2.5.0 >=20 --PmA2V3Z32TCmWXqI Content-Type: application/pgp-signature; name="signature.asc" Content-Description: Digital signature -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAEBCAAGBQJWuhJKAAoJEGwLaZPeOHZ68f8P/0N+I0MaIkTwvF4e2YO71zAR /rGnbxoewDSIdjzzkL88eTO0JsGerl+zX2RX7ATQ4KHX/yXmI4ASRZqolo6VdPQx 0ANOrVahbmtIfB+wwpCNcg8TBv5yaN6eRpFXZaDK9JnQyP4S3VsR3JZl0he7Na8e UyVjhSCljIX96Xzn7sUKIJgCMTIxlhzOQe/tv0KWxhmmYUi96xO1+WTmnH1PSaO9 BLd9uKPwWAKW676fqBx4LrHD7+Tya7HpI6U+MTim5Fmcsiucstn/suOYG/R/OH4u QNsy0s66V7V/RCs9otAW5HGQuJP5UrQroKYOy8KxvnAnN08oU0d8UehH0hJmbpTc g4NJRrgE9MvnC/EFrn33ewfSJ0lmDjAOcv39e52PCaB7CMqVhWBo6NpZvtVMNryr GhdtckyT4CgVwwcOy6CqM3vCrcyXkTXXA/z4f2HAmcR1w6LbUPvpzqCuvTm4Q1af bVi36Ibd1faqkWaJEVSacjAt9kyq4NTSweD8Tu/X9xICma9EGQBlnrSxpnxSDWEe PidN8THhJ5QQfK6XMsvnsDpQjV2ai+riK5nDmPBzkojty5rgzk6occIbx2YXC072 aR/K0slGfZhB1LKrk2XTi0Idp4n1uSoQQm4/PWB1Y01vbxSD1wBnroswuJSVp0fN 6ySrQ0vAStTESEySNMRF =zLQS -----END PGP SIGNATURE----- --PmA2V3Z32TCmWXqI--