All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes
@ 2018-01-16  3:45 Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson
  0 siblings, 2 replies; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

Changes since v1:

Patch 1:
  * Use rsb/rsc for gt/le.  Now we always use "rI" as a constraint,
    so we never have to load immediates into registers by hand.

Patch 2:
  * We can't use add_ofs in the while condition and cmp_ofs in the
    reduction and converge; use cmp_ofs always.  Fixes the sparc64
    regression that Peter noticed.


r~


Richard Henderson (4):
  tcg/arm: Fix double-word comparisons
  tcg/arm: Support tlb offsets larger than 64k

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons
  2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
@ 2018-01-16  3:45 ` Richard Henderson
  2018-01-16 11:33   ` Peter Maydell
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson
  1 sibling, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Richard Henderson

The code sequence we were generating was only good for unsigned
comparisons.  For signed comparisions, use the sequence from gcc.

Fixes booting of ppc64 firmware, with a patch changing the code
sequence for ppc comparisons.

Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.inc.c | 97 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 65 insertions(+), 32 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 98a12535a5..0ff283d84f 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -239,10 +239,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
     }
 }
 
-#define TCG_CT_CONST_ARM  0x100
-#define TCG_CT_CONST_INV  0x200
-#define TCG_CT_CONST_NEG  0x400
-#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ARM     0x0100
+#define TCG_CT_CONST_INV     0x0200
+#define TCG_CT_CONST_NEG     0x0400
+#define TCG_CT_CONST_ZERO    0x1000
 
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
@@ -351,8 +351,7 @@ static inline int check_fit_imm(uint32_t imm)
 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
                                          const TCGArgConstraint *arg_ct)
 {
-    int ct;
-    ct = arg_ct->ct;
+    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -1103,6 +1102,56 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
     }
 }
 
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    TCGReg al = args[0];
+    TCGReg ah = args[1];
+    TCGArg bl = args[2];
+    TCGArg bh = args[3];
+    TCGCond cond = args[4];
+    int const_bl = const_args[2];
+    int const_bh = const_args[3];
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+    case TCG_COND_LTU:
+    case TCG_COND_LEU:
+    case TCG_COND_GTU:
+    case TCG_COND_GEU:
+        /* We perform a conditional comparision.  If the high half is
+           equal, then overwrite the flags with the comparison of the
+           low half.  The resulting flags cover the whole.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
+        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
+        return cond;
+
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /* We perform a double-word subtraction and examine the result.
+           We do not actually need the result of the subtract, so the
+           low part "subtract" is a compare.  For the high half we have
+           no choice but to compute into a temporary.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
+        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
+                       TCG_REG_TMP, ah, bh, const_bh);
+        return cond;
+
+    case TCG_COND_LE:
+    case TCG_COND_GT:
+        /* Similar, but with swapped arguments, via reversed subtract.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
+                       TCG_REG_TMP, al, bl, const_bl);
+        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
+                        TCG_REG_TMP, ah, bh, const_bh);
+        return tcg_swap_cond(cond);
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #ifdef CONFIG_SOFTMMU
 #include "tcg-ldst.inc.c"
 
@@ -1964,22 +2013,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
                            arg_label(args[3]));
         break;
-    case INDEX_op_brcond2_i32:
-        /* The resulting conditions are:
-         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
-         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
-         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
-         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
-         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
-         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
-         */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[0], args[2], const_args[2]);
-        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
-                           arg_label(args[5]));
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
                         args[1], args[2], const_args[2]);
@@ -1988,15 +2021,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
                         ARITH_MOV, args[0], 0, 0);
         break;
+
+    case INDEX_op_brcond2_i32:
+        c = tcg_out_cmp2(s, args, const_args);
+        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
+        break;
     case INDEX_op_setcond2_i32:
-        /* See brcond2_i32 comment */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[2], args[4], const_args[4]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
-                        ARITH_MOV, args[0], 0, 1);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+        c = tcg_out_cmp2(s, args + 1, const_args + 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
                         ARITH_MOV, args[0], 0, 0);
         break;
 
@@ -2093,9 +2126,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     static const TCGTargetOpDef sub2
         = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
     static const TCGTargetOpDef br2
-        = { .args_ct_str = { "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "rI", "rI" } };
     static const TCGTargetOpDef setc2
-        = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
 
     switch (op) {
     case INDEX_op_goto_ptr:
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k
  2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
@ 2018-01-16  3:45 ` Richard Henderson
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

AArch64 with SVE has an offset of 80k to the 8th TLB.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target.inc.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 0ff283d84f..8f5d4f208d 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1246,12 +1246,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
 /* We're expecting to use an 8-bit immediate and to mask.  */
 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
 
-/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
-   Using the offset of the second entry in the last tlb table ensures
-   that we can index all of the elements of the first entry.  */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
-                  > 0xffff);
-
 /* Load and compare a TLB entry, leaving the flags set.  Returns the register
    containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
@@ -1264,6 +1258,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
          ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    int mask_off;
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
 
@@ -1295,16 +1290,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                         0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
     }
 
-    /* We checked that the offset is contained within 16 bits above.  */
-    if (add_off > 0xfff
-        || (use_armv6_instructions && TARGET_LONG_BITS == 64
-            && cmp_off > 0xff)) {
+    /* Add portions of the offset until the memory access is in range.
+     * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
+     * we can use a 12-bit offset.  */
+    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+        mask_off = 0xff;
+    } else {
+        mask_off = 0xfff;
+    }
+    while (cmp_off > mask_off) {
+        int shift = ctz32(cmp_off & ~mask_off) & ~1;
+        int rot = ((32 - shift) << 7) & 0xf00;
+        int addend = cmp_off & (0xff << shift);
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
-                        (24 << 7) | (cmp_off >> 8));
+                        rot | ((cmp_off >> shift) & 0xff));
         base = TCG_REG_R2;
-        add_off -= cmp_off & 0xff00;
-        cmp_off &= 0xff;
+        add_off -= addend;
+        cmp_off -= addend;
     }
+
     if (!use_armv7_instructions) {
         tcg_out_dat_imm(s, COND_AL, ARITH_AND,
                         TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
@ 2018-01-16 11:33   ` Peter Maydell
  0 siblings, 0 replies; 4+ messages in thread
From: Peter Maydell @ 2018-01-16 11:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers, Richard Henderson

On 16 January 2018 at 03:45, Richard Henderson
<richard.henderson@linaro.org> wrote:
> The code sequence we were generating was only good for unsigned
> comparisons.  For signed comparisions, use the sequence from gcc.
>
> Fixes booting of ppc64 firmware, with a patch changing the code
> sequence for ppc comparisons.
>
> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/arm/tcg-target.inc.c | 97 ++++++++++++++++++++++++++++++++----------------
>  1 file changed, 65 insertions(+), 32 deletions(-)
>
> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
> index 98a12535a5..0ff283d84f 100644
> --- a/tcg/arm/tcg-target.inc.c
> +++ b/tcg/arm/tcg-target.inc.c
> @@ -239,10 +239,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
>      }
>  }
>
> -#define TCG_CT_CONST_ARM  0x100
> -#define TCG_CT_CONST_INV  0x200
> -#define TCG_CT_CONST_NEG  0x400
> -#define TCG_CT_CONST_ZERO 0x800
> +#define TCG_CT_CONST_ARM     0x0100
> +#define TCG_CT_CONST_INV     0x0200
> +#define TCG_CT_CONST_NEG     0x0400
> +#define TCG_CT_CONST_ZERO    0x1000

You might as well just drop this hunk now you're not adding a new entry.

>  /* parse target specific constraints */
>  static const char *target_parse_constraint(TCGArgConstraint *ct,
> @@ -351,8 +351,7 @@ static inline int check_fit_imm(uint32_t imm)
>  static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
>                                           const TCGArgConstraint *arg_ct)
>  {
> -    int ct;
> -    ct = arg_ct->ct;
> +    int ct = arg_ct->ct;
>      if (ct & TCG_CT_CONST) {
>          return 1;
>      } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {

Ditto.


Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-01-16 11:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
2018-01-16 11:33   ` Peter Maydell
2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.