All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions
@ 2017-07-11 21:21 Richard Henderson
  2017-07-11 21:21 ` [Qemu-devel] [PATCH 1/2] target/i386: Decode AMD XOP prefix Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Richard Henderson @ 2017-07-11 21:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, ricardo.ribalda

These are general purpose bit manipulation instructions akin
to the BMI1 and BMI2 instructions.  This is an AMD extension
and uses the XOP instruction prefix.

I am in the process of trying to run the gcc testsuite with -mtbm,
with and without the patchset, to see (1) if the new insns get used
and (2) that they run ok.

Please review.


r~


Richard Henderson (2):
  target/i386: Decode AMD XOP prefix
  target/i386: Implement all TBM instructions

 target/i386/cc_helper_template.h |  18 +++++
 target/i386/cpu.h                |   7 +-
 target/i386/cc_helper.c          |  28 ++++++-
 target/i386/cpu.c                |   3 +-
 target/i386/translate.c          | 163 +++++++++++++++++++++++++++++++++++----
 5 files changed, 199 insertions(+), 20 deletions(-)

-- 
2.9.4

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Qemu-devel] [PATCH 1/2] target/i386: Decode AMD XOP prefix
  2017-07-11 21:21 [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions Richard Henderson
@ 2017-07-11 21:21 ` Richard Henderson
  2017-07-11 21:21 ` [Qemu-devel] [PATCH 2/2] target/i386: Implement all TBM instructions Richard Henderson
  2017-07-12  4:04 ` [Qemu-devel] [PATCH 0/2] " Richard Henderson
  2 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2017-07-11 21:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, ricardo.ribalda

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index ed3b896..6082db2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4500,8 +4500,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #endif
     case 0xc5: /* 2-byte VEX */
     case 0xc4: /* 3-byte VEX */
+    case 0x8f: /* 3-byte XOP */
         /* VEX prefixes cannot be used except in 32-bit mode.
-           Otherwise the instruction is LES or LDS.  */
+           Otherwise the instruction is LES, LDS, or POP.  */
         if (s->code32 && !s->vm86) {
             static const int pp_prefix[4] = {
                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
@@ -4510,7 +4511,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
-                   otherwise the instruction is LES or LDS.  */
+                   otherwise the instruction is LES, LDS, or POP.  */
+                break;
+            }
+            if (b == 0x8f && (vex2 & 0x1f) < 8) {
+                /* If the value of the XOP.map_select field is less than 8,
+                   the first two bytes of the three-byte XOP are interpreted
+                   as a form of the POP instruction.  */
                 break;
             }
             s->pc++;
@@ -4536,18 +4543,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #endif
                 vex3 = cpu_ldub_code(env, s->pc++);
                 rex_w = (vex3 >> 7) & 1;
-                switch (vex2 & 0x1f) {
-                case 0x01: /* Implied 0f leading opcode bytes.  */
-                    b = cpu_ldub_code(env, s->pc++) | 0x100;
-                    break;
-                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
-                    b = 0x138;
-                    break;
-                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
-                    b = 0x13a;
-                    break;
-                default:   /* Reserved for future use.  */
-                    goto unknown_op;
+                if (b == 0xc4) {
+                    switch (vex2 & 0x1f) {
+                    case 0x01: /* Implied 0f leading opcode bytes.  */
+                        b = cpu_ldub_code(env, s->pc++) | 0x100;
+                        break;
+                    case 0x02: /* Implied 0f 38 leading opcode bytes.  */
+                        b = 0x138;
+                        break;
+                    case 0x03: /* Implied 0f 3a leading opcode bytes.  */
+                        b = 0x13a;
+                        break;
+                    default:   /* Reserved for future use.  */
+                        goto unknown_op;
+                    }
+                } else {
+                    /* Unlike VEX, XOP.map_select does not overlap the
+                       base instruction set.  Prepend the map_select to
+                       the next opcode byte.  */
+                    b = cpu_ldub_code(env, s->pc++) + (vex2 & 0x1f) * 0x100;
                 }
             }
             s->vex_v = (~vex3 >> 3) & 0xf;
@@ -8276,6 +8290,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1d0 ... 0x1fe:
         gen_sse(env, s, b, pc_start, rex_r);
         break;
+
+    case 0x800 ... 0x8ff: /* XOP opcode map 8 */
+    case 0x900 ... 0x9ff: /* XOP opcode map 9 */
+    case 0xa00 ... 0xaff: /* XOP opcode map 10 */
     default:
         goto unknown_op;
     }
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Qemu-devel] [PATCH 2/2] target/i386: Implement all TBM instructions
  2017-07-11 21:21 [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions Richard Henderson
  2017-07-11 21:21 ` [Qemu-devel] [PATCH 1/2] target/i386: Decode AMD XOP prefix Richard Henderson
@ 2017-07-11 21:21 ` Richard Henderson
  2017-07-12  4:04 ` [Qemu-devel] [PATCH 0/2] " Richard Henderson
  2 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2017-07-11 21:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, ricardo.ribalda

Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/cc_helper_template.h |  18 ++++++
 target/i386/cpu.h                |   7 ++-
 target/i386/cc_helper.c          |  28 +++++++--
 target/i386/cpu.c                |   3 +-
 target/i386/translate.c          | 123 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 170 insertions(+), 9 deletions(-)

diff --git a/target/i386/cc_helper_template.h b/target/i386/cc_helper_template.h
index 607311f..6ce63b7 100644
--- a/target/i386/cc_helper_template.h
+++ b/target/i386/cc_helper_template.h
@@ -235,6 +235,24 @@ static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return src1 == 0;
 }
 
+static int glue(compute_all_tbmadd, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    int cf, pf, af, zf, sf, of;
+
+    cf = (src1 == (DATA_TYPE)-1);
+    pf = 0; /* undefined */
+    af = 0; /* undefined */
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_tbmadd, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    return src1 == (DATA_TYPE)-1;
+}
+
 #undef DATA_BITS
 #undef SIGN_MASK
 #undef DATA_TYPE
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7a228af..537f592 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -774,11 +774,16 @@ typedef enum {
     CC_OP_SARL,
     CC_OP_SARQ,
 
-    CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */
+    CC_OP_BMILGB, /* Z,S via DST, C = SRC==0; O=0; P,A undefined */
     CC_OP_BMILGW,
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
+    CC_OP_TBMADDB, /* Z,S via DST; C = SRC==-1; O=0; P,A undefined */
+    CC_OP_TBMADDW,
+    CC_OP_TBMADDL,
+    CC_OP_TBMADDQ,
+
     CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
     CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
     CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
diff --git a/target/i386/cc_helper.c b/target/i386/cc_helper.c
index c9c90e1..2f12c3b 100644
--- a/target/i386/cc_helper.c
+++ b/target/i386/cc_helper.c
@@ -98,9 +98,6 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
                                    target_ulong src2, int op)
 {
     switch (op) {
-    default: /* should never happen */
-        return 0;
-
     case CC_OP_EFLAGS:
         return src1;
     case CC_OP_CLR:
@@ -185,6 +182,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_all_bmilgl(dst, src1);
 
+    case CC_OP_TBMADDB:
+        return compute_all_tbmaddb(dst, src1);
+    case CC_OP_TBMADDW:
+        return compute_all_tbmaddw(dst, src1);
+    case CC_OP_TBMADDL:
+        return compute_all_tbmaddl(dst, src1);
+
     case CC_OP_ADCX:
         return compute_all_adcx(dst, src1, src2);
     case CC_OP_ADOX:
@@ -215,7 +219,12 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return compute_all_sarq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_all_bmilgq(dst, src1);
+    case CC_OP_TBMADDQ:
+        return compute_all_tbmaddq(dst, src1);
 #endif
+
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -228,7 +237,6 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
                                  target_ulong src2, int op)
 {
     switch (op) {
-    default: /* should never happen */
     case CC_OP_LOGICB:
     case CC_OP_LOGICW:
     case CC_OP_LOGICL:
@@ -307,6 +315,13 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_c_bmilgl(dst, src1);
 
+    case CC_OP_TBMADDB:
+        return compute_c_tbmaddb(dst, src1);
+    case CC_OP_TBMADDW:
+        return compute_c_tbmaddw(dst, src1);
+    case CC_OP_TBMADDL:
+        return compute_c_tbmaddl(dst, src1);
+
 #ifdef TARGET_X86_64
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
@@ -320,7 +335,12 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
         return compute_c_shlq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_c_bmilgq(dst, src1);
+    case CC_OP_TBMADDQ:
+        return compute_c_tbmaddq(dst, src1);
 #endif
+
+    default:
+        g_assert_not_reached();
     }
 }
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c571772..34ab828 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -227,7 +227,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \
           TCG_EXT2_X86_64_FEATURES)
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
-          CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
+          CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \
+          CPUID_EXT3_TBM)
 #define TCG_EXT4_FEATURES 0
 #define TCG_SVM_FEATURES 0
 #define TCG_KVM_FEATURES 0
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 6082db2..2c64d2b 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -219,6 +219,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_TBMADDB ... CC_OP_TBMADDQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
@@ -783,6 +784,12 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 
+    case CC_OP_TBMADDB ... CC_OP_TBMADDQ:
+        size = s->cc_op - CC_OP_TBMADDB;
+        t0 = gen_ext_tl(reg, cpu_cc_src, size, true);
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0,
+                             .mask = -1, .imm = -1 };
+
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
@@ -8291,9 +8298,119 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_sse(env, s, b, pc_start, rex_r);
         break;
 
-    case 0x800 ... 0x8ff: /* XOP opcode map 8 */
-    case 0x900 ... 0x9ff: /* XOP opcode map 9 */
-    case 0xa00 ... 0xaff: /* XOP opcode map 10 */
+    case 0x901:
+    case 0x902: /* most tbm insns */
+        if (!(s->cpuid_ext3_features & CPUID_EXT3_TBM)
+            || s->vex_l != 0) {
+            goto illegal_op;
+        }
+        modrm = cpu_ldub_code(env, s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        ot = rex_w > 0 ? MO_64 : MO_32;
+        if (mod != 3) {
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        } else {
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
+        }
+
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+        switch ((b & 2) * 4 + ((modrm >> 3) & 7)) {
+        case 1: /* blcfill */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 2: /* blsfill */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 3: /* blcs */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 4: /* tzmsk */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_andc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 5: /* blcic */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_andc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 6: /* blsic */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 7: /* t1mskc */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 8 + 1: /* blcmsk */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 8 + 6: /* blci */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        default:
+            goto illegal_op;
+        }
+        gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+        set_cc_op(s, op + ot);
+        break;
+
+    case 0xa10: /* bextr Gy, Ey, imm4 */
+        {
+            int ofs, len, max;
+
+            if (!(s->cpuid_ext3_features & CPUID_EXT3_TBM)
+                || s->vex_l != 0) {
+                goto illegal_op;
+            }
+
+            s->rip_offset = 4;
+            modrm = cpu_ldub_code(env, s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+            ot = rex_w > 0 ? MO_64 : MO_32;
+            if (mod != 3) {
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            } else {
+                gen_op_mov_v_reg(ot, cpu_T0, rm);
+            }
+            val = cpu_ldl_code(env, s->pc);
+            s->pc += 4;
+
+            ofs = extract32(val, 0, 8);
+            len = extract32(val, 8, 8);
+            max = 8 << ot;
+            if (len == 0 || ofs >= max) {
+                tcg_gen_movi_tl(cpu_T0, 0);
+            } else {
+                len = MIN(len, max - ofs);
+                tcg_gen_extract_tl(cpu_T0, cpu_T0, ofs, len);
+            }
+            tcg_gen_mov_tl(cpu_regs[reg], cpu_T0);
+            gen_op_update1_cc();
+            /* Z is set as per result, C/O = 0, S/A/P = undefined.
+               Which is less strict than LOGIC, but accurate.  */
+            set_cc_op(s, CC_OP_LOGICB + ot);
+        }
+        break;
+
     default:
         goto unknown_op;
     }
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions
  2017-07-11 21:21 [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions Richard Henderson
  2017-07-11 21:21 ` [Qemu-devel] [PATCH 1/2] target/i386: Decode AMD XOP prefix Richard Henderson
  2017-07-11 21:21 ` [Qemu-devel] [PATCH 2/2] target/i386: Implement all TBM instructions Richard Henderson
@ 2017-07-12  4:04 ` Richard Henderson
  2017-07-12 13:28   ` Ricardo Ribalda Delgado
  2 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2017-07-12  4:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, ricardo.ribalda

On 07/11/2017 11:21 AM, Richard Henderson wrote:
> I am in the process of trying to run the gcc testsuite with -mtbm,
> with and without the patchset, to see (1) if the new insns get used
> and (2) that they run ok.

FWIW, make check-gcc RUNTESTFLAGS='--target_board=unix/-mtbm execute.exp' shows 
204 failures on a host that does not support TBM, so the extension is being 
used.  A browse through exactly one of these used only bextr.  Running the same 
tests with dejagnu using qemu-x86_64 -cpu qemu64,+tbm shows zero failures.


r~

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions
  2017-07-12  4:04 ` [Qemu-devel] [PATCH 0/2] " Richard Henderson
@ 2017-07-12 13:28   ` Ricardo Ribalda Delgado
  2017-07-12 18:43     ` Richard Henderson
                       ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-12 13:28 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, pbonzini, ehabkost

Hi Richard

Thanks for your patch! I have applied it to my tree, but i still get
SIGSEGV. I think that I might have found the problem. It seems to be
related to the bmi instruction blsr, which seems to be not properly
implemented.

On this example:

#include <stdio.h>

int test_blsr(int val){

return (val & (val - 1));
}


int main(int argc, char *argv) {
volatile int val = 4096;

fprintf(stdout, "%d\n", test_blsr(val));

return 0;
}

When it is compiled with -march=bdver4 -static -O3 test_blsr , the
compiler produces:

0000000000400af0 <test_blsr>:
  400af0:       c4 e2 78 f3 cf          blsr   %edi,%eax
  400af5:       c3                      retq
  400af6:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  400afd:       00 00 00

If I run the emulator: /tmp/qemu/x86_64-linux-user/qemu-x86_64 -cpu
Haswell ./a.out

The function prints 4096

A fast look in the code shows that
https://github.com/qemu/qemu/blob/master/target/i386/translate.c#L4028
does not really match
https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29

It appears that case 1 and case 3 are swapped. I tried to fix it, but
with no results :(. Anyway, the wiki could also be wrong.

What is sure is that the code produces different results on qemu than
on the target, which is not good


Thanks again for your help!


On Wed, Jul 12, 2017 at 6:04 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 07/11/2017 11:21 AM, Richard Henderson wrote:
>>
>> I am in the process of trying to run the gcc testsuite with -mtbm,
>> with and without the patchset, to see (1) if the new insns get used
>> and (2) that they run ok.
>
>
> FWIW, make check-gcc RUNTESTFLAGS='--target_board=unix/-mtbm execute.exp'
> shows 204 failures on a host that does not support TBM, so the extension is
> being used.  A browse through exactly one of these used only bextr.  Running
> the same tests with dejagnu using qemu-x86_64 -cpu qemu64,+tbm shows zero
> failures.
>
>
> r~



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions
  2017-07-12 13:28   ` Ricardo Ribalda Delgado
@ 2017-07-12 18:43     ` Richard Henderson
  2017-07-12 18:45     ` [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI Richard Henderson
  2017-07-12 19:29     ` [Qemu-devel] [PATCH v2] " Richard Henderson
  2 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2017-07-12 18:43 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado; +Cc: qemu-devel, pbonzini, ehabkost

On 07/12/2017 03:28 AM, Ricardo Ribalda Delgado wrote:
> Hi Richard
> 
> Thanks for your patch! I have applied it to my tree, but i still get
> SIGSEGV. I think that I might have found the problem. It seems to be
> related to the bmi instruction blsr, which seems to be not properly
> implemented.

You're absolutely right.


r~

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2017-07-12 13:28   ` Ricardo Ribalda Delgado
  2017-07-12 18:43     ` Richard Henderson
@ 2017-07-12 18:45     ` Richard Henderson
  2017-07-12 18:58       ` Ricardo Ribalda Delgado
  2018-06-06  9:13       ` Ricardo Ribalda Delgado
  2017-07-12 19:29     ` [Qemu-devel] [PATCH v2] " Richard Henderson
  2 siblings, 2 replies; 16+ messages in thread
From: Richard Henderson @ 2017-07-12 18:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: ricardo.ribalda, ehabkost, pbonzini

The implementation of these two instructions was swapped.
At the same time, unify the setup of eflags for the insn group.

Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 8365a6d..087a2e6 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4029,36 +4029,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP1, 0);
 
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
-                    gen_op_update2_cc();
-                    set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
-
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
+                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 case 3: /* blsi By, Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_neg_tl(cpu_T0, cpu_T1);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 default:
                     goto unknown_op;
                 }
+                gen_op_update2_cc();
+                set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
             default:
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2017-07-12 18:45     ` [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI Richard Henderson
@ 2017-07-12 18:58       ` Ricardo Ribalda Delgado
  2017-07-12 19:12         ` Richard Henderson
  2018-06-06  9:13       ` Ricardo Ribalda Delgado
  1 sibling, 1 reply; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-12 18:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

Hi Richard

Thanks again!, When I apply this patch I get the following error:

/tmp/qemu/tcg/tcg.c:2042: tcg fatal error

Regards!

On Wed, Jul 12, 2017 at 8:45 PM, Richard Henderson <rth@twiddle.net> wrote:
> The implementation of these two instructions was swapped.
> At the same time, unify the setup of eflags for the insn group.
>
> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/i386/translate.c | 25 ++++++++-----------------
>  1 file changed, 8 insertions(+), 17 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 8365a6d..087a2e6 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -4029,36 +4029,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP1, 0);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>                      gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> -                    gen_op_update2_cc();
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>                      break;
> -
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_neg_tl(cpu_T0, cpu_T1);
> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  default:
>                      goto unknown_op;
>                  }
> +                gen_op_update2_cc();
> +                set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
>              default:
> --
> 2.9.4
>



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2017-07-12 18:58       ` Ricardo Ribalda Delgado
@ 2017-07-12 19:12         ` Richard Henderson
  2017-07-12 19:25           ` Ricardo Ribalda Delgado
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2017-07-12 19:12 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado; +Cc: qemu-devel, ehabkost, pbonzini

On 07/12/2017 08:58 AM, Ricardo Ribalda Delgado wrote:
> Hi Richard
> 
> Thanks again!, When I apply this patch I get the following error:
> 
> /tmp/qemu/tcg/tcg.c:2042: tcg fatal error

Bah.  I misremembered that OR_TMP1 is unusable in this context.


r~

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2017-07-12 19:12         ` Richard Henderson
@ 2017-07-12 19:25           ` Ricardo Ribalda Delgado
  0 siblings, 0 replies; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-12 19:25 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

This seems to work fine with the example. But my app still throughs sigsegv :(

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 2c64d2b71ec4..564b9c6057c2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4033,32 +4033,23 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
-                    gen_op_update2_cc();
-                    set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
-
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 case 3: /* blsi By, Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 default:
                     goto unknown_op;
                 }
+                gen_op_update2_cc();
+                set_cc_op(s, CC_OP_BMILGB + ot);
                 break;

             default:

On Wed, Jul 12, 2017 at 9:12 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 07/12/2017 08:58 AM, Ricardo Ribalda Delgado wrote:
>>
>> Hi Richard
>>
>> Thanks again!, When I apply this patch I get the following error:
>>
>> /tmp/qemu/tcg/tcg.c:2042: tcg fatal error
>
>
> Bah.  I misremembered that OR_TMP1 is unusable in this context.
>
>
> r~



-- 
Ricardo Ribalda

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI
  2017-07-12 13:28   ` Ricardo Ribalda Delgado
  2017-07-12 18:43     ` Richard Henderson
  2017-07-12 18:45     ` [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI Richard Henderson
@ 2017-07-12 19:29     ` Richard Henderson
  2017-07-13 20:42       ` Ricardo Ribalda Delgado
  2 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2017-07-12 19:29 UTC (permalink / raw)
  To: qemu-devel; +Cc: ricardo.ribalda, ehabkost, pbonzini

The implementation of these two instructions was swapped.
At the same time, unify the setup of eflags for the insn group.

Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9d5f1c3..69d3787 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
-                    gen_op_update2_cc();
-                    set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
-
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 case 3: /* blsi By, Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 default:
                     goto unknown_op;
                 }
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+                set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
             default:
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI
  2017-07-12 19:29     ` [Qemu-devel] [PATCH v2] " Richard Henderson
@ 2017-07-13 20:42       ` Ricardo Ribalda Delgado
  2017-07-13 21:55         ` Ricardo Ribalda Delgado
  0 siblings, 1 reply; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-13 20:42 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

Hi Richard

The simple example works as expected, but my big application
(gobject-introspection) still crashes with sigsegv :(.

it seems to be something related to the bmi and tbm instructions. If I
disable them in gcc ( -mno-bmi -mno-tbm), the application
runs ok.

A look at qemu's code does not show anything obvious, but I am not
that familiar with qemu source yet to find something like this through
static analysis.

My plan (as soon as I have some time) is to create a small set of apps
to validate bmi/tbm/ (Are you aware of something already existing for
this?)
My stupid guess is that maybe the ops are switched, or the flags are
not properly modified.

If you want, I can share the application that crashes with you, just
be aware that the number of dependencies is considerable.

BTW I can only run the gdb stub on version 2.8.0. On git HEAD I am getting only:

Quit
(gdb) c
Continuing.
warning: Remote failure reply: E22

Program stopped.
0x00000040017bac07 in ?? ()
(gdb) c
Continuing.



Thanks for your help, it is greatly appreciated!

On Wed, Jul 12, 2017 at 9:29 PM, Richard Henderson <rth@twiddle.net> wrote:
> The implementation of these two instructions was swapped.
> At the same time, unify the setup of eflags for the insn group.
>
> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/i386/translate.c | 26 +++++++++-----------------
>  1 file changed, 9 insertions(+), 17 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 9d5f1c3..69d3787 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> +                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> -                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> -                    gen_op_update2_cc();
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>                      break;
> -
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  default:
>                      goto unknown_op;
>                  }
> +                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> +                set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
>              default:
> --
> 2.9.4
>



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI
  2017-07-13 20:42       ` Ricardo Ribalda Delgado
@ 2017-07-13 21:55         ` Ricardo Ribalda Delgado
  2017-07-13 22:49           ` Ricardo Ribalda Delgado
  0 siblings, 1 reply; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-13 21:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

Hi again

Some progress here, I think that I have found a bug in andn, I have
already send a patch.

I have made a rudimentary testcase for bmi. I will try tomorrow o
build something similar for tbm.

For reference, I am using this script:

for a in $(seq 0 255); do for b in $(seq 0 255); do for c in $(seq 0
255); do /tmp/qemu/x86_64-linux-user/qemu-x86_64 -cpu qemu64,+bmi1
./a.out $a $b $c >/tmp/res.qemu; ./pc $a $b $c > /tmp/res.pc; if !
diff  /tmp/res.pc /tmp/res.qemu; then echo $a $b $c; fi ; done ; done
; done

with this build options
gcc kk.c -mbmi -O3 -Wall
gcc kk.c -march=native -O3 -Wall -o pc

and this code:

#include <stdio.h>
#include <stdlib.h>
#include <x86intrin.h>


long test_blsr(long val){

return (val & (val - 1));
}

long test_blsi(long val){

return (val & (-val));
}

long test_blmsk(long val){

return (val ^ (val -1));
}

long test_andn(long v1, long v2){

return (~v1 & v2);
}

long test_tzcnt(long val) {
#ifdef PC
return val ? __builtin_ctz(val) : 32;
#else
return __tzcnt_u32(val);
#endif
}

long test_bextr(long src, unsigned char start, unsigned char len) {
#ifdef PC
return (src >> start) & ((1 << len)-1);
#else
return __bextr_u32(src, start | len <<8);
#endif
}

int main(int argc, char *argv[]) {
long op1, op2, op3;
long ret;

if (argc < 4) {
fprintf(stderr, "use %s op1 op2 op3\n", argv[0]);
return -1;
}
op1 = strtoul(argv[1], NULL, 0);
op2 = strtoul(argv[2], NULL, 0);
op3 = strtoul(argv[3], NULL, 0);

fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);
fprintf(stdout, "op 2 %ld (0x%lx)\n", op2, op2);

ret = test_blsr(op1);
fprintf(stdout, "blsr %ld (0x%lx)\n",ret, ret);

ret = test_blsi(op1);
fprintf(stdout, "blsi %ld (0x%lx)\n",ret, ret);

ret = test_blmsk(op1);
fprintf(stdout, "blmsk %ld (0x%lx)\n",ret, ret);

ret = test_andn(op1,op2);
fprintf(stdout, "andn %ld (0x%lx)\n",ret, ret);

ret = test_tzcnt(op1);
fprintf(stdout, "tzcnt %ld (0x%lx)\n",ret, ret);

ret = test_bextr(op1, op2, op3);
fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);

return 0;
}

On Thu, Jul 13, 2017 at 10:42 PM, Ricardo Ribalda Delgado
<ricardo.ribalda@gmail.com> wrote:
> Hi Richard
>
> The simple example works as expected, but my big application
> (gobject-introspection) still crashes with sigsegv :(.
>
> it seems to be something related to the bmi and tbm instructions. If I
> disable them in gcc ( -mno-bmi -mno-tbm), the application
> runs ok.
>
> A look at qemu's code does not show anything obvious, but I am not
> that familiar with qemu source yet to find something like this through
> static analysis.
>
> My plan (as soon as I have some time) is to create a small set of apps
> to validate bmi/tbm/ (Are you aware of something already existing for
> this?)
> My stupid guess is that maybe the ops are switched, or the flags are
> not properly modified.
>
> If you want, I can share the application that crashes with you, just
> be aware that the number of dependencies is considerable.
>
> BTW I can only run the gdb stub on version 2.8.0. On git HEAD I am getting only:
>
> Quit
> (gdb) c
> Continuing.
> warning: Remote failure reply: E22
>
> Program stopped.
> 0x00000040017bac07 in ?? ()
> (gdb) c
> Continuing.
>
>
>
> Thanks for your help, it is greatly appreciated!
>
> On Wed, Jul 12, 2017 at 9:29 PM, Richard Henderson <rth@twiddle.net> wrote:
>> The implementation of these two instructions was swapped.
>> At the same time, unify the setup of eflags for the insn group.
>>
>> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> ---
>>  target/i386/translate.c | 26 +++++++++-----------------
>>  1 file changed, 9 insertions(+), 17 deletions(-)
>>
>> diff --git a/target/i386/translate.c b/target/i386/translate.c
>> index 9d5f1c3..69d3787 100644
>> --- a/target/i386/translate.c
>> +++ b/target/i386/translate.c
>> @@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>>                  ot = mo_64_32(s->dflag);
>>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>>
>> +                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>>                  switch (reg & 7) {
>>                  case 1: /* blsr By,Ey */
>> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
>> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>> -                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>> -                    gen_op_update2_cc();
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>>                      break;
>> -
>>                  case 2: /* blsmsk By,Ey */
>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>>                      break;
>> -
>>                  case 3: /* blsi By, Ey */
>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>> +                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
>> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>>                      break;
>> -
>>                  default:
>>                      goto unknown_op;
>>                  }
>> +                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> +                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>> +                set_cc_op(s, CC_OP_BMILGB + ot);
>>                  break;
>>
>>              default:
>> --
>> 2.9.4
>>
>
>
>
> --
> Ricardo Ribalda



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target/i386: Fix BLSR and BLSI
  2017-07-13 21:55         ` Ricardo Ribalda Delgado
@ 2017-07-13 22:49           ` Ricardo Ribalda Delgado
  0 siblings, 0 replies; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2017-07-13 22:49 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

Hi

For completion. This is my poor man tbm test. It has run for 5 minutes
with no errors

gcc tbm.c -O3 -march=native  -o pc
gcc tbm.c -mtbm -O3

 for a in $(seq 0 65535); do /tmp/qemu/x86_64-linux-user/qemu-x86_64
-cpu qemu64,+tbm ./a.out $a >/tmp/res.qemu ; ./pc $a  > /tmp/res.pc ;
if !  diff  /tmp/res.pc /tmp/res.qemu; then echo $a ; fi ; done



#include <stdio.h>
#include <stdlib.h>
#include <x86intrin.h>


long test_bextr(long src) {
unsigned char start =1, len=3;
return (src >> start) & ((1 << len)-1);
}

long test_blcfill(long val){

return val & (val + 1);
}

long test_blci(long val){

return val | ~(val + 1);
}

long test_blcic(long val){

return ~val & (val + 1);
}

long test_blcmsk(long val){

return val ^ (val + 1);
}

long test_blcs(long val){

return val | (val + 1);
}

long test_blsfill(long val){

return val | (val - 1);
}

long test_blsic(long val){

return ~val | (val - 1);
}

long test_t1mskc(long val){

return ~val | (val + 1);
}

long test_tzmsk(long val){

return ~val & (val - 1);
}

int main(int argc, char *argv[]) {
long op1;
long ret;

if (argc < 2) {
fprintf(stderr, "use %s op1 \n", argv[0]);
return -1;
}
op1 = strtoul(argv[1], NULL, 0);

fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);

ret = test_bextr(op1);
fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);

ret = test_blcfill(op1);
fprintf(stdout, "blcfill %ld (0x%lx)\n",ret, ret);

ret = test_blci(op1);
fprintf(stdout, "blci %ld (0x%lx)\n",ret, ret);

ret = test_blcic(op1);
fprintf(stdout, "blcic %ld (0x%lx)\n",ret, ret);

ret = test_blcmsk(op1);
fprintf(stdout, "blcmsk %ld (0x%lx)\n",ret, ret);

ret = test_blcs(op1);
fprintf(stdout, "blcs %ld (0x%lx)\n",ret, ret);

ret = test_blsfill(op1);
fprintf(stdout, "blsfill %ld (0x%lx)\n",ret, ret);

ret = test_blsic(op1);
fprintf(stdout, "blsic %ld (0x%lx)\n",ret, ret);

ret = test_t1mskc(op1);
fprintf(stdout, "t1mskc %ld (0x%lx)\n",ret, ret);

ret = test_tzmsk(op1);
fprintf(stdout, "tzmsk %ld (0x%lx)\n",ret, ret);



return 0;
}

On Thu, Jul 13, 2017 at 11:55 PM, Ricardo Ribalda Delgado
<ricardo.ribalda@gmail.com> wrote:
> Hi again
>
> Some progress here, I think that I have found a bug in andn, I have
> already send a patch.
>
> I have made a rudimentary testcase for bmi. I will try tomorrow o
> build something similar for tbm.
>
> For reference, I am using this script:
>
> for a in $(seq 0 255); do for b in $(seq 0 255); do for c in $(seq 0
> 255); do /tmp/qemu/x86_64-linux-user/qemu-x86_64 -cpu qemu64,+bmi1
> ./a.out $a $b $c >/tmp/res.qemu; ./pc $a $b $c > /tmp/res.pc; if !
> diff  /tmp/res.pc /tmp/res.qemu; then echo $a $b $c; fi ; done ; done
> ; done
>
> with this build options
> gcc kk.c -mbmi -O3 -Wall
> gcc kk.c -march=native -O3 -Wall -o pc
>
> and this code:
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <x86intrin.h>
>
>
> long test_blsr(long val){
>
> return (val & (val - 1));
> }
>
> long test_blsi(long val){
>
> return (val & (-val));
> }
>
> long test_blmsk(long val){
>
> return (val ^ (val -1));
> }
>
> long test_andn(long v1, long v2){
>
> return (~v1 & v2);
> }
>
> long test_tzcnt(long val) {
> #ifdef PC
> return val ? __builtin_ctz(val) : 32;
> #else
> return __tzcnt_u32(val);
> #endif
> }
>
> long test_bextr(long src, unsigned char start, unsigned char len) {
> #ifdef PC
> return (src >> start) & ((1 << len)-1);
> #else
> return __bextr_u32(src, start | len <<8);
> #endif
> }
>
> int main(int argc, char *argv[]) {
> long op1, op2, op3;
> long ret;
>
> if (argc < 4) {
> fprintf(stderr, "use %s op1 op2 op3\n", argv[0]);
> return -1;
> }
> op1 = strtoul(argv[1], NULL, 0);
> op2 = strtoul(argv[2], NULL, 0);
> op3 = strtoul(argv[3], NULL, 0);
>
> fprintf(stdout, "op 1 %ld (0x%lx)\n", op1, op1);
> fprintf(stdout, "op 2 %ld (0x%lx)\n", op2, op2);
>
> ret = test_blsr(op1);
> fprintf(stdout, "blsr %ld (0x%lx)\n",ret, ret);
>
> ret = test_blsi(op1);
> fprintf(stdout, "blsi %ld (0x%lx)\n",ret, ret);
>
> ret = test_blmsk(op1);
> fprintf(stdout, "blmsk %ld (0x%lx)\n",ret, ret);
>
> ret = test_andn(op1,op2);
> fprintf(stdout, "andn %ld (0x%lx)\n",ret, ret);
>
> ret = test_tzcnt(op1);
> fprintf(stdout, "tzcnt %ld (0x%lx)\n",ret, ret);
>
> ret = test_bextr(op1, op2, op3);
> fprintf(stdout, "bextr %ld (0x%lx)\n",ret, ret);
>
> return 0;
> }
>
> On Thu, Jul 13, 2017 at 10:42 PM, Ricardo Ribalda Delgado
> <ricardo.ribalda@gmail.com> wrote:
>> Hi Richard
>>
>> The simple example works as expected, but my big application
>> (gobject-introspection) still crashes with sigsegv :(.
>>
>> it seems to be something related to the bmi and tbm instructions. If I
>> disable them in gcc ( -mno-bmi -mno-tbm), the application
>> runs ok.
>>
>> A look at qemu's code does not show anything obvious, but I am not
>> that familiar with qemu source yet to find something like this through
>> static analysis.
>>
>> My plan (as soon as I have some time) is to create a small set of apps
>> to validate bmi/tbm/ (Are you aware of something already existing for
>> this?)
>> My stupid guess is that maybe the ops are switched, or the flags are
>> not properly modified.
>>
>> If you want, I can share the application that crashes with you, just
>> be aware that the number of dependencies is considerable.
>>
>> BTW I can only run the gdb stub on version 2.8.0. On git HEAD I am getting only:
>>
>> Quit
>> (gdb) c
>> Continuing.
>> warning: Remote failure reply: E22
>>
>> Program stopped.
>> 0x00000040017bac07 in ?? ()
>> (gdb) c
>> Continuing.
>>
>>
>>
>> Thanks for your help, it is greatly appreciated!
>>
>> On Wed, Jul 12, 2017 at 9:29 PM, Richard Henderson <rth@twiddle.net> wrote:
>>> The implementation of these two instructions was swapped.
>>> At the same time, unify the setup of eflags for the insn group.
>>>
>>> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>>> ---
>>>  target/i386/translate.c | 26 +++++++++-----------------
>>>  1 file changed, 9 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/target/i386/translate.c b/target/i386/translate.c
>>> index 9d5f1c3..69d3787 100644
>>> --- a/target/i386/translate.c
>>> +++ b/target/i386/translate.c
>>> @@ -4031,34 +4031,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>>>                  ot = mo_64_32(s->dflag);
>>>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>>>
>>> +                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>>>                  switch (reg & 7) {
>>>                  case 1: /* blsr By,Ey */
>>> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
>>> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>>>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>>> -                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>>> -                    gen_op_update2_cc();
>>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>>>                      break;
>>> -
>>>                  case 2: /* blsmsk By,Ey */
>>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>>> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
>>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>>> +                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
>>> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>>>                      break;
>>> -
>>>                  case 3: /* blsi By, Ey */
>>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>>> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
>>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>>> +                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
>>> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>>>                      break;
>>> -
>>>                  default:
>>>                      goto unknown_op;
>>>                  }
>>> +                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>>> +                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>>> +                set_cc_op(s, CC_OP_BMILGB + ot);
>>>                  break;
>>>
>>>              default:
>>> --
>>> 2.9.4
>>>
>>
>>
>>
>> --
>> Ricardo Ribalda
>
>
>
> --
> Ricardo Ribalda



-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2017-07-12 18:45     ` [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI Richard Henderson
  2017-07-12 18:58       ` Ricardo Ribalda Delgado
@ 2018-06-06  9:13       ` Ricardo Ribalda Delgado
  2018-06-06 12:32         ` Paolo Bonzini
  1 sibling, 1 reply; 16+ messages in thread
From: Ricardo Ribalda Delgado @ 2018-06-06  9:13 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, ehabkost, pbonzini

Hi Richard,

I cannot find this patch on qemu master branch. Do you need any help
to get this done?

Thanks!
On Wed, Jul 12, 2017 at 8:45 PM Richard Henderson <rth@twiddle.net> wrote:
>
> The implementation of these two instructions was swapped.
> At the same time, unify the setup of eflags for the insn group.
>
> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/i386/translate.c | 25 ++++++++-----------------
>  1 file changed, 8 insertions(+), 17 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 8365a6d..087a2e6 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -4029,36 +4029,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP1, 0);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>                      gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> -                    gen_op_update2_cc();
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>                      break;
> -
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                    set_cc_op(s, CC_OP_BMILGB + ot);
> +                    tcg_gen_neg_tl(cpu_T0, cpu_T1);
> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>                      break;
> -
>                  default:
>                      goto unknown_op;
>                  }
> +                gen_op_update2_cc();
> +                set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
>              default:
> --
> 2.9.4
>


-- 
Ricardo Ribalda

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI
  2018-06-06  9:13       ` Ricardo Ribalda Delgado
@ 2018-06-06 12:32         ` Paolo Bonzini
  0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2018-06-06 12:32 UTC (permalink / raw)
  To: Ricardo Ribalda Delgado, Richard Henderson; +Cc: qemu-devel, ehabkost

On 06/06/2018 11:13, Ricardo Ribalda Delgado wrote:
> Hi Richard,
> 
> I cannot find this patch on qemu master branch. Do you need any help
> to get this done?

I queued it now, thanks for the reminder!

Paolo


> Thanks!
> On Wed, Jul 12, 2017 at 8:45 PM Richard Henderson <rth@twiddle.net> wrote:
>>
>> The implementation of these two instructions was swapped.
>> At the same time, unify the setup of eflags for the insn group.
>>
>> Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> ---
>>  target/i386/translate.c | 25 ++++++++-----------------
>>  1 file changed, 8 insertions(+), 17 deletions(-)
>>
>> diff --git a/target/i386/translate.c b/target/i386/translate.c
>> index 8365a6d..087a2e6 100644
>> --- a/target/i386/translate.c
>> +++ b/target/i386/translate.c
>> @@ -4029,36 +4029,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>>                      goto illegal_op;
>>                  }
>>                  ot = mo_64_32(s->dflag);
>> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP1, 0);
>>
>>                  switch (reg & 7) {
>>                  case 1: /* blsr By,Ey */
>> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
>> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
>>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>>                      gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
>> -                    gen_op_update2_cc();
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>>                      break;
>> -
>>                  case 2: /* blsmsk By,Ey */
>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>> +                    tcg_gen_subi_tl(cpu_T0, cpu_T1, 1);
>> +                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
>>                      break;
>> -
>>                  case 3: /* blsi By, Ey */
>> -                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
>> -                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
>> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
>> -                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>> -                    set_cc_op(s, CC_OP_BMILGB + ot);
>> +                    tcg_gen_neg_tl(cpu_T0, cpu_T1);
>> +                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>>                      break;
>> -
>>                  default:
>>                      goto unknown_op;
>>                  }
>> +                gen_op_update2_cc();
>> +                set_cc_op(s, CC_OP_BMILGB + ot);
>>                  break;
>>
>>              default:
>> --
>> 2.9.4
>>
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-06-06 12:33 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-11 21:21 [Qemu-devel] [PATCH 0/2] target/i386: Implement all TBM instructions Richard Henderson
2017-07-11 21:21 ` [Qemu-devel] [PATCH 1/2] target/i386: Decode AMD XOP prefix Richard Henderson
2017-07-11 21:21 ` [Qemu-devel] [PATCH 2/2] target/i386: Implement all TBM instructions Richard Henderson
2017-07-12  4:04 ` [Qemu-devel] [PATCH 0/2] " Richard Henderson
2017-07-12 13:28   ` Ricardo Ribalda Delgado
2017-07-12 18:43     ` Richard Henderson
2017-07-12 18:45     ` [Qemu-devel] [PATCH] target/i386: Fix BLSR and BLSI Richard Henderson
2017-07-12 18:58       ` Ricardo Ribalda Delgado
2017-07-12 19:12         ` Richard Henderson
2017-07-12 19:25           ` Ricardo Ribalda Delgado
2018-06-06  9:13       ` Ricardo Ribalda Delgado
2018-06-06 12:32         ` Paolo Bonzini
2017-07-12 19:29     ` [Qemu-devel] [PATCH v2] " Richard Henderson
2017-07-13 20:42       ` Ricardo Ribalda Delgado
2017-07-13 21:55         ` Ricardo Ribalda Delgado
2017-07-13 22:49           ` Ricardo Ribalda Delgado

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.