All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock
@ 2017-05-24 22:08 Richard Henderson
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Richard Henderson @ 2017-05-24 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, aurelien

This is the rewrite of EX that I posted last week, fixed with
Aurelien's help, and adjusted to be applied on top of my v2 unwind
patch set.

It also splits the patch into more pieces to make it easier to debug,
and keeps the direct implementation of the most common target insns.
Which are in fact so common I don't see any other usage while booting
the debian installer.


r~


Richard Henderson (4):
  target/s390x: Save current ilen during translation
  target/s390x: End the TB after EXECUTE
  target/s390x: Implement EXECUTE via new TranslationBlock
  target/s390x: Re-implement a few EXECUTE target insns directly

 target/s390x/cpu.h        |   4 +-
 target/s390x/helper.c     |   5 ++
 target/s390x/machine.c    |  19 ++++++
 target/s390x/mem_helper.c | 156 ++++++++++++++++------------------------------
 target/s390x/translate.c  |  92 ++++++++++++++++-----------
 5 files changed, 136 insertions(+), 140 deletions(-)

-- 
2.9.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation
  2017-05-24 22:08 [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock Richard Henderson
@ 2017-05-24 22:08 ` Richard Henderson
  2017-05-25 22:57   ` Aurelien Jarno
  2017-06-01  8:30   ` David Hildenbrand
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE Richard Henderson
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Richard Henderson @ 2017-05-24 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, aurelien

Use this saved value instead of recomputing from next_pc difference.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/s390x/translate.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 4bd16d9..5b8333f 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -58,6 +58,7 @@ struct DisasContext {
     const DisasInsn *insn;
     DisasFields *fields;
     uint64_t pc, next_pc;
+    uint32_t ilen;
     enum cc_op cc_op;
     bool singlestep_enabled;
 };
@@ -349,7 +350,7 @@ static void gen_program_exception(DisasContext *s, int code)
     tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
     tcg_temp_free_i32(tmp);
 
-    tmp = tcg_const_i32(s->next_pc - s->pc);
+    tmp = tcg_const_i32(s->ilen);
     tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen));
     tcg_temp_free_i32(tmp);
 
@@ -2207,7 +2208,7 @@ static ExitStatus op_ex(DisasContext *s, DisasOps *o)
         v1 = regs[r1];
     }
 
-    ilen = tcg_const_i32(s->next_pc - s->pc);
+    ilen = tcg_const_i32(s->ilen);
     gen_helper_ex(cpu_env, ilen, v1, o->in2);
     tcg_temp_free_i32(ilen);
 
@@ -4052,7 +4053,7 @@ static ExitStatus op_svc(DisasContext *s, DisasOps *o)
     tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code));
     tcg_temp_free_i32(t);
 
-    t = tcg_const_i32(s->next_pc - s->pc);
+    t = tcg_const_i32(s->ilen);
     tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen));
     tcg_temp_free_i32(t);
 
@@ -5191,6 +5192,7 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s,
     op = (insn >> 8) & 0xff;
     ilen = get_ilen(op);
     s->next_pc = s->pc + ilen;
+    s->ilen = ilen;
 
     switch (ilen) {
     case 2:
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE
  2017-05-24 22:08 [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock Richard Henderson
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
@ 2017-05-24 22:08 ` Richard Henderson
  2017-05-25 22:58   ` Aurelien Jarno
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock Richard Henderson
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly Richard Henderson
  3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2017-05-24 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, aurelien

This split will be required for implementing EXECUTE properly.
Do this now as a separate step to aid comparison of before and
after TB listings.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/s390x/mem_helper.c | 54 ++++++++++++++++++++++++++++-------------------
 target/s390x/translate.c  |  6 +++++-
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 4b96c27..d57d5b1 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1234,6 +1234,7 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
     S390CPU *cpu = s390_env_get_cpu(env);
     uint64_t insn = cpu_lduw_code(env, addr);
     uint8_t opc = insn >> 8;
+    uint32_t cc;
 
     /* Or in the contents of R1[56:63].  */
     insn |= r1 & 0xff;
@@ -1263,42 +1264,46 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
         b2 = extract64(insn, 28, 4);
         d1 = extract64(insn, 32, 12);
         d2 = extract64(insn, 16, 12);
+
+        cc = env->cc_op;
         switch (opc & 0xf) {
         case 0x2:
             do_helper_mvc(env, l, get_address(env, 0, b1, d1),
                           get_address(env, 0, b2, d2), 0);
-            return;
+            break;
         case 0x4:
-            env->cc_op = do_helper_nc(env, l, get_address(env, 0, b1, d1),
-                                      get_address(env, 0, b2, d2), 0);
-            return;
+            cc = do_helper_nc(env, l, get_address(env, 0, b1, d1),
+                              get_address(env, 0, b2, d2), 0);
+            break;
         case 0x5:
-            env->cc_op = do_helper_clc(env, l, get_address(env, 0, b1, d1),
-                                       get_address(env, 0, b2, d2), 0);
-            return;
+            cc = do_helper_clc(env, l, get_address(env, 0, b1, d1),
+                               get_address(env, 0, b2, d2), 0);
+            break;
         case 0x6:
-            env->cc_op = do_helper_oc(env, l, get_address(env, 0, b1, d1),
-                                      get_address(env, 0, b2, d2), 0);
-            return;
+            cc = do_helper_oc(env, l, get_address(env, 0, b1, d1),
+                              get_address(env, 0, b2, d2), 0);
+            break;
         case 0x7:
-            env->cc_op = do_helper_xc(env, l, get_address(env, 0, b1, d1),
-                                      get_address(env, 0, b2, d2), 0);
-            return;
+            cc = do_helper_xc(env, l, get_address(env, 0, b1, d1),
+                              get_address(env, 0, b2, d2), 0);
+            break;
         case 0xc:
             do_helper_tr(env, l, get_address(env, 0, b1, d1),
                          get_address(env, 0, b2, d2), 0);
-            return;
+            break;
         case 0xd:
-            env->cc_op = do_helper_trt(env, l, get_address(env, 0, b1, d1),
-                                       get_address(env, 0, b2, d2), 0);
-            return;
+            cc = do_helper_trt(env, l, get_address(env, 0, b1, d1),
+                               get_address(env, 0, b2, d2), 0);
+            break;
+        default:
+            goto abort;
         }
     } else if (opc == 0x0a) {
         /* supervisor call */
         env->int_svc_code = extract64(insn, 48, 8);
         env->int_svc_ilen = ilen;
         helper_exception(env, EXCP_SVC);
-        return;
+        g_assert_not_reached();
     } else if (opc == 0xbf) {
         uint32_t r1, r3, b2, d2;
 
@@ -1306,10 +1311,15 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
         r3 = extract64(insn, 48, 4);
         b2 = extract64(insn, 44, 4);
         d2 = extract64(insn, 32, 12);
-        env->cc_op = helper_icm(env, r1, get_address(env, 0, b2, d2), r3);
-        return;
+        cc = helper_icm(env, r1, get_address(env, 0, b2, d2), r3);
+    } else {
+ abort:
+        cpu_abort(CPU(cpu),
+                  "EXECUTE on instruction prefix 0x%x not implemented\n",
+                  opc);
+        g_assert_not_reached();
     }
 
-    cpu_abort(CPU(cpu), "EXECUTE on instruction prefix 0x%x not implemented\n",
-              opc);
+    env->cc_op = cc;
+    env->psw.addr += ilen;
 }
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 5b8333f..70212c8 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -1163,6 +1163,8 @@ typedef enum {
        the PC (for whatever reason), so there's no need to do it again on
        exiting the TB.  */
     EXIT_PC_UPDATED,
+    /* We have updated the PC and CC values.  */
+    EXIT_PC_CC_UPDATED,
     /* We are exiting the TB, but have neither emitted a goto_tb, nor
        updated the PC for the next instruction to be executed.  */
     EXIT_PC_STALE,
@@ -2216,7 +2218,7 @@ static ExitStatus op_ex(DisasContext *s, DisasOps *o)
         tcg_temp_free_i64(v1);
     }
 
-    return NO_EXIT;
+    return EXIT_PC_CC_UPDATED;
 }
 
 static ExitStatus op_fieb(DisasContext *s, DisasOps *o)
@@ -5489,6 +5491,8 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
         /* Next TB starts off with CC_OP_DYNAMIC, so make sure the
            cc op type is in env */
         update_cc_op(&dc);
+        /* FALLTHRU */
+    case EXIT_PC_CC_UPDATED:
         /* Exit the TB, either by raising a debug exception or by return.  */
         if (do_debug) {
             gen_exception(EXCP_DEBUG);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock
  2017-05-24 22:08 [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock Richard Henderson
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE Richard Henderson
@ 2017-05-24 22:08 ` Richard Henderson
  2017-05-25 22:58   ` Aurelien Jarno
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly Richard Henderson
  3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2017-05-24 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, aurelien

Previously, helper_ex would construct the insn and then implement
the insn via direct calls other helpers.  This was sufficient to
boot Linux but that is all.

It is easy enough to go the whole nine yards by stashing state for
EXECUTE within the cpu, and then rely on a new TB to be created
that properly and completely interprets the insn.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/s390x/cpu.h        |   4 +-
 target/s390x/helper.c     |   5 ++
 target/s390x/machine.c    |  19 ++++++++
 target/s390x/mem_helper.c | 118 +++++-----------------------------------------
 target/s390x/translate.c  |  80 ++++++++++++++++++-------------
 5 files changed, 85 insertions(+), 141 deletions(-)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 4f38ba0..79235cf 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -103,6 +103,8 @@ typedef struct CPUS390XState {
     uint64_t cc_dst;
     uint64_t cc_vr;
 
+    uint64_t ex_value;
+
     uint64_t __excp_addr;
     uint64_t psa;
 
@@ -391,7 +393,7 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
     *pc = env->psw.addr;
-    *cs_base = 0;
+    *cs_base = env->ex_value;
     *flags = ((env->psw.mask >> 32) & ~FLAG_MASK_CC) |
              ((env->psw.mask & PSW_MASK_32) ? FLAG_MASK_32 : 0);
 }
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 9978490..f01811f 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -642,6 +642,11 @@ bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         S390CPU *cpu = S390_CPU(cs);
         CPUS390XState *env = &cpu->env;
 
+        if (env->ex_value) {
+            /* Execution of the target insn is indivisible from
+               the parent EXECUTE insn.  */
+            return false;
+        }
         if (env->psw.mask & PSW_MASK_EXT) {
             s390_cpu_do_interrupt(cs);
             return true;
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index 8503fa1..8f908bb 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -34,6 +34,7 @@ static int cpu_post_load(void *opaque, int version_id)
 
     return 0;
 }
+
 static void cpu_pre_save(void *opaque)
 {
     S390CPU *cpu = opaque;
@@ -156,6 +157,23 @@ const VMStateDescription vmstate_riccb = {
     }
 };
 
+static bool exval_needed(void *opaque)
+{
+    S390CPU *cpu = opaque;
+    return cpu->env.ex_value != 0;
+}
+
+const VMStateDescription vmstate_exval = {
+    .name = "cpu/exval",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = exval_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.ex_value, S390CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_s390_cpu = {
     .name = "cpu",
     .post_load = cpu_post_load,
@@ -188,6 +206,7 @@ const VMStateDescription vmstate_s390_cpu = {
         &vmstate_fpu,
         &vmstate_vregs,
         &vmstate_riccb,
+        &vmstate_exval,
         NULL
     },
 };
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index d57d5b1..3a77edc 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -435,37 +435,6 @@ uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s)
     return d + len;
 }
 
-static uint32_t helper_icm(CPUS390XState *env, uint32_t r1, uint64_t address,
-                           uint32_t mask)
-{
-    int pos = 24; /* top of the lower half of r1 */
-    uint64_t rmask = 0xff000000ULL;
-    uint8_t val = 0;
-    int ccd = 0;
-    uint32_t cc = 0;
-
-    while (mask) {
-        if (mask & 8) {
-            env->regs[r1] &= ~rmask;
-            val = cpu_ldub_data(env, address);
-            if ((val & 0x80) && !ccd) {
-                cc = 1;
-            }
-            ccd = 1;
-            if (val && cc == 0) {
-                cc = 2;
-            }
-            env->regs[r1] |= (uint64_t)val << pos;
-            address++;
-        }
-        mask = (mask << 1) & 0xf;
-        pos -= 8;
-        rmask >>= 8;
-    }
-
-    return cc;
-}
-
 /* load access registers r1 to r3 from memory at a2 */
 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 {
@@ -1222,19 +1191,17 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 }
 #endif
 
-/* execute instruction
-   this instruction executes an insn modified with the contents of r1
-   it does not change the executed instruction in memory
-   it does not change the program counter
-   in other words: tricky...
-   currently implemented by interpreting the cases it is most commonly used.
+/* Execute instruction.  This instruction executes an insn modified with
+   the contents of r1.  It does not change the executed instruction in memory;
+   it does not change the program counter.
+
+   Perform this by recording the modified instruction in env->ex_value.
+   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
 */
 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
 {
-    S390CPU *cpu = s390_env_get_cpu(env);
     uint64_t insn = cpu_lduw_code(env, addr);
     uint8_t opc = insn >> 8;
-    uint32_t cc;
 
     /* Or in the contents of R1[56:63].  */
     insn |= r1 & 0xff;
@@ -1254,72 +1221,9 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
         g_assert_not_reached();
     }
 
-    HELPER_LOG("%s: addr 0x%lx insn 0x%" PRIx64 "\n", __func__, addr, insn);
-
-    if ((opc & 0xf0) == 0xd0) {
-        uint32_t l, b1, b2, d1, d2;
-
-        l = extract64(insn, 48, 8);
-        b1 = extract64(insn, 44, 4);
-        b2 = extract64(insn, 28, 4);
-        d1 = extract64(insn, 32, 12);
-        d2 = extract64(insn, 16, 12);
-
-        cc = env->cc_op;
-        switch (opc & 0xf) {
-        case 0x2:
-            do_helper_mvc(env, l, get_address(env, 0, b1, d1),
-                          get_address(env, 0, b2, d2), 0);
-            break;
-        case 0x4:
-            cc = do_helper_nc(env, l, get_address(env, 0, b1, d1),
-                              get_address(env, 0, b2, d2), 0);
-            break;
-        case 0x5:
-            cc = do_helper_clc(env, l, get_address(env, 0, b1, d1),
-                               get_address(env, 0, b2, d2), 0);
-            break;
-        case 0x6:
-            cc = do_helper_oc(env, l, get_address(env, 0, b1, d1),
-                              get_address(env, 0, b2, d2), 0);
-            break;
-        case 0x7:
-            cc = do_helper_xc(env, l, get_address(env, 0, b1, d1),
-                              get_address(env, 0, b2, d2), 0);
-            break;
-        case 0xc:
-            do_helper_tr(env, l, get_address(env, 0, b1, d1),
-                         get_address(env, 0, b2, d2), 0);
-            break;
-        case 0xd:
-            cc = do_helper_trt(env, l, get_address(env, 0, b1, d1),
-                               get_address(env, 0, b2, d2), 0);
-            break;
-        default:
-            goto abort;
-        }
-    } else if (opc == 0x0a) {
-        /* supervisor call */
-        env->int_svc_code = extract64(insn, 48, 8);
-        env->int_svc_ilen = ilen;
-        helper_exception(env, EXCP_SVC);
-        g_assert_not_reached();
-    } else if (opc == 0xbf) {
-        uint32_t r1, r3, b2, d2;
-
-        r1 = extract64(insn, 52, 4);
-        r3 = extract64(insn, 48, 4);
-        b2 = extract64(insn, 44, 4);
-        d2 = extract64(insn, 32, 12);
-        cc = helper_icm(env, r1, get_address(env, 0, b2, d2), r3);
-    } else {
- abort:
-        cpu_abort(CPU(cpu),
-                  "EXECUTE on instruction prefix 0x%x not implemented\n",
-                  opc);
-        g_assert_not_reached();
-    }
-
-    env->cc_op = cc;
-    env->psw.addr += ilen;
+    /* Record the insn we want to execute as well as the ilen to use
+       during the execution of the target insn.  This will also ensure
+       that ex_value is non-zero, which flags that we are in a state
+       that requires such execution.  */
+    env->ex_value = insn | ilen;
 }
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 70212c8..97ca639 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -57,6 +57,7 @@ struct DisasContext {
     struct TranslationBlock *tb;
     const DisasInsn *insn;
     DisasFields *fields;
+    uint64_t ex_value;
     uint64_t pc, next_pc;
     uint32_t ilen;
     enum cc_op cc_op;
@@ -2186,23 +2187,18 @@ static ExitStatus op_epsw(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_ex(DisasContext *s, DisasOps *o)
 {
-    /* ??? Perhaps a better way to implement EXECUTE is to set a bit in
-       tb->flags, (ab)use the tb->cs_base field as the address of
-       the template in memory, and grab 8 bits of tb->flags/cflags for
-       the contents of the register.  We would then recognize all this
-       in gen_intermediate_code_internal, generating code for exactly
-       one instruction.  This new TB then gets executed normally.
-
-       On the other hand, this seems to be mostly used for modifying
-       MVC inside of memcpy, which needs a helper call anyway.  So
-       perhaps this doesn't bear thinking about any further.  */
-
     int r1 = get_field(s->fields, r1);
     TCGv_i32 ilen;
     TCGv_i64 v1;
 
+    /* Nested EXECUTE is not allowed.  */
+    if (unlikely(s->ex_value)) {
+        gen_program_exception(s, PGM_EXECUTE);
+        return EXIT_NORETURN;
+    }
+
     update_psw_addr(s);
-    gen_op_calc_cc(s);
+    update_cc_op(s);
 
     if (r1 == 0) {
         v1 = tcg_const_i64(0);
@@ -5190,25 +5186,36 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s,
     int op, op2, ilen;
     const DisasInsn *info;
 
-    insn = ld_code2(env, pc);
-    op = (insn >> 8) & 0xff;
-    ilen = get_ilen(op);
-    s->next_pc = s->pc + ilen;
-    s->ilen = ilen;
+    if (unlikely(s->ex_value)) {
+        /* Drop the EX data now, so that it's clear on exception paths.  */
+        TCGv_i64 zero = tcg_const_i64(0);
+        tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value));
+        tcg_temp_free_i64(zero);
 
-    switch (ilen) {
-    case 2:
-        insn = insn << 48;
-        break;
-    case 4:
-        insn = ld_code4(env, pc) << 32;
-        break;
-    case 6:
-        insn = (insn << 48) | (ld_code4(env, pc + 2) << 16);
-        break;
-    default:
-        abort();
+        /* Extract the values saved by EXECUTE.  */
+        insn = s->ex_value & 0xffffffffffff0000ull;
+        ilen = s->ex_value & 0xf;
+        op = insn >> 56;
+    } else {
+        insn = ld_code2(env, pc);
+        op = (insn >> 8) & 0xff;
+        ilen = get_ilen(op);
+        switch (ilen) {
+        case 2:
+            insn = insn << 48;
+            break;
+        case 4:
+            insn = ld_code4(env, pc) << 32;
+            break;
+        case 6:
+            insn = (insn << 48) | (ld_code4(env, pc + 2) << 16);
+            break;
+        default:
+            g_assert_not_reached();
+        }
     }
+    s->next_pc = s->pc + ilen;
+    s->ilen = ilen;
 
     /* We can't actually determine the insn format until we've looked up
        the full insn opcode.  Which we can't do without locating the
@@ -5425,6 +5432,7 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
     dc.tb = tb;
     dc.pc = pc_start;
     dc.cc_op = CC_OP_DYNAMIC;
+    dc.ex_value = tb->cs_base;
     do_debug = dc.singlestep_enabled = cs->singlestep_enabled;
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
@@ -5471,7 +5479,8 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
                 || tcg_op_buf_full()
                 || num_insns >= max_insns
                 || singlestep
-                || cs->singlestep_enabled)) {
+                || cs->singlestep_enabled
+                || dc.ex_value)) {
             status = EXIT_PC_STALE;
         }
     } while (status == NO_EXIT);
@@ -5513,9 +5522,14 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
         qemu_log_lock();
-        qemu_log("IN: %s\n", lookup_symbol(pc_start));
-        log_target_disas(cs, pc_start, dc.pc - pc_start, 1);
-        qemu_log("\n");
+        if (unlikely(dc.ex_value)) {
+            /* ??? Unfortunately log_target_disas can't use host memory.  */
+            qemu_log("IN: EXECUTE %016" PRIx64 "\n", dc.ex_value);
+        } else {
+            qemu_log("IN: %s\n", lookup_symbol(pc_start));
+            log_target_disas(cs, pc_start, dc.pc - pc_start, 1);
+            qemu_log("\n");
+        }
         qemu_log_unlock();
     }
 #endif
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly
  2017-05-24 22:08 [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock Richard Henderson
                   ` (2 preceding siblings ...)
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock Richard Henderson
@ 2017-05-24 22:08 ` Richard Henderson
  2017-05-25 23:12   ` Aurelien Jarno
  3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2017-05-24 22:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, aurelien

While the previous patch is required for proper conformance,
the vast majority of target insns are MVC and XC for implementing
memmove and memset respectively.  The next most common are CLC,
TR, and SVC.

Implementing these (and a few others for which we already have
an implementation) directly is faster than going through full
translation to a TB.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/s390x/mem_helper.c | 66 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 15 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 3a77edc..e35571e 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -200,31 +200,30 @@ uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 }
 
 /* memmove */
-static void do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
-                          uint64_t src, uintptr_t ra)
+static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
+                              uint64_t src, uintptr_t ra)
 {
     uint32_t i;
 
     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
                __func__, l, dest, src);
 
+    /* mvc and memmove do not behave the same when areas overlap! */
     /* mvc with source pointing to the byte after the destination is the
        same as memset with the first source byte */
     if (dest == src + 1) {
         fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l + 1, ra);
-        return;
-    }
-
-    /* mvc and memmove do not behave the same when areas overlap! */
-    if (dest < src || src + l < dest) {
+    } else if (dest < src || src + l < dest) {
         fast_memmove(env, dest, src, l + 1, ra);
-        return;
+    } else {
+        /* slow version with byte accesses which always work */
+        for (i = 0; i <= l; i++) {
+            uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
+            cpu_stb_data_ra(env, dest + i, x, ra);
+        }
     }
 
-    /* slow version with byte accesses which always work */
-    for (i = 0; i <= l; i++) {
-        cpu_stb_data_ra(env, dest + i, cpu_ldub_data_ra(env, src + i, ra), ra);
-    }
+    return env->cc_op;
 }
 
 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
@@ -692,8 +691,8 @@ void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
     }
 }
 
-static void do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
-                         uint64_t trans, uintptr_t ra)
+static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
+                             uint64_t trans, uintptr_t ra)
 {
     uint32_t i;
 
@@ -702,12 +701,14 @@ static void do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
         cpu_stb_data_ra(env, array + i, new_byte, ra);
     }
+
+    return env->cc_op;
 }
 
 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
                 uint64_t trans)
 {
-    return do_helper_tr(env, len, array, trans, GETPC());
+    do_helper_tr(env, len, array, trans, GETPC());
 }
 
 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
@@ -1221,6 +1222,41 @@ void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
         g_assert_not_reached();
     }
 
+    /* The very most common cases can be sped up by avoiding a new TB.  */
+    if ((opc & 0xf0) == 0xd0) {
+        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
+                                      uint64_t, uintptr_t);
+        static const dx_helper dx[16] = {
+            [0x2] = do_helper_mvc,
+            [0x4] = do_helper_nc,
+            [0x5] = do_helper_clc,
+            [0x6] = do_helper_oc,
+            [0x7] = do_helper_xc,
+            [0xc] = do_helper_tr,
+            [0xd] = do_helper_trt,
+        };
+        dx_helper helper = dx[opc & 0xf];
+
+        if (helper) {
+            uint32_t l = extract64(insn, 48, 8);
+            uint32_t b1 = extract64(insn, 44, 4);
+            uint32_t d1 = extract64(insn, 32, 12);
+            uint32_t b2 = extract64(insn, 28, 4);
+            uint32_t d2 = extract64(insn, 16, 12);
+            uint64_t a1 = get_address(env, 0, b1, d1);
+            uint64_t a2 = get_address(env, 0, b2, d2);
+
+            env->cc_op = helper(env, l, a1, a2, 0);
+            env->psw.addr += ilen;
+            return;
+        }
+    } else if (opc == 0x0a) {
+        env->int_svc_code = extract64(insn, 48, 8);
+        env->int_svc_ilen = ilen;
+        helper_exception(env, EXCP_SVC);
+        g_assert_not_reached();
+    }
+
     /* Record the insn we want to execute as well as the ilen to use
        during the execution of the target insn.  This will also ensure
        that ex_value is non-zero, which flags that we are in a state
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
@ 2017-05-25 22:57   ` Aurelien Jarno
  2017-06-01  8:30   ` David Hildenbrand
  1 sibling, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2017-05-25 22:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, thuth

On 2017-05-24 15:08, Richard Henderson wrote:
> Use this saved value instead of recomputing from next_pc difference.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/s390x/translate.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE Richard Henderson
@ 2017-05-25 22:58   ` Aurelien Jarno
  0 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2017-05-25 22:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, thuth

On 2017-05-24 15:08, Richard Henderson wrote:
> This split will be required for implementing EXECUTE properly.
> Do this now as a separate step to aid comparison of before and
> after TB listings.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/s390x/mem_helper.c | 54 ++++++++++++++++++++++++++++-------------------
>  target/s390x/translate.c  |  6 +++++-
>  2 files changed, 37 insertions(+), 23 deletions(-)

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock Richard Henderson
@ 2017-05-25 22:58   ` Aurelien Jarno
  0 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2017-05-25 22:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, thuth

On 2017-05-24 15:08, Richard Henderson wrote:
> Previously, helper_ex would construct the insn and then implement
> the insn via direct calls other helpers.  This was sufficient to
> boot Linux but that is all.
> 
> It is easy enough to go the whole nine yards by stashing state for
> EXECUTE within the cpu, and then rely on a new TB to be created
> that properly and completely interprets the insn.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/s390x/cpu.h        |   4 +-
>  target/s390x/helper.c     |   5 ++
>  target/s390x/machine.c    |  19 ++++++++
>  target/s390x/mem_helper.c | 118 +++++-----------------------------------------
>  target/s390x/translate.c  |  80 ++++++++++++++++++-------------
>  5 files changed, 85 insertions(+), 141 deletions(-)

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly Richard Henderson
@ 2017-05-25 23:12   ` Aurelien Jarno
  2017-05-26 21:10     ` Richard Henderson
  0 siblings, 1 reply; 11+ messages in thread
From: Aurelien Jarno @ 2017-05-25 23:12 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, thuth

On 2017-05-24 15:08, Richard Henderson wrote:
> While the previous patch is required for proper conformance,
> the vast majority of target insns are MVC and XC for implementing
> memmove and memset respectively.  The next most common are CLC,
> TR, and SVC.
> 
> Implementing these (and a few others for which we already have
> an implementation) directly is faster than going through full
> translation to a TB.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/s390x/mem_helper.c | 66 ++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 51 insertions(+), 15 deletions(-)

I have mixed feelings about this patch. On one side it is correct. On
the other side, I don't know if it really worth it. With the goto_ptr
optimization, it can be executed quite fast once it has been translated
once.

So in short, I leave you decide:

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly
  2017-05-25 23:12   ` Aurelien Jarno
@ 2017-05-26 21:10     ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2017-05-26 21:10 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: thuth, qemu-devel

On 05/25/2017 04:12 PM, Aurelien Jarno wrote:
> On 2017-05-24 15:08, Richard Henderson wrote:
>> While the previous patch is required for proper conformance,
>> the vast majority of target insns are MVC and XC for implementing
>> memmove and memset respectively.  The next most common are CLC,
>> TR, and SVC.
>>
>> Implementing these (and a few others for which we already have
>> an implementation) directly is faster than going through full
>> translation to a TB.
>>
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>> ---
>>   target/s390x/mem_helper.c | 66 ++++++++++++++++++++++++++++++++++++-----------
>>   1 file changed, 51 insertions(+), 15 deletions(-)
> 
> I have mixed feelings about this patch. On one side it is correct. On
> the other side, I don't know if it really worth it. With the goto_ptr
> optimization, it can be executed quite fast once it has been translated
> once.

The thing is, I can't identify these being reused at all.  The only case for 
which that would even seem to make sense is memcpy/memset that happens to use 
the same size.  But even then doing the hashing to look up the block is more 
than the decoding required to run the helper directly.


r~

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation
  2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
  2017-05-25 22:57   ` Aurelien Jarno
@ 2017-06-01  8:30   ` David Hildenbrand
  1 sibling, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2017-06-01  8:30 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: thuth, aurelien

On 25.05.2017 00:08, Richard Henderson wrote:
> Use this saved value instead of recomputing from next_pc difference.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target/s390x/translate.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 4bd16d9..5b8333f 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -58,6 +58,7 @@ struct DisasContext {
>      const DisasInsn *insn;
>      DisasFields *fields;
>      uint64_t pc, next_pc;
> +    uint32_t ilen;
>      enum cc_op cc_op;
>      bool singlestep_enabled;
>  };
> @@ -349,7 +350,7 @@ static void gen_program_exception(DisasContext *s, int code)
>      tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
>      tcg_temp_free_i32(tmp);
>  
> -    tmp = tcg_const_i32(s->next_pc - s->pc);
> +    tmp = tcg_const_i32(s->ilen);
>      tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen));
>      tcg_temp_free_i32(tmp);
>  
> @@ -2207,7 +2208,7 @@ static ExitStatus op_ex(DisasContext *s, DisasOps *o)
>          v1 = regs[r1];
>      }
>  
> -    ilen = tcg_const_i32(s->next_pc - s->pc);
> +    ilen = tcg_const_i32(s->ilen);
>      gen_helper_ex(cpu_env, ilen, v1, o->in2);
>      tcg_temp_free_i32(ilen);
>  
> @@ -4052,7 +4053,7 @@ static ExitStatus op_svc(DisasContext *s, DisasOps *o)
>      tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code));
>      tcg_temp_free_i32(t);
>  
> -    t = tcg_const_i32(s->next_pc - s->pc);
> +    t = tcg_const_i32(s->ilen);
>      tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen));
>      tcg_temp_free_i32(t);
>  
> @@ -5191,6 +5192,7 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s,
>      op = (insn >> 8) & 0xff;
>      ilen = get_ilen(op);
>      s->next_pc = s->pc + ilen;
> +    s->ilen = ilen;
>  
>      switch (ilen) {
>      case 2:
> 

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 

Thanks,

David

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-06-01  8:30 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-24 22:08 [Qemu-devel] [PATCH 0/4] target/s390x Implement EXECUTE via TranslationBlock Richard Henderson
2017-05-24 22:08 ` [Qemu-devel] [PATCH 1/4] target/s390x: Save current ilen during translation Richard Henderson
2017-05-25 22:57   ` Aurelien Jarno
2017-06-01  8:30   ` David Hildenbrand
2017-05-24 22:08 ` [Qemu-devel] [PATCH 2/4] target/s390x: End the TB after EXECUTE Richard Henderson
2017-05-25 22:58   ` Aurelien Jarno
2017-05-24 22:08 ` [Qemu-devel] [PATCH 3/4] target/s390x: Implement EXECUTE via new TranslationBlock Richard Henderson
2017-05-25 22:58   ` Aurelien Jarno
2017-05-24 22:08 ` [Qemu-devel] [PATCH 4/4] target/s390x: Re-implement a few EXECUTE target insns directly Richard Henderson
2017-05-25 23:12   ` Aurelien Jarno
2017-05-26 21:10     ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.