All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: qemu-ppc@nongnu.org
Cc: qemu-devel@nongnu.org, david@gibson.dropbear.id.au,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>
Subject: [Qemu-devel] [PATCH 28/32] ppc: Avoid double translation for lvx/lvxl/stvx/stvxl
Date: Wed, 27 Jul 2016 08:21:22 +1000	[thread overview]
Message-ID: <1469571686-7284-28-git-send-email-benh@kernel.crashing.org> (raw)
In-Reply-To: <1469571686-7284-1-git-send-email-benh@kernel.crashing.org>

Those are always naturally aligned, so cannot cross a page boundary,
thus instead of generating two 8-byte loads with translation on each
(and double swap for LE on LE), we use a helper that will do a single
translation and memcpy the result over (or do appropriate swapping
if needed).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/helper.h             |  2 ++
 target-ppc/mem_helper.c         | 60 +++++++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vmx-impl.c | 38 ++++++++------------------
 target-ppc/translate/vmx-ops.c  |  4 +--
 4 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 1f5cfd0..64f7d2c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -269,9 +269,11 @@ DEF_HELPER_5(vmsumshm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsumshs, void, env, avr, avr, avr, avr)
 DEF_HELPER_4(vmladduhm, void, avr, avr, avr, avr)
 DEF_HELPER_2(mtvscr, void, env, avr)
+DEF_HELPER_3(lvx, void, env, i32, tl)
 DEF_HELPER_3(lvebx, void, env, avr, tl)
 DEF_HELPER_3(lvehx, void, env, avr, tl)
 DEF_HELPER_3(lvewx, void, env, avr, tl)
+DEF_HELPER_3(stvx, void, env, i32, tl)
 DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 6548715..da3f973 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -225,6 +225,66 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
 #define LO_IDX 0
 #endif
 
+void helper_lvx(CPUPPCState *env, uint32_t vr, target_ulong addr)
+{
+    uintptr_t raddr = GETPC();
+    ppc_avr_t *haddr;
+
+    /* Align address */
+    addr &= ~(target_ulong)0xf;
+
+    /* Try fast path translate */
+    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, env->dmmu_idx);
+    if (haddr) {
+        if (msr_le && HI_IDX) {
+            memcpy(&env->avr[vr], haddr, 16);
+        } else {
+            env->avr[vr].u64[LO_IDX] = bswap64(haddr->u64[HI_IDX]);
+            env->avr[vr].u64[HI_IDX] = bswap64(haddr->u64[LO_IDX]);
+        }
+    } else {
+        if (needs_byteswap(env)) {
+            env->avr[vr].u64[LO_IDX] =
+                bswap64(cpu_ldq_data_ra(env, addr, raddr));
+            env->avr[vr].u64[HI_IDX] =
+                bswap64(cpu_ldq_data_ra(env, addr + 8, raddr));
+        } else {
+            env->avr[vr].u64[HI_IDX] = cpu_ldq_data_ra(env, addr, raddr);
+            env->avr[vr].u64[LO_IDX] = cpu_ldq_data_ra(env, addr + 8, raddr);
+        }
+    }
+}
+
+void helper_stvx(CPUPPCState *env, uint32_t vr, target_ulong addr)
+{
+    uintptr_t raddr = GETPC();
+    ppc_avr_t *haddr;
+
+    /* Align address */
+    addr &= ~(target_ulong)0xf;
+
+    /* Try fast path translate */
+    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx);
+    if (haddr) {
+        if (msr_le && HI_IDX) {
+            memcpy(haddr, &env->avr[vr], 16);
+        } else {
+            haddr->u64[LO_IDX] = bswap64(env->avr[vr].u64[HI_IDX]);
+            haddr->u64[HI_IDX] = bswap64(env->avr[vr].u64[LO_IDX]);
+        }
+    } else {
+        if (needs_byteswap(env)) {
+            cpu_stq_data_ra(env, addr,
+                            bswap64(env->avr[vr].u64[LO_IDX]), raddr);
+            cpu_stq_data_ra(env, addr + 8,
+                            bswap64(env->avr[vr].u64[HI_IDX]), raddr);
+        } else {
+            cpu_stq_data_ra(env, addr, env->avr[vr].u64[HI_IDX], raddr);
+            cpu_stq_data_ra(env, addr + 8, env->avr[vr].u64[LO_IDX], raddr);
+        }
+    }
+}
+
 /* We use msr_le to determine index ordering in a vector.  However,
    byteswapping is not simply controlled by msr_le.  We also need to take
    into account endianness of the target.  This is done for the little-endian
diff --git a/target-ppc/translate/vmx-impl.c b/target-ppc/translate/vmx-impl.c
index 110e19c..a58aa0c 100644
--- a/target-ppc/translate/vmx-impl.c
+++ b/target-ppc/translate/vmx-impl.c
@@ -15,55 +15,39 @@ static inline TCGv_ptr gen_avr_ptr(int reg)
 }
 
 #define GEN_VR_LDX(name, opc2, opc3)                                          \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
+static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     TCGv EA;                                                                  \
+    TCGv_i32 t0;                                                              \
     if (unlikely(!ctx->altivec_enabled)) {                                    \
         gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
         return;                                                               \
     }                                                                         \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
+    t0 = tcg_const_i32(rD(ctx->opcode));                                      \
     gen_addr_reg_index(ctx, EA);                                              \
-    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-    /* We only need to swap high and low halves. gen_qemu_ld64 does necessary \
-       64-bit byteswap already. */                                            \
-    if (ctx->le_mode) {                                                       \
-        gen_qemu_ld64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_ld64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-    } else {                                                                  \
-        gen_qemu_ld64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_ld64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-    }                                                                         \
+    gen_helper_lvx(cpu_env, t0, EA);                                          \
     tcg_temp_free(EA);                                                        \
+    tcg_temp_free_i32(t0);                                                    \
 }
 
 #define GEN_VR_STX(name, opc2, opc3)                                          \
 static void gen_st##name(DisasContext *ctx)                                   \
 {                                                                             \
     TCGv EA;                                                                  \
+    TCGv_i32 t0;                                                              \
     if (unlikely(!ctx->altivec_enabled)) {                                    \
         gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
         return;                                                               \
     }                                                                         \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
+    t0 = tcg_const_i32(rD(ctx->opcode));                                      \
     gen_addr_reg_index(ctx, EA);                                              \
-    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-    /* We only need to swap high and low halves. gen_qemu_st64 does necessary \
-       64-bit byteswap already. */                                            \
-    if (ctx->le_mode) {                                                       \
-        gen_qemu_st64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_st64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-    } else {                                                                  \
-        gen_qemu_st64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_st64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-    }                                                                         \
+    gen_helper_stvx(cpu_env, t0, EA);                                         \
     tcg_temp_free(EA);                                                        \
+    tcg_temp_free_i32(t0);                                                    \
 }
 
 #define GEN_VR_LVE(name, opc2, opc3, size)                              \
@@ -116,9 +100,9 @@ GEN_VR_LVE(bx, 0x07, 0x00, 1);
 GEN_VR_LVE(hx, 0x07, 0x01, 2);
 GEN_VR_LVE(wx, 0x07, 0x02, 4);
 
-GEN_VR_STX(svx, 0x07, 0x07);
+GEN_VR_STX(vx, 0x07, 0x07);
 /* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
-GEN_VR_STX(svxl, 0x07, 0x0F);
+GEN_VR_STX(vxl, 0x07, 0x0F);
 
 GEN_VR_STVE(bx, 0x07, 0x04, 1);
 GEN_VR_STVE(hx, 0x07, 0x05, 2);
diff --git a/target-ppc/translate/vmx-ops.c b/target-ppc/translate/vmx-ops.c
index b9c982a..6c7d150 100644
--- a/target-ppc/translate/vmx-ops.c
+++ b/target-ppc/translate/vmx-ops.c
@@ -11,8 +11,8 @@ GEN_VR_LDX(lvxl, 0x07, 0x0B),
 GEN_VR_LVE(bx, 0x07, 0x00),
 GEN_VR_LVE(hx, 0x07, 0x01),
 GEN_VR_LVE(wx, 0x07, 0x02),
-GEN_VR_STX(svx, 0x07, 0x07),
-GEN_VR_STX(svxl, 0x07, 0x0F),
+GEN_VR_STX(vx, 0x07, 0x07),
+GEN_VR_STX(vxl, 0x07, 0x0F),
 GEN_VR_STVE(bx, 0x07, 0x04),
 GEN_VR_STVE(hx, 0x07, 0x05),
 GEN_VR_STVE(wx, 0x07, 0x06),
-- 
2.7.4

  parent reply	other threads:[~2016-07-26 22:23 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-26 22:20 [Qemu-devel] [PATCH 01/32] ppc: Fix fault PC reporting for lve*/stve* VMX instructions Benjamin Herrenschmidt
2016-07-26 22:20 ` [Qemu-devel] [PATCH 02/32] ppc: Provide basic raise_exception_* functions Benjamin Herrenschmidt
2016-07-27  1:50   ` David Gibson
2016-07-27  3:46     ` Benjamin Herrenschmidt
2016-07-26 22:20 ` [Qemu-devel] [PATCH 03/32] ppc: Move classic fp ops out of translate.c Benjamin Herrenschmidt
2016-07-28 16:02   ` Richard Henderson
2016-07-28 21:56     ` Benjamin Herrenschmidt
2016-07-26 22:20 ` [Qemu-devel] [PATCH 04/32] ppc: Move embedded spe " Benjamin Herrenschmidt
2016-07-26 22:20 ` [Qemu-devel] [PATCH 05/32] ppc: Move DFP " Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 06/32] ppc: Move VMX " Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 07/32] ppc: Move VSX " Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 08/32] ppc: Rename fload_invalid_op_excp to float_invalid_op_excp Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 09/32] ppc: Make float_invalid_op_excp() pass the return address Benjamin Herrenschmidt
2016-07-28 16:06   ` Richard Henderson
2016-07-28 21:57     ` Benjamin Herrenschmidt
2016-07-28 22:10       ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 10/32] ppc: Make float_check_status() " Benjamin Herrenschmidt
2016-07-27  1:57   ` David Gibson
2016-07-27  3:47     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 11/32] ppc: Don't update the NIP in floating point generated code Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 12/32] ppc: FP exceptions are always precise Benjamin Herrenschmidt
2016-07-27  2:00   ` David Gibson
2016-07-27  3:50     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 13/32] ppc: Don't update NIP in lswi/lswx/stswi/stswx Benjamin Herrenschmidt
2016-07-27  2:04   ` David Gibson
2016-07-27  3:51     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 14/32] ppc: Don't update NIP in lmw/stmw/icbi Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 15/32] ppc: Make tlb_fill() use new exception helper Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 16/32] ppc: Rework NIP updates vs. exception generation Benjamin Herrenschmidt
2016-07-27  2:19   ` David Gibson
2016-07-27  3:54     ` Benjamin Herrenschmidt
2016-07-27  4:35     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 17/32] ppc: Fix source NIP on SLB related interrupts Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 18/32] ppc: Don't update NIP in DCR access routines Benjamin Herrenschmidt
2016-07-27  2:21   ` David Gibson
2016-07-27  3:55     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 19/32] ppc: Don't update NIP in facility unavailable interrupts Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 20/32] ppc: Don't update NIP BookE 2.06 tlbwe Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 21/32] ppc: Don't update NIP on conditional trap instructions Benjamin Herrenschmidt
2016-07-27  2:26   ` David Gibson
2016-07-27  3:56     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 22/32] ppc: Don't update NIP if not taking alignment exceptions Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 23/32] ppc: Don't update NIP in dcbz and lscbx Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 24/32] ppc: Make alignment exceptions suck less Benjamin Herrenschmidt
2016-07-27  2:30   ` David Gibson
2016-07-27  3:59     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 25/32] ppc: Handle unconditional (always/never) traps at translation time Benjamin Herrenschmidt
2016-07-27  2:33   ` David Gibson
2016-07-27  4:00     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 26/32] ppc: Speed up dcbz Benjamin Herrenschmidt
2016-07-27  2:36   ` David Gibson
2016-07-27  4:02     ` Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 27/32] ppc: Fix CFAR updates Benjamin Herrenschmidt
2016-07-26 22:21 ` Benjamin Herrenschmidt [this message]
2016-07-29  0:49   ` [Qemu-devel] [PATCH 28/32] ppc: Avoid double translation for lvx/lvxl/stvx/stvxl Richard Henderson
2016-07-29  2:13     ` Benjamin Herrenschmidt
2016-07-29  3:34       ` David Gibson
2016-07-29  4:40         ` Benjamin Herrenschmidt
2016-07-29  4:58           ` Benjamin Herrenschmidt
2016-07-29  5:42             ` David Gibson
2016-07-29  9:00     ` Benjamin Herrenschmidt
2016-07-29 12:43       ` Richard Henderson
2016-07-26 22:21 ` [Qemu-devel] [PATCH 29/32] ppc: Don't set access_type on all load/stores on hash64 Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 30/32] ppc: Use a helper to generate "LE unsupported" alignment interrupts Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 31/32] ppc: load/store multiple and string insns don't do LE Benjamin Herrenschmidt
2016-07-26 22:21 ` [Qemu-devel] [PATCH 32/32] ppc: Speed up load/store multiple Benjamin Herrenschmidt
2016-07-27  2:47   ` David Gibson
2016-07-27  4:04     ` Benjamin Herrenschmidt
2016-07-27  1:06 ` [Qemu-devel] [PATCH 01/32] ppc: Fix fault PC reporting for lve*/stve* VMX instructions David Gibson
     [not found] <579ad8bd.8481620a.89e78.f1ce@mx.google.com>
2016-07-29  4:44 ` [Qemu-devel] [PATCH 28/32] ppc: Avoid double translation for lvx/lvxl/stvx/stvxl Benjamin Herrenschmidt
2016-07-29  6:42 ` Benjamin Herrenschmidt
2016-07-29  6:56   ` Benjamin Herrenschmidt
2016-07-29 12:30     ` Richard Henderson
2016-07-29 12:37     ` Richard Henderson
2016-07-29  6:58   ` Benjamin Herrenschmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1469571686-7284-28-git-send-email-benh@kernel.crashing.org \
    --to=benh@kernel.crashing.org \
    --cc=david@gibson.dropbear.id.au \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.