All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Cc: "Rob Herring" <rob.herring@linaro.org>,
	"Peter Crosthwaite" <peter.crosthwaite@xilinx.com>,
	patches@linaro.org, "Michael Matz" <matz@suse.de>,
	"Claudio Fontana" <claudio.fontana@huawei.com>,
	"Alexander Graf" <agraf@suse.de>,
	"Will Newton" <will.newton@linaro.org>,
	"Dirk Mueller" <dmueller@suse.de>,
	"Laurent Desnogues" <laurent.desnogues@gmail.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	kvmarm@lists.cs.columbia.edu,
	"Christoffer Dall" <christoffer.dall@linaro.org>,
	"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH v4 12/21] target-arm: A64: Implement DC ZVA
Date: Thu,  6 Mar 2014 19:32:56 +0000	[thread overview]
Message-ID: <1394134385-1727-13-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1394134385-1727-1-git-send-email-peter.maydell@linaro.org>

Implement the DC ZVA instruction, which clears a block of memory.
The fast path obtains a pointer to the underlying RAM via the TCG TLB
data structure so we can do a direct memset(), with fallback to a
simple byte-store loop in the slow path.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 include/exec/softmmu_exec.h |  36 +++++++++++++
 target-arm/cpu-qom.h        |   2 +
 target-arm/cpu.h            |   3 +-
 target-arm/cpu64.c          |   1 +
 target-arm/helper.c         | 122 ++++++++++++++++++++++++++++++++++++++++++--
 target-arm/helper.h         |   1 +
 target-arm/translate-a64.c  |   5 ++
 7 files changed, 164 insertions(+), 6 deletions(-)

diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
index 6fde154..66a1ba8 100644
--- a/include/exec/softmmu_exec.h
+++ b/include/exec/softmmu_exec.h
@@ -162,3 +162,39 @@
 #define stw(p, v) stw_data(p, v)
 #define stl(p, v) stl_data(p, v)
 #define stq(p, v) stq_data(p, v)
+
+/**
+ * tlb_vaddr_to_host:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @mmu_idx: MMU index to use for lookup
+ *
+ * Look up the specified guest virtual index in the TCG softmmu TLB.
+ * If the TLB contains a host virtual address suitable for direct RAM
+ * access, then return it. Otherwise (TLB miss, TLB entry is for an
+ * I/O access, etc) return NULL.
+ *
+ * This is the equivalent of the initial fast-path code used by
+ * TCG backends for guest load and store accesses.
+ */
+static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
+                                      int mmu_idx)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t haddr;
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        return NULL;
+    }
+
+    if (tlb_addr & ~TARGET_PAGE_MASK) {
+        /* IO access */
+        return NULL;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    return (void *)haddr;
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 00234e1..41caa6c 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -150,6 +150,8 @@ typedef struct ARMCPU {
     uint32_t reset_cbar;
     uint32_t reset_auxcr;
     bool reset_hivecs;
+    /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
+    uint32_t dcz_blocksize;
 } ARMCPU;
 
 #define TYPE_AARCH64_CPU "aarch64-cpu"
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3a0c38d..fa826c4 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -755,7 +755,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
 #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
+#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8426bf1..fccecc2 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -46,6 +46,7 @@ static void aarch64_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
+    cpu->dcz_blocksize = 7; /*  512 bytes */
 }
 #endif
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e1672aa..45e6910 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -10,6 +10,8 @@
 #include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
+#include "exec/softmmu_exec.h"
+
 static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
@@ -1644,6 +1646,29 @@ static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush(env, asid == 0);
 }
 
+static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* We don't implement EL2, so the only control on DC ZVA is the
+     * bit in the SCTLR which can prohibit access for EL0.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int dzp_bit = 1 << 4;
+
+    /* DZP indicates whether DC ZVA access is allowed */
+    if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {
+        dzp_bit = 0;
+    }
+    return cpu->dcz_blocksize | dzp_bit;
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -1663,13 +1688,18 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "FPSR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
-    /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
-     * For system mode the DZP bit here will need to be computed, not constant.
-     */
     { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
-      .access = PL0_R, .type = ARM_CP_CONST,
-      .resetvalue = 0x10 },
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE,
+      .readfn = aa64_dczid_read },
+    { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_DC_ZVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
     { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
       .access = PL1_R, .type = ARM_CP_CURRENTEL },
@@ -3820,6 +3850,88 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
 
 #endif
 
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /* Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
+
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+    {
+        /* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+         * the block size so we might have to do more than one TLB lookup.
+         * We know that in fact for any v8 CPU the page size is at least 4K
+         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+         * 1K as an artefact of legacy v5 subpage support being present in the
+         * same QEMU executable.
+         */
+        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+        void *hostaddr[maxidx];
+        int try, i;
+
+        for (try = 0; try < 2; try++) {
+
+            for (i = 0; i < maxidx; i++) {
+                hostaddr[i] = tlb_vaddr_to_host(env,
+                                                vaddr + TARGET_PAGE_SIZE * i,
+                                                cpu_mmu_index(env));
+                if (!hostaddr[i]) {
+                    break;
+                }
+            }
+            if (i == maxidx) {
+                /* If it's all in the TLB it's fair game for just writing to;
+                 * we know we don't need to update dirty status, etc.
+                 */
+                for (i = 0; i < maxidx - 1; i++) {
+                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+                }
+                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+                return;
+            }
+            /* OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            cpu_stb_data(env, vaddr_in, 0);
+            /* Now we can populate the other TLB entries, if any */
+            for (i = 0; i < maxidx; i++) {
+                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+                    cpu_stb_data(env, va, 0);
+                }
+            }
+        }
+
+        /* Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        for (i = 0; i < blocklen; i++) {
+            cpu_stb_data(env, vaddr + i, 0);
+        }
+    }
+#else
+    memset(g2h(vaddr), 0, blocklen);
+#endif
+}
+
 /* Note that signed overflow is undefined in C.  The following routines are
    careful to use unsigned types where modulo arithmetic is required.
    Failure to do so _will_ break on newer gcc.  */
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 2729ea5..4c924e9 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -502,6 +502,7 @@ DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
 
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_2(dc_zva, void, env, i64)
 
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 7054fa1..4eaea64 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1331,6 +1331,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         tcg_rt = cpu_reg(s, rt);
         tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
         return;
+    case ARM_CP_DC_ZVA:
+        /* Writes clear the aligned block of memory which rt points into. */
+        tcg_rt = cpu_reg(s, rt);
+        gen_helper_dc_zva(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }
-- 
1.9.0

  parent reply	other threads:[~2014-03-06 19:33 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-06 19:32 [Qemu-devel] [PATCH v4 00/21] AArch64 system emulation (boots a kernel!) Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 01/21] target-arm: Split out private-to-target functions into internals.h Peter Maydell
2014-03-17  7:13   ` Peter Crosthwaite
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 02/21] target-arm: Implement AArch64 DAIF system register Peter Maydell
2014-03-17  2:30   ` Peter Crosthwaite
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 03/21] target-arm: Define exception record for AArch64 exceptions Peter Maydell
2014-03-17  2:53   ` Peter Crosthwaite
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 04/21] target-arm: Provide correct syndrome information for cpreg access traps Peter Maydell
2014-03-17  3:05   ` Peter Crosthwaite
2014-03-17 12:32     ` Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 05/21] target-arm: Add support for generating exceptions with syndrome information Peter Maydell
2014-03-17  3:19   ` Peter Crosthwaite
2014-03-17 12:40     ` Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 06/21] target-arm: Provide syndrome information for MMU faults Peter Maydell
2014-03-17  3:28   ` Peter Crosthwaite
2014-03-17 12:41     ` Peter Maydell
2014-03-17 12:50       ` Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 07/21] target-arm: A64: Correctly fault FP/Neon if CPACR.FPEN set Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 08/21] target-arm: A64: Add assertion that FP access was checked Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 09/21] target-arm: Fix VFP enables for AArch32 EL0 under AArch64 EL1 Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 10/21] target-arm: Add v8 mmu translation support Peter Maydell
2014-03-20 18:20   ` Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 11/21] target-arm: Don't mention PMU in debug feature register Peter Maydell
2014-03-17  5:13   ` Peter Crosthwaite
2014-03-17 12:58     ` Peter Maydell
2014-03-17 13:11       ` Peter Crosthwaite
2014-03-06 19:32 ` Peter Maydell [this message]
2014-03-07 14:51   ` [Qemu-devel] [PATCH v4 12/21] target-arm: A64: Implement DC ZVA Richard Henderson
2014-03-07 15:11     ` Peter Maydell
2014-03-07 15:25       ` Richard Henderson
2014-03-07 15:40       ` Richard Henderson
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 13/21] target-arm: Use dedicated CPU state fields for ARM946 access bit registers Peter Maydell
2014-03-17  5:20   ` Peter Crosthwaite
2014-03-17 13:03     ` Peter Maydell
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 14/21] target-arm: Implement AArch64 views of fault status and data registers Peter Maydell
2014-03-17  5:30   ` Peter Crosthwaite
2014-03-17 13:06     ` Peter Maydell
2014-03-17 13:17       ` Peter Crosthwaite
2014-03-06 19:32 ` [Qemu-devel] [PATCH v4 15/21] target-arm: Add AArch64 ELR_EL1 register Peter Maydell
2014-03-17  5:33   ` Peter Crosthwaite
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 16/21] target-arm: Implement SP_EL0, SP_EL1 Peter Maydell
2014-03-17  7:02   ` Peter Crosthwaite
2014-03-17  7:31     ` Peter Crosthwaite
2014-03-20 17:12     ` Peter Maydell
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 17/21] target-arm: Implement AArch64 SPSR_EL1 Peter Maydell
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 18/21] target-arm: Move arm_log_exception() into internals.h Peter Maydell
2014-03-17  7:04   ` Peter Crosthwaite
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 19/21] target-arm: Implement AArch64 EL1 exception handling Peter Maydell
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 20/21] target-arm: Add Cortex-A57 processor Peter Maydell
2014-03-20 19:18   ` Peter Maydell
2014-03-26  2:34   ` Rob Herring
2014-03-06 19:33 ` [Qemu-devel] [PATCH v4 21/21] hw/arm/virt: Add support for Cortex-A57 Peter Maydell
2014-03-17  7:12   ` Peter Crosthwaite
2014-04-10 15:02     ` Peter Maydell
2014-04-10 19:41       ` Rob Herring
2014-04-10 21:16         ` Peter Maydell
2014-03-07  4:09 ` [Qemu-devel] [PATCH v4 00/21] AArch64 system emulation (boots a kernel!) Xuebing Wang
2014-03-07  8:47   ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1394134385-1727-13-git-send-email-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=agraf@suse.de \
    --cc=alex.bennee@linaro.org \
    --cc=christoffer.dall@linaro.org \
    --cc=claudio.fontana@huawei.com \
    --cc=dmueller@suse.de \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=laurent.desnogues@gmail.com \
    --cc=matz@suse.de \
    --cc=patches@linaro.org \
    --cc=peter.crosthwaite@xilinx.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rob.herring@linaro.org \
    --cc=rth@twiddle.net \
    --cc=will.newton@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.