QEMU-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode
@ 2019-10-11 13:47 Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 01/22] target/arm: Add MTE_ACTIVE to tb_flags Richard Henderson
                   ` (23 more replies)
  0 siblings, 24 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

This is an update of the v4 patch from March.

I believe I've fixed the address space issues that Peter noticed.
If the board model does not supply tag memory, then I downgrade
the cpu support to "instructions only" (id_aa64pfr1.mte == 1),
which does not allow tag memory access to be enabled in the cpu.

I did not update the arm_hcr_el2_eff comment re ARMv8.4, because
I have not done a complete audit of all of the v8.5 bits.

The hacky kernel testing patch has needed some updates since March.
The following applies to v5.4-rc2.


r~


Richard Henderson (22):
  target/arm: Add MTE_ACTIVE to tb_flags
  target/arm: Add regime_has_2_ranges
  target/arm: Add MTE system registers
  target/arm: Add helper_mte_check{1,2,3}
  target/arm: Suppress tag check for sp+offset
  target/arm: Implement the IRG instruction
  target/arm: Implement ADDG, SUBG instructions
  target/arm: Implement the GMI instruction
  target/arm: Implement the SUBP instruction
  target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY
  target/arm: Implement LDG, STG, ST2G instructions
  target/arm: Implement the STGP instruction
  target/arm: Implement the LDGM and STGM instructions
  target/arm: Implement the access tag cache flushes
  target/arm: Clean address for DC ZVA
  target/arm: Implement data cache set allocation tags
  target/arm: Set PSTATE.TCO on exception entry
  target/arm: Enable MTE
  target/arm: Cache the Tagged bit for a page in MemTxAttrs
  target/arm: Create tagged ram when MTE is enabled
  target/arm: Add mmu indexes for tag memory
  target/arm: Add allocation tag storage for system mode

 target/arm/cpu-param.h     |   2 +-
 target/arm/cpu.h           |  37 ++-
 target/arm/helper-a64.h    |  17 ++
 target/arm/internals.h     |  45 +++
 target/arm/translate.h     |   2 +
 hw/arm/virt.c              |  54 ++++
 target/arm/cpu.c           |  63 +++-
 target/arm/cpu64.c         |   1 +
 target/arm/helper.c        | 277 ++++++++++++++---
 target/arm/mte_helper.c    | 601 +++++++++++++++++++++++++++++++++++++
 target/arm/tlb_helper.c    |   3 +-
 target/arm/translate-a64.c | 342 ++++++++++++++++++---
 target/arm/Makefile.objs   |   1 +
 13 files changed, 1345 insertions(+), 100 deletions(-)
 create mode 100644 target/arm/mte_helper.c

--- kernel patch

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f19fe4b9acc4..ee6b7f387a9a 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -52,7 +52,8 @@
 #define ARM64_HAS_IRQ_PRIO_MASKING		42
 #define ARM64_HAS_DCPODP			43
 #define ARM64_WORKAROUND_1463225		44
+#define ARM64_HAS_MTE				45
 
-#define ARM64_NCAPS				45
+#define ARM64_NCAPS				46
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ddf9d762ac62..5825130bd8eb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,6 +12,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_ATA		(UL(1) << 56)
 #define HCR_FWB		(UL(1) << 46)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
@@ -78,8 +79,8 @@
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
-#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_ATA)
 
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1		((1 << 31) | (1 << 23))
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 972d196c7714..2a65831f6e0f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -482,6 +482,7 @@
 
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_DSSBS	(BIT(44))
+#define SCTLR_ELx_ATA	(BIT(43))
 #define SCTLR_ELx_ENIA	(BIT(31))
 #define SCTLR_ELx_ENIB	(BIT(30))
 #define SCTLR_ELx_ENDA	(BIT(27))
@@ -510,6 +511,7 @@
 #endif
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_ATA0		(BIT(42))
 #define SCTLR_EL1_UCI		(BIT(26))
 #define SCTLR_EL1_E0E		(BIT(24))
 #define SCTLR_EL1_SPAN		(BIT(23))
@@ -598,6 +600,7 @@
 #define ID_AA64PFR0_EL0_32BIT_64BIT	0x2
 
 /* id_aa64pfr1 */
+#define ID_AA64PFR1_MTE_SHIFT		8
 #define ID_AA64PFR1_SSBS_SHIFT		4
 
 #define ID_AA64PFR1_SSBS_PSTATE_NI	0
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index cabebf1a7976..6a122ed7f76b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -171,6 +171,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
 	ARM64_FTR_END,
 };
@@ -1261,6 +1262,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
 }
 #endif
 
+static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+{
+	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -1561,6 +1567,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+	{
+		.desc = "Memory Tagging",
+		.capability = ARM64_HAS_MTE,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR1_EL1,
+		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 2,
+		.cpu_enable = cpu_enable_mte,
+	},
 	{},
 };
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..32cfa35195ae 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -424,14 +424,14 @@ ENTRY(__cpu_setup)
 	 *   DEVICE_nGnRE	001	00000100
 	 *   DEVICE_GRE		010	00001100
 	 *   NORMAL_NC		011	01000100
-	 *   NORMAL		100	11111111
+	 *   NORMAL		100	11110000 (Tag)
 	 *   NORMAL_WT		101	10111011
 	 */
 	ldr	x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
 		     MAIR(0x04, MT_DEVICE_nGnRE) | \
 		     MAIR(0x0c, MT_DEVICE_GRE) | \
 		     MAIR(0x44, MT_NORMAL_NC) | \
-		     MAIR(0xff, MT_NORMAL) | \
+		     MAIR(0xf0, MT_NORMAL) | \
 		     MAIR(0xbb, MT_NORMAL_WT)
 	msr	mair_el1, x5
 	/*

--- mte smoke test

/*
 * Memory tagging, basic pass cases.
 */

#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>

asm(".arch armv8.5-a+memtag");

int data[16 / sizeof(int)] __attribute__((aligned(16)));

int main(int ac, char **av)
{
    int *p0 = data;
    int *p1, *p2;
    long c;

    if (mlock(data, sizeof(data)) < 0) {
        perror("mlock");
        return 1;
    }

    asm("irg %0,%1,%2" : "=r"(p1) : "r"(p0), "r"(1));
    assert(p1 != p0);
    asm("subp %0,%1,%2" : "=r"(c) : "r"(p0), "r"(p1));
    assert(c == 0);

    asm("stg %0, [%0]" : : "r"(p1));
    asm("ldg %0, [%1]" : "=r"(p2) : "r"(p0), "0"(p0));
    assert(p1 == p2);

    return 0;
}



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 01/22] target/arm: Add MTE_ACTIVE to tb_flags
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 02/22] target/arm: Add regime_has_2_ranges Richard Henderson
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

When MTE is fully enabled, i.e. access to tags are enabled and
tag checks affect the PE, then arrange to perform the check
while stripping the TBI.

The check is not yet implemented, just the plumbing to that point.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Clean TBI bits exactly.
    Fix license to lgpl 2.1.
v3: Remove stub helper_mte_check; moved to a later patch.
---
 target/arm/cpu.h           | 12 ++++++++
 target/arm/internals.h     | 19 ++++++++++++
 target/arm/translate.h     |  2 ++
 target/arm/helper.c        | 61 ++++++++++++++++++++++++++++++--------
 target/arm/translate-a64.c |  1 +
 5 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 297ad5e47a..408d749b7a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1201,6 +1201,7 @@ void pmu_init(ARMCPU *cpu);
 #define PSTATE_BTYPE (3U << 10)
 #define PSTATE_IL (1U << 20)
 #define PSTATE_SS (1U << 21)
+#define PSTATE_TCO (1U << 25)
 #define PSTATE_V (1U << 28)
 #define PSTATE_C (1U << 29)
 #define PSTATE_Z (1U << 30)
@@ -3196,6 +3197,7 @@ FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
 FIELD(TBFLAG_A64, BT, 9, 1)
 FIELD(TBFLAG_A64, BTYPE, 10, 2)
 FIELD(TBFLAG_A64, TBID, 12, 2)
+FIELD(TBFLAG_A64, MTE_ACTIVE, 14, 1)
 
 static inline bool bswap_code(bool sctlr_b)
 {
@@ -3598,6 +3600,16 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
 }
 
+static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0;
+}
+
+static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
+}
+
 /*
  * Forward to the above feature tests given an ARMCPU pointer.
  */
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 232d963875..dcc5d6cca3 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -983,6 +983,7 @@ typedef struct ARMVAParameters {
     bool tbid       : 1;
     bool epd        : 1;
     bool hpd        : 1;
+    bool tcma       : 1;
     bool using16k   : 1;
     bool using64k   : 1;
 } ARMVAParameters;
@@ -1007,6 +1008,24 @@ static inline int exception_target_el(CPUARMState *env)
     return target_el;
 }
 
+/* Determine if allocation tags are available.  */
+static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
+                                                 uint64_t sctlr)
+{
+    if (el < 3
+        && arm_feature(env, ARM_FEATURE_EL3)
+        && !(env->cp15.scr_el3 & SCR_ATA)) {
+        return false;
+    }
+    if (el < 2
+        && arm_feature(env, ARM_FEATURE_EL2)
+        && !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+        return false;
+    }
+    sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA);
+    return sctlr != 0;
+}
+
 #ifndef CONFIG_USER_ONLY
 
 /* Security attributes for an address, as returned by v8m_security_lookup. */
diff --git a/target/arm/translate.h b/target/arm/translate.h
index dd24f91f26..9913c35cc2 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -75,6 +75,8 @@ typedef struct DisasContext {
     bool is_ldex;
     /* True if v8.3-PAuth is active.  */
     bool pauth_active;
+    /* True if v8.5-MTE tag checks affect the PE.  */
+    bool mte_active;
     /* True with v8.5-BTI and SCTLR_ELx.BT* set.  */
     bool bt;
     /*
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0d9a2d2ab7..b690eda136 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1904,6 +1904,9 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     if (cpu_isar_feature(aa64_pauth, cpu)) {
         valid_mask |= SCR_API | SCR_APK;
     }
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        valid_mask |= SCR_ATA;
+    }
 
     /* Clear all-context RES0 bits.  */
     value &= valid_mask;
@@ -4158,22 +4161,31 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = env_archcpu(env);
 
-    if (raw_read(env, ri) == value) {
-        /* Skip the TLB flush if nothing actually changed; Linux likes
-         * to do a lot of pointless SCTLR writes.
-         */
-        return;
-    }
-
     if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
         /* M bit is RAZ/WI for PMSA with no MPU implemented */
         value &= ~SCTLR_M;
     }
 
-    raw_write(env, ri, value);
+    if (!cpu_isar_feature(aa64_mte, cpu)) {
+        if (ri->opc1 == 6) { /* SCTLR_EL3 */
+            value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA);
+        } else {
+            value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF |
+                       SCTLR_ATA0 | SCTLR_ATA);
+        }
+    }
+
     /* ??? Lots of these bits are not implemented.  */
-    /* This may enable/disable the MMU, so do a TLB flush.  */
-    tlb_flush(CPU(cpu));
+
+    if (raw_read(env, ri) != value) {
+        /*
+         * This may enable/disable the MMU, so do a TLB flush.
+         * Skip the TLB flush if nothing actually changed;
+         * Linux likes to do a lot of pointless SCTLR writes.
+         */
+        raw_write(env, ri, value);
+        tlb_flush(CPU(cpu));
+    }
 }
 
 static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -4679,6 +4691,9 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
     if (cpu_isar_feature(aa64_pauth, cpu)) {
         valid_mask |= HCR_API | HCR_APK;
     }
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        valid_mask |= HCR_ATA;
+    }
 
     /* Clear RES0 bits.  */
     value &= valid_mask;
@@ -9302,7 +9317,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
 {
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
     uint32_t el = regime_el(env, mmu_idx);
-    bool tbi, tbid, epd, hpd, using16k, using64k;
+    bool tbi, tbid, epd, hpd, tcma, using16k, using64k;
     int select, tsz;
 
     /*
@@ -9317,11 +9332,12 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         using16k = extract32(tcr, 15, 1);
         if (mmu_idx == ARMMMUIdx_S2NS) {
             /* VTCR_EL2 */
-            tbi = tbid = hpd = false;
+            tbi = tbid = hpd = tcma = false;
         } else {
             tbi = extract32(tcr, 20, 1);
             hpd = extract32(tcr, 24, 1);
             tbid = extract32(tcr, 29, 1);
+            tcma = extract32(tcr, 30, 1);
         }
         epd = false;
     } else if (!select) {
@@ -9332,6 +9348,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         tbi = extract64(tcr, 37, 1);
         hpd = extract64(tcr, 41, 1);
         tbid = extract64(tcr, 51, 1);
+        tcma = extract64(tcr, 57, 1);
     } else {
         int tg = extract32(tcr, 30, 2);
         using16k = tg == 1;
@@ -9341,6 +9358,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         tbi = extract64(tcr, 38, 1);
         hpd = extract64(tcr, 42, 1);
         tbid = extract64(tcr, 52, 1);
+        tcma = extract64(tcr, 58, 1);
     }
     tsz = MIN(tsz, 39);  /* TODO: ARMv8.4-TTST */
     tsz = MAX(tsz, 16);  /* TODO: ARMv8.2-LVA  */
@@ -9352,6 +9370,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         .tbid = tbid,
         .epd = epd,
         .hpd = hpd,
+        .tcma = tcma,
         .using16k = using16k,
         .using64k = using64k,
     };
@@ -11065,6 +11084,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
     if (is_a64(env)) {
         ARMCPU *cpu = env_archcpu(env);
         uint64_t sctlr;
+        int tbid;
 
         *pc = env->pc;
         flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
@@ -11073,7 +11093,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
         {
             ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
             ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
-            int tbii, tbid;
+            int tbii;
 
             /* FIXME: ARMv8.1-VHE S2 translation regime.  */
             if (regime_el(env, stage1) < 2) {
@@ -11126,6 +11146,21 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             }
             flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
         }
+
+        /*
+         * Set MTE_ACTIVE if any access may be Checked, and leave clear
+         * if all accesses must be Unchecked:
+         * 1) If no TBI, then there are no tags in the address to check,
+         * 2) If Tag Check Override, then all accesses are Unchecked,
+         * 3) If Tag Check Fail == 0, then Checked access have no effect,
+         * 4) If no Allocation Tag Access, then all accesses are Unchecked.
+         */
+        if (tbid
+            && !(env->pstate & PSTATE_TCO)
+            && (sctlr & (current_el == 0 ? SCTLR_TCF0 : SCTLR_TCF))
+            && allocation_tag_access_enabled(env, current_el, sctlr)) {
+            flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1);
+        }
     } else {
         *pc = env->regs[15];
         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2d6cd09634..51f3af9cd9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14161,6 +14161,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
     dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
     dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
+    dc->mte_active = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = arm_cpu->cp_regs;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 02/22] target/arm: Add regime_has_2_ranges
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 01/22] target/arm: Add MTE_ACTIVE to tb_flags Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-03 11:01   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 03/22] target/arm: Add MTE system registers Richard Henderson
                   ` (21 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

A translation with 2 ranges has both positive and negative addresses.
This is true for the EL1&0 and the as-yet unimplemented EL2&0 regimes.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/internals.h     | 14 ++++++++++++++
 target/arm/helper.c        | 22 +++++-----------------
 target/arm/translate-a64.c |  3 +--
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index dcc5d6cca3..9486680b87 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -804,6 +804,20 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu)
     }
 }
 
+/* Return true if this address translation regime has two ranges.  */
+static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
+{
+    switch (mmu_idx) {
+    case ARMMMUIdx_S12NSE0:
+    case ARMMMUIdx_S12NSE1:
+    case ARMMMUIdx_S1NSE0:
+    case ARMMMUIdx_S1NSE1:
+        return true;
+    default:
+        return false;
+    }
+}
+
 /* Return true if this address translation regime is secure */
 static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b690eda136..f9dee51ede 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8774,15 +8774,8 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
     }
 
     if (is_aa64) {
-        switch (regime_el(env, mmu_idx)) {
-        case 1:
-            if (!is_user) {
-                xn = pxn || (user_rw & PAGE_WRITE);
-            }
-            break;
-        case 2:
-        case 3:
-            break;
+        if (regime_has_2_ranges(mmu_idx) && !is_user) {
+            xn = pxn || (user_rw & PAGE_WRITE);
         }
     } else if (arm_feature(env, ARM_FEATURE_V7)) {
         switch (regime_el(env, mmu_idx)) {
@@ -9316,7 +9309,6 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
                                         ARMMMUIdx mmu_idx)
 {
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
-    uint32_t el = regime_el(env, mmu_idx);
     bool tbi, tbid, epd, hpd, tcma, using16k, using64k;
     int select, tsz;
 
@@ -9326,7 +9318,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
      */
     select = extract64(va, 55, 1);
 
-    if (el > 1) {
+    if (!regime_has_2_ranges(mmu_idx)) {
         tsz = extract32(tcr, 0, 6);
         using64k = extract32(tcr, 14, 1);
         using16k = extract32(tcr, 15, 1);
@@ -9486,10 +9478,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         param = aa64_va_parameters(env, address, mmu_idx,
                                    access_type != MMU_INST_FETCH);
         level = 0;
-        /* If we are in 64-bit EL2 or EL3 then there is no TTBR1, so mark it
-         * invalid.
-         */
-        ttbr1_valid = (el < 2);
+        ttbr1_valid = regime_has_2_ranges(mmu_idx);
         addrsize = 64 - 8 * param.tbi;
         inputsize = 64 - param.tsz;
     } else {
@@ -11095,8 +11084,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
             int tbii;
 
-            /* FIXME: ARMv8.1-VHE S2 translation regime.  */
-            if (regime_el(env, stage1) < 2) {
+            if (regime_has_2_ranges(mmu_idx)) {
                 ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
                 tbid = (p1.tbi << 1) | p0.tbi;
                 tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 51f3af9cd9..c85db69db4 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -175,8 +175,7 @@ static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
     if (tbi == 0) {
         /* Load unmodified address */
         tcg_gen_mov_i64(dst, src);
-    } else if (s->current_el >= 2) {
-        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
+    } else if (!regime_has_2_ranges(s->mmu_idx)) {
         /* Force tag byte to all zero */
         tcg_gen_extract_i64(dst, src, 0, 56);
     } else {
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 03/22] target/arm: Add MTE system registers
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 01/22] target/arm: Add MTE_ACTIVE to tb_flags Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 02/22] target/arm: Add regime_has_2_ranges Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-03 11:48   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3} Richard Henderson
                   ` (20 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

This is TFSRE0_EL1, TFSR_EL1, TFSR_EL2, TFSR_EL3,
RGSR_EL1, GCR_EL1, GMID_EL1, and PSTATE.TCO.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Add GMID; add access_mte.
v4: Define only TCO at mte_insn_reg.
---
 target/arm/cpu.h           |  3 ++
 target/arm/internals.h     |  6 ++++
 target/arm/helper.c        | 73 ++++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c | 11 ++++++
 4 files changed, 93 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 408d749b7a..d99bb5e956 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -486,6 +486,9 @@ typedef struct CPUARMState {
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
         uint64_t vpidr_el2; /* Virtualization Processor ID Register */
         uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+        uint64_t tfsr_el[4]; /* tfsrel0_el1 is index 0.  */
+        uint64_t gcr_el1;
+        uint64_t rgsr_el1;
     } cp15;
 
     struct {
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 9486680b87..bfa243be06 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1079,4 +1079,10 @@ void arm_log_exception(int idx);
 
 #endif /* !CONFIG_USER_ONLY */
 
+/*
+ * The log2 of the words in the tag block, for GMID_EL1.BS.
+ * The is the maximum, 256 bytes, which manipulates 64-bits of tags.
+ */
+#define GMID_EL1_BS  6
+
 #endif
diff --git a/target/arm/helper.c b/target/arm/helper.c
index f9dee51ede..f435a8d8bd 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5921,6 +5921,73 @@ static const ARMCPRegInfo rndr_reginfo[] = {
       .access = PL0_R, .readfn = rndr_readfn },
     REGINFO_SENTINEL
 };
+
+static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 bool isread)
+{
+    int el = arm_current_el(env);
+
+    if (el < 2 &&
+        arm_feature(env, ARM_FEATURE_EL2) &&
+        !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+        return CP_ACCESS_TRAP_EL2;
+    }
+    if (el < 3 &&
+        arm_feature(env, ARM_FEATURE_EL3) &&
+        !(env->cp15.scr_el3 & SCR_ATA)) {
+        return CP_ACCESS_TRAP_EL3;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return env->pstate & PSTATE_TCO;
+}
+
+static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+    env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);
+}
+
+static const ARMCPRegInfo mte_reginfo[] = {
+    { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 6, .opc2 = 1,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
+    { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 5, .opc2 = 0,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
+    { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 5, .opc2 = 0,
+      .access = PL2_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },
+    { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 6, .opc2 = 0,
+      .access = PL3_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },
+    { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },
+    { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,
+      .access = PL1_RW, .accessfn = access_mte,
+      .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },
+    { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
+      .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo mte_tco_reginfo[] = {
+    { .name = "TCO", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+      .type = ARM_CP_NO_RAW,
+      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
+    REGINFO_SENTINEL
+};
 #endif
 
 static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6881,6 +6948,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     if (cpu_isar_feature(aa64_rndr, cpu)) {
         define_arm_cp_regs(cpu, rndr_reginfo);
     }
+    if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
+        define_arm_cp_regs(cpu, mte_tco_reginfo);
+    }
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        define_arm_cp_regs(cpu, mte_reginfo);
+    }
 #endif
 
     /*
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c85db69db4..62bdf50796 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1611,6 +1611,17 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
         s->base.is_jmp = DISAS_UPDATE;
         break;
 
+    case 0x1c: /* TCO */
+        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        if (crm & 1) {
+            set_pstate_bits(PSTATE_TCO);
+        } else {
+            clear_pstate_bits(PSTATE_TCO);
+        }
+        break;
+
     default:
     do_unallocated:
         unallocated_encoding(s);
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3}
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (2 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 03/22] target/arm: Add MTE system registers Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-03 13:42   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset Richard Henderson
                   ` (19 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Implements the rules of "PE generation of Checked and Unchecked
accesses" which aren't already implied by TB_FLAGS_MTE_ACTIVE.
Implements the rules of "PE handling of Tag Check Failure".

Does not implement tag physical address space, so all operations
reduce to unchecked so far.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fix TFSR update.
v3: Split helper_mte_check per {1,2} IAs; take tbi data from translate.
v5: Split helper_mte_check3, the only one that needs a runtime check for tbi.
---
 target/arm/helper-a64.h    |   4 +
 target/arm/mte_helper.c    | 167 +++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c |  15 +++-
 target/arm/Makefile.objs   |   1 +
 4 files changed, 186 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/mte_helper.c

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index a915c1247f..a82e21f15a 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -102,3 +102,7 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(mte_check1, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_2(mte_check2, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
new file mode 100644
index 0000000000..bbb90cbe86
--- /dev/null
+++ b/target/arm/mte_helper.c
@@ -0,0 +1,167 @@
+/*
+ * ARM v8.5-MemTag Operations
+ *
+ * Copyright (c) 2019 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+
+
+static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
+{
+    /* Tag storage not implemented.  */
+    return -1;
+}
+
+static int allocation_tag_from_addr(uint64_t ptr)
+{
+    ptr += 1ULL << 55;  /* carry ptr[55] into ptr[59:56].  */
+    return extract64(ptr, 56, 4);
+}
+
+/*
+ * Perform a checked access for MTE.
+ * On arrival, TBI is known to enabled, as is allocation_tag_access_enabled.
+ */
+static uint64_t do_mte_check(CPUARMState *env, uint64_t dirty_ptr,
+                             uint64_t clean_ptr, uint32_t select,
+                             uintptr_t ra)
+{
+    ARMMMUIdx stage1 = arm_stage1_mmu_idx(env);
+    int ptr_tag, mem_tag;
+
+    /*
+     * If TCMA is enabled, then physical tag 0 is unchecked.
+     * Note the rules in D6.8.1 are written with logical tags, where
+     * the corresponding physical tag rule is simpler: equal to 0.
+     * We will need the physical tag below anyway.
+     */
+    ptr_tag = allocation_tag_from_addr(dirty_ptr);
+    if (ptr_tag == 0) {
+        ARMVAParameters p = aa64_va_parameters(env, dirty_ptr, stage1, true);
+        if (p.tcma) {
+            return clean_ptr;
+        }
+    }
+
+    /*
+     * If an access is made to an address that does not provide tag
+     * storage, the result is IMPLEMENTATION DEFINED.  We choose to
+     * treat the access as unchecked.
+     * This is similar to MemAttr != Tagged, which are also unchecked.
+     */
+    mem_tag = get_allocation_tag(env, clean_ptr, ra);
+    if (mem_tag < 0) {
+        return clean_ptr;
+    }
+
+    /* If the tags do not match, the tag check operation fails.  */
+    if (unlikely(ptr_tag != mem_tag)) {
+        int el, regime_el, tcf;
+        uint64_t sctlr;
+
+        el = arm_current_el(env);
+        regime_el = (el ? el : 1);   /* TODO: ARMv8.1-VHE EL2&0 regime */
+        sctlr = env->cp15.sctlr_el[regime_el];
+        if (el == 0) {
+            tcf = extract64(sctlr, 38, 2);
+        } else {
+            tcf = extract64(sctlr, 40, 2);
+        }
+
+        switch (tcf) {
+        case 1:
+            /*
+             * Tag check fail causes a synchronous exception.
+             *
+             * In restore_state_to_opc, we set the exception syndrome
+             * for the load or store operation.  Do that first so we
+             * may overwrite that with the syndrome for the tag check.
+             */
+            cpu_restore_state(env_cpu(env), ra, true);
+            env->exception.vaddress = dirty_ptr;
+            raise_exception(env, EXCP_DATA_ABORT,
+                            syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0x11),
+                            exception_target_el(env));
+            /* noreturn; fall through to assert anyway */
+
+        case 0:
+            /*
+             * Tag check fail does not affect the PE.
+             * We eliminate this case by not setting MTE_ACTIVE
+             * in tb_flags, so that we never make this runtime call.
+             */
+            g_assert_not_reached();
+
+        case 2:
+            /* Tag check fail causes asynchronous flag set.  */
+            env->cp15.tfsr_el[regime_el] |= 1 << select;
+            break;
+
+        default:
+            /* Case 3: Reserved. */
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Tag check failure with SCTLR_EL%d.TCF "
+                          "set to reserved value %d\n", regime_el, tcf);
+            break;
+        }
+    }
+
+    return clean_ptr;
+}
+
+/*
+ * Perform check in translation regime w/single IA range.
+ * It is known that TBI is enabled on entry.
+ */
+uint64_t HELPER(mte_check1)(CPUARMState *env, uint64_t dirty_ptr)
+{
+    uint64_t clean_ptr = extract64(dirty_ptr, 0, 56);
+    return do_mte_check(env, dirty_ptr, clean_ptr, 0, GETPC());
+}
+
+/*
+ * Perform check in translation regime w/two IA ranges.
+ * It is known that TBI is enabled on entry.
+ */
+uint64_t HELPER(mte_check2)(CPUARMState *env, uint64_t dirty_ptr)
+{
+    uint32_t select = extract64(dirty_ptr, 55, 1);
+    uint64_t clean_ptr = sextract64(dirty_ptr, 0, 56);
+    return do_mte_check(env, dirty_ptr, clean_ptr, select, GETPC());
+}
+
+/*
+ * Perform check in translation regime w/two IA ranges.
+ * The TBI argument is the concatenation of TBI1:TBI0.
+ */
+uint64_t HELPER(mte_check3)(CPUARMState *env, uint64_t dirty_ptr, uint32_t tbi)
+{
+    uint32_t select = extract64(dirty_ptr, 55, 1);
+    uint64_t clean_ptr = sextract64(dirty_ptr, 0, 56);
+
+    if ((tbi >> select) & 1) {
+        return do_mte_check(env, dirty_ptr, clean_ptr, select, GETPC());
+    } else {
+        /* TBI is disabled; the access is unchecked.  */
+        return dirty_ptr;
+    }
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 62bdf50796..8e4fea6b4c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -214,7 +214,20 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 {
     TCGv_i64 clean = new_tmp_a64(s);
-    gen_top_byte_ignore(s, clean, addr, s->tbid);
+
+    /* Note that s->mte_active already includes a check for s->tbid != 0. */
+    if (!s->mte_active) {
+        gen_top_byte_ignore(s, clean, addr, s->tbid);
+    } else if (!regime_has_2_ranges(s->mmu_idx)) {
+        gen_helper_mte_check1(clean, cpu_env, addr);
+    } else if (s->tbid == 3) {
+        /* Both TBI1:TBI0 are set; no need to check at runtime. */
+        gen_helper_mte_check2(clean, cpu_env, addr);
+    } else {
+        TCGv_i32 tbi = tcg_const_i32(s->tbid);
+        gen_helper_mte_check3(clean, cpu_env, addr, tbi);
+        tcg_temp_free_i32(tbi);
+    }
     return clean;
 }
 
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
index cf26c16f5f..8fd7d086c8 100644
--- a/target/arm/Makefile.objs
+++ b/target/arm/Makefile.objs
@@ -67,3 +67,4 @@ obj-$(CONFIG_SOFTMMU) += psci.o
 obj-$(TARGET_AARCH64) += translate-a64.o helper-a64.o
 obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
 obj-$(TARGET_AARCH64) += pauth_helper.o
+obj-$(TARGET_AARCH64) += mte_helper.o
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (3 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3} Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-03 14:07   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 06/22] target/arm: Implement the IRG instruction Richard Henderson
                   ` (18 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

R0078 specifies that base register, or base register plus immediate
offset, is unchecked when the base register is SP.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Include writeback addresses as checked.
---
 target/arm/translate-a64.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8e4fea6b4c..18d45fba87 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -211,12 +211,12 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
  * This is always a fresh temporary, as we need to be able to
  * increment this independently of a dirty write-back address.
  */
-static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
+static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr, bool check)
 {
     TCGv_i64 clean = new_tmp_a64(s);
 
     /* Note that s->mte_active already includes a check for s->tbid != 0. */
-    if (!s->mte_active) {
+    if (!check || !s->mte_active) {
         gen_top_byte_ignore(s, clean, addr, s->tbid);
     } else if (!regime_has_2_ranges(s->mmu_idx)) {
         gen_helper_mte_check1(clean, cpu_env, addr);
@@ -2334,7 +2334,7 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
                                size | MO_ALIGN | s->be_data);
 }
@@ -2352,7 +2352,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
 
     if (size == 2) {
         TCGv_i64 cmp = tcg_temp_new_i64();
@@ -2477,7 +2477,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (is_lasr) {
             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
         return;
 
@@ -2486,7 +2486,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
             gen_check_sp_alignment(s);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
         s->is_ldex = true;
         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
         if (is_lasr) {
@@ -2506,7 +2506,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             gen_check_sp_alignment(s);
         }
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         return;
@@ -2522,7 +2522,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
             gen_check_sp_alignment(s);
         }
-        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
@@ -2536,7 +2536,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             if (is_lasr) {
                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
             }
-            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
             return;
         }
@@ -2554,7 +2554,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             if (rn == 31) {
                 gen_check_sp_alignment(s);
             }
-            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
             s->is_ldex = true;
             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
             if (is_lasr) {
@@ -2744,7 +2744,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     if (!postindex) {
         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
     }
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = clean_data_tbi(s, dirty_addr, wback || rn != 31);
 
     if (is_vector) {
         if (is_load) {
@@ -2882,7 +2882,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     if (!post_index) {
         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
     }
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = clean_data_tbi(s, dirty_addr, writeback || rn != 31);
 
     if (is_vector) {
         if (is_store) {
@@ -2989,7 +2989,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
 
     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = clean_data_tbi(s, dirty_addr, true);
 
     if (is_vector) {
         if (is_store) {
@@ -3074,7 +3074,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
     dirty_addr = read_cpu_reg_sp(s, rn, 1);
     offset = imm12 << size;
     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = clean_data_tbi(s, dirty_addr, rn != 31);
 
     if (is_vector) {
         if (is_store) {
@@ -3158,7 +3158,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn), rn != 31);
     tcg_rs = read_cpu_reg(s, rs, true);
 
     if (o3_opc == 1) { /* LDCLR */
@@ -3220,7 +3220,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
 
     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
-    clean_addr = clean_data_tbi(s, dirty_addr);
+    clean_addr = clean_data_tbi(s, dirty_addr, is_wback || rn != 31);
 
     tcg_rt = cpu_reg(s, rt);
     do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
@@ -3380,7 +3380,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     elements = (is_q ? 16 : 8) / ebytes;
 
     tcg_rn = cpu_reg_sp(s, rn);
-    clean_addr = clean_data_tbi(s, tcg_rn);
+    clean_addr = clean_data_tbi(s, tcg_rn, is_postidx || rn != 31);
     tcg_ebytes = tcg_const_i64(ebytes);
 
     for (r = 0; r < rpt; r++) {
@@ -3523,7 +3523,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     }
 
     tcg_rn = cpu_reg_sp(s, rn);
-    clean_addr = clean_data_tbi(s, tcg_rn);
+    clean_addr = clean_data_tbi(s, tcg_rn, is_postidx || rn != 31);
     tcg_ebytes = tcg_const_i64(ebytes);
 
     for (xs = 0; xs < selem; xs++) {
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 06/22] target/arm: Implement the IRG instruction
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (4 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-03 14:26   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 07/22] target/arm: Implement ADDG, SUBG instructions Richard Henderson
                   ` (17 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Update to 00eac5.
    Merge choose_random_nonexcluded_tag into helper_irg since
    that pseudo function no longer exists separately.
---
 target/arm/helper-a64.h    |  1 +
 target/arm/mte_helper.c    | 57 ++++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c |  7 +++++
 3 files changed, 65 insertions(+)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index a82e21f15a..6ff7f5b756 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -106,3 +106,4 @@ DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(mte_check1, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_2(mte_check2, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index bbb90cbe86..9848849a91 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -37,6 +37,31 @@ static int allocation_tag_from_addr(uint64_t ptr)
     return extract64(ptr, 56, 4);
 }
 
+static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
+{
+    if (exclude == 0xffff) {
+        return 0;
+    }
+    if (offset == 0) {
+        while (exclude & (1 << tag)) {
+            tag = (tag + 1) & 15;
+        }
+    } else {
+        do {
+            do {
+                tag = (tag + 1) & 15;
+            } while (exclude & (1 << tag));
+        } while (--offset > 0);
+    }
+    return tag;
+}
+
+static uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
+{
+    rtag -= extract64(ptr, 55, 1);
+    return deposit64(ptr, 56, 4, rtag);
+}
+
 /*
  * Perform a checked access for MTE.
  * On arrival, TBI is known to enabled, as is allocation_tag_access_enabled.
@@ -165,3 +190,35 @@ uint64_t HELPER(mte_check3)(CPUARMState *env, uint64_t dirty_ptr, uint32_t tbi)
         return dirty_ptr;
     }
 }
+
+uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
+{
+    int el = arm_current_el(env);
+    uint64_t sctlr = arm_sctlr(env, el);
+    int rtag = 0;
+
+    if (allocation_tag_access_enabled(env, el, sctlr)) {
+        /*
+         * Our IMPDEF choice for GCR_EL1.RRND==1 is to behave as if
+         * GCR_EL1.RRND==0, always producing deterministic results.
+         */
+        uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
+        int start = extract32(env->cp15.rgsr_el1, 0, 4);
+        int seed = extract32(env->cp15.rgsr_el1, 8, 16);
+        int offset, i;
+
+        /* RandomTag */
+        for (i = offset = 0; i < 4; ++i) {
+            /* NextRandomTagBit */
+            int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
+                       extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
+            seed = (top << 15) | (seed >> 1);
+            offset |= top << i;
+        }
+        rtag = choose_nonexcluded_tag(start, offset, exclude);
+
+        env->cp15.rgsr_el1 = rtag | (seed << 8);
+    }
+
+    return address_with_allocation_tag(rn, rtag);
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 18d45fba87..83d253d67f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5156,6 +5156,13 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
     case 3: /* SDIV */
         handle_div(s, true, sf, rm, rn, rd);
         break;
+    case 4: /* IRG */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
+                       cpu_reg_sp(s, rn), cpu_reg(s, rm));
+        break;
     case 8: /* LSLV */
         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
         break;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 07/22] target/arm: Implement ADDG, SUBG instructions
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (5 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 06/22] target/arm: Implement the IRG instruction Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 08/22] target/arm: Implement the GMI instruction Richard Henderson
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Shift offset in translate; use extract32.
---
 target/arm/helper-a64.h    |  2 ++
 target/arm/internals.h     |  4 +++
 target/arm/mte_helper.c    | 32 +++++++++++++++++
 target/arm/translate-a64.c | 71 ++++++++++++++++++++++++++------------
 4 files changed, 86 insertions(+), 23 deletions(-)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 6ff7f5b756..268c114b79 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -107,3 +107,5 @@ DEF_HELPER_FLAGS_2(mte_check1, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_2(mte_check2, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
 DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(addg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
+DEF_HELPER_FLAGS_4(subg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index bfa243be06..a434743b15 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1085,4 +1085,8 @@ void arm_log_exception(int idx);
  */
 #define GMID_EL1_BS  6
 
+/* We associate one allocation tag per 16 bytes, the minimum.  */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE      (1 << LOG2_TAG_GRANULE)
+
 #endif
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 9848849a91..c3edc51bba 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -222,3 +222,35 @@ uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
 
     return address_with_allocation_tag(rn, rtag);
 }
+
+uint64_t HELPER(addg)(CPUARMState *env, uint64_t ptr,
+                      uint32_t offset, uint32_t tag_offset)
+{
+    int el = arm_current_el(env);
+    uint64_t sctlr = arm_sctlr(env, el);
+    int rtag = 0;
+
+    if (allocation_tag_access_enabled(env, el, sctlr)) {
+        int start_tag = allocation_tag_from_addr(ptr);
+        uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+        rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+    }
+
+    return address_with_allocation_tag(ptr + offset, rtag);
+}
+
+uint64_t HELPER(subg)(CPUARMState *env, uint64_t ptr,
+                      uint32_t offset, uint32_t tag_offset)
+{
+    int el = arm_current_el(env);
+    uint64_t sctlr = arm_sctlr(env, el);
+    int rtag = 0;
+
+    if (allocation_tag_access_enabled(env, el, sctlr)) {
+        int start_tag = allocation_tag_from_addr(ptr);
+        uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+        rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+    }
+
+    return address_with_allocation_tag(ptr - offset, rtag);
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 83d253d67f..26aee0c1c9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3628,7 +3628,9 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
  *    sf: 0 -> 32bit, 1 -> 64bit
  *    op: 0 -> add  , 1 -> sub
  *     S: 1 -> set flags
- * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
+ * shift: 00 -> LSL imm by 0,
+ *        01 -> LSL imm by 12
+ *        10 -> ADDG, SUBG
  */
 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
 {
@@ -3639,10 +3641,10 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     bool setflags = extract32(insn, 29, 1);
     bool sub_op = extract32(insn, 30, 1);
     bool is_64bit = extract32(insn, 31, 1);
+    bool is_tag = false;
 
     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
-    TCGv_i64 tcg_result;
 
     switch (shift) {
     case 0x0:
@@ -3650,35 +3652,58 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     case 0x1:
         imm <<= 12;
         break;
+    case 0x2:
+        /* ADDG, SUBG */
+        if (!is_64bit || setflags || (imm & 0x30) ||
+            !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        is_tag = true;
+        break;
     default:
+    do_unallocated:
         unallocated_encoding(s);
         return;
     }
 
-    tcg_result = tcg_temp_new_i64();
-    if (!setflags) {
-        if (sub_op) {
-            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
-        } else {
-            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
-        }
-    } else {
-        TCGv_i64 tcg_imm = tcg_const_i64(imm);
-        if (sub_op) {
-            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
-        } else {
-            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
-        }
-        tcg_temp_free_i64(tcg_imm);
-    }
+    if (is_tag) {
+        TCGv_i32 tag_offset = tcg_const_i32(imm & 15);
+        TCGv_i32 offset = tcg_const_i32((imm >> 6) << LOG2_TAG_GRANULE);
 
-    if (is_64bit) {
-        tcg_gen_mov_i64(tcg_rd, tcg_result);
+        if (sub_op) {
+            gen_helper_subg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
+        } else {
+            gen_helper_addg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
+        }
+        tcg_temp_free_i32(tag_offset);
+        tcg_temp_free_i32(offset);
     } else {
-        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
-    }
+        TCGv_i64 tcg_result;
 
-    tcg_temp_free_i64(tcg_result);
+        if (!setflags) {
+            tcg_result = tcg_rd;
+            if (sub_op) {
+                tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
+            } else {
+                tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
+            }
+        } else {
+            TCGv_i64 tcg_imm = tcg_const_i64(imm);
+            tcg_result = new_tmp_a64(s);
+            if (sub_op) {
+                gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+            } else {
+                gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+            }
+            tcg_temp_free_i64(tcg_imm);
+        }
+
+        if (is_64bit) {
+            tcg_gen_mov_i64(tcg_rd, tcg_result);
+        } else {
+            tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+        }
+    }
 }
 
 /* The input should be a value in the bottom e bits (with higher
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 08/22] target/arm: Implement the GMI instruction
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (6 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 07/22] target/arm: Implement ADDG, SUBG instructions Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 09/22] target/arm: Implement the SUBP instruction Richard Henderson
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-a64.h    | 1 +
 target/arm/mte_helper.c    | 6 ++++++
 target/arm/translate-a64.c | 6 ++++++
 3 files changed, 13 insertions(+)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 268c114b79..31f848ca03 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -109,3 +109,4 @@ DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
 DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_4(addg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
 DEF_HELPER_FLAGS_4(subg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
+DEF_HELPER_FLAGS_2(gmi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index c3edc51bba..251dfff1e1 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -254,3 +254,9 @@ uint64_t HELPER(subg)(CPUARMState *env, uint64_t ptr,
 
     return address_with_allocation_tag(ptr - offset, rtag);
 }
+
+uint64_t HELPER(gmi)(uint64_t ptr, uint64_t mask)
+{
+    int tag = allocation_tag_from_addr(ptr);
+    return mask | (1ULL << tag);
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 26aee0c1c9..4184d65d97 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5188,6 +5188,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
         gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
                        cpu_reg_sp(s, rn), cpu_reg(s, rm));
         break;
+    case 5: /* GMI */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        gen_helper_gmi(cpu_reg(s, rd), cpu_reg_sp(s, rn), cpu_reg(s, rm));
+        break;
     case 8: /* LSLV */
         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
         break;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 09/22] target/arm: Implement the SUBP instruction
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (7 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 08/22] target/arm: Implement the GMI instruction Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY Richard Henderson
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fix extraction length.
---
 target/arm/translate-a64.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4184d65d97..cf341c98d3 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5162,19 +5162,39 @@ static void handle_crc32(DisasContext *s,
  */
 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
 {
-    unsigned int sf, rm, opcode, rn, rd;
+    unsigned int sf, rm, opcode, rn, rd, setflag;
     sf = extract32(insn, 31, 1);
+    setflag = extract32(insn, 29, 1);
     rm = extract32(insn, 16, 5);
     opcode = extract32(insn, 10, 6);
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    if (extract32(insn, 29, 1)) {
+    if (setflag && opcode != 0) {
         unallocated_encoding(s);
         return;
     }
 
     switch (opcode) {
+    case 0: /* SUBP(S) */
+        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        } else {
+            TCGv_i64 tcg_n, tcg_m, tcg_d;
+
+            tcg_n = read_cpu_reg_sp(s, rn, true);
+            tcg_m = read_cpu_reg_sp(s, rm, true);
+            tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
+            tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
+            tcg_d = cpu_reg(s, rd);
+
+            if (setflag) {
+                gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
+            } else {
+                tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
+            }
+        }
+        break;
     case 2: /* UDIV */
         handle_div(s, false, sf, rm, rn, rd);
         break;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (8 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 09/22] target/arm: Implement the SUBP instruction Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 16:12   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

We will need this to raise unaligned exceptions from user mode.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tlb_helper.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 5feb312941..29b92a1149 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -10,8 +10,6 @@
 #include "internals.h"
 #include "exec/exec-all.h"
 
-#if !defined(CONFIG_USER_ONLY)
-
 static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
                                             unsigned int target_el,
                                             bool same_el, bool ea,
@@ -122,6 +120,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
     arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
 }
 
+#ifndef CONFIG_USER_ONLY
 /*
  * arm_cpu_do_transaction_failed: handle a memory system error response
  * (eg "no device/memory present at address") by raising an external abort
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (9 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 17:07   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 12/22] target/arm: Implement the STGP instruction Richard Henderson
                   ` (12 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Split out allocation_tag_mem.  Handle atomicity of stores.
v3: Add X[t] input to these insns; require pre-cleaned addresses.
v5: Fix !32-byte aligned operation of st2g.
---
 target/arm/helper-a64.h    |   5 ++
 target/arm/mte_helper.c    | 154 +++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c | 115 +++++++++++++++++++++++++++
 3 files changed, 274 insertions(+)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 31f848ca03..88a0241915 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -110,3 +110,8 @@ DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_4(addg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
 DEF_HELPER_FLAGS_4(subg, TCG_CALL_NO_RWG_SE, i64, env, i64, i32, i32)
 DEF_HELPER_FLAGS_2(gmi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 251dfff1e1..f1dd1cc0dd 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -25,8 +25,21 @@
 #include "exec/helper-proto.h"
 
 
+static uint8_t *allocation_tag_mem(CPUARMState *env, uint64_t ptr,
+                                   bool write, uintptr_t ra)
+{
+    /* Tag storage not implemented.  */
+    return NULL;
+}
+
 static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
 {
+    uint8_t *mem = allocation_tag_mem(env, ptr, false, ra);
+
+    if (mem) {
+        int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+        return extract32(atomic_read(mem), ofs, 4);
+    }
     /* Tag storage not implemented.  */
     return -1;
 }
@@ -260,3 +273,144 @@ uint64_t HELPER(gmi)(uint64_t ptr, uint64_t mask)
     int tag = allocation_tag_from_addr(ptr);
     return mask | (1ULL << tag);
 }
+
+uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    int el;
+    uint64_t sctlr;
+    int rtag;
+
+    /* Trap if accessing an invalid page.  */
+    rtag = get_allocation_tag(env, ptr, GETPC());
+
+    /*
+     * The tag is squashed to zero if the page does not support tags,
+     * or if the OS is denying access to the tags.
+     */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (rtag < 0 || !allocation_tag_access_enabled(env, el, sctlr)) {
+        rtag = 0;
+    }
+
+    return address_with_allocation_tag(xt, rtag);
+}
+
+static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
+{
+    if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
+        arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
+                                    cpu_mmu_index(env, false), ra);
+        g_assert_not_reached();
+    }
+}
+
+/* For use in a non-parallel context, store to the given nibble.  */
+static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    uint8_t old = atomic_read(mem);
+    uint8_t new = deposit32(old, ofs, 4, tag);
+
+    atomic_set(mem, new);
+}
+
+/* For use in a parallel context, atomically store to the given nibble.  */
+static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
+{
+    int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+    uint8_t old = atomic_read(mem);
+
+    while (1) {
+        uint8_t new = deposit32(old, ofs, 4, tag);
+        uint8_t cmp = atomic_cmpxchg(mem, old, new);
+        if (likely(cmp == old)) {
+            return;
+        }
+        old = cmp;
+    }
+}
+
+typedef void stg_store1(uint64_t, uint8_t *, int);
+
+static void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
+                   uintptr_t ra, stg_store1 store1)
+{
+    int el;
+    uint64_t sctlr;
+    uint8_t *mem;
+
+    check_tag_aligned(env, ptr, ra);
+
+    /* Trap if accessing an invalid page.  */
+    mem = allocation_tag_mem(env, ptr, true, ra);
+
+    /* Store if page supports tags and access is enabled.  */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (mem && allocation_tag_access_enabled(env, el, sctlr)) {
+        store1(ptr, mem, allocation_tag_from_addr(xt));
+    }
+}
+
+void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_stg(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_stg(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+static void do_st2g(CPUARMState *env, uint64_t ptr1, uint64_t xt,
+                    uintptr_t ra, stg_store1 store1)
+{
+    int el, tag;
+    uint64_t ptr2, sctlr;
+    uint8_t *mem1, *mem2;
+
+    check_tag_aligned(env, ptr1, ra);
+
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    tag = allocation_tag_from_addr(xt);
+
+    /*
+     * Trap if accessing an invalid page(s).
+     * This takes priority over !allocation_tag_access_enabled.
+     */
+    mem1 = allocation_tag_mem(env, ptr1, true, ra);
+
+    if (ptr1 & TAG_GRANULE) {
+        /* The two stores are unaligned and modify two bytes.  */
+        ptr2 = ptr1 + TAG_GRANULE;
+        mem2 = allocation_tag_mem(env, ptr2, true, ra);
+
+        /* Store if page supports tags and access is enabled.  */
+        if ((mem1 || mem2) && allocation_tag_access_enabled(env, el, sctlr)) {
+            if (mem1) {
+                store1(ptr1, mem1, tag);
+            }
+            if (mem2) {
+                store1(ptr2, mem2, tag);
+            }
+        }
+    } else {
+        /* The two stores are aligned 32, and modify one byte.  */
+        if (mem1 && allocation_tag_access_enabled(env, el, sctlr)) {
+            tag |= tag << 4;
+            atomic_set(mem1, tag);
+        }
+    }
+}
+
+void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_st2g(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+    do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index cf341c98d3..c17b36ebb2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3559,6 +3559,118 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     }
 }
 
+/*
+ * Load/Store memory tags
+ *
+ *  31 30 29         24     22  21     12    10      5      0
+ * +-----+-------------+-----+---+------+-----+------+------+
+ * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
+ * +-----+-------------+-----+---+------+-----+------+------+
+ */
+static void disas_ldst_tag(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
+    int op2 = extract32(insn, 10, 3);
+    int op1 = extract32(insn, 22, 2);
+    bool is_load = false, is_pair = false, is_zero = false;
+    int index = 0;
+    TCGv_i64 dirty_addr, clean_addr, tcg_rt;
+
+    if ((insn & 0xff200000) != 0xd9200000
+        || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+        goto do_unallocated;
+    }
+
+    switch (op1) {
+    case 0: /* STG */
+        if (op2 != 0) {
+            /* STG */
+            index = op2 - 2;
+            break;
+        }
+        goto do_unallocated;
+    case 1:
+        if (op2 != 0) {
+            /* STZG */
+            is_zero = true;
+            index = op2 - 2;
+        } else {
+            /* LDG */
+            is_load = true;
+        }
+        break;
+    case 2:
+        if (op2 != 0) {
+            /* ST2G */
+            is_pair = true;
+            index = op2 - 2;
+            break;
+        }
+        goto do_unallocated;
+    case 3:
+        if (op2 != 0) {
+            /* STZ2G */
+            is_pair = is_zero = true;
+            index = op2 - 2;
+            break;
+        }
+        goto do_unallocated;
+
+    default:
+    do_unallocated:
+        unallocated_encoding(s);
+        return;
+    }
+
+    dirty_addr = read_cpu_reg_sp(s, rn, true);
+    if (index <= 0) {
+        /* pre-index or signed offset */
+        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
+    }
+
+    clean_addr = clean_data_tbi(s, dirty_addr, false);
+    tcg_rt = cpu_reg(s, rt);
+
+    if (is_load) {
+        gen_helper_ldg(tcg_rt, cpu_env, clean_addr, tcg_rt);
+    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        if (is_pair) {
+            gen_helper_st2g_parallel(cpu_env, clean_addr, tcg_rt);
+        } else {
+            gen_helper_stg_parallel(cpu_env, clean_addr, tcg_rt);
+        }
+    } else {
+        if (is_pair) {
+            gen_helper_st2g(cpu_env, clean_addr, tcg_rt);
+        } else {
+            gen_helper_stg(cpu_env, clean_addr, tcg_rt);
+        }
+    }
+
+    if (is_zero) {
+        TCGv_i64 tcg_zero = tcg_const_i64(0);
+        int mem_index = get_mem_index(s);
+        int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
+
+        for (i = 0; i < n; i += 8) {
+            tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q);
+            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+        }
+        tcg_temp_free_i64(tcg_zero);
+    }
+
+    if (index != 0) {
+        /* pre-index or post-index */
+        if (index > 0) {
+            /* post-index */
+            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
+        }
+        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
+    }
+}
+
 /* Loads and stores */
 static void disas_ldst(DisasContext *s, uint32_t insn)
 {
@@ -3583,6 +3695,9 @@ static void disas_ldst(DisasContext *s, uint32_t insn)
     case 0x0d: /* AdvSIMD load/store single structure */
         disas_ldst_single_struct(s, insn);
         break;
+    case 0x19: /* Load/store tag */
+        disas_ldst_tag(s, insn);
+        break;
     default:
         unallocated_encoding(s);
         break;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 12/22] target/arm: Implement the STGP instruction
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (10 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 17:15   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions Richard Henderson
                   ` (11 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Handle atomicity, require pre-cleaned address.
---
 target/arm/translate-a64.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c17b36ebb2..4ecb0a2fb7 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2657,7 +2657,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
  * +-----+-------+---+---+-------+---+-------+-------+------+------+
  *
  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
- *      LDPSW                    01
+ *      LDPSW/STGP               01
  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
  *   V: 0 -> GPR, 1 -> Vector
  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
@@ -2682,6 +2682,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     bool is_signed = false;
     bool postindex = false;
     bool wback = false;
+    bool set_tag = false;
 
     TCGv_i64 clean_addr, dirty_addr;
 
@@ -2694,6 +2695,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
 
     if (is_vector) {
         size = 2 + opc;
+    } else if (opc == 1 && !is_load) {
+        /* STGP */
+        if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        size = 3;
+        set_tag = true;
     } else {
         size = 2 + extract32(opc, 1, 1);
         is_signed = extract32(opc, 0, 1);
@@ -2746,6 +2755,15 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     }
     clean_addr = clean_data_tbi(s, dirty_addr, wback || rn != 31);
 
+    if (set_tag) {
+        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
+        if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            gen_helper_stg_parallel(cpu_env, clean_addr, tcg_rn);
+        } else {
+            gen_helper_stg(cpu_env, clean_addr, tcg_rn);
+        }
+    }
+
     if (is_vector) {
         if (is_load) {
             do_fp_ld(s, rt, clean_addr, size);
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (11 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 12/22] target/arm: Implement the STGP instruction Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 17:42   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 14/22] target/arm: Implement the access tag cache flushes Richard Henderson
                   ` (10 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Require pre-cleaned addresses.
---
 target/arm/helper-a64.h    |  3 ++
 target/arm/mte_helper.c    | 96 ++++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c | 42 +++++++++++++----
 3 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 88a0241915..405aa60016 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -115,3 +115,6 @@ DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index f1dd1cc0dd..f1315bae37 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -414,3 +414,99 @@ void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
 {
     do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
 }
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+    const int size = 4 << GMID_EL1_BS;
+    int el;
+    uint64_t sctlr;
+    void *mem;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, size);
+
+    /* Trap if accessing an invalid page(s).  */
+    mem = allocation_tag_mem(env, ptr, false, GETPC());
+
+    /*
+     * The tag is squashed to zero if the page does not support tags,
+     * or if the OS is denying access to the tags.
+     */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {
+        return 0;
+    }
+
+#if GMID_EL1_BS != 6
+# error "Fill in the blanks for other sizes"
+#endif
+    /*
+     * We are loading 64-bits worth of tags.  The ordering of elements
+     * within the word corresponds to a 64-bit little-endian operation.
+     */
+    return ldq_le_p(mem);
+}
+
+static uint64_t do_stgm(CPUARMState *env, uint64_t ptr,
+                        uint64_t val, uintptr_t ra)
+{
+    const int size = 4 << GMID_EL1_BS;
+    int el;
+    uint64_t sctlr;
+    void *mem;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, size);
+
+    /* Trap if accessing an invalid page(s).  */
+    mem = allocation_tag_mem(env, ptr, true, ra);
+
+    /*
+     * No action if the page does not support tags,
+     * or if the OS is denying access to the tags.
+     */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {
+        return ptr;
+    }
+
+#if GMID_EL1_BS != 6
+# error "Fill in the blanks for other sizes"
+#endif
+    /*
+     * We are storing 64-bits worth of tags.  The ordering of elements
+     * within the word corresponds to a 64-bit little-endian operation.
+     */
+    stq_le_p(mem, val);
+
+    return ptr;
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+    do_stgm(env, ptr, val, GETPC());
+}
+
+void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+    int i, mmu_idx, size = 4 << GMID_EL1_BS;
+    uintptr_t ra = GETPC();
+    void *mem;
+
+    ptr = do_stgm(env, ptr, val, ra);
+
+    /*
+     * We will have just probed this virtual address in do_stgm.
+     * If the tlb_vaddr_to_host fails, then the memory is not ram,
+     * or is monitored in some other way.  Fall back to stores.
+     */
+    mmu_idx = cpu_mmu_index(env, false);
+    mem = tlb_vaddr_to_host(env, ptr, MMU_DATA_STORE, mmu_idx);
+    if (mem) {
+        memset(mem, 0, size);
+    } else {
+        for (i = 0; i < size; i += 8) {
+            cpu_stq_data_ra(env, ptr + i, 0, ra);
+        }
+    }
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4ecb0a2fb7..4e049bb4aa 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3592,7 +3592,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
     int op2 = extract32(insn, 10, 3);
     int op1 = extract32(insn, 22, 2);
-    bool is_load = false, is_pair = false, is_zero = false;
+    bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
     int index = 0;
     TCGv_i64 dirty_addr, clean_addr, tcg_rt;
 
@@ -3602,13 +3602,18 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
     }
 
     switch (op1) {
-    case 0: /* STG */
+    case 0:
         if (op2 != 0) {
             /* STG */
             index = op2 - 2;
-            break;
+        } else {
+            /* STZGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = is_zero = true;
         }
-        goto do_unallocated;
+        break;
     case 1:
         if (op2 != 0) {
             /* STZG */
@@ -3624,17 +3629,27 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
             /* ST2G */
             is_pair = true;
             index = op2 - 2;
-            break;
+        } else {
+            /* STGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = true;
         }
-        goto do_unallocated;
+        break;
     case 3:
         if (op2 != 0) {
             /* STZ2G */
             is_pair = is_zero = true;
             index = op2 - 2;
-            break;
+        } else {
+            /* LDGM */
+            if (s->current_el == 0 || offset != 0) {
+                goto do_unallocated;
+            }
+            is_mult = is_load = true;
         }
-        goto do_unallocated;
+        break;
 
     default:
     do_unallocated:
@@ -3651,7 +3666,16 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
     clean_addr = clean_data_tbi(s, dirty_addr, false);
     tcg_rt = cpu_reg(s, rt);
 
-    if (is_load) {
+    if (is_mult) {
+        if (is_load) {
+            gen_helper_ldgm(tcg_rt, cpu_env, clean_addr);
+        } else if (is_zero) {
+            gen_helper_stzgm(cpu_env, clean_addr, tcg_rt);
+        } else {
+            gen_helper_stgm(cpu_env, clean_addr, tcg_rt);
+        }
+        return;
+    } else if (is_load) {
         gen_helper_ldg(tcg_rt, cpu_env, clean_addr, tcg_rt);
     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
         if (is_pair) {
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 14/22] target/arm: Implement the access tag cache flushes
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (12 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 17:49   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 15/22] target/arm: Clean address for DC ZVA Richard Henderson
                   ` (9 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Like the regular data cache flushes, these are nops within qemu.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper.c | 48 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index f435a8d8bd..33bc176e1c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5978,6 +5978,54 @@ static const ARMCPRegInfo mte_reginfo[] = {
     { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
+    { .name = "IGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "IGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "IGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "IGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CIGSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CIGDSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGVAP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGDVAP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGVADP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CGDVADP", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CIGVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "CIGDVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL1_W },
     REGINFO_SENTINEL
 };
 
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 15/22] target/arm: Clean address for DC ZVA
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (13 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 14/22] target/arm: Implement the access tag cache flushes Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 17:54   ` Peter Maydell
  2019-12-05 18:58   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 16/22] target/arm: Implement data cache set allocation tags Richard Henderson
                   ` (8 subsequent siblings)
  23 siblings, 2 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

This data access was forgotten in the previous patch.

Fixes: 3a471103ac1823ba
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4e049bb4aa..49817b96ae 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1766,7 +1766,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         return;
     case ARM_CP_DC_ZVA:
         /* Writes clear the aligned block of memory which rt points into. */
-        tcg_rt = cpu_reg(s, rt);
+        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
         gen_helper_dc_zva(cpu_env, tcg_rt);
         return;
     default:
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 16/22] target/arm: Implement data cache set allocation tags
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (14 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 15/22] target/arm: Clean address for DC ZVA Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 18:17   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 17/22] target/arm: Set PSTATE.TCO on exception entry Richard Henderson
                   ` (7 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

This is DC GVA and DC GZVA.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Use allocation_tag_mem + memset.
v3: Require pre-cleaned addresses.
---
 target/arm/cpu.h           |  4 +++-
 target/arm/helper-a64.h    |  1 +
 target/arm/helper.c        | 16 ++++++++++++++++
 target/arm/mte_helper.c    | 28 ++++++++++++++++++++++++++++
 target/arm/translate-a64.c |  9 +++++++++
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d99bb5e956..93a362708b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2233,7 +2233,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
 #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
 #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
+#define ARM_CP_DC_GVA            (ARM_CP_SPECIAL | 0x0600)
+#define ARM_CP_DC_GZVA           (ARM_CP_SPECIAL | 0x0700)
+#define ARM_LAST_SPECIAL         ARM_CP_DC_GZVA
 #define ARM_CP_FPU               0x1000
 #define ARM_CP_SVE               0x2000
 #define ARM_CP_NO_GDB            0x4000
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 405aa60016..c82605b51e 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -118,3 +118,4 @@ DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(stzgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(dc_gva, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 33bc176e1c..eec9064d88 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6026,6 +6026,22 @@ static const ARMCPRegInfo mte_reginfo[] = {
     { .name = "CIGDVAC", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
       .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "GVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3,
+      .access = PL0_W, .type = ARM_CP_DC_GVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
+    { .name = "GZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4,
+      .access = PL0_W, .type = ARM_CP_DC_GZVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
     REGINFO_SENTINEL
 };
 
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index f1315bae37..e8d8a6bedb 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -510,3 +510,31 @@ void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
         }
     }
 }
+
+void HELPER(dc_gva)(CPUARMState *env, uint64_t ptr)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    size_t blocklen = 4 << cpu->dcz_blocksize;
+    int el;
+    uint64_t sctlr;
+    uint8_t *mem;
+    int rtag;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, blocklen);
+
+    /* Trap if accessing an invalid page.  */
+    mem = allocation_tag_mem(env, ptr, true, GETPC());
+
+    /* No action if page does not support tags, or if access is disabled.  */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {
+        return;
+    }
+
+    rtag = allocation_tag_from_addr(ptr);
+    rtag |= rtag << 4;
+
+    assert(QEMU_IS_ALIGNED(blocklen, 2 * TAG_GRANULE));
+    memset(mem, rtag, blocklen / (2 * TAG_GRANULE));
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 49817b96ae..31260f97f9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1769,6 +1769,15 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
         gen_helper_dc_zva(cpu_env, tcg_rt);
         return;
+    case ARM_CP_DC_GVA:
+        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
+        gen_helper_dc_gva(cpu_env, tcg_rt);
+        return;
+    case ARM_CP_DC_GZVA:
+        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
+        gen_helper_dc_zva(cpu_env, tcg_rt);
+        gen_helper_dc_gva(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 17/22] target/arm: Set PSTATE.TCO on exception entry
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (15 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 16/22] target/arm: Implement data cache set allocation tags Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-10-11 13:47 ` [PATCH v5 18/22] target/arm: Enable MTE Richard Henderson
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

D1.10 specifies that exception handlers begin with tag checks overridden.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Only set if MTE feature present.
---
 target/arm/helper.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index eec9064d88..e988398fce 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8401,6 +8401,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
     target_ulong addr = env->cp15.vbar_el[new_el];
     unsigned int new_mode = aarch64_pstate_mode(new_el, true);
     unsigned int cur_el = arm_current_el(env);
+    unsigned int new_pstate;
 
     /*
      * Note that new_el can never be 0.  If cur_el is 0, then
@@ -8494,7 +8495,11 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
     qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n",
                   env->elr_el[new_el]);
 
-    pstate_write(env, PSTATE_DAIF | new_mode);
+    new_pstate = new_mode | PSTATE_DAIF;
+    if (cpu_isar_feature(aa64_mte, cpu)) {
+        new_pstate |= PSTATE_TCO;
+    }
+    pstate_write(env, new_pstate);
     env->aarch64 = 1;
     aarch64_restore_sp(env, new_el);
 
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 18/22] target/arm: Enable MTE
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (16 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 17/22] target/arm: Set PSTATE.TCO on exception entry Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 18:23   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
                   ` (5 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

We now implement all of the components of MTE, without actually
supporting any tagged memory.  All MTE instructions will work,
trivially, so we can enable support.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.c   | 10 ++++++++++
 target/arm/cpu64.c |  1 +
 2 files changed, 11 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 2399c14471..12fffa3ee4 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -209,6 +209,16 @@ static void arm_cpu_reset(CPUState *s)
          * make no difference to the user-level emulation.
          */
         env->cp15.tcr_el[1].raw_tcr = (3ULL << 37);
+        /* Enable MTE allocation tags.  */
+        env->cp15.hcr_el2 |= HCR_ATA;
+        env->cp15.scr_el3 |= SCR_ATA;
+        env->cp15.sctlr_el[1] |= SCTLR_ATA0;
+        /* Enable synchronous tag check failures.  */
+        env->cp15.sctlr_el[1] |= 1ull << 38;
+#ifdef TARGET_AARCH64
+        /* Set MTE seed to non-zero value, otherwise RandomTag fails.  */
+        env->cp15.rgsr_el1 = 0x123400;
+#endif
 #else
         /* Reset into the highest available EL */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index d7f5bf610a..ac1e2dc2c4 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -350,6 +350,7 @@ static void aarch64_max_initfn(Object *obj)
 
         t = cpu->isar.id_aa64pfr1;
         t = FIELD_DP64(t, ID_AA64PFR1, BT, 1);
+        t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
         cpu->isar.id_aa64pfr1 = t;
 
         t = cpu->isar.id_aa64mmfr1;
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (17 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 18/22] target/arm: Enable MTE Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 18:32   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled Richard Henderson
                   ` (4 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

This "bit" is a particular value of the page's MemAttr.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index e988398fce..17981d7c48 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -9609,6 +9609,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     uint64_t descaddrmask;
     bool aarch64 = arm_el_is_aa64(env, el);
     bool guarded = false;
+    uint8_t memattr;
 
     /* TODO:
      * This code does not handle the different format TCR for VTCR_EL2.
@@ -9836,17 +9837,21 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         txattrs->target_tlb_bit0 = true;
     }
 
+    if (mmu_idx == ARMMMUIdx_S2NS) {
+        memattr = convert_stage2_attrs(env, extract32(attrs, 0, 4));
+    } else {
+        /* Index into MAIR registers for cache attributes */
+        uint64_t mair = env->cp15.mair_el[el];
+        memattr = extract64(mair, extract32(attrs, 0, 3) * 8, 8);
+    }
+
+    /* When in aarch64 mode, and MTE is enabled, remember Tagged in IOTLB.  */
+    if (aarch64 && memattr == 0xf0 && cpu_isar_feature(aa64_mte, cpu)) {
+        txattrs->target_tlb_bit1 = true;
+    }
+
     if (cacheattrs != NULL) {
-        if (mmu_idx == ARMMMUIdx_S2NS) {
-            cacheattrs->attrs = convert_stage2_attrs(env,
-                                                     extract32(attrs, 0, 4));
-        } else {
-            /* Index into MAIR registers for cache attributes */
-            uint8_t attrindx = extract32(attrs, 0, 3);
-            uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
-            assert(attrindx <= 7);
-            cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
-        }
+        cacheattrs->attrs = memattr;
         cacheattrs->shareability = extract32(attrs, 6, 2);
     }
 
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (18 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-05 18:40   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory Richard Henderson
                   ` (3 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v5: Assign cs->num_ases to the final value first.
    Downgrade to ID_AA64PFR1.MTE=1 if tag memory is not available.
v6: Add secure tag memory for EL3.
---
 target/arm/cpu.h |  6 ++++++
 hw/arm/virt.c    | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 target/arm/cpu.c | 53 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 93a362708b..faca43ea78 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -765,6 +765,10 @@ struct ARMCPU {
     /* MemoryRegion to use for secure physical accesses */
     MemoryRegion *secure_memory;
 
+    /* MemoryRegion to use for allocation tag accesses */
+    MemoryRegion *tag_memory;
+    MemoryRegion *secure_tag_memory;
+
     /* For v8M, pointer to the IDAU interface provided by board/SoC */
     Object *idau;
 
@@ -2956,6 +2960,8 @@ int cpu_mmu_index(CPUARMState *env, bool ifetch);
 typedef enum ARMASIdx {
     ARMASIdx_NS = 0,
     ARMASIdx_S = 1,
+    ARMASIdx_TagNS = 2,
+    ARMASIdx_TagS = 3,
 } ARMASIdx;
 
 /* Return the Exception Level targeted by debug exceptions. */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d74538b021..573988ba4d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1330,6 +1330,18 @@ static void create_secure_ram(VirtMachineState *vms,
     g_free(nodename);
 }
 
+static void create_tag_ram(MemoryRegion *tag_sysmem,
+                           hwaddr base, hwaddr size,
+                           const char *name)
+{
+    MemoryRegion *tagram = g_new(MemoryRegion, 1);
+
+    memory_region_init_ram(tagram, NULL, name, size / 32, &error_fatal);
+    memory_region_add_subregion(tag_sysmem, base / 32, tagram);
+
+    /* ??? Do we really need an fdt entry, or is MemTag enabled sufficient.  */
+}
+
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 {
     const VirtMachineState *board = container_of(binfo, VirtMachineState,
@@ -1485,6 +1497,8 @@ static void machvirt_init(MachineState *machine)
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *secure_sysmem = NULL;
+    MemoryRegion *tag_sysmem = NULL;
+    MemoryRegion *secure_tag_sysmem = NULL;
     int n, virt_max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     bool firmware_loaded;
@@ -1652,6 +1666,35 @@ static void machvirt_init(MachineState *machine)
                                      "secure-memory", &error_abort);
         }
 
+        /*
+         * The cpu adds the property iff MemTag is supported.
+         * If it is, we must allocate the ram to back that up.
+         */
+        if (object_property_find(cpuobj, "tag-memory", NULL)) {
+            if (!tag_sysmem) {
+                tag_sysmem = g_new(MemoryRegion, 1);
+                memory_region_init(tag_sysmem, OBJECT(machine),
+                                   "tag-memory", UINT64_MAX / 32);
+
+                if (vms->secure) {
+                    secure_tag_sysmem = g_new(MemoryRegion, 1);
+                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
+                                       "secure-tag-memory", UINT64_MAX / 32);
+
+                    /* As with ram, secure-tag takes precedence over tag.  */
+                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
+                                                        tag_sysmem, -1);
+                }
+            }
+
+            object_property_set_link(cpuobj, OBJECT(tag_sysmem),
+                                     "tag-memory", &error_abort);
+            if (vms->secure) {
+                object_property_set_link(cpuobj, OBJECT(secure_tag_sysmem),
+                                         "secure-tag-memory", &error_abort);
+            }
+        }
+
         object_property_set_bool(cpuobj, true, "realized", &error_fatal);
         object_unref(cpuobj);
     }
@@ -1695,6 +1738,17 @@ static void machvirt_init(MachineState *machine)
         create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
     }
 
+    if (tag_sysmem) {
+        create_tag_ram(tag_sysmem, vms->memmap[VIRT_MEM].base,
+                       machine->ram_size, "mach-virt.tag");
+        if (vms->secure) {
+            create_tag_ram(secure_tag_sysmem,
+                           vms->memmap[VIRT_SECURE_MEM].base,
+                           vms->memmap[VIRT_SECURE_MEM].size,
+                           "mach-virt.secure-tag");
+        }
+    }
+
     vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64);
 
     create_rtc(vms, pic);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 12fffa3ee4..a3a49cd5bf 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1182,6 +1182,27 @@ void arm_cpu_post_init(Object *obj)
 
     qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property,
                              &error_abort);
+
+#ifndef CONFIG_USER_ONLY
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) &&
+        cpu_isar_feature(aa64_mte, cpu)) {
+        object_property_add_link(obj, "tag-memory",
+                                 TYPE_MEMORY_REGION,
+                                 (Object **)&cpu->tag_memory,
+                                 qdev_prop_allow_set_link_before_realize,
+                                 OBJ_PROP_LINK_STRONG,
+                                 &error_abort);
+
+        if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) {
+            object_property_add_link(obj, "secure-tag-memory",
+                                     TYPE_MEMORY_REGION,
+                                     (Object **)&cpu->secure_tag_memory,
+                                     qdev_prop_allow_set_link_before_realize,
+                                     OBJ_PROP_LINK_STRONG,
+                                     &error_abort);
+        }
+    }
+#endif
 }
 
 static void arm_cpu_finalizefn(Object *obj)
@@ -1632,17 +1653,43 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     MachineState *ms = MACHINE(qdev_get_machine());
     unsigned int smp_cpus = ms->smp.cpus;
 
-    if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+    /*
+     * We must set cs->num_ases to the final value before
+     * the first call to cpu_address_space_init.
+     */
+    if (cpu->tag_memory != NULL) {
+        cs->num_ases = 4;
+    } else if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
         cs->num_ases = 2;
+    } else {
+        cs->num_ases = 1;
+    }
 
+    if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
         if (!cpu->secure_memory) {
             cpu->secure_memory = cs->memory;
         }
         cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory",
                                cpu->secure_memory);
-    } else {
-        cs->num_ases = 1;
     }
+
+    if (cpu->tag_memory != NULL) {
+        cpu_address_space_init(cs, ARMASIdx_TagNS, "cpu-tag-memory",
+                               cpu->tag_memory);
+        if (cpu->has_el3) {
+            cpu_address_space_init(cs, ARMASIdx_TagS, "cpu-tag-memory",
+                                   cpu->secure_tag_memory);
+        }
+    } else if (cpu_isar_feature(aa64_mte, cpu)) {
+        /*
+         * Since there is no tag memory, we can't meaningfully support MTE
+         * to its fullest.  To avoid problems later, when we would come to
+         * use the tag memory, downgrade support to insns only.
+         */
+        cpu->isar.id_aa64pfr1 =
+            FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
+    }
+
     cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory);
 
     /* No core_count specified, default to smp_cpus. */
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (19 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-06 11:46   ` Peter Maydell
  2019-10-11 13:47 ` [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode Richard Henderson
                   ` (2 subsequent siblings)
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

The process by which one goes from an address space plus physical
address to a host pointer is complex.  It is easiest to reuse the
mechanism already present within cputlb, and letting that cache
the results.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu-param.h |  2 +-
 target/arm/cpu.h       | 12 +++++++++---
 target/arm/internals.h |  2 ++
 target/arm/helper.c    | 25 +++++++++++++++++++++++--
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
index 6e6948e960..18ac562346 100644
--- a/target/arm/cpu-param.h
+++ b/target/arm/cpu-param.h
@@ -29,6 +29,6 @@
 # define TARGET_PAGE_BITS_MIN  10
 #endif
 
-#define NB_MMU_MODES 8
+#define NB_MMU_MODES 9
 
 #endif
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index faca43ea78..c3609ef9d5 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2854,8 +2854,8 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
 #define ARM_MMU_IDX_M_NEGPRI 0x2
 #define ARM_MMU_IDX_M_S 0x4
 
-#define ARM_MMU_IDX_TYPE_MASK (~0x7)
-#define ARM_MMU_IDX_COREIDX_MASK 0x7
+#define ARM_MMU_IDX_TYPE_MASK (~0xf)
+#define ARM_MMU_IDX_COREIDX_MASK 0xf
 
 typedef enum ARMMMUIdx {
     ARMMMUIdx_S12NSE0 = 0 | ARM_MMU_IDX_A,
@@ -2865,6 +2865,9 @@ typedef enum ARMMMUIdx {
     ARMMMUIdx_S1SE0 = 4 | ARM_MMU_IDX_A,
     ARMMMUIdx_S1SE1 = 5 | ARM_MMU_IDX_A,
     ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
+    ARMMMUIdx_TagNS = 7 | ARM_MMU_IDX_A,
+    ARMMMUIdx_TagS = 8 | ARM_MMU_IDX_A,
+
     ARMMMUIdx_MUser = 0 | ARM_MMU_IDX_M,
     ARMMMUIdx_MPriv = 1 | ARM_MMU_IDX_M,
     ARMMMUIdx_MUserNegPri = 2 | ARM_MMU_IDX_M,
@@ -2891,6 +2894,8 @@ typedef enum ARMMMUIdxBit {
     ARMMMUIdxBit_S1SE0 = 1 << 4,
     ARMMMUIdxBit_S1SE1 = 1 << 5,
     ARMMMUIdxBit_S2NS = 1 << 6,
+    ARMMMUIdxBit_TagNS = 1 << 7,
+    ARMMMUIdxBit_TagS = 1 << 8,
     ARMMMUIdxBit_MUser = 1 << 0,
     ARMMMUIdxBit_MPriv = 1 << 1,
     ARMMMUIdxBit_MUserNegPri = 1 << 2,
@@ -3254,7 +3259,8 @@ enum {
 /* Return the address space index to use for a memory access */
 static inline int arm_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs)
 {
-    return attrs.secure ? ARMASIdx_S : ARMASIdx_NS;
+    return ((attrs.target_tlb_bit2 ? ARMASIdx_TagNS : ARMASIdx_NS)
+            + attrs.secure);
 }
 
 /* Return the AddressSpace to use for a memory access
diff --git a/target/arm/internals.h b/target/arm/internals.h
index a434743b15..dfa395eb35 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -828,6 +828,7 @@ static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
     case ARMMMUIdx_S1NSE1:
     case ARMMMUIdx_S1E2:
     case ARMMMUIdx_S2NS:
+    case ARMMMUIdx_TagNS:
     case ARMMMUIdx_MPrivNegPri:
     case ARMMMUIdx_MUserNegPri:
     case ARMMMUIdx_MPriv:
@@ -836,6 +837,7 @@ static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
     case ARMMMUIdx_S1E3:
     case ARMMMUIdx_S1SE0:
     case ARMMMUIdx_S1SE1:
+    case ARMMMUIdx_TagS:
     case ARMMMUIdx_MSPrivNegPri:
     case ARMMMUIdx_MSUserNegPri:
     case ARMMMUIdx_MSPriv:
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 17981d7c48..3147469899 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8658,9 +8658,18 @@ static inline bool regime_translation_disabled(CPUARMState *env,
         }
     }
 
-    if (mmu_idx == ARMMMUIdx_S2NS) {
+    switch (mmu_idx) {
+    case ARMMMUIdx_S2NS:
         /* HCR.DC means HCR.VM behaves as 1 */
         return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0;
+
+    case ARMMMUIdx_TagS:
+    case ARMMMUIdx_TagNS:
+        /* These indexes are qemu internal, and are physically mapped.  */
+        return true;
+
+    default:
+        break;
     }
 
     if (env->cp15.hcr_el2 & HCR_TGE) {
@@ -10662,7 +10671,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
                    target_ulong *page_size,
                    ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
 {
-    if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
+    switch (mmu_idx) {
+    case ARMMMUIdx_S12NSE0:
+    case ARMMMUIdx_S12NSE1:
         /* Call ourselves recursively to do the stage 1 and then stage 2
          * translations.
          */
@@ -10713,6 +10724,16 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
              */
             mmu_idx = stage_1_mmu_idx(mmu_idx);
         }
+        break;
+
+    case ARMMMUIdx_TagS:
+    case ARMMMUIdx_TagNS:
+        /* Indicate tag memory to arm_asidx_from_attrs.  */
+        attrs->target_tlb_bit2 = true;
+        break;
+
+    default:
+        break;
     }
 
     /* The page table entries may downgrade secure to non-secure, but
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (20 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory Richard Henderson
@ 2019-10-11 13:47 ` Richard Henderson
  2019-12-06 13:02   ` Peter Maydell
  2019-10-11 19:32 ` [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, " no-reply
  2019-10-15 20:39 ` Evgenii Stepanov
  23 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-10-11 13:47 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/mte_helper.c | 61 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index e8d8a6bedb..657383ba0e 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -28,8 +28,69 @@
 static uint8_t *allocation_tag_mem(CPUARMState *env, uint64_t ptr,
                                    bool write, uintptr_t ra)
 {
+#ifdef CONFIG_USER_ONLY
     /* Tag storage not implemented.  */
     return NULL;
+#else
+    CPUState *cs = env_cpu(env);
+    uintptr_t index;
+    int mmu_idx;
+    CPUTLBEntry *entry;
+    CPUIOTLBEntry *iotlbentry;
+    MemoryRegionSection *section;
+    hwaddr physaddr, tag_physaddr;
+
+    /*
+     * Find the TLB entry for this access.
+     * As a side effect, this also raises an exception for invalid access.
+     *
+     * TODO: Perhaps there should be a cputlb helper that returns a
+     * matching tlb entry + iotlb entry.  That would also be able to
+     * make use of the victim tlb cache, which is currently private.
+     */
+    mmu_idx = cpu_mmu_index(env, false);
+    index = tlb_index(env, mmu_idx, ptr);
+    entry = tlb_entry(env, mmu_idx, ptr);
+    if (!tlb_hit(write ? tlb_addr_write(entry) : entry->addr_read, ptr)) {
+        bool ok = arm_cpu_tlb_fill(cs, ptr, 16,
+                                   write ? MMU_DATA_STORE : MMU_DATA_LOAD,
+                                   mmu_idx, false, ra);
+        assert(ok);
+        index = tlb_index(env, mmu_idx, ptr);
+        entry = tlb_entry(env, mmu_idx, ptr);
+    }
+
+    /* If the virtual page MemAttr != Tagged, nothing to do.  */
+    iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+    if (!iotlbentry->attrs.target_tlb_bit1) {
+        return NULL;
+    }
+
+    /*
+     * Find the physical address for the virtual access.
+     *
+     * TODO: It should be possible to have the tag mmu_idx map
+     * from main memory ram_addr to tag memory host address.
+     * that would allow this lookup step to be cached as well.
+     */
+    section = iotlb_to_section(cs, iotlbentry->addr, iotlbentry->attrs);
+    physaddr = ((iotlbentry->addr & TARGET_PAGE_MASK) + ptr
+                + section->offset_within_address_space
+                - section->offset_within_region);
+
+    /* Convert to the physical address in tag space.  */
+    tag_physaddr = physaddr >> (LOG2_TAG_GRANULE + 1);
+
+    /* Choose the tlb index to use for the tag physical access.  */
+    mmu_idx = iotlbentry->attrs.secure ? ARMMMUIdx_TagS : ARMMMUIdx_TagNS;
+    mmu_idx = arm_to_core_mmu_idx(mmu_idx);
+
+    /*
+     * FIXME: Get access length and type so that we can use
+     * probe_access, so that pages are marked dirty for migration.
+     */
+    return tlb_vaddr_to_host(env, tag_physaddr, MMU_DATA_LOAD, mmu_idx);
+#endif
 }
 
 static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
-- 
2.17.1



^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (21 preceding siblings ...)
  2019-10-11 13:47 ` [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode Richard Henderson
@ 2019-10-11 19:32 ` " no-reply
  2019-10-15 20:39 ` Evgenii Stepanov
  23 siblings, 0 replies; 53+ messages in thread
From: no-reply @ 2019-10-11 19:32 UTC (permalink / raw)
  To: richard.henderson; +Cc: peter.maydell, qemu-arm, qemu-devel

Patchew URL: https://patchew.org/QEMU/20191011134744.2477-1-richard.henderson@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode
Type: series
Message-id: 20191011134744.2477-1-richard.henderson@linaro.org

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
2cb4ac6 target/arm: Add allocation tag storage for system mode
04c91b4 target/arm: Add mmu indexes for tag memory
1e0ff9d target/arm: Create tagged ram when MTE is enabled
b751fc4 target/arm: Cache the Tagged bit for a page in MemTxAttrs
9ac60fd target/arm: Enable MTE
513b429 target/arm: Set PSTATE.TCO on exception entry
b34fae3 target/arm: Implement data cache set allocation tags
e821379 target/arm: Clean address for DC ZVA
aff90e1 target/arm: Implement the access tag cache flushes
0caee2b target/arm: Implement the LDGM and STGM instructions
2b4a576 target/arm: Implement the STGP instruction
8dc4ae2 target/arm: Implement LDG, STG, ST2G instructions
2f4a984 target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY
3af0a57 target/arm: Implement the SUBP instruction
b26b9b0 target/arm: Implement the GMI instruction
83744f3 target/arm: Implement ADDG, SUBG instructions
454811d target/arm: Implement the IRG instruction
3570a15 target/arm: Suppress tag check for sp+offset
1409fa4 target/arm: Add helper_mte_check{1,2,3}
bbbd12d target/arm: Add MTE system registers
bac1b74 target/arm: Add regime_has_2_ranges
498eda0 target/arm: Add MTE_ACTIVE to tb_flags

=== OUTPUT BEGIN ===
1/22 Checking commit 498eda06038b (target/arm: Add MTE_ACTIVE to tb_flags)
ERROR: spaces prohibited around that ':' (ctx:WxW)
#214: FILE: target/arm/internals.h:986:
+    bool tcma       : 1;
                     ^

total: 1 errors, 0 warnings, 213 lines checked

Patch 1/22 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/22 Checking commit bac1b74f8ee9 (target/arm: Add regime_has_2_ranges)
3/22 Checking commit bbbd12d06479 (target/arm: Add MTE system registers)
4/22 Checking commit 1409fa4954ca (target/arm: Add helper_mte_check{1,2,3})
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#39: 
new file mode 100644

total: 0 errors, 1 warnings, 199 lines checked

Patch 4/22 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
5/22 Checking commit 3570a158fe8a (target/arm: Suppress tag check for sp+offset)
6/22 Checking commit 454811d5fbee (target/arm: Implement the IRG instruction)
7/22 Checking commit 83744f3effbf (target/arm: Implement ADDG, SUBG instructions)
8/22 Checking commit b26b9b0ff90d (target/arm: Implement the GMI instruction)
9/22 Checking commit 3af0a5757328 (target/arm: Implement the SUBP instruction)
10/22 Checking commit 2f4a98446331 (target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY)
11/22 Checking commit 8dc4ae2e080d (target/arm: Implement LDG, STG, ST2G instructions)
12/22 Checking commit 2b4a576928a6 (target/arm: Implement the STGP instruction)
13/22 Checking commit 0caee2b19728 (target/arm: Implement the LDGM and STGM instructions)
14/22 Checking commit aff90e1ac887 (target/arm: Implement the access tag cache flushes)
15/22 Checking commit e82137999567 (target/arm: Clean address for DC ZVA)
16/22 Checking commit b34fae36a523 (target/arm: Implement data cache set allocation tags)
17/22 Checking commit 513b42914b4f (target/arm: Set PSTATE.TCO on exception entry)
18/22 Checking commit 9ac60fd7222d (target/arm: Enable MTE)
19/22 Checking commit b751fc4415fe (target/arm: Cache the Tagged bit for a page in MemTxAttrs)
20/22 Checking commit 1e0ff9daa24d (target/arm: Create tagged ram when MTE is enabled)
21/22 Checking commit 04c91b4cfe18 (target/arm: Add mmu indexes for tag memory)
22/22 Checking commit 2cb4ac638a77 (target/arm: Add allocation tag storage for system mode)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191011134744.2477-1-richard.henderson@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode
  2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
                   ` (22 preceding siblings ...)
  2019-10-11 19:32 ` [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, " no-reply
@ 2019-10-15 20:39 ` Evgenii Stepanov
  2019-10-15 22:04   ` Richard Henderson
  23 siblings, 1 reply; 53+ messages in thread
From: Evgenii Stepanov @ 2019-10-15 20:39 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Peter Maydell, qemu-arm, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 8451 bytes --]

Hi,

please find attached three random fixes for instruction translation
and one for syscall emulation.

On Fri, Oct 11, 2019 at 6:48 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This is an update of the v4 patch from March.
>
> I believe I've fixed the address space issues that Peter noticed.
> If the board model does not supply tag memory, then I downgrade
> the cpu support to "instructions only" (id_aa64pfr1.mte == 1),
> which does not allow tag memory access to be enabled in the cpu.
>
> I did not update the arm_hcr_el2_eff comment re ARMv8.4, because
> I have not done a complete audit of all of the v8.5 bits.
>
> The hacky kernel testing patch has needed some updates since March.
> The following applies to v5.4-rc2.
>
>
> r~
>
>
> Richard Henderson (22):
>   target/arm: Add MTE_ACTIVE to tb_flags
>   target/arm: Add regime_has_2_ranges
>   target/arm: Add MTE system registers
>   target/arm: Add helper_mte_check{1,2,3}
>   target/arm: Suppress tag check for sp+offset
>   target/arm: Implement the IRG instruction
>   target/arm: Implement ADDG, SUBG instructions
>   target/arm: Implement the GMI instruction
>   target/arm: Implement the SUBP instruction
>   target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY
>   target/arm: Implement LDG, STG, ST2G instructions
>   target/arm: Implement the STGP instruction
>   target/arm: Implement the LDGM and STGM instructions
>   target/arm: Implement the access tag cache flushes
>   target/arm: Clean address for DC ZVA
>   target/arm: Implement data cache set allocation tags
>   target/arm: Set PSTATE.TCO on exception entry
>   target/arm: Enable MTE
>   target/arm: Cache the Tagged bit for a page in MemTxAttrs
>   target/arm: Create tagged ram when MTE is enabled
>   target/arm: Add mmu indexes for tag memory
>   target/arm: Add allocation tag storage for system mode
>
>  target/arm/cpu-param.h     |   2 +-
>  target/arm/cpu.h           |  37 ++-
>  target/arm/helper-a64.h    |  17 ++
>  target/arm/internals.h     |  45 +++
>  target/arm/translate.h     |   2 +
>  hw/arm/virt.c              |  54 ++++
>  target/arm/cpu.c           |  63 +++-
>  target/arm/cpu64.c         |   1 +
>  target/arm/helper.c        | 277 ++++++++++++++---
>  target/arm/mte_helper.c    | 601 +++++++++++++++++++++++++++++++++++++
>  target/arm/tlb_helper.c    |   3 +-
>  target/arm/translate-a64.c | 342 ++++++++++++++++++---
>  target/arm/Makefile.objs   |   1 +
>  13 files changed, 1345 insertions(+), 100 deletions(-)
>  create mode 100644 target/arm/mte_helper.c
>
> --- kernel patch
>
> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index f19fe4b9acc4..ee6b7f387a9a 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -52,7 +52,8 @@
>  #define ARM64_HAS_IRQ_PRIO_MASKING             42
>  #define ARM64_HAS_DCPODP                       43
>  #define ARM64_WORKAROUND_1463225               44
> +#define ARM64_HAS_MTE                          45
>
> -#define ARM64_NCAPS                            45
> +#define ARM64_NCAPS                            46
>
>  #endif /* __ASM_CPUCAPS_H */
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index ddf9d762ac62..5825130bd8eb 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -12,6 +12,7 @@
>  #include <asm/types.h>
>
>  /* Hyp Configuration Register (HCR) bits */
> +#define HCR_ATA                (UL(1) << 56)
>  #define HCR_FWB                (UL(1) << 46)
>  #define HCR_API                (UL(1) << 41)
>  #define HCR_APK                (UL(1) << 40)
> @@ -78,8 +79,8 @@
>                          HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
>                          HCR_FMO | HCR_IMO)
>  #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
> -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
> -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
> +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
> +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_ATA)
>
>  /* TCR_EL2 Registers bits */
>  #define TCR_EL2_RES1           ((1 << 31) | (1 << 23))
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 972d196c7714..2a65831f6e0f 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -482,6 +482,7 @@
>
>  /* Common SCTLR_ELx flags. */
>  #define SCTLR_ELx_DSSBS        (BIT(44))
> +#define SCTLR_ELx_ATA  (BIT(43))
>  #define SCTLR_ELx_ENIA (BIT(31))
>  #define SCTLR_ELx_ENIB (BIT(30))
>  #define SCTLR_ELx_ENDA (BIT(27))
> @@ -510,6 +511,7 @@
>  #endif
>
>  /* SCTLR_EL1 specific flags. */
> +#define SCTLR_EL1_ATA0         (BIT(42))
>  #define SCTLR_EL1_UCI          (BIT(26))
>  #define SCTLR_EL1_E0E          (BIT(24))
>  #define SCTLR_EL1_SPAN         (BIT(23))
> @@ -598,6 +600,7 @@
>  #define ID_AA64PFR0_EL0_32BIT_64BIT    0x2
>
>  /* id_aa64pfr1 */
> +#define ID_AA64PFR1_MTE_SHIFT          8
>  #define ID_AA64PFR1_SSBS_SHIFT         4
>
>  #define ID_AA64PFR1_SSBS_PSTATE_NI     0
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index cabebf1a7976..6a122ed7f76b 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -171,6 +171,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
>  };
>
>  static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
> +       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, 0),
>         ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
>         ARM64_FTR_END,
>  };
> @@ -1261,6 +1262,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
>  }
>  #endif
>
> +static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
> +{
> +       sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
> +}
> +
>  static const struct arm64_cpu_capabilities arm64_features[] = {
>         {
>                 .desc = "GIC system register CPU interface",
> @@ -1561,6 +1567,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>                 .min_field_value = 1,
>         },
>  #endif
> +       {
> +               .desc = "Memory Tagging",
> +               .capability = ARM64_HAS_MTE,
> +               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +               .matches = has_cpuid_feature,
> +               .sys_reg = SYS_ID_AA64PFR1_EL1,
> +               .field_pos = ID_AA64PFR1_MTE_SHIFT,
> +               .sign = FTR_UNSIGNED,
> +               .min_field_value = 2,
> +               .cpu_enable = cpu_enable_mte,
> +       },
>         {},
>  };
>
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index a1e0592d1fbc..32cfa35195ae 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -424,14 +424,14 @@ ENTRY(__cpu_setup)
>          *   DEVICE_nGnRE       001     00000100
>          *   DEVICE_GRE         010     00001100
>          *   NORMAL_NC          011     01000100
> -        *   NORMAL             100     11111111
> +        *   NORMAL             100     11110000 (Tag)
>          *   NORMAL_WT          101     10111011
>          */
>         ldr     x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
>                      MAIR(0x04, MT_DEVICE_nGnRE) | \
>                      MAIR(0x0c, MT_DEVICE_GRE) | \
>                      MAIR(0x44, MT_NORMAL_NC) | \
> -                    MAIR(0xff, MT_NORMAL) | \
> +                    MAIR(0xf0, MT_NORMAL) | \
>                      MAIR(0xbb, MT_NORMAL_WT)
>         msr     mair_el1, x5
>         /*
>
> --- mte smoke test
>
> /*
>  * Memory tagging, basic pass cases.
>  */
>
> #include <stdio.h>
> #include <assert.h>
> #include <sys/mman.h>
>
> asm(".arch armv8.5-a+memtag");
>
> int data[16 / sizeof(int)] __attribute__((aligned(16)));
>
> int main(int ac, char **av)
> {
>     int *p0 = data;
>     int *p1, *p2;
>     long c;
>
>     if (mlock(data, sizeof(data)) < 0) {
>         perror("mlock");
>         return 1;
>     }
>
>     asm("irg %0,%1,%2" : "=r"(p1) : "r"(p0), "r"(1));
>     assert(p1 != p0);
>     asm("subp %0,%1,%2" : "=r"(c) : "r"(p0), "r"(p1));
>     assert(c == 0);
>
>     asm("stg %0, [%0]" : : "r"(p1));
>     asm("ldg %0, [%1]" : "=r"(p2) : "r"(p0), "0"(p0));
>     assert(p1 == p2);
>
>     return 0;
> }
>
>

[-- Attachment #2: 0004-Fix-pre-post-index-confusion-in-disas_ldst_tag.patch --]
[-- Type: text/x-patch, Size: 1111 bytes --]

From 7dfe3f53bc606d2c5bb81e5828e6cf32225f6b72 Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Tue, 27 Aug 2019 16:14:37 -0700
Subject: [PATCH 4/4] Fix pre/post-index confusion in disas_ldst_tag.

---
 target/arm/translate-a64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a85bae1f27..5728b68ccc 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3679,7 +3679,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
     }
 
     dirty_addr = read_cpu_reg_sp(s, rn, true);
-    if (index <= 0) {
+    if (index >= 0) {
         /* pre-index or signed offset */
         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
     }
@@ -3726,7 +3726,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
 
     if (index != 0) {
         /* pre-index or post-index */
-        if (index > 0) {
+        if (index < 0) {
             /* post-index */
             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
         }
-- 
2.23.0.700.g56cf767bdb-goog


[-- Attachment #3: 0003-Fix-wrong-field-size-in-disas_ldst_tag.patch --]
[-- Type: text/x-patch, Size: 944 bytes --]

From 1ecf4f7baedfe2de80a97c408fa2cc64ccd99dbe Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Tue, 27 Aug 2019 16:13:26 -0700
Subject: [PATCH 3/4] Fix wrong field size in disas_ldst_tag.

---
 target/arm/translate-a64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0a168506d2..a85bae1f27 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3611,7 +3611,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
     int rt = extract32(insn, 0, 5);
     int rn = extract32(insn, 5, 5);
     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
-    int op2 = extract32(insn, 10, 3);
+    int op2 = extract32(insn, 10, 2);
     int op1 = extract32(insn, 22, 2);
     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
     int index = 0;
-- 
2.23.0.700.g56cf767bdb-goog


[-- Attachment #4: 0001-Fix-STGP-offset-scale.patch --]
[-- Type: text/x-patch, Size: 748 bytes --]

From a8a024202d1ba80142eacc09dab10c7780874582 Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Thu, 22 Aug 2019 18:05:34 -0700
Subject: [PATCH 1/4] Fix STGP offset scale.

---
 target/arm/translate-a64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 9a554856e9..0a168506d2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2763,7 +2763,8 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
         return;
     }
 
-    offset <<= size;
+    // STGP offset is 16-scaled.
+    offset <<= (size + set_tag);
 
     if (rn == 31) {
         gen_check_sp_alignment(s);
-- 
2.23.0.700.g56cf767bdb-goog


[-- Attachment #5: 0002-Untag-userspace-addresses-in-syscall-emulation.patch --]
[-- Type: text/x-patch, Size: 2133 bytes --]

From bde3007cbe33ccbbba4648c7ee093534be08ccd0 Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Tue, 27 Aug 2019 16:12:38 -0700
Subject: [PATCH 2/4] Untag userspace addresses in syscall emulation.

---
 linux-user/qemu.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index aac0334627..a8f0a8eee9 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -456,8 +456,16 @@ extern unsigned long guest_stack_size;
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1 /* implies read access */
 
+static inline abi_ulong untagged_addr(abi_ulong addr) {
+#if TARGET_ABI_BITS == 64
+    addr &= (((abi_ulong)-1) >> 8);
+#endif
+    return addr;
+}
+
 static inline int access_ok(int type, abi_ulong addr, abi_ulong size)
 {
+    addr = untagged_addr(addr);
     return guest_addr_valid(addr) &&
            (size == 0 || guest_addr_valid(addr + size - 1)) &&
            page_check_range((target_ulong)addr, size,
@@ -601,6 +609,7 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy
 {
     if (!access_ok(type, guest_addr, len))
         return NULL;
+    guest_addr = untagged_addr(guest_addr);
 #ifdef DEBUG_REMAP
     {
         void *addr;
@@ -642,7 +651,7 @@ abi_long target_strlen(abi_ulong gaddr);
 static inline void *lock_user_string(abi_ulong guest_addr)
 {
     abi_long len;
-    len = target_strlen(guest_addr);
+    len = target_strlen(untagged_addr(guest_addr));
     if (len < 0)
         return NULL;
     return lock_user(VERIFY_READ, guest_addr, (long)(len + 1), 1);
@@ -650,7 +659,7 @@ static inline void *lock_user_string(abi_ulong guest_addr)
 
 /* Helper macros for locking/unlocking a target struct.  */
 #define lock_user_struct(type, host_ptr, guest_addr, copy)	\
-    (host_ptr = lock_user(type, guest_addr, sizeof(*host_ptr), copy))
+    (host_ptr = lock_user(type, untagged_addr(guest_addr), sizeof(*host_ptr), copy))
 #define unlock_user_struct(host_ptr, guest_addr, copy)		\
     unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
-- 
2.23.0.700.g56cf767bdb-goog


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode
  2019-10-15 20:39 ` Evgenii Stepanov
@ 2019-10-15 22:04   ` Richard Henderson
  0 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-10-15 22:04 UTC (permalink / raw)
  To: Evgenii Stepanov; +Cc: Peter Maydell, qemu-arm, qemu-devel

On 10/15/19 1:39 PM, Evgenii Stepanov wrote:
> Hi,
> 
> please find attached three random fixes for instruction translation
> and one for syscall emulation.

Thanks for the patches.

> @@ -2763,7 +2763,8 @@ static void disas_ldst_pair
>          return;
>      }
>  
> -    offset <<= size;
> +    // STGP offset is 16-scaled.
> +    offset <<= (size + set_tag);

Right.  I'll fix this with

    offset <<= (set_tag ? LOG2_TAG_GRANULE : size);

which I think is a bit clearer.

> @@ -3611,7 +3611,7 @@ static void disas_ldst_tag
>      int rt = extract32(insn, 0, 5);
>      int rn = extract32(insn, 5, 5);
>      uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
> -    int op2 = extract32(insn, 10, 3);
> +    int op2 = extract32(insn, 10, 2);

Yep.

> @@ -3679,7 +3679,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
>      }
>  
>      dirty_addr = read_cpu_reg_sp(s, rn, true);
> -    if (index <= 0) {
> +    if (index >= 0) {
>          /* pre-index or signed offset */
>          tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
>      }
> @@ -3726,7 +3726,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
>  
>      if (index != 0) {
>          /* pre-index or post-index */
> -        if (index > 0) {
> +        if (index < 0) {
>              /* post-index */
>              tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
>          }

Yep.

Ideally there'd be a kernel patch for MTE that works well enough to run RISU on
the fast model, and I'd be able to compare results.  I suppose in the meantime
more unit testing will have to do.

> +++ b/linux-user/qemu.h
> @@ -456,8 +456,16 @@ extern unsigned long guest_stack_size;
>  #define VERIFY_READ 0
>  #define VERIFY_WRITE 1 /* implies read access */
>  
> +static inline abi_ulong untagged_addr(abi_ulong addr) {
> +#if TARGET_ABI_BITS == 64
> +    addr &= (((abi_ulong)-1) >> 8);
> +#endif
> +    return addr;
> +}

At minimum this needs TARGET_AARCH64, because this kernel feature doesn't apply
to other targets.  But I'll see if I can do this such that it doesn't put
target-specific stuff in linux-user/qemu.h.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 02/22] target/arm: Add regime_has_2_ranges
  2019-10-11 13:47 ` [PATCH v5 02/22] target/arm: Add regime_has_2_ranges Richard Henderson
@ 2019-12-03 11:01   ` Peter Maydell
  2019-12-03 15:09     ` Richard Henderson
  0 siblings, 1 reply; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 11:01 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:48, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> A translation with 2 ranges has both positive and negative addresses.
> This is true for the EL1&0 and the as-yet unimplemented EL2&0 regimes.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/internals.h     | 14 ++++++++++++++
>  target/arm/helper.c        | 22 +++++-----------------
>  target/arm/translate-a64.c |  3 +--
>  3 files changed, 20 insertions(+), 19 deletions(-)
>
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index dcc5d6cca3..9486680b87 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -804,6 +804,20 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu)
>      }
>  }
>
> +/* Return true if this address translation regime has two ranges.  */
> +static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
> +{
> +    switch (mmu_idx) {
> +    case ARMMMUIdx_S12NSE0:
> +    case ARMMMUIdx_S12NSE1:
> +    case ARMMMUIdx_S1NSE0:
> +    case ARMMMUIdx_S1NSE1:
> +        return true;

Don't S1SE0 and S1SE1 also need to be here?

> +    default:
> +        return false;
> +    }
> +}
> +
>  /* Return true if this address translation regime is secure */
>  static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
>  {
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index b690eda136..f9dee51ede 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -8774,15 +8774,8 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
>      }
>
>      if (is_aa64) {
> -        switch (regime_el(env, mmu_idx)) {
> -        case 1:
> -            if (!is_user) {
> -                xn = pxn || (user_rw & PAGE_WRITE);
> -            }
> -            break;
> -        case 2:
> -        case 3:
> -            break;
> +        if (regime_has_2_ranges(mmu_idx) && !is_user) {
> +            xn = pxn || (user_rw & PAGE_WRITE);
>          }

(I was sceptical that 'regime_has_2_ranges()' was the right condition
here, but the Arm ARM really does define it as "valid only when stage
1 of the translation regime can support two VA ranges".)

>      } else if (arm_feature(env, ARM_FEATURE_V7)) {
>          switch (regime_el(env, mmu_idx)) {
> @@ -9316,7 +9309,6 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
>                                          ARMMMUIdx mmu_idx)
>  {
>      uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
> -    uint32_t el = regime_el(env, mmu_idx);
>      bool tbi, tbid, epd, hpd, tcma, using16k, using64k;
>      int select, tsz;
>
> @@ -9326,7 +9318,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
>       */
>      select = extract64(va, 55, 1);
>
> -    if (el > 1) {
> +    if (!regime_has_2_ranges(mmu_idx)) {
>          tsz = extract32(tcr, 0, 6);
>          using64k = extract32(tcr, 14, 1);
>          using16k = extract32(tcr, 15, 1);
> @@ -9486,10 +9478,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
>          param = aa64_va_parameters(env, address, mmu_idx,
>                                     access_type != MMU_INST_FETCH);
>          level = 0;
> -        /* If we are in 64-bit EL2 or EL3 then there is no TTBR1, so mark it
> -         * invalid.
> -         */
> -        ttbr1_valid = (el < 2);
> +        ttbr1_valid = regime_has_2_ranges(mmu_idx);
>          addrsize = 64 - 8 * param.tbi;
>          inputsize = 64 - param.tsz;
>      } else {
> @@ -11095,8 +11084,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>              ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
>              int tbii;
>
> -            /* FIXME: ARMv8.1-VHE S2 translation regime.  */
> -            if (regime_el(env, stage1) < 2) {
> +            if (regime_has_2_ranges(mmu_idx)) {

Now that the rebuild_hflags patchset has landed this is in
rebuild_hflags_a64().

>                  ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
>                  tbid = (p1.tbi << 1) | p0.tbi;
>                  tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 51f3af9cd9..c85db69db4 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -175,8 +175,7 @@ static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
>      if (tbi == 0) {
>          /* Load unmodified address */
>          tcg_gen_mov_i64(dst, src);
> -    } else if (s->current_el >= 2) {
> -        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
> +    } else if (!regime_has_2_ranges(s->mmu_idx)) {
>          /* Force tag byte to all zero */
>          tcg_gen_extract_i64(dst, src, 0, 56);
>      } else {

The comment above this function also needs updating to no longer
refer to "EL2 and EL3" vs "EL0 and EL1". (You might also remove
the use of the imperial 'We' in the last sentence in it ;-))

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 03/22] target/arm: Add MTE system registers
  2019-10-11 13:47 ` [PATCH v5 03/22] target/arm: Add MTE system registers Richard Henderson
@ 2019-12-03 11:48   ` Peter Maydell
  2019-12-06 14:47     ` Richard Henderson
  0 siblings, 1 reply; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 11:48 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:48, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This is TFSRE0_EL1, TFSR_EL1, TFSR_EL2, TFSR_EL3,
> RGSR_EL1, GCR_EL1, GMID_EL1, and PSTATE.TCO.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v3: Add GMID; add access_mte.
> v4: Define only TCO at mte_insn_reg.
> ---
>  target/arm/cpu.h           |  3 ++
>  target/arm/internals.h     |  6 ++++
>  target/arm/helper.c        | 73 ++++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c | 11 ++++++
>  4 files changed, 93 insertions(+)


> +    { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
> +      .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },

This should trap if HCR_EL2.TID5 is 1 (since we're adding
support for the TID* ID reg trap bits now).

> +    REGINFO_SENTINEL
> +};
> +
> +static const ARMCPRegInfo mte_tco_reginfo[] = {
> +    { .name = "TCO", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
> +      .type = ARM_CP_NO_RAW,
> +      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
> +    REGINFO_SENTINEL
> +};
>  #endif
>
>  static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
> @@ -6881,6 +6948,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
>      if (cpu_isar_feature(aa64_rndr, cpu)) {
>          define_arm_cp_regs(cpu, rndr_reginfo);
>      }

So, aa64_mte_insn_reg here is checking for ID_AA64PFR1_EL1 != 0
("instructions accessible at EL0 are implemented")
and aa64_mte is checking for >= 2 ("full implementation").
I think a couple of brief comments would clarify:

> +    if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
           /* EL0-visible MTE registers, present even for dummy
implementation */
> +        define_arm_cp_regs(cpu, mte_tco_reginfo);
> +    }
> +    if (cpu_isar_feature(aa64_mte, cpu)) {
           /* MTE registers present for a full implementation */
> +        define_arm_cp_regs(cpu, mte_reginfo);
> +    }

(The other way to arrange this would be to have the 'real'
TCO regdef in mte_reginfo, and separately have "reginfo
if we only have the dummy visible-from-EL0-parts-only
which defines a constant 0 TCO" (and also make the MSR_i
code implement a RAZ/WI for this case, for consistency).
An implementation that allows the guest to toggle the PSTATE.TCO
bit to no visible effect is architecturally valid, though.)

>  #endif
>
>      /*
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index c85db69db4..62bdf50796 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1611,6 +1611,17 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
>          s->base.is_jmp = DISAS_UPDATE;
>          break;
>
> +    case 0x1c: /* TCO */
> +        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
> +            goto do_unallocated;
> +        }
> +        if (crm & 1) {
> +            set_pstate_bits(PSTATE_TCO);
> +        } else {
> +            clear_pstate_bits(PSTATE_TCO);
> +        }
> +        break;
> +
>      default:
>      do_unallocated:
>          unallocated_encoding(s);
> --
> 2.17.1

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3}
  2019-10-11 13:47 ` [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3} Richard Henderson
@ 2019-12-03 13:42   ` Peter Maydell
  2019-12-03 16:06     ` Richard Henderson
  2019-12-03 16:14     ` Richard Henderson
  0 siblings, 2 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 13:42 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:49, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Implements the rules of "PE generation of Checked and Unchecked
> accesses" which aren't already implied by TB_FLAGS_MTE_ACTIVE.
> Implements the rules of "PE handling of Tag Check Failure".
>
> Does not implement tag physical address space, so all operations
> reduce to unchecked so far.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Fix TFSR update.
> v3: Split helper_mte_check per {1,2} IAs; take tbi data from translate.
> v5: Split helper_mte_check3, the only one that needs a runtime check for tbi.
> ---
>  target/arm/helper-a64.h    |   4 +
>  target/arm/mte_helper.c    | 167 +++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c |  15 +++-
>  target/arm/Makefile.objs   |   1 +
>  4 files changed, 186 insertions(+), 1 deletion(-)
>  create mode 100644 target/arm/mte_helper.c
>
> diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
> index a915c1247f..a82e21f15a 100644
> --- a/target/arm/helper-a64.h
> +++ b/target/arm/helper-a64.h
> @@ -102,3 +102,7 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
>  DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
>  DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
>  DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
> +
> +DEF_HELPER_FLAGS_2(mte_check1, TCG_CALL_NO_WG, i64, env, i64)
> +DEF_HELPER_FLAGS_2(mte_check2, TCG_CALL_NO_WG, i64, env, i64)
> +DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
> new file mode 100644
> index 0000000000..bbb90cbe86
> --- /dev/null
> +++ b/target/arm/mte_helper.c
> @@ -0,0 +1,167 @@
> +/*
> + * ARM v8.5-MemTag Operations
> + *
> + * Copyright (c) 2019 Linaro, Ltd.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "internals.h"
> +#include "exec/exec-all.h"
> +#include "exec/cpu_ldst.h"
> +#include "exec/helper-proto.h"
> +
> +
> +static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
> +{
> +    /* Tag storage not implemented.  */
> +    return -1;
> +}
> +
> +static int allocation_tag_from_addr(uint64_t ptr)
> +{
> +    ptr += 1ULL << 55;  /* carry ptr[55] into ptr[59:56].  */
> +    return extract64(ptr, 56, 4);

What's the carry-bit-55 logic for? The pseudocode
AArch64.AllocationTagFromAddress just returns bits [59:56].

> +}
> +
> +/*
> + * Perform a checked access for MTE.
> + * On arrival, TBI is known to enabled, as is allocation_tag_access_enabled.

"to be"

> + */
> +static uint64_t do_mte_check(CPUARMState *env, uint64_t dirty_ptr,
> +                             uint64_t clean_ptr, uint32_t select,
> +                             uintptr_t ra)
> +{
> +    ARMMMUIdx stage1 = arm_stage1_mmu_idx(env);
> +    int ptr_tag, mem_tag;
> +
> +    /*
> +     * If TCMA is enabled, then physical tag 0 is unchecked.
> +     * Note the rules in D6.8.1 are written with logical tags, where
> +     * the corresponding physical tag rule is simpler: equal to 0.
> +     * We will need the physical tag below anyway.
> +     */

This reads a bit oddly, because (in the final version of the spec)
physical and logical tags are identical (AArch64.PhysicalTag()
just returns bits [59:56] of the vaddr).

> +    ptr_tag = allocation_tag_from_addr(dirty_ptr);
> +    if (ptr_tag == 0) {
> +        ARMVAParameters p = aa64_va_parameters(env, dirty_ptr, stage1, true);
> +        if (p.tcma) {
> +            return clean_ptr;
> +        }
> +    }

I don't think this logic gets the "regime has two address ranges"
case correct. For a two-address-range translation regime (where
TCR_ELx has TCMA0 and TCMA1 bits, rather than just a single TCMA bit),
then the 'select' argument to this function needs to be involved,
because we should do a tag-unchecked access if:
 * addr[59:55]==0b00000 (ie select == 0 and ptr_tag == 0)
   and TCR_ELx.TCMA0 is set
 * addr[59:55]==0b11111 (ie select == 1 and ptr_tag == 0xf)
   and TCR_ELx.TCMA1 is set
(the pseudocode for this is in AArch64.AccessTagIsChecked(),
and the TCR_EL1.TCMA[01] bit definitions agree; the text in
D6.8.1 appears to be confused.)

> +
> +    /*
> +     * If an access is made to an address that does not provide tag
> +     * storage, the result is IMPLEMENTATION DEFINED.  We choose to
> +     * treat the access as unchecked.
> +     * This is similar to MemAttr != Tagged, which are also unchecked.
> +     */
> +    mem_tag = get_allocation_tag(env, clean_ptr, ra);
> +    if (mem_tag < 0) {
> +        return clean_ptr;
> +    }
> +
> +    /* If the tags do not match, the tag check operation fails.  */
> +    if (unlikely(ptr_tag != mem_tag)) {
> +        int el, regime_el, tcf;
> +        uint64_t sctlr;
> +
> +        el = arm_current_el(env);
> +        regime_el = (el ? el : 1);   /* TODO: ARMv8.1-VHE EL2&0 regime */

We could write this as "regime_el(env, stage1)" if that function
wasn't local to helper.c, right ?

> +        sctlr = env->cp15.sctlr_el[regime_el];
> +        if (el == 0) {
> +            tcf = extract64(sctlr, 38, 2);
> +        } else {
> +            tcf = extract64(sctlr, 40, 2);
> +        }
> +
> +        switch (tcf) {
> +        case 1:
> +            /*
> +             * Tag check fail causes a synchronous exception.
> +             *
> +             * In restore_state_to_opc, we set the exception syndrome
> +             * for the load or store operation.  Do that first so we
> +             * may overwrite that with the syndrome for the tag check.
> +             */
> +            cpu_restore_state(env_cpu(env), ra, true);
> +            env->exception.vaddress = dirty_ptr;
> +            raise_exception(env, EXCP_DATA_ABORT,
> +                            syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0x11),
> +                            exception_target_el(env));
> +            /* noreturn; fall through to assert anyway */

hopefully this fallthrough comment syntax doesn't confuse any
of our compilers/static analyzers...

> +
> +        case 0:
> +            /*
> +             * Tag check fail does not affect the PE.
> +             * We eliminate this case by not setting MTE_ACTIVE
> +             * in tb_flags, so that we never make this runtime call.
> +             */
> +            g_assert_not_reached();
> +
> +        case 2:
> +            /* Tag check fail causes asynchronous flag set.  */
> +            env->cp15.tfsr_el[regime_el] |= 1 << select;

Won't this incorrectly accumulate tagfails for EL0 into
TFSR_EL1 rather than TFSRE0_EL1 ? I think you want "[el]".

> +            break;
> +
> +        default:
> +            /* Case 3: Reserved. */
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Tag check failure with SCTLR_EL%d.TCF "
> +                          "set to reserved value %d\n", regime_el, tcf);

Technically this message is going to be wrong for the
case of el==0 (where it's SCTLR_EL1.TCF0, not .TCF, that's
been mis-set).

> +            break;
> +        }
> +    }
> +
> +    return clean_ptr;
> +}
> +
> +/*
> + * Perform check in translation regime w/single IA range.
> + * It is known that TBI is enabled on entry.
> + */
> +uint64_t HELPER(mte_check1)(CPUARMState *env, uint64_t dirty_ptr)
> +{
> +    uint64_t clean_ptr = extract64(dirty_ptr, 0, 56);
> +    return do_mte_check(env, dirty_ptr, clean_ptr, 0, GETPC());
> +}
> +
> +/*
> + * Perform check in translation regime w/two IA ranges.
> + * It is known that TBI is enabled on entry.
> + */
> +uint64_t HELPER(mte_check2)(CPUARMState *env, uint64_t dirty_ptr)
> +{
> +    uint32_t select = extract64(dirty_ptr, 55, 1);
> +    uint64_t clean_ptr = sextract64(dirty_ptr, 0, 56);
> +    return do_mte_check(env, dirty_ptr, clean_ptr, select, GETPC());
> +}
> +
> +/*
> + * Perform check in translation regime w/two IA ranges.
> + * The TBI argument is the concatenation of TBI1:TBI0.
> + */
> +uint64_t HELPER(mte_check3)(CPUARMState *env, uint64_t dirty_ptr, uint32_t tbi)
> +{
> +    uint32_t select = extract64(dirty_ptr, 55, 1);
> +    uint64_t clean_ptr = sextract64(dirty_ptr, 0, 56);
> +
> +    if ((tbi >> select) & 1) {
> +        return do_mte_check(env, dirty_ptr, clean_ptr, select, GETPC());
> +    } else {
> +        /* TBI is disabled; the access is unchecked.  */
> +        return dirty_ptr;
> +    }
> +}
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 62bdf50796..8e4fea6b4c 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -214,7 +214,20 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
>  static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
>  {
>      TCGv_i64 clean = new_tmp_a64(s);
> -    gen_top_byte_ignore(s, clean, addr, s->tbid);
> +
> +    /* Note that s->mte_active already includes a check for s->tbid != 0. */
> +    if (!s->mte_active) {
> +        gen_top_byte_ignore(s, clean, addr, s->tbid);
> +    } else if (!regime_has_2_ranges(s->mmu_idx)) {
> +        gen_helper_mte_check1(clean, cpu_env, addr);
> +    } else if (s->tbid == 3) {
> +        /* Both TBI1:TBI0 are set; no need to check at runtime. */
> +        gen_helper_mte_check2(clean, cpu_env, addr);
> +    } else {
> +        TCGv_i32 tbi = tcg_const_i32(s->tbid);
> +        gen_helper_mte_check3(clean, cpu_env, addr, tbi);
> +        tcg_temp_free_i32(tbi);
> +    }
>      return clean;
>  }
>
> diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
> index cf26c16f5f..8fd7d086c8 100644
> --- a/target/arm/Makefile.objs
> +++ b/target/arm/Makefile.objs
> @@ -67,3 +67,4 @@ obj-$(CONFIG_SOFTMMU) += psci.o
>  obj-$(TARGET_AARCH64) += translate-a64.o helper-a64.o
>  obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
>  obj-$(TARGET_AARCH64) += pauth_helper.o
> +obj-$(TARGET_AARCH64) += mte_helper.o
> --
> 2.17.1

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset
  2019-10-11 13:47 ` [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset Richard Henderson
@ 2019-12-03 14:07   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 14:07 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:49, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> R0078 specifies that base register, or base register plus immediate
> offset, is unchecked when the base register is SP.

It looks like rule-numbers didn't make it into the final Arm ARM,
so I guess the reference here would just be to section D6.8.1 ?

Also, this phrasing is slightly ambiguous about whether the
"when base is SP" condition applies to both "base register"
and "base register + immediate", or just to the last of the two;
the correct reading is the latter of these (and the D6.8.1
Arm ARM text is in error; trust the pseudocode here).

We could perhaps say something like:

D6.8.1 specifies that accesses are tag-unchecked for loads and
stores (including exclusives, compare-and-swap, etc) whose addresses are:
 * base-register only, where the base register is SP
 * base-register plus immediate, where the base register is SP
   (not including reg+imm with writeback addressing forms)
and also that literal (pc-relative) loads are tag-unchecked.

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Include writeback addresses as checked.

The load-literal case is implicitly tag-unchecked because
the address calculation doesn't go via clean_data_tbi(), right?

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 06/22] target/arm: Implement the IRG instruction
  2019-10-11 13:47 ` [PATCH v5 06/22] target/arm: Implement the IRG instruction Richard Henderson
@ 2019-12-03 14:26   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 14:26 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:49, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Update to 00eac5.
>     Merge choose_random_nonexcluded_tag into helper_irg since
>     that pseudo function no longer exists separately.
> ---
>  target/arm/helper-a64.h    |  1 +
>  target/arm/mte_helper.c    | 57 ++++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c |  7 +++++
>  3 files changed, 65 insertions(+)
>
> diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
> index a82e21f15a..6ff7f5b756 100644
> --- a/target/arm/helper-a64.h
> +++ b/target/arm/helper-a64.h
> @@ -106,3 +106,4 @@ DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
>  DEF_HELPER_FLAGS_2(mte_check1, TCG_CALL_NO_WG, i64, env, i64)
>  DEF_HELPER_FLAGS_2(mte_check2, TCG_CALL_NO_WG, i64, env, i64)
>  DEF_HELPER_FLAGS_3(mte_check3, TCG_CALL_NO_WG, i64, env, i64, i32)
> +DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
> index bbb90cbe86..9848849a91 100644
> --- a/target/arm/mte_helper.c
> +++ b/target/arm/mte_helper.c
> @@ -37,6 +37,31 @@ static int allocation_tag_from_addr(uint64_t ptr)
>      return extract64(ptr, 56, 4);
>  }
>
> +static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
> +{
> +    if (exclude == 0xffff) {
> +        return 0;
> +    }
> +    if (offset == 0) {
> +        while (exclude & (1 << tag)) {
> +            tag = (tag + 1) & 15;
> +        }
> +    } else {
> +        do {
> +            do {
> +                tag = (tag + 1) & 15;
> +            } while (exclude & (1 << tag));
> +        } while (--offset > 0);
> +    }

I feel like this would be easier to review if it matched
the logic the pseudocode uses, though I think the end result
comes out the same.

> +    return tag;
> +}
> +
> +static uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
> +{
> +    rtag -= extract64(ptr, 55, 1);
> +    return deposit64(ptr, 56, 4, rtag);

This doesn't match AArch64.AddressWithAllocationTag -- the
fiddling with bit 55 is unwanted.

> +}
> +
>  /*
>   * Perform a checked access for MTE.
>   * On arrival, TBI is known to enabled, as is allocation_tag_access_enabled.
> @@ -165,3 +190,35 @@ uint64_t HELPER(mte_check3)(CPUARMState *env, uint64_t dirty_ptr, uint32_t tbi)
>          return dirty_ptr;
>      }
>  }
> +
> +uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
> +{
> +    int el = arm_current_el(env);
> +    uint64_t sctlr = arm_sctlr(env, el);
> +    int rtag = 0;
> +
> +    if (allocation_tag_access_enabled(env, el, sctlr)) {
> +        /*
> +         * Our IMPDEF choice for GCR_EL1.RRND==1 is to behave as if
> +         * GCR_EL1.RRND==0, always producing deterministic results.
> +         */
> +        uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
> +        int start = extract32(env->cp15.rgsr_el1, 0, 4);
> +        int seed = extract32(env->cp15.rgsr_el1, 8, 16);
> +        int offset, i;
> +
> +        /* RandomTag */
> +        for (i = offset = 0; i < 4; ++i) {
> +            /* NextRandomTagBit */
> +            int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
> +                       extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
> +            seed = (top << 15) | (seed >> 1);
> +            offset |= top << i;
> +        }
> +        rtag = choose_nonexcluded_tag(start, offset, exclude);
> +
> +        env->cp15.rgsr_el1 = rtag | (seed << 8);
> +    }
> +
> +    return address_with_allocation_tag(rn, rtag);
> +}
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 18d45fba87..83d253d67f 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -5156,6 +5156,13 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
>      case 3: /* SDIV */
>          handle_div(s, true, sf, rm, rn, rd);
>          break;
> +    case 4: /* IRG */
> +        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
> +            goto do_unallocated;
> +        }
> +        gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
> +                       cpu_reg_sp(s, rn), cpu_reg(s, rm));

In the case of "we only have mte_insn_reg, not full MTE",
the allocation tag we insert into the address must always
be zero, so you could just special case this and emit code
inline to clear bits [59:56]. The code as it stands works
because we ensure that the guest can't set the SCTLR.*ATA*
bits. (That's a bit inconsistent with our approach to the
PSTATE.TCO bit, which we do allow a guest to toggle, but
the inconsistency is permitted by the architecture.) I'm
not sure whether "we only have the EL0 visible bits" is
going to be a common enough config to care about to
special-case.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 02/22] target/arm: Add regime_has_2_ranges
  2019-12-03 11:01   ` Peter Maydell
@ 2019-12-03 15:09     ` Richard Henderson
  0 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-12-03 15:09 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/3/19 11:01 AM, Peter Maydell wrote:
>> +/* Return true if this address translation regime has two ranges.  */
>> +static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
>> +{
>> +    switch (mmu_idx) {
>> +    case ARMMMUIdx_S12NSE0:
>> +    case ARMMMUIdx_S12NSE1:
>> +    case ARMMMUIdx_S1NSE0:
>> +    case ARMMMUIdx_S1NSE1:
>> +        return true;
> 
> Don't S1SE0 and S1SE1 also need to be here?

Whoops, yes.  I'll need to fix that in the VHE patch set too.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3}
  2019-12-03 13:42   ` Peter Maydell
@ 2019-12-03 16:06     ` Richard Henderson
  2019-12-03 16:26       ` Peter Maydell
  2019-12-03 16:14     ` Richard Henderson
  1 sibling, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-12-03 16:06 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/3/19 1:42 PM, Peter Maydell wrote:
>> +static int allocation_tag_from_addr(uint64_t ptr)
>> +{
>> +    ptr += 1ULL << 55;  /* carry ptr[55] into ptr[59:56].  */
>> +    return extract64(ptr, 56, 4);
> 
> What's the carry-bit-55 logic for? The pseudocode
> AArch64.AllocationTagFromAddress just returns bits [59:56].

This was the old physical tag extraction.

>> +static uint64_t do_mte_check(CPUARMState *env, uint64_t dirty_ptr,
>> +                             uint64_t clean_ptr, uint32_t select,
>> +                             uintptr_t ra)
>> +{
>> +    ARMMMUIdx stage1 = arm_stage1_mmu_idx(env);
>> +    int ptr_tag, mem_tag;
>> +
>> +    /*
>> +     * If TCMA is enabled, then physical tag 0 is unchecked.
>> +     * Note the rules in D6.8.1 are written with logical tags, where
>> +     * the corresponding physical tag rule is simpler: equal to 0.
>> +     * We will need the physical tag below anyway.
>> +     */
> 
> This reads a bit oddly, because (in the final version of the spec)
> physical and logical tags are identical (AArch64.PhysicalTag()
> just returns bits [59:56] of the vaddr).

I missed that change between draft and final.

Wow, that's really annoying.  If they were going to drop physical vs logical
tags, why did they keep the language?

Frankly, it made a *lot* of sense as a way to handle addresses in TTBR1, which
now have asymmetric special cases.  In particular, ADDG will, as I read it now,
with allocation tag access disabled, munge a TTBR1 address to <59:56> = 0.
Which is fine so long as access is disabled, but when re-enabled (e.g. via
PSTATE.TCO) the address will no longer pass the TCMA test.

Is this really intentional?


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3}
  2019-12-03 13:42   ` Peter Maydell
  2019-12-03 16:06     ` Richard Henderson
@ 2019-12-03 16:14     ` Richard Henderson
  1 sibling, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-12-03 16:14 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

Oh, to finish up on the replies...

On 12/3/19 1:42 PM, Peter Maydell wrote:
>> +    ptr_tag = allocation_tag_from_addr(dirty_ptr);
>> +    if (ptr_tag == 0) {
>> +        ARMVAParameters p = aa64_va_parameters(env, dirty_ptr, stage1, true);
>> +        if (p.tcma) {
>> +            return clean_ptr;
>> +        }
>> +    }
> 
> I don't think this logic gets the "regime has two address ranges"
> case correct. For a two-address-range translation regime (where
> TCR_ELx has TCMA0 and TCMA1 bits, rather than just a single TCMA bit),
> then the 'select' argument to this function needs to be involved,
> because we should do a tag-unchecked access if:
>  * addr[59:55]==0b00000 (ie select == 0 and ptr_tag == 0)
>    and TCR_ELx.TCMA0 is set
>  * addr[59:55]==0b11111 (ie select == 1 and ptr_tag == 0xf)
>    and TCR_ELx.TCMA1 is set
> (the pseudocode for this is in AArch64.AccessTagIsChecked(),
> and the TCR_EL1.TCMA[01] bit definitions agree; the text in
> D6.8.1 appears to be confused.)

It used to be correct.

That was the lovely bit about physical vs logical tags.  Add 1 bit bit 55, let
the carry ripple up, so that the physical tag check for TCMA was always against 0.

That seems to be broken now in the final spec.

>> +        el = arm_current_el(env);
>> +        regime_el = (el ? el : 1);   /* TODO: ARMv8.1-VHE EL2&0 regime */
> 
> We could write this as "regime_el(env, stage1)" if that function
> wasn't local to helper.c, right ?

Yes.

>> +            /* noreturn; fall through to assert anyway */
> 
> hopefully this fallthrough comment syntax doesn't confuse any
> of our compilers/static analyzers...

It shouldn't...

>> +            /* Tag check fail causes asynchronous flag set.  */
>> +            env->cp15.tfsr_el[regime_el] |= 1 << select;
> 
> Won't this incorrectly accumulate tagfails for EL0 into
> TFSR_EL1 rather than TFSRE0_EL1 ? I think you want "[el]".

Yep.

>> +            /* Case 3: Reserved. */
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "Tag check failure with SCTLR_EL%d.TCF "
>> +                          "set to reserved value %d\n", regime_el, tcf);
> 
> Technically this message is going to be wrong for the
> case of el==0 (where it's SCTLR_EL1.TCF0, not .TCF, that's
> been mis-set).

Yep.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3}
  2019-12-03 16:06     ` Richard Henderson
@ 2019-12-03 16:26       ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-03 16:26 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Tue, 3 Dec 2019 at 16:06, Richard Henderson
<richard.henderson@linaro.org> wrote:
> On 12/3/19 1:42 PM, Peter Maydell wrote:
> > This reads a bit oddly, because (in the final version of the spec)
> > physical and logical tags are identical (AArch64.PhysicalTag()
> > just returns bits [59:56] of the vaddr).
>
> I missed that change between draft and final.
>
> Wow, that's really annoying.  If they were going to drop physical vs logical
> tags, why did they keep the language?

It leaves space for a potential future architecture making
the mapping something other than 1:1.

> Is this really intentional?

Yes.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY
  2019-10-11 13:47 ` [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY Richard Henderson
@ 2019-12-05 16:12   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 16:12 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We will need this to raise unaligned exceptions from user mode.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tlb_helper.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
> index 5feb312941..29b92a1149 100644
> --- a/target/arm/tlb_helper.c
> +++ b/target/arm/tlb_helper.c
> @@ -10,8 +10,6 @@
>  #include "internals.h"
>  #include "exec/exec-all.h"
>
> -#if !defined(CONFIG_USER_ONLY)
> -
>  static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
>                                              unsigned int target_el,
>                                              bool same_el, bool ea,
> @@ -122,6 +120,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
>      arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
>  }
>
> +#ifndef CONFIG_USER_ONLY
>  /*
>   * arm_cpu_do_transaction_failed: handle a memory system error response
>   * (eg "no device/memory present at address") by raising an external abort
> --
> 2.17.1

Isn't this just enabling compilation of the softmmu
arm_cpu_do_unaligned_access() on linux-user? That codepath
does a lot of softmmu stuff including calling arm_deliver_fault()
(which implicitly does somewhat more looking at EL1 system
register state than we necessarily have set up correctly
for the user-mode code).

For arm_cpu_tlb_fill() which handles prefetch/data aborts
we just have a separate much simpler codepath for
CONFIG_USER_ONLY which doesn't call arm_deliver_fault().
I think being consistent here about how we handle the
CONFIG_USER_ONLY case would help avoid having a codepath
that isn't very well tested because it's only used in
the odd special case of unaligned-address exceptions.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions
  2019-10-11 13:47 ` [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
@ 2019-12-05 17:07   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 17:07 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Split out allocation_tag_mem.  Handle atomicity of stores.
> v3: Add X[t] input to these insns; require pre-cleaned addresses.
> v5: Fix !32-byte aligned operation of st2g.
> ---
>  target/arm/helper-a64.h    |   5 ++
>  target/arm/mte_helper.c    | 154 +++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c | 115 +++++++++++++++++++++++++++
>  3 files changed, 274 insertions(+)
>

> --- a/target/arm/mte_helper.c
> +++ b/target/arm/mte_helper.c
> @@ -25,8 +25,21 @@
>  #include "exec/helper-proto.h"
>
>
> +static uint8_t *allocation_tag_mem(CPUARMState *env, uint64_t ptr,
> +                                   bool write, uintptr_t ra)
> +{
> +    /* Tag storage not implemented.  */
> +    return NULL;
> +}
> +
>  static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
>  {
> +    uint8_t *mem = allocation_tag_mem(env, ptr, false, ra);
> +
> +    if (mem) {
> +        int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
> +        return extract32(atomic_read(mem), ofs, 4);

Can we have a comment somewhere describing what our tag
storage looks like? I guess from the code that we're doing
it as a byte array where each byte stores 2 4-bit tags
(in which order?), but documenting it would be nice.

> +    }
>      /* Tag storage not implemented.  */
>      return -1;
>  }

> +static void do_st2g(CPUARMState *env, uint64_t ptr1, uint64_t xt,
> +                    uintptr_t ra, stg_store1 store1)
> +{
> +    int el, tag;
> +    uint64_t ptr2, sctlr;
> +    uint8_t *mem1, *mem2;
> +
> +    check_tag_aligned(env, ptr1, ra);
> +
> +    el = arm_current_el(env);
> +    sctlr = arm_sctlr(env, el);
> +    tag = allocation_tag_from_addr(xt);
> +
> +    /*
> +     * Trap if accessing an invalid page(s).
> +     * This takes priority over !allocation_tag_access_enabled.
> +     */
> +    mem1 = allocation_tag_mem(env, ptr1, true, ra);
> +
> +    if (ptr1 & TAG_GRANULE) {
> +        /* The two stores are unaligned and modify two bytes.  */
> +        ptr2 = ptr1 + TAG_GRANULE;
> +        mem2 = allocation_tag_mem(env, ptr2, true, ra);
> +
> +        /* Store if page supports tags and access is enabled.  */
> +        if ((mem1 || mem2) && allocation_tag_access_enabled(env, el, sctlr)) {
> +            if (mem1) {
> +                store1(ptr1, mem1, tag);
> +            }
> +            if (mem2) {
> +                store1(ptr2, mem2, tag);
> +            }
> +        }
> +    } else {
> +        /* The two stores are aligned 32, and modify one byte.  */

Not sure what the '32' means here?

> +        if (mem1 && allocation_tag_access_enabled(env, el, sctlr)) {
> +            tag |= tag << 4;
> +            atomic_set(mem1, tag);
> +        }
> +    }
> +}

> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index cf341c98d3..c17b36ebb2 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -3559,6 +3559,118 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
>      }
>  }
>
> +/*
> + * Load/Store memory tags
> + *
> + *  31 30 29         24     22  21     12    10      5      0
> + * +-----+-------------+-----+---+------+-----+------+------+
> + * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
> + * +-----+-------------+-----+---+------+-----+------+------+
> + */
> +static void disas_ldst_tag(DisasContext *s, uint32_t insn)
> +{
> +    int rt = extract32(insn, 0, 5);
> +    int rn = extract32(insn, 5, 5);
> +    uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
> +    int op2 = extract32(insn, 10, 3);

Typo ? op2 is only 2 bits, not 3.

> +    int op1 = extract32(insn, 22, 2);

The Arm ARM calls this field 'opc', fwiw.

> +    bool is_load = false, is_pair = false, is_zero = false;
> +    int index = 0;
> +    TCGv_i64 dirty_addr, clean_addr, tcg_rt;
> +
> +    if ((insn & 0xff200000) != 0xd9200000
> +        || !dc_isar_feature(aa64_mte_insn_reg, s)) {
> +        goto do_unallocated;
> +    }

Bits 28:24 are already checked by the decode that got us here.

I did wonder about maybe doing the decode of
[31:30] and [21] in the caller (which would match the
structure of the decode tables in the manual), but
we do the same sort of thing for bit [31] in
disas_ldst_multiple_struct() and disas_ldst_single_struct(),
so this is fine.

Not all the insns in this encoding group are present
for the mte_insn_reg cut-down implementation:
LDGM, STGM and STZGM should UNDEF unless we have
full-fat MTE. We haven't added any of those in this patch,
but it might affect how you want to structure the
conditional for doing the feature bit test. (Looking
ahead, patch 13 which adds those insns doesn't update the
feature bit test.)


> +
> +    switch (op1) {
> +    case 0: /* STG */
> +        if (op2 != 0) {
> +            /* STG */
> +            index = op2 - 2;

What does 'index' represent? It looks from the rest of
the code like it's some sort of tristate between
'preindex', 'postindex' and 'not indexed'; if so
a comment explaining what the valid values and meanings
are would be helpful. Alternatively, follow the approach
of disas_ldst_reg_imm9() and just have separate
'post_index' and 'writeback' bools.

> +            break;
> +        }
> +        goto do_unallocated;
> +    case 1:
> +        if (op2 != 0) {
> +            /* STZG */
> +            is_zero = true;
> +            index = op2 - 2;
> +        } else {
> +            /* LDG */
> +            is_load = true;
> +        }
> +        break;
> +    case 2:
> +        if (op2 != 0) {
> +            /* ST2G */
> +            is_pair = true;
> +            index = op2 - 2;
> +            break;
> +        }
> +        goto do_unallocated;
> +    case 3:
> +        if (op2 != 0) {
> +            /* STZ2G */
> +            is_pair = is_zero = true;
> +            index = op2 - 2;
> +            break;
> +        }
> +        goto do_unallocated;
> +
> +    default:
> +    do_unallocated:
> +        unallocated_encoding(s);
> +        return;
> +    }

Should there be a
    if (rn == 31) {
        gen_check_sp_alignment(s);
    }
here?

> +
> +    dirty_addr = read_cpu_reg_sp(s, rn, true);
> +    if (index <= 0) {
> +        /* pre-index or signed offset */
> +        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
> +    }
> +
> +    clean_addr = clean_data_tbi(s, dirty_addr, false);
> +    tcg_rt = cpu_reg(s, rt);

I think this is only correct for LDG, where the Rt field
is 'specifies the Xt register to use'; for STG and ST2G
it's an '<Xn|SP>' form where 31 means "use SP" and you
want cpu_reg_sp() for those.


> +
> +    if (is_load) {
> +        gen_helper_ldg(tcg_rt, cpu_env, clean_addr, tcg_rt);
> +    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +        if (is_pair) {
> +            gen_helper_st2g_parallel(cpu_env, clean_addr, tcg_rt);
> +        } else {
> +            gen_helper_stg_parallel(cpu_env, clean_addr, tcg_rt);
> +        }
> +    } else {
> +        if (is_pair) {
> +            gen_helper_st2g(cpu_env, clean_addr, tcg_rt);
> +        } else {
> +            gen_helper_stg(cpu_env, clean_addr, tcg_rt);
> +        }
> +    }
> +
> +    if (is_zero) {
> +        TCGv_i64 tcg_zero = tcg_const_i64(0);
> +        int mem_index = get_mem_index(s);
> +        int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
> +
> +        for (i = 0; i < n; i += 8) {
> +            tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q);
> +            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +        }
> +        tcg_temp_free_i64(tcg_zero);
> +    }
> +
> +    if (index != 0) {
> +        /* pre-index or post-index */
> +        if (index > 0) {
> +            /* post-index */
> +            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
> +        }
> +        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
> +    }
> +}

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 12/22] target/arm: Implement the STGP instruction
  2019-10-11 13:47 ` [PATCH v5 12/22] target/arm: Implement the STGP instruction Richard Henderson
@ 2019-12-05 17:15   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 17:15 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v3: Handle atomicity, require pre-cleaned address.
> ---
>  target/arm/translate-a64.c | 20 +++++++++++++++++++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index c17b36ebb2..4ecb0a2fb7 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -2657,7 +2657,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
>   * +-----+-------+---+---+-------+---+-------+-------+------+------+
>   *
>   * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
> - *      LDPSW                    01
> + *      LDPSW/STGP               01
>   *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
>   *   V: 0 -> GPR, 1 -> Vector
>   * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
> @@ -2682,6 +2682,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
>      bool is_signed = false;
>      bool postindex = false;
>      bool wback = false;
> +    bool set_tag = false;
>
>      TCGv_i64 clean_addr, dirty_addr;
>
> @@ -2694,6 +2695,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
>
>      if (is_vector) {
>          size = 2 + opc;
> +    } else if (opc == 1 && !is_load) {
> +        /* STGP */
> +        if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
> +            unallocated_encoding(s);
> +            return;
> +        }
> +        size = 3;
> +        set_tag = true;
>      } else {
>          size = 2 + extract32(opc, 1, 1);
>          is_signed = extract32(opc, 0, 1);
> @@ -2746,6 +2755,15 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
>      }
>      clean_addr = clean_data_tbi(s, dirty_addr, wback || rn != 31);

Don't we need to adjust the 'check' argument to clean_data_tbi()
here? The pseudocode says STGP doesn't do tag-checking.

>
> +    if (set_tag) {
> +        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
> +        if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +            gen_helper_stg_parallel(cpu_env, clean_addr, tcg_rn);
> +        } else {
> +            gen_helper_stg(cpu_env, clean_addr, tcg_rn);
> +        }
> +    }
> +
>      if (is_vector) {
>          if (is_load) {
>              do_fp_ld(s, rt, clean_addr, size);
> --
> 2.17.1

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions
  2019-10-11 13:47 ` [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions Richard Henderson
@ 2019-12-05 17:42   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 17:42 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v3: Require pre-cleaned addresses.
> ---
>  target/arm/helper-a64.h    |  3 ++
>  target/arm/mte_helper.c    | 96 ++++++++++++++++++++++++++++++++++++++
>  target/arm/translate-a64.c | 42 +++++++++++++----
>  3 files changed, 132 insertions(+), 9 deletions(-)
>

> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 4ecb0a2fb7..4e049bb4aa 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -3592,7 +3592,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
>      uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
>      int op2 = extract32(insn, 10, 3);
>      int op1 = extract32(insn, 22, 2);
> -    bool is_load = false, is_pair = false, is_zero = false;
> +    bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
>      int index = 0;
>      TCGv_i64 dirty_addr, clean_addr, tcg_rt;
>
> @@ -3602,13 +3602,18 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
>      }
>

These are the insns that should UNDEF if we have only
the insn_reg dummy flavour of MTE.

(Also, unlike STG and ST2G, none of the M insns want
Xt to be <Xt|SP>.)


Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 14/22] target/arm: Implement the access tag cache flushes
  2019-10-11 13:47 ` [PATCH v5 14/22] target/arm: Implement the access tag cache flushes Richard Henderson
@ 2019-12-05 17:49   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 17:49 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Like the regular data cache flushes, these are nops within qemu.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/helper.c | 48 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index f435a8d8bd..33bc176e1c 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -5978,6 +5978,54 @@ static const ARMCPRegInfo mte_reginfo[] = {
>      { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
>        .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
>        .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
> +    { .name = "IGVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "IGSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "IGDVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "IGDSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGDSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CIGSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CIGDSW", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGDVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGVAP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGDVAP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGVADP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CGDVADP", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CIGVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3,
> +      .type = ARM_CP_NOP, .access = PL1_W },
> +    { .name = "CIGDVAC", .state = ARM_CP_STATE_AA64,
> +      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
> +      .type = ARM_CP_NOP, .access = PL1_W },
>      REGINFO_SENTINEL
>  };

Some of these, but not all, are conditionally available at EL0,
which means that for those that are:
 * .access should be PL0_W
 * .accessfn should be aa64_cacheop_access() (which checks
    SCTLR_EL1.UCI)
 * they need to be in a reginfo that makes them available
   for the insns-and-regs-only flavour of MTE

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 15/22] target/arm: Clean address for DC ZVA
  2019-10-11 13:47 ` [PATCH v5 15/22] target/arm: Clean address for DC ZVA Richard Henderson
@ 2019-12-05 17:54   ` Peter Maydell
  2019-12-05 18:58   ` Peter Maydell
  1 sibling, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 17:54 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This data access was forgotten in the previous patch.

Do you mean "in the patch where we added support for cleaning
addresses of TBI information"? As written it sounds like you're
referring to the previous patch in this patchseries.

> Fixes: 3a471103ac1823ba
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 16/22] target/arm: Implement data cache set allocation tags
  2019-10-11 13:47 ` [PATCH v5 16/22] target/arm: Implement data cache set allocation tags Richard Henderson
@ 2019-12-05 18:17   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 18:17 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This is DC GVA and DC GZVA.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Use allocation_tag_mem + memset.
> v3: Require pre-cleaned addresses.
> ---

> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
> index f1315bae37..e8d8a6bedb 100644
> --- a/target/arm/mte_helper.c
> +++ b/target/arm/mte_helper.c
> @@ -510,3 +510,31 @@ void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
>          }
>      }
>  }
> +
> +void HELPER(dc_gva)(CPUARMState *env, uint64_t ptr)
> +{
> +    ARMCPU *cpu = env_archcpu(env);
> +    size_t blocklen = 4 << cpu->dcz_blocksize;
> +    int el;
> +    uint64_t sctlr;
> +    uint8_t *mem;
> +    int rtag;
> +
> +    ptr = QEMU_ALIGN_DOWN(ptr, blocklen);
> +
> +    /* Trap if accessing an invalid page.  */
> +    mem = allocation_tag_mem(env, ptr, true, GETPC());
> +
> +    /* No action if page does not support tags, or if access is disabled.  */
> +    el = arm_current_el(env);
> +    sctlr = arm_sctlr(env, el);
> +    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {
> +        return;
> +    }
> +
> +    rtag = allocation_tag_from_addr(ptr);
> +    rtag |= rtag << 4;
> +
> +    assert(QEMU_IS_ALIGNED(blocklen, 2 * TAG_GRANULE));

Could we assert this on CPU init rather than in this helper?
That way if anybody tries to create a CPU whose dcz_blocksize
doesn't work with the TAG_GRANULE they'll realize immediately
rather than only if they happen to run a guest workload that
use DC GVA or DC GZVA.

I also had to think a bit to work out which way round this
assert is checking: it's testing that the ZVA block length
(usually 64 bytes) is a multiple of (twice the TAG_GRANULE),
which is to say a multiple of 32. Given that the blocksize
is stored as a log2 value, this can only fail for blocksizes
16, 8, 4, 2 or 1, which are all fairly unlikely.

> +    memset(mem, rtag, blocklen / (2 * TAG_GRANULE));
> +}
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 49817b96ae..31260f97f9 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1769,6 +1769,15 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
>          tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
>          gen_helper_dc_zva(cpu_env, tcg_rt);
>          return;
> +    case ARM_CP_DC_GVA:
> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
> +        gen_helper_dc_gva(cpu_env, tcg_rt);
> +        return;
> +    case ARM_CP_DC_GZVA:
> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
> +        gen_helper_dc_zva(cpu_env, tcg_rt);
> +        gen_helper_dc_gva(cpu_env, tcg_rt);

I think this means that if there's a watchpoint set on the memory
partway through the block we're zeroing then we'll take the
watchpoint with some of the memory zeroed but without the
corresponding tags having been updated. But that's probably OK
(at any rate I wouldn't worry about it for now...)

> +        return;
>      default:
>          break;
>      }
> --

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 18/22] target/arm: Enable MTE
  2019-10-11 13:47 ` [PATCH v5 18/22] target/arm: Enable MTE Richard Henderson
@ 2019-12-05 18:23   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 18:23 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We now implement all of the components of MTE, without actually
> supporting any tagged memory.  All MTE instructions will work,
> trivially, so we can enable support.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu.c   | 10 ++++++++++
>  target/arm/cpu64.c |  1 +
>  2 files changed, 11 insertions(+)
>
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 2399c14471..12fffa3ee4 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -209,6 +209,16 @@ static void arm_cpu_reset(CPUState *s)
>           * make no difference to the user-level emulation.
>           */
>          env->cp15.tcr_el[1].raw_tcr = (3ULL << 37);
> +        /* Enable MTE allocation tags.  */
> +        env->cp15.hcr_el2 |= HCR_ATA;
> +        env->cp15.scr_el3 |= SCR_ATA;
> +        env->cp15.sctlr_el[1] |= SCTLR_ATA0;
> +        /* Enable synchronous tag check failures.  */
> +        env->cp15.sctlr_el[1] |= 1ull << 38;

Isn't this making assumptions about the Linux ABI for
memtag (ie that it actually will expose it to userspace
and that it will make tag check failures synchronous)?

> +#ifdef TARGET_AARCH64
> +        /* Set MTE seed to non-zero value, otherwise RandomTag fails.  */
> +        env->cp15.rgsr_el1 = 0x123400;
> +#endif

Does anything go wrong if we don't bother with the #ifdef?

>  #else
>          /* Reset into the highest available EL */
>          if (arm_feature(env, ARM_FEATURE_EL3)) {
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index d7f5bf610a..ac1e2dc2c4 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -350,6 +350,7 @@ static void aarch64_max_initfn(Object *obj)
>
>          t = cpu->isar.id_aa64pfr1;
>          t = FIELD_DP64(t, ID_AA64PFR1, BT, 1);
> +        t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
>          cpu->isar.id_aa64pfr1 = t;
>
>          t = cpu->isar.id_aa64mmfr1;
> --

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs
  2019-10-11 13:47 ` [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
@ 2019-12-05 18:32   ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 18:32 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This "bit" is a particular value of the page's MemAttr.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/helper.c | 25 +++++++++++++++----------
>  1 file changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index e988398fce..17981d7c48 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -9609,6 +9609,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
>      uint64_t descaddrmask;
>      bool aarch64 = arm_el_is_aa64(env, el);
>      bool guarded = false;
> +    uint8_t memattr;
>
>      /* TODO:
>       * This code does not handle the different format TCR for VTCR_EL2.
> @@ -9836,17 +9837,21 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
>          txattrs->target_tlb_bit0 = true;
>      }
>
> +    if (mmu_idx == ARMMMUIdx_S2NS) {
> +        memattr = convert_stage2_attrs(env, extract32(attrs, 0, 4));
> +    } else {
> +        /* Index into MAIR registers for cache attributes */
> +        uint64_t mair = env->cp15.mair_el[el];
> +        memattr = extract64(mair, extract32(attrs, 0, 3) * 8, 8);
> +    }
> +
> +    /* When in aarch64 mode, and MTE is enabled, remember Tagged in IOTLB.  */
> +    if (aarch64 && memattr == 0xf0 && cpu_isar_feature(aa64_mte, cpu)) {
> +        txattrs->target_tlb_bit1 = true;
> +    }

A comment somewhere saying that 0xf0 is the "Tagged Normal Memory"
attribute would probably be helpful.

> +
>      if (cacheattrs != NULL) {
> -        if (mmu_idx == ARMMMUIdx_S2NS) {
> -            cacheattrs->attrs = convert_stage2_attrs(env,
> -                                                     extract32(attrs, 0, 4));
> -        } else {
> -            /* Index into MAIR registers for cache attributes */
> -            uint8_t attrindx = extract32(attrs, 0, 3);
> -            uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
> -            assert(attrindx <= 7);
> -            cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
> -        }
> +        cacheattrs->attrs = memattr;
>          cacheattrs->shareability = extract32(attrs, 6, 2);
>      }

Don't we also need to care about the stage 2 attributes
somewhere ? If the combo of stage 1 + stage 2 doesn't
say it's Normal Inner&OuterShareable WB NT RA WA then
it shouldn't be Tagged even if the stage 1 attributes ask
for that.

There's also the special case for "stage 1 translation
disabled" -- depends on HCR_EL2.DC and HCR_EL2.DCT.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled
  2019-10-11 13:47 ` [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled Richard Henderson
@ 2019-12-05 18:40   ` Peter Maydell
  2019-12-05 19:24     ` Richard Henderson
  0 siblings, 1 reply; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 18:40 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v5: Assign cs->num_ases to the final value first.
>     Downgrade to ID_AA64PFR1.MTE=1 if tag memory is not available.
> v6: Add secure tag memory for EL3.
> ---
>  target/arm/cpu.h |  6 ++++++
>  hw/arm/virt.c    | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
>  target/arm/cpu.c | 53 ++++++++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 110 insertions(+), 3 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 93a362708b..faca43ea78 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -765,6 +765,10 @@ struct ARMCPU {
>      /* MemoryRegion to use for secure physical accesses */
>      MemoryRegion *secure_memory;
>
> +    /* MemoryRegion to use for allocation tag accesses */
> +    MemoryRegion *tag_memory;
> +    MemoryRegion *secure_tag_memory;
> +
>      /* For v8M, pointer to the IDAU interface provided by board/SoC */
>      Object *idau;
>
> @@ -2956,6 +2960,8 @@ int cpu_mmu_index(CPUARMState *env, bool ifetch);
>  typedef enum ARMASIdx {
>      ARMASIdx_NS = 0,
>      ARMASIdx_S = 1,
> +    ARMASIdx_TagNS = 2,
> +    ARMASIdx_TagS = 3,
>  } ARMASIdx;
>
>  /* Return the Exception Level targeted by debug exceptions. */
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index d74538b021..573988ba4d 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1330,6 +1330,18 @@ static void create_secure_ram(VirtMachineState *vms,
>      g_free(nodename);
>  }
>
> +static void create_tag_ram(MemoryRegion *tag_sysmem,
> +                           hwaddr base, hwaddr size,
> +                           const char *name)
> +{
> +    MemoryRegion *tagram = g_new(MemoryRegion, 1);
> +
> +    memory_region_init_ram(tagram, NULL, name, size / 32, &error_fatal);
> +    memory_region_add_subregion(tag_sysmem, base / 32, tagram);
> +
> +    /* ??? Do we really need an fdt entry, or is MemTag enabled sufficient.  */

What's this '???' asking about? I would be surprised if the
kernel expected to have any kind of FDT for tag RAM
(with the exception that an implementation that puts tags
in a special part of normal-ram will want that not
to be described in the fdt as ram usable by the kernel), but
we should ask the kernel folks.

> +}
> +
>  static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
>  {
>      const VirtMachineState *board = container_of(binfo, VirtMachineState,
> @@ -1485,6 +1497,8 @@ static void machvirt_init(MachineState *machine)
>      qemu_irq pic[NUM_IRQS];
>      MemoryRegion *sysmem = get_system_memory();
>      MemoryRegion *secure_sysmem = NULL;
> +    MemoryRegion *tag_sysmem = NULL;
> +    MemoryRegion *secure_tag_sysmem = NULL;
>      int n, virt_max_cpus;
>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>      bool firmware_loaded;
> @@ -1652,6 +1666,35 @@ static void machvirt_init(MachineState *machine)
>                                       "secure-memory", &error_abort);
>          }
>
> +        /*
> +         * The cpu adds the property iff MemTag is supported.

We've had confusion before from non-native-speakers and
non-maths-geeks about 'iff' in comments; better to expand to
'if and only if'.

> +         * If it is, we must allocate the ram to back that up.
> +         */
> +        if (object_property_find(cpuobj, "tag-memory", NULL)) {
> +            if (!tag_sysmem) {
> +                tag_sysmem = g_new(MemoryRegion, 1);
> +                memory_region_init(tag_sysmem, OBJECT(machine),
> +                                   "tag-memory", UINT64_MAX / 32);
> +
> +                if (vms->secure) {
> +                    secure_tag_sysmem = g_new(MemoryRegion, 1);
> +                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
> +                                       "secure-tag-memory", UINT64_MAX / 32);
> +
> +                    /* As with ram, secure-tag takes precedence over tag.  */
> +                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
> +                                                        tag_sysmem, -1);
> +                }
> +            }

Are there really separate S and NS tag RAMs?

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 15/22] target/arm: Clean address for DC ZVA
  2019-10-11 13:47 ` [PATCH v5 15/22] target/arm: Clean address for DC ZVA Richard Henderson
  2019-12-05 17:54   ` Peter Maydell
@ 2019-12-05 18:58   ` Peter Maydell
  1 sibling, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-05 18:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This data access was forgotten in the previous patch.
>
> Fixes: 3a471103ac1823ba
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate-a64.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 4e049bb4aa..49817b96ae 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1766,7 +1766,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
>          return;
>      case ARM_CP_DC_ZVA:
>          /* Writes clear the aligned block of memory which rt points into. */
> -        tcg_rt = cpu_reg(s, rt);
> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
>          gen_helper_dc_zva(cpu_env, tcg_rt);
>          return;

...doesn't this mean we don't do a tag check for DC ZVA?
Or is that handled in the helper ? (I guess it has to be,
the DC ZVA will span multiple tag granules).

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled
  2019-12-05 18:40   ` Peter Maydell
@ 2019-12-05 19:24     ` Richard Henderson
  2019-12-06  9:51       ` Peter Maydell
  0 siblings, 1 reply; 53+ messages in thread
From: Richard Henderson @ 2019-12-05 19:24 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/5/19 10:40 AM, Peter Maydell wrote:
>> +         * If it is, we must allocate the ram to back that up.
>> +         */
>> +        if (object_property_find(cpuobj, "tag-memory", NULL)) {
>> +            if (!tag_sysmem) {
>> +                tag_sysmem = g_new(MemoryRegion, 1);
>> +                memory_region_init(tag_sysmem, OBJECT(machine),
>> +                                   "tag-memory", UINT64_MAX / 32);
>> +
>> +                if (vms->secure) {
>> +                    secure_tag_sysmem = g_new(MemoryRegion, 1);
>> +                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
>> +                                       "secure-tag-memory", UINT64_MAX / 32);
>> +
>> +                    /* As with ram, secure-tag takes precedence over tag.  */
>> +                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
>> +                                                        tag_sysmem, -1);
>> +                }
>> +            }
> 
> Are there really separate S and NS tag RAMs?

Implementation defined, I believe.  As with everything about tag storage, it
would seem.  But since there are separate S and NS normal RAMS, I create
separate tag spaces to match.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled
  2019-12-05 19:24     ` Richard Henderson
@ 2019-12-06  9:51       ` Peter Maydell
  0 siblings, 0 replies; 53+ messages in thread
From: Peter Maydell @ 2019-12-06  9:51 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Thu, 5 Dec 2019 at 19:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 12/5/19 10:40 AM, Peter Maydell wrote:
> > Are there really separate S and NS tag RAMs?
>
> Implementation defined, I believe.  As with everything about tag storage, it
> would seem.  But since there are separate S and NS normal RAMS, I create
> separate tag spaces to match.

Yeah, it seems like the obvious design. I just couldn't find anything
in the spec that said it was possible. I'm probably missing the
relevant paragraph.

-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory
  2019-10-11 13:47 ` [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory Richard Henderson
@ 2019-12-06 11:46   ` Peter Maydell
  2019-12-06 14:03     ` Richard Henderson
  0 siblings, 1 reply; 53+ messages in thread
From: Peter Maydell @ 2019-12-06 11:46 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The process by which one goes from an address space plus physical
> address to a host pointer is complex.  It is easiest to reuse the
> mechanism already present within cputlb, and letting that cache
> the results.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu-param.h |  2 +-
>  target/arm/cpu.h       | 12 +++++++++---
>  target/arm/internals.h |  2 ++
>  target/arm/helper.c    | 25 +++++++++++++++++++++++--
>  4 files changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
> index 6e6948e960..18ac562346 100644
> --- a/target/arm/cpu-param.h
> +++ b/target/arm/cpu-param.h
> @@ -29,6 +29,6 @@
>  # define TARGET_PAGE_BITS_MIN  10
>  #endif
>
> -#define NB_MMU_MODES 8
> +#define NB_MMU_MODES 9
>
>  #endif
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index faca43ea78..c3609ef9d5 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -2854,8 +2854,8 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
>  #define ARM_MMU_IDX_M_NEGPRI 0x2
>  #define ARM_MMU_IDX_M_S 0x4
>
> -#define ARM_MMU_IDX_TYPE_MASK (~0x7)
> -#define ARM_MMU_IDX_COREIDX_MASK 0x7
> +#define ARM_MMU_IDX_TYPE_MASK (~0xf)
> +#define ARM_MMU_IDX_COREIDX_MASK 0xf
>
>  typedef enum ARMMMUIdx {
>      ARMMMUIdx_S12NSE0 = 0 | ARM_MMU_IDX_A,
> @@ -2865,6 +2865,9 @@ typedef enum ARMMMUIdx {
>      ARMMMUIdx_S1SE0 = 4 | ARM_MMU_IDX_A,
>      ARMMMUIdx_S1SE1 = 5 | ARM_MMU_IDX_A,
>      ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
> +    ARMMMUIdx_TagNS = 7 | ARM_MMU_IDX_A,
> +    ARMMMUIdx_TagS = 8 | ARM_MMU_IDX_A,
> +
>      ARMMMUIdx_MUser = 0 | ARM_MMU_IDX_M,
>      ARMMMUIdx_MPriv = 1 | ARM_MMU_IDX_M,
>      ARMMMUIdx_MUserNegPri = 2 | ARM_MMU_IDX_M,
> @@ -2891,6 +2894,8 @@ typedef enum ARMMMUIdxBit {
>      ARMMMUIdxBit_S1SE0 = 1 << 4,
>      ARMMMUIdxBit_S1SE1 = 1 << 5,
>      ARMMMUIdxBit_S2NS = 1 << 6,
> +    ARMMMUIdxBit_TagNS = 1 << 7,
> +    ARMMMUIdxBit_TagS = 1 << 8,
>      ARMMMUIdxBit_MUser = 1 << 0,
>      ARMMMUIdxBit_MPriv = 1 << 1,
>      ARMMMUIdxBit_MUserNegPri = 1 << 2,
> @@ -3254,7 +3259,8 @@ enum {
>  /* Return the address space index to use for a memory access */
>  static inline int arm_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs)
>  {
> -    return attrs.secure ? ARMASIdx_S : ARMASIdx_NS;
> +    return ((attrs.target_tlb_bit2 ? ARMASIdx_TagNS : ARMASIdx_NS)
> +            + attrs.secure);

If you want to do the "just add attrs.secure" can we have
a build-time assert that ARMASIdx_S is ARMASIdx_NS + 1, and
ditto for TagNS/TagS, please? It seems like the kind of thing
that will catch us out later on.

>  }

>      if (env->cp15.hcr_el2 & HCR_TGE) {
> @@ -10662,7 +10671,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
>                     target_ulong *page_size,
>                     ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
>  {
> -    if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
> +    switch (mmu_idx) {
> +    case ARMMMUIdx_S12NSE0:
> +    case ARMMMUIdx_S12NSE1:
>          /* Call ourselves recursively to do the stage 1 and then stage 2
>           * translations.
>           */
> @@ -10713,6 +10724,16 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
>               */
>              mmu_idx = stage_1_mmu_idx(mmu_idx);
>          }
> +        break;
> +
> +    case ARMMMUIdx_TagS:
> +    case ARMMMUIdx_TagNS:
> +        /* Indicate tag memory to arm_asidx_from_attrs.  */
> +        attrs->target_tlb_bit2 = true;
> +        break;

So here we fall through to the "handle a stage 1 lookup" code, which:
 * sets attrs->secure
 * sets attrs->user (always false, so we could have left it alone)
 * skips the FCSE handling (as we're v8)
 * skips the PMSA handling
 * hits the regime_translation_disabled() check, which fills in
   *phys_ptr, *prot and *page_size and returns

Maybe it would be clearer if this case here just did all of that:

    case ARMMMUIdx_TagS:
        attrs->secure = true;
        /* fall through */
    case ARMMMUIdx_TagNS:
        /* Indicate tag memory to arm_asidx_from_attrs.  */
        attrs->target_tlb_bit2 = true;
        *phys_ptr = address;
        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
        *page_size = TARGET_PAGE_SIZE;
        return 0;

Did I miss anything out?
Or are we going to want more things which are common between
the stage 1 codepath and the "just a tag ram access" case
in future?

thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode
  2019-10-11 13:47 ` [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode Richard Henderson
@ 2019-12-06 13:02   ` Peter Maydell
  2019-12-06 14:14     ` Richard Henderson
  0 siblings, 1 reply; 53+ messages in thread
From: Peter Maydell @ 2019-12-06 13:02 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-arm, QEMU Developers

On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/mte_helper.c | 61 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 61 insertions(+)
>
> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
> index e8d8a6bedb..657383ba0e 100644
> --- a/target/arm/mte_helper.c
> +++ b/target/arm/mte_helper.c
> @@ -28,8 +28,69 @@
>  static uint8_t *allocation_tag_mem(CPUARMState *env, uint64_t ptr,
>                                     bool write, uintptr_t ra)
>  {
> +#ifdef CONFIG_USER_ONLY
>      /* Tag storage not implemented.  */
>      return NULL;
> +#else
> +    CPUState *cs = env_cpu(env);
> +    uintptr_t index;
> +    int mmu_idx;
> +    CPUTLBEntry *entry;
> +    CPUIOTLBEntry *iotlbentry;
> +    MemoryRegionSection *section;
> +    hwaddr physaddr, tag_physaddr;
> +
> +    /*
> +     * Find the TLB entry for this access.
> +     * As a side effect, this also raises an exception for invalid access.
> +     *
> +     * TODO: Perhaps there should be a cputlb helper that returns a
> +     * matching tlb entry + iotlb entry.  That would also be able to
> +     * make use of the victim tlb cache, which is currently private.
> +     */
> +    mmu_idx = cpu_mmu_index(env, false);
> +    index = tlb_index(env, mmu_idx, ptr);
> +    entry = tlb_entry(env, mmu_idx, ptr);
> +    if (!tlb_hit(write ? tlb_addr_write(entry) : entry->addr_read, ptr)) {
> +        bool ok = arm_cpu_tlb_fill(cs, ptr, 16,
> +                                   write ? MMU_DATA_STORE : MMU_DATA_LOAD,
> +                                   mmu_idx, false, ra);
> +        assert(ok);
> +        index = tlb_index(env, mmu_idx, ptr);
> +        entry = tlb_entry(env, mmu_idx, ptr);
> +    }
> +
> +    /* If the virtual page MemAttr != Tagged, nothing to do.  */
> +    iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> +    if (!iotlbentry->attrs.target_tlb_bit1) {
> +        return NULL;
> +    }
> +
> +    /*
> +     * Find the physical address for the virtual access.
> +     *
> +     * TODO: It should be possible to have the tag mmu_idx map
> +     * from main memory ram_addr to tag memory host address.
> +     * that would allow this lookup step to be cached as well.
> +     */
> +    section = iotlb_to_section(cs, iotlbentry->addr, iotlbentry->attrs);
> +    physaddr = ((iotlbentry->addr & TARGET_PAGE_MASK) + ptr
> +                + section->offset_within_address_space
> +                - section->offset_within_region);

I'm surprised that going from vaddr to (physaddr, attrs) requires
this much effort, it seems like the kind of thing we would
already have a function to do.

> +
> +    /* Convert to the physical address in tag space.  */
> +    tag_physaddr = physaddr >> (LOG2_TAG_GRANULE + 1);
> +
> +    /* Choose the tlb index to use for the tag physical access.  */
> +    mmu_idx = iotlbentry->attrs.secure ? ARMMMUIdx_TagS : ARMMMUIdx_TagNS;
> +    mmu_idx = arm_to_core_mmu_idx(mmu_idx);
> +
> +    /*
> +     * FIXME: Get access length and type so that we can use
> +     * probe_access, so that pages are marked dirty for migration.
> +     */
> +    return tlb_vaddr_to_host(env, tag_physaddr, MMU_DATA_LOAD, mmu_idx);

Hmm, does that mean that a setup with MemTag is not migratable?
If so, we should at least install a migration-blocker for CPUs
in that configuration.

> +#endif
>  }
>
>  static int get_allocation_tag(CPUARMState *env, uint64_t ptr, uintptr_t ra)
> --
> 2.17.1
>


thanks
-- PMM


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory
  2019-12-06 11:46   ` Peter Maydell
@ 2019-12-06 14:03     ` Richard Henderson
  0 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-12-06 14:03 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/6/19 3:46 AM, Peter Maydell wrote:
>> +    case ARMMMUIdx_TagS:
>> +    case ARMMMUIdx_TagNS:
>> +        /* Indicate tag memory to arm_asidx_from_attrs.  */
>> +        attrs->target_tlb_bit2 = true;
>> +        break;
> 
> So here we fall through to the "handle a stage 1 lookup" code, which:
>  * sets attrs->secure
>  * sets attrs->user (always false, so we could have left it alone)
>  * skips the FCSE handling (as we're v8)
>  * skips the PMSA handling
>  * hits the regime_translation_disabled() check, which fills in
>    *phys_ptr, *prot and *page_size and returns

Exactly.

> Maybe it would be clearer if this case here just did all of that:
> 
>     case ARMMMUIdx_TagS:
>         attrs->secure = true;
>         /* fall through */
>     case ARMMMUIdx_TagNS:
>         /* Indicate tag memory to arm_asidx_from_attrs.  */
>         attrs->target_tlb_bit2 = true;
>         *phys_ptr = address;
>         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
>         *page_size = TARGET_PAGE_SIZE;
>         return 0;
> 
> Did I miss anything out?

I think that's about it.  I thought about doing exactly this.

Also, this is a better location if I ever do something about the TODO in tne
next patch, wherein I talk about mapping not from physical address but from the
normal ram's ramaddr_t, so as to
cache that mapping step as well.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode
  2019-12-06 13:02   ` Peter Maydell
@ 2019-12-06 14:14     ` Richard Henderson
  0 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-12-06 14:14 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/6/19 5:02 AM, Peter Maydell wrote:
>> +    /*
>> +     * Find the physical address for the virtual access.
>> +     *
>> +     * TODO: It should be possible to have the tag mmu_idx map
>> +     * from main memory ram_addr to tag memory host address.
>> +     * that would allow this lookup step to be cached as well.
>> +     */
>> +    section = iotlb_to_section(cs, iotlbentry->addr, iotlbentry->attrs);
>> +    physaddr = ((iotlbentry->addr & TARGET_PAGE_MASK) + ptr
>> +                + section->offset_within_address_space
>> +                - section->offset_within_region);
> 
> I'm surprised that going from vaddr to (physaddr, attrs) requires
> this much effort, it seems like the kind of thing we would
> already have a function to do.

There are very few places that need to talk about the actual physical address.
 Mostly because that doesn't mean much within qemu -- physical address within
which address space?  Usually we want the ramaddr_t (which is a sort of
combination of pa + as), or the host address, or the device the exists at the
pa + as.

>> +    /*
>> +     * FIXME: Get access length and type so that we can use
>> +     * probe_access, so that pages are marked dirty for migration.
>> +     */
>> +    return tlb_vaddr_to_host(env, tag_physaddr, MMU_DATA_LOAD, mmu_idx);
> 
> Hmm, does that mean that a setup with MemTag is not migratable?
> If so, we should at least install a migration-blocker for CPUs
> in that configuration.

It probably does as written.  I intend to fix this properly before final.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [PATCH v5 03/22] target/arm: Add MTE system registers
  2019-12-03 11:48   ` Peter Maydell
@ 2019-12-06 14:47     ` Richard Henderson
  0 siblings, 0 replies; 53+ messages in thread
From: Richard Henderson @ 2019-12-06 14:47 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-arm, QEMU Developers

On 12/3/19 3:48 AM, Peter Maydell wrote:
>> +    { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
>> +      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
>> +      .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
> 
> This should trap if HCR_EL2.TID5 is 1 (since we're adding
> support for the TID* ID reg trap bits now).

Done.

> So, aa64_mte_insn_reg here is checking for ID_AA64PFR1_EL1 != 0
> ("instructions accessible at EL0 are implemented")
> and aa64_mte is checking for >= 2 ("full implementation").
> I think a couple of brief comments would clarify:

Done.

> (The other way to arrange this would be to have the 'real'
> TCO regdef in mte_reginfo, and separately have "reginfo
> if we only have the dummy visible-from-EL0-parts-only
> which defines a constant 0 TCO" (and also make the MSR_i
> code implement a RAZ/WI for this case, for consistency).

Done.  I agree this is a better way to treat the EL0-only case...

> An implementation that allows the guest to toggle the PSTATE.TCO
> bit to no visible effect is architecturally valid, though.

... because this could turn out to be slightly confusing.


r~


^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, back to index

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-11 13:47 [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
2019-10-11 13:47 ` [PATCH v5 01/22] target/arm: Add MTE_ACTIVE to tb_flags Richard Henderson
2019-10-11 13:47 ` [PATCH v5 02/22] target/arm: Add regime_has_2_ranges Richard Henderson
2019-12-03 11:01   ` Peter Maydell
2019-12-03 15:09     ` Richard Henderson
2019-10-11 13:47 ` [PATCH v5 03/22] target/arm: Add MTE system registers Richard Henderson
2019-12-03 11:48   ` Peter Maydell
2019-12-06 14:47     ` Richard Henderson
2019-10-11 13:47 ` [PATCH v5 04/22] target/arm: Add helper_mte_check{1,2,3} Richard Henderson
2019-12-03 13:42   ` Peter Maydell
2019-12-03 16:06     ` Richard Henderson
2019-12-03 16:26       ` Peter Maydell
2019-12-03 16:14     ` Richard Henderson
2019-10-11 13:47 ` [PATCH v5 05/22] target/arm: Suppress tag check for sp+offset Richard Henderson
2019-12-03 14:07   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 06/22] target/arm: Implement the IRG instruction Richard Henderson
2019-12-03 14:26   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 07/22] target/arm: Implement ADDG, SUBG instructions Richard Henderson
2019-10-11 13:47 ` [PATCH v5 08/22] target/arm: Implement the GMI instruction Richard Henderson
2019-10-11 13:47 ` [PATCH v5 09/22] target/arm: Implement the SUBP instruction Richard Henderson
2019-10-11 13:47 ` [PATCH v5 10/22] target/arm: Define arm_cpu_do_unaligned_access for CONFIG_USER_ONLY Richard Henderson
2019-12-05 16:12   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 11/22] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
2019-12-05 17:07   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 12/22] target/arm: Implement the STGP instruction Richard Henderson
2019-12-05 17:15   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 13/22] target/arm: Implement the LDGM and STGM instructions Richard Henderson
2019-12-05 17:42   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 14/22] target/arm: Implement the access tag cache flushes Richard Henderson
2019-12-05 17:49   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 15/22] target/arm: Clean address for DC ZVA Richard Henderson
2019-12-05 17:54   ` Peter Maydell
2019-12-05 18:58   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 16/22] target/arm: Implement data cache set allocation tags Richard Henderson
2019-12-05 18:17   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 17/22] target/arm: Set PSTATE.TCO on exception entry Richard Henderson
2019-10-11 13:47 ` [PATCH v5 18/22] target/arm: Enable MTE Richard Henderson
2019-12-05 18:23   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 19/22] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
2019-12-05 18:32   ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 20/22] target/arm: Create tagged ram when MTE is enabled Richard Henderson
2019-12-05 18:40   ` Peter Maydell
2019-12-05 19:24     ` Richard Henderson
2019-12-06  9:51       ` Peter Maydell
2019-10-11 13:47 ` [PATCH v5 21/22] target/arm: Add mmu indexes for tag memory Richard Henderson
2019-12-06 11:46   ` Peter Maydell
2019-12-06 14:03     ` Richard Henderson
2019-10-11 13:47 ` [PATCH v5 22/22] target/arm: Add allocation tag storage for system mode Richard Henderson
2019-12-06 13:02   ` Peter Maydell
2019-12-06 14:14     ` Richard Henderson
2019-10-11 19:32 ` [PATCH v5 00/22] target/arm: Implement ARMv8.5-MemTag, " no-reply
2019-10-15 20:39 ` Evgenii Stepanov
2019-10-15 22:04   ` Richard Henderson

QEMU-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/qemu-devel/0 qemu-devel/git/0.git
	git clone --mirror https://lore.kernel.org/qemu-devel/1 qemu-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 qemu-devel qemu-devel/ https://lore.kernel.org/qemu-devel \
		qemu-devel@nongnu.org
	public-inbox-index qemu-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.nongnu.qemu-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git