All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] RISC-V: Add support for Ztso
  2023-11-13  9:56 [RFC PATCH 0/2] RISC-V: Add TSO extensions (Ztso/Ssdtso) Christoph Muellner
@ 2023-11-13  9:56 ` Christoph Muellner
  2023-11-13  9:56 ` [RFC PATCH 2/2] RISC-V: Add support for Ssdtso Christoph Muellner
  1 sibling, 0 replies; 10+ messages in thread
From: Palmer Dabbelt @ 2022-09-17  7:26 UTC (permalink / raw)
  To: qemu-devel, Richard Henderson; +Cc: Palmer Dabbelt

The Ztso extension was recently frozen, this adds it as a CPU property
and adds various fences throughout the port in order to allow TSO
targets to function on weaker hosts.  We need no fences for AMOs as
they're already SC, the placess we need barriers are described.

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
Like the v1 this has been pretty minimally tested, but I figured I'd
just send it.  This is what I would have done the first time had I not
read that comment near TCG_GUEST_DEFAULT_MO, I think trying to describe
that when I ran into Richard at the Cauldron was probably more confusing
than just digging up the code and sending it along.
---
 target/riscv/cpu.c                      |  3 +++
 target/riscv/cpu.h                      |  1 +
 target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
 target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
 target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
 target/riscv/translate.c                |  3 +++
 6 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ac6f82ebd0..d66169efa5 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -919,6 +919,8 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
     DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
 
+    DEFINE_PROP_BOOL("ztso", RISCVCPU, cfg.ext_ztso, false),
+
     /* Vendor-specific custom extensions */
     DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
 
@@ -1094,6 +1096,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
         ISA_EDATA_ENTRY(zksed, ext_zksed),
         ISA_EDATA_ENTRY(zksh, ext_zksh),
         ISA_EDATA_ENTRY(zkt, ext_zkt),
+        ISA_EDATA_ENTRY(ztso, ext_ztso),
         ISA_EDATA_ENTRY(zve32f, ext_zve32f),
         ISA_EDATA_ENTRY(zve64f, ext_zve64f),
         ISA_EDATA_ENTRY(zhinx, ext_zhinx),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 5c7acc055a..c64fd4e258 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -433,6 +433,7 @@ struct RISCVCPUConfig {
     bool ext_zve32f;
     bool ext_zve64f;
     bool ext_zmmul;
+    bool ext_ztso;
     bool rvv_ta_all_1s;
 
     uint32_t mvendorid;
diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
index 45db82c9be..9066e1bde3 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -26,7 +26,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
     }
     tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
-    if (a->aq) {
+    /*
+     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
+     * AMOs.  Instead treat them like loads.
+     */
+    if (a->aq || ctx->ztso) {
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
 
@@ -61,9 +65,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
     gen_set_label(l1);
     /*
      * Address comparison failure.  However, we still need to
-     * provide the memory barrier implied by AQ/RL.
+     * provide the memory barrier implied by AQ/RL/TSO.
      */
-    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
+    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
+    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
     gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
 
     gen_set_label(l2);
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index ca8e3d1ea1..9bef42a3e5 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -261,11 +261,19 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
 
 static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
 {
+    bool out;
+
     if (get_xl(ctx) == MXL_RV128) {
-        return gen_load_i128(ctx, a, memop);
+        out = gen_load_i128(ctx, a, memop);
     } else {
-        return gen_load_tl(ctx, a, memop);
+        out = gen_load_tl(ctx, a, memop);
+    }
+
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
+
+    return out;
 }
 
 static bool trans_lb(DisasContext *ctx, arg_lb *a)
@@ -322,6 +330,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
     TCGv addr = get_address(ctx, a->rs1, a->imm);
     TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
 
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
     return true;
 }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 6c091824b6..1994b38035 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -671,8 +671,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
+    /*
+     * According to the specification
+     *
+     *   Additionally, if the Ztso extension is implemented, then vector memory
+     *   instructions in the V extension and Zve family of extensions follow
+     *   RVTSO at the instruction level.  The Ztso extension does not
+     *   strengthen the ordering of intra-instruction element accesses.
+     *
+     * as a result neither ordered nor unordered accesses from the V
+     * instructions need ordering within the loop but we do still need barriers
+     * around the loop.
+     */
+    if (is_store && s->ztso) {
+      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     fn(dest, mask, base, cpu_env, desc);
 
+    if (!is_store && s->ztso) {
+      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+    }
+
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
 
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 63b04e8a94..c7c574b09f 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -109,6 +109,8 @@ typedef struct DisasContext {
     /* PointerMasking extension */
     bool pm_mask_enabled;
     bool pm_base_enabled;
+    /* Ztso */
+    bool ztso;
     /* TCG of the current insn_start */
     TCGOp *insn_start;
 } DisasContext;
@@ -1109,6 +1111,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     memset(ctx->ftemp, 0, sizeof(ctx->ftemp));
     ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
     ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
+    ctx->ztso = cpu->cfg.ext_ztso;
     ctx->zero = tcg_constant_tl(0);
 }
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] RISC-V: Add support for Ztso
  2023-11-13  9:56 ` Christoph Muellner
  (?)
@ 2022-09-23  4:35 ` Alistair Francis
  2022-09-23 10:49   ` Palmer Dabbelt
  -1 siblings, 1 reply; 10+ messages in thread
From: Alistair Francis @ 2022-09-23  4:35 UTC (permalink / raw)
  To: Palmer Dabbelt; +Cc: qemu-devel@nongnu.org Developers, Richard Henderson

On Sat, Sep 17, 2022 at 6:12 PM Palmer Dabbelt <palmer@rivosinc.com> wrote:
>
> The Ztso extension was recently frozen, this adds it as a CPU property
> and adds various fences throughout the port in order to allow TSO
> targets to function on weaker hosts.  We need no fences for AMOs as
> they're already SC, the placess we need barriers are described.
>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> ---
> Like the v1 this has been pretty minimally tested, but I figured I'd
> just send it.  This is what I would have done the first time had I not
> read that comment near TCG_GUEST_DEFAULT_MO, I think trying to describe
> that when I ran into Richard at the Cauldron was probably more confusing
> than just digging up the code and sending it along.

At a quick glance this looks reasonable. Did I miss anything or is
this ready to go?

Alistair

> ---
>  target/riscv/cpu.c                      |  3 +++
>  target/riscv/cpu.h                      |  1 +
>  target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
>  target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
>  target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
>  target/riscv/translate.c                |  3 +++
>  6 files changed, 49 insertions(+), 5 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index ac6f82ebd0..d66169efa5 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -919,6 +919,8 @@ static Property riscv_cpu_extensions[] = {
>      DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
>      DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
>
> +    DEFINE_PROP_BOOL("ztso", RISCVCPU, cfg.ext_ztso, false),
> +
>      /* Vendor-specific custom extensions */
>      DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
>
> @@ -1094,6 +1096,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
>          ISA_EDATA_ENTRY(zksed, ext_zksed),
>          ISA_EDATA_ENTRY(zksh, ext_zksh),
>          ISA_EDATA_ENTRY(zkt, ext_zkt),
> +        ISA_EDATA_ENTRY(ztso, ext_ztso),
>          ISA_EDATA_ENTRY(zve32f, ext_zve32f),
>          ISA_EDATA_ENTRY(zve64f, ext_zve64f),
>          ISA_EDATA_ENTRY(zhinx, ext_zhinx),
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 5c7acc055a..c64fd4e258 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -433,6 +433,7 @@ struct RISCVCPUConfig {
>      bool ext_zve32f;
>      bool ext_zve64f;
>      bool ext_zmmul;
> +    bool ext_ztso;
>      bool rvv_ta_all_1s;
>
>      uint32_t mvendorid;
> diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
> index 45db82c9be..9066e1bde3 100644
> --- a/target/riscv/insn_trans/trans_rva.c.inc
> +++ b/target/riscv/insn_trans/trans_rva.c.inc
> @@ -26,7 +26,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
>          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>      }
>      tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
> -    if (a->aq) {
> +    /*
> +     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
> +     * AMOs.  Instead treat them like loads.
> +     */
> +    if (a->aq || ctx->ztso) {
>          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>      }
>
> @@ -61,9 +65,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
>      gen_set_label(l1);
>      /*
>       * Address comparison failure.  However, we still need to
> -     * provide the memory barrier implied by AQ/RL.
> +     * provide the memory barrier implied by AQ/RL/TSO.
>       */
> -    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
> +    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
> +    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
>      gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
>
>      gen_set_label(l2);
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
> index ca8e3d1ea1..9bef42a3e5 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -261,11 +261,19 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
>
>  static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
>  {
> +    bool out;
> +
>      if (get_xl(ctx) == MXL_RV128) {
> -        return gen_load_i128(ctx, a, memop);
> +        out = gen_load_i128(ctx, a, memop);
>      } else {
> -        return gen_load_tl(ctx, a, memop);
> +        out = gen_load_tl(ctx, a, memop);
> +    }
> +
> +    if (ctx->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>      }
> +
> +    return out;
>  }
>
>  static bool trans_lb(DisasContext *ctx, arg_lb *a)
> @@ -322,6 +330,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
>      TCGv addr = get_address(ctx, a->rs1, a->imm);
>      TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
>
> +    if (ctx->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    }
> +
>      tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
>      return true;
>  }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 6c091824b6..1994b38035 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -671,8 +671,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
>      tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
>      tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
>
> +    /*
> +     * According to the specification
> +     *
> +     *   Additionally, if the Ztso extension is implemented, then vector memory
> +     *   instructions in the V extension and Zve family of extensions follow
> +     *   RVTSO at the instruction level.  The Ztso extension does not
> +     *   strengthen the ordering of intra-instruction element accesses.
> +     *
> +     * as a result neither ordered nor unordered accesses from the V
> +     * instructions need ordering within the loop but we do still need barriers
> +     * around the loop.
> +     */
> +    if (is_store && s->ztso) {
> +      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    }
> +
>      fn(dest, mask, base, cpu_env, desc);
>
> +    if (!is_store && s->ztso) {
> +      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
> +    }
> +
>      tcg_temp_free_ptr(dest);
>      tcg_temp_free_ptr(mask);
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 63b04e8a94..c7c574b09f 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -109,6 +109,8 @@ typedef struct DisasContext {
>      /* PointerMasking extension */
>      bool pm_mask_enabled;
>      bool pm_base_enabled;
> +    /* Ztso */
> +    bool ztso;
>      /* TCG of the current insn_start */
>      TCGOp *insn_start;
>  } DisasContext;
> @@ -1109,6 +1111,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>      memset(ctx->ftemp, 0, sizeof(ctx->ftemp));
>      ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
>      ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
> +    ctx->ztso = cpu->cfg.ext_ztso;
>      ctx->zero = tcg_constant_tl(0);
>  }
>
> --
> 2.34.1
>
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] RISC-V: Add support for Ztso
  2022-09-23  4:35 ` Alistair Francis
@ 2022-09-23 10:49   ` Palmer Dabbelt
  2022-09-25 10:28     ` Richard Henderson
  0 siblings, 1 reply; 10+ messages in thread
From: Palmer Dabbelt @ 2022-09-23 10:49 UTC (permalink / raw)
  To: alistair23; +Cc: qemu-devel, Richard Henderson

On Thu, 22 Sep 2022 21:35:26 PDT (-0700), alistair23@gmail.com wrote:
> On Sat, Sep 17, 2022 at 6:12 PM Palmer Dabbelt <palmer@rivosinc.com> wrote:
>>
>> The Ztso extension was recently frozen, this adds it as a CPU property
>> and adds various fences throughout the port in order to allow TSO
>> targets to function on weaker hosts.  We need no fences for AMOs as
>> they're already SC, the placess we need barriers are described.
>>
>> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
>> ---
>> Like the v1 this has been pretty minimally tested, but I figured I'd
>> just send it.  This is what I would have done the first time had I not
>> read that comment near TCG_GUEST_DEFAULT_MO, I think trying to describe
>> that when I ran into Richard at the Cauldron was probably more confusing
>> than just digging up the code and sending it along.
>
> At a quick glance this looks reasonable. Did I miss anything or is
> this ready to go?

Richard and I ended up chatting about it at the Cauldron: the original 
plan was to have TCG insert these fences, but that would require a lot 
of plumbing for the RISC-V use case where we can have heterogenous harts 
(and likely soon dynamic harts, IIRC there's some HW that already 
supports that).

Maybe Richard can chime in, but I think the rough plan was to go with 
this and just ammend the commit message explaining why it's different 
than the x86-on-arm64 support that's planned.  I think

    The Ztso extension was recently frozen, this adds it as a CPU property
    and adds various fences throughout the port in order to allow TSO
    targets to function on weaker hosts.  We need no fences for AMOs as
    they're already SC, the placess we need barriers are described.  
    These fences are placed in the RISC-V backend rather than TCG as is 
    planned for x86-on-arm64 because RISC-V allows heterogenous (and 
    likely soon dynamic) hart memory models.

should do it.

> Alistair
>
>> ---
>>  target/riscv/cpu.c                      |  3 +++
>>  target/riscv/cpu.h                      |  1 +
>>  target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
>>  target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
>>  target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
>>  target/riscv/translate.c                |  3 +++
>>  6 files changed, 49 insertions(+), 5 deletions(-)
>>
>> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
>> index ac6f82ebd0..d66169efa5 100644
>> --- a/target/riscv/cpu.c
>> +++ b/target/riscv/cpu.c
>> @@ -919,6 +919,8 @@ static Property riscv_cpu_extensions[] = {
>>      DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
>>      DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
>>
>> +    DEFINE_PROP_BOOL("ztso", RISCVCPU, cfg.ext_ztso, false),
>> +
>>      /* Vendor-specific custom extensions */
>>      DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
>>
>> @@ -1094,6 +1096,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
>>          ISA_EDATA_ENTRY(zksed, ext_zksed),
>>          ISA_EDATA_ENTRY(zksh, ext_zksh),
>>          ISA_EDATA_ENTRY(zkt, ext_zkt),
>> +        ISA_EDATA_ENTRY(ztso, ext_ztso),
>>          ISA_EDATA_ENTRY(zve32f, ext_zve32f),
>>          ISA_EDATA_ENTRY(zve64f, ext_zve64f),
>>          ISA_EDATA_ENTRY(zhinx, ext_zhinx),
>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>> index 5c7acc055a..c64fd4e258 100644
>> --- a/target/riscv/cpu.h
>> +++ b/target/riscv/cpu.h
>> @@ -433,6 +433,7 @@ struct RISCVCPUConfig {
>>      bool ext_zve32f;
>>      bool ext_zve64f;
>>      bool ext_zmmul;
>> +    bool ext_ztso;
>>      bool rvv_ta_all_1s;
>>
>>      uint32_t mvendorid;
>> diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
>> index 45db82c9be..9066e1bde3 100644
>> --- a/target/riscv/insn_trans/trans_rva.c.inc
>> +++ b/target/riscv/insn_trans/trans_rva.c.inc
>> @@ -26,7 +26,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
>>          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>>      }
>>      tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
>> -    if (a->aq) {
>> +    /*
>> +     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
>> +     * AMOs.  Instead treat them like loads.
>> +     */
>> +    if (a->aq || ctx->ztso) {
>>          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>>      }
>>
>> @@ -61,9 +65,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
>>      gen_set_label(l1);
>>      /*
>>       * Address comparison failure.  However, we still need to
>> -     * provide the memory barrier implied by AQ/RL.
>> +     * provide the memory barrier implied by AQ/RL/TSO.
>>       */
>> -    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
>> +    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
>> +    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
>>      gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
>>
>>      gen_set_label(l2);
>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
>> index ca8e3d1ea1..9bef42a3e5 100644
>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>> @@ -261,11 +261,19 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
>>
>>  static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
>>  {
>> +    bool out;
>> +
>>      if (get_xl(ctx) == MXL_RV128) {
>> -        return gen_load_i128(ctx, a, memop);
>> +        out = gen_load_i128(ctx, a, memop);
>>      } else {
>> -        return gen_load_tl(ctx, a, memop);
>> +        out = gen_load_tl(ctx, a, memop);
>> +    }
>> +
>> +    if (ctx->ztso) {
>> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>>      }
>> +
>> +    return out;
>>  }
>>
>>  static bool trans_lb(DisasContext *ctx, arg_lb *a)
>> @@ -322,6 +330,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
>>      TCGv addr = get_address(ctx, a->rs1, a->imm);
>>      TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
>>
>> +    if (ctx->ztso) {
>> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>> +    }
>> +
>>      tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
>>      return true;
>>  }
>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
>> index 6c091824b6..1994b38035 100644
>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>> @@ -671,8 +671,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
>>      tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
>>      tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
>>
>> +    /*
>> +     * According to the specification
>> +     *
>> +     *   Additionally, if the Ztso extension is implemented, then vector memory
>> +     *   instructions in the V extension and Zve family of extensions follow
>> +     *   RVTSO at the instruction level.  The Ztso extension does not
>> +     *   strengthen the ordering of intra-instruction element accesses.
>> +     *
>> +     * as a result neither ordered nor unordered accesses from the V
>> +     * instructions need ordering within the loop but we do still need barriers
>> +     * around the loop.
>> +     */
>> +    if (is_store && s->ztso) {
>> +      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>> +    }
>> +
>>      fn(dest, mask, base, cpu_env, desc);
>>
>> +    if (!is_store && s->ztso) {
>> +      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>> +    }
>> +
>>      tcg_temp_free_ptr(dest);
>>      tcg_temp_free_ptr(mask);
>>
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index 63b04e8a94..c7c574b09f 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -109,6 +109,8 @@ typedef struct DisasContext {
>>      /* PointerMasking extension */
>>      bool pm_mask_enabled;
>>      bool pm_base_enabled;
>> +    /* Ztso */
>> +    bool ztso;
>>      /* TCG of the current insn_start */
>>      TCGOp *insn_start;
>>  } DisasContext;
>> @@ -1109,6 +1111,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>>      memset(ctx->ftemp, 0, sizeof(ctx->ftemp));
>>      ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
>>      ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
>> +    ctx->ztso = cpu->cfg.ext_ztso;
>>      ctx->zero = tcg_constant_tl(0);
>>  }
>>
>> --
>> 2.34.1
>>
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] RISC-V: Add support for Ztso
  2022-09-23 10:49   ` Palmer Dabbelt
@ 2022-09-25 10:28     ` Richard Henderson
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2022-09-25 10:28 UTC (permalink / raw)
  To: Palmer Dabbelt, alistair23; +Cc: qemu-devel

On 9/23/22 10:49, Palmer Dabbelt wrote:
>     The Ztso extension was recently frozen, this adds it as a CPU property
>     and adds various fences throughout the port in order to allow TSO
>     targets to function on weaker hosts.  We need no fences for AMOs as
>     they're already SC, the placess we need barriers are described.    These fences are 
> placed in the RISC-V backend rather than TCG as is    planned for x86-on-arm64 because 
> RISC-V allows heterogenous (and    likely soon dynamic) hart memory models.

Heterogenous shouldn't have been a problem (no more than Arm a-profile co-existing with 
m-profile), but dynamic would have been difficult to do generically for sure.

Otherwise this description addition looks good.


r~


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 0/2] RISC-V: Add TSO extensions (Ztso/Ssdtso)
@ 2023-11-13  9:56 Christoph Muellner
  2023-11-13  9:56 ` [RFC PATCH 1/2] RISC-V: Add support for Ztso Christoph Muellner
  2023-11-13  9:56 ` [RFC PATCH 2/2] RISC-V: Add support for Ssdtso Christoph Muellner
  0 siblings, 2 replies; 10+ messages in thread
From: Christoph Muellner @ 2023-11-13  9:56 UTC (permalink / raw)
  To: qemu-riscv, qemu-devel, Alistair Francis, Bin Meng,
	Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Christoph Müllner

From: Christoph Müllner <christoph.muellner@vrull.eu>

This series picks up an earlier v2 Ztso patch from Palmer
and adds a second to support Ssdtso.

Palmer's v2 Ztso patch can be found here:
  https://patchwork.kernel.org/project/qemu-devel/patch/20220917072635.11616-1-palmer@rivosinc.com/
This patch did not apply cleanly but the necessary changes were trivial.
There was a request to extend the commit message, which is part of the
posted patch of this series.  As this patch was reviewed a year ago,
I believe it could be merged.

The second patch adds support for dynamic TSO following the second
draft of the Ssdtso extension, which was published recently:
  https://lists.riscv.org/g/tech-arch-review/message/183
Note, that the Ssdtso specification is in development state
(i.e., not frozen or even ratified) which is also the reason
why I marked the series as RFC.

Relevant in this context might be also, that Richard's patch to improve
TCG's memory barrier selection depending on host and guest memory ordering
landed in June:
  https://lore.kernel.org/all/a313b36b-dcc1-f812-ccbd-afed1cbd523b@linaro.org/T/

Christoph Müllner (1):
  RISC-V: Add support for Ssdtso

Palmer Dabbelt (1):
  RISC-V: Add support for Ztso

 target/riscv/cpu.c                      |  4 ++++
 target/riscv/cpu_bits.h                 |  3 +++
 target/riscv/cpu_cfg.h                  |  2 ++
 target/riscv/csr.c                      |  9 ++++++---
 target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
 target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
 target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
 target/riscv/translate.c                |  3 +++
 8 files changed, 60 insertions(+), 8 deletions(-)

-- 
2.41.0



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 1/2] RISC-V: Add support for Ztso
  2023-11-13  9:56 [RFC PATCH 0/2] RISC-V: Add TSO extensions (Ztso/Ssdtso) Christoph Muellner
@ 2023-11-13  9:56 ` Christoph Muellner
  2023-11-14 13:22   ` Daniel Henrique Barboza
  2023-11-13  9:56 ` [RFC PATCH 2/2] RISC-V: Add support for Ssdtso Christoph Muellner
  1 sibling, 1 reply; 10+ messages in thread
From: Christoph Muellner @ 2023-11-13  9:56 UTC (permalink / raw)
  To: qemu-riscv, qemu-devel, Alistair Francis, Bin Meng,
	Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Palmer Dabbelt, Christoph Müllner, Weiwei Li,
	Daniel Henrique Barboza, Liu Zhiwei

From: Palmer Dabbelt <palmer@rivosinc.com>

The Ztso extension is already ratified, this adds it as a CPU property
and adds various fences throughout the port in order to allow TSO
targets to function on weaker hosts.  We need no fences for AMOs as
they're already SC, the placess we need barriers are described.
These fences are placed in the RISC-V backend rather than TCG as is
planned for x86-on-arm64 because RISC-V allows heterogenous (and
likely soon dynamic) hart memory models.

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 target/riscv/cpu.c                      |  2 ++
 target/riscv/cpu_cfg.h                  |  1 +
 target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
 target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
 target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
 target/riscv/translate.c                |  3 +++
 6 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 83c7c0cf07..b446e553b1 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -118,6 +118,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
     ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
     ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
+    ISA_EXT_DATA_ENTRY(ztso, PRIV_VERSION_1_12_0, ext_ztso),
     ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
     ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
     ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
@@ -1336,6 +1337,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("zksed", ext_zksed, false),
     MULTI_EXT_CFG_BOOL("zksh", ext_zksh, false),
     MULTI_EXT_CFG_BOOL("zkt", ext_zkt, false),
+    MULTI_EXT_CFG_BOOL("ztso", ext_ztso, false),
 
     MULTI_EXT_CFG_BOOL("zdinx", ext_zdinx, false),
     MULTI_EXT_CFG_BOOL("zfinx", ext_zfinx, false),
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index f4605fb190..a0f951d9c1 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -70,6 +70,7 @@ struct RISCVCPUConfig {
     bool ext_zihintntl;
     bool ext_zihintpause;
     bool ext_zihpm;
+    bool ext_ztso;
     bool ext_smstateen;
     bool ext_sstc;
     bool ext_svadu;
diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
index 5f194a447b..85c7e31f2a 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -28,7 +28,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
     }
     tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
-    if (a->aq) {
+    /*
+     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
+     * AMOs.  Instead treat them like loads.
+     */
+    if (a->aq || ctx->ztso) {
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
 
@@ -64,9 +68,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
     gen_set_label(l1);
     /*
      * Address comparison failure.  However, we still need to
-     * provide the memory barrier implied by AQ/RL.
+     * provide the memory barrier implied by AQ/RL/TSO.
      */
-    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
+    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
+    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
     gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
 
     gen_set_label(l2);
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index faf6d65064..ad40d3e87f 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -266,12 +266,20 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
 
 static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
 {
+    bool out;
+
     decode_save_opc(ctx);
     if (get_xl(ctx) == MXL_RV128) {
-        return gen_load_i128(ctx, a, memop);
+        out = gen_load_i128(ctx, a, memop);
     } else {
-        return gen_load_tl(ctx, a, memop);
+        out = gen_load_tl(ctx, a, memop);
+    }
+
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
+
+    return out;
 }
 
 static bool trans_lb(DisasContext *ctx, arg_lb *a)
@@ -328,6 +336,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
     TCGv addr = get_address(ctx, a->rs1, a->imm);
     TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
 
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
     return true;
 }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 78bd363310..76e63fcbca 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -636,8 +636,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
     tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0));
 
+    /*
+     * According to the specification
+     *
+     *   Additionally, if the Ztso extension is implemented, then vector memory
+     *   instructions in the V extension and Zve family of extensions follow
+     *   RVTSO at the instruction level.  The Ztso extension does not
+     *   strengthen the ordering of intra-instruction element accesses.
+     *
+     * as a result neither ordered nor unordered accesses from the V
+     * instructions need ordering within the loop but we do still need barriers
+     * around the loop.
+     */
+    if (is_store && s->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     fn(dest, mask, base, tcg_env, desc);
 
+    if (!is_store && s->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+    }
+
     if (!is_store) {
         mark_vs_dirty(s);
     }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index f0be79bb16..ab56051d6d 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -109,6 +109,8 @@ typedef struct DisasContext {
     /* PointerMasking extension */
     bool pm_mask_enabled;
     bool pm_base_enabled;
+    /* Ztso */
+    bool ztso;
     /* Use icount trigger for native debug */
     bool itrigger;
     /* FRM is known to contain a valid value. */
@@ -1194,6 +1196,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->cs = cs;
     ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
     ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
+    ctx->ztso = cpu->cfg.ext_ztso;
     ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER);
     ctx->zero = tcg_constant_tl(0);
     ctx->virt_inst_excp = false;
-- 
2.41.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 2/2] RISC-V: Add support for Ssdtso
  2023-11-13  9:56 [RFC PATCH 0/2] RISC-V: Add TSO extensions (Ztso/Ssdtso) Christoph Muellner
  2023-11-13  9:56 ` [RFC PATCH 1/2] RISC-V: Add support for Ztso Christoph Muellner
@ 2023-11-13  9:56 ` Christoph Muellner
  2023-11-14 13:23   ` Daniel Henrique Barboza
  1 sibling, 1 reply; 10+ messages in thread
From: Christoph Muellner @ 2023-11-13  9:56 UTC (permalink / raw)
  To: qemu-riscv, qemu-devel, Alistair Francis, Bin Meng,
	Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Christoph Müllner, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei

From: Christoph Müllner <christoph.muellner@vrull.eu>

The Ssdtso extension introduces a DTSO field to the {m,s,h}envcfg
register to enable TSO at run-time.  Building on top of Ztso support,
this patch treates Ssdtso just like Ztso (always execute in TSO mode),
which should be fine from a correctness perspective.

Similar like Ztso, this is expected to have little overhead on
host machines that operate in TSO mode (e.g. x86).
However, executing the TSO fences on guests without TSO, will
have a negative performance impact, regardless if TSO is enabled
in the guest or not (e.g. running a RV guest with Ssdtso and disabled
DTSO bit on an aarch64 host).

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 target/riscv/cpu.c       | 2 ++
 target/riscv/cpu_bits.h  | 3 +++
 target/riscv/cpu_cfg.h   | 1 +
 target/riscv/csr.c       | 9 ++++++---
 target/riscv/translate.c | 2 +-
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b446e553b1..e01bc56471 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -149,6 +149,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
     ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
     ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
+    ISA_EXT_DATA_ENTRY(ssdtso, PRIV_VERSION_1_12_0, ext_ssdtso),
     ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc),
     ISA_EXT_DATA_ENTRY(svadu, PRIV_VERSION_1_12_0, ext_svadu),
     ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval),
@@ -1308,6 +1309,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false),
     MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
     MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
+    MULTI_EXT_CFG_BOOL("ssdtso", ext_ssdtso, false),
     MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true),
 
     MULTI_EXT_CFG_BOOL("smepmp", ext_smepmp, false),
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index ebd7917d49..166f1a879d 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -751,6 +751,7 @@ typedef enum RISCVException {
 #define MENVCFG_CBIE                       (3UL << 4)
 #define MENVCFG_CBCFE                      BIT(6)
 #define MENVCFG_CBZE                       BIT(7)
+#define MENVCFG_DTSO                       BIT(8)
 #define MENVCFG_ADUE                       (1ULL << 61)
 #define MENVCFG_PBMTE                      (1ULL << 62)
 #define MENVCFG_STCE                       (1ULL << 63)
@@ -764,11 +765,13 @@ typedef enum RISCVException {
 #define SENVCFG_CBIE                       MENVCFG_CBIE
 #define SENVCFG_CBCFE                      MENVCFG_CBCFE
 #define SENVCFG_CBZE                       MENVCFG_CBZE
+#define SENVCFG_DTSO                       MENVCFG_DTSO
 
 #define HENVCFG_FIOM                       MENVCFG_FIOM
 #define HENVCFG_CBIE                       MENVCFG_CBIE
 #define HENVCFG_CBCFE                      MENVCFG_CBCFE
 #define HENVCFG_CBZE                       MENVCFG_CBZE
+#define HENVCFG_DTSO                       MENVCFG_DTSO
 #define HENVCFG_ADUE                       MENVCFG_ADUE
 #define HENVCFG_PBMTE                      MENVCFG_PBMTE
 #define HENVCFG_STCE                       MENVCFG_STCE
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index a0f951d9c1..bdf0d2bbc4 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -72,6 +72,7 @@ struct RISCVCPUConfig {
     bool ext_zihpm;
     bool ext_ztso;
     bool ext_smstateen;
+    bool ext_ssdtso;
     bool ext_sstc;
     bool ext_svadu;
     bool ext_svinval;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index fde7ce1a53..ee40f01185 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -2034,7 +2034,8 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno,
                                     target_ulong val)
 {
     const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
-    uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE;
+    uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE |
+                    MENVCFG_CBZE | MENVCFG_DTSO;
 
     if (riscv_cpu_mxl(env) == MXL_RV64) {
         mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
@@ -2084,7 +2085,8 @@ static RISCVException read_senvcfg(CPURISCVState *env, int csrno,
 static RISCVException write_senvcfg(CPURISCVState *env, int csrno,
                                     target_ulong val)
 {
-    uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE | SENVCFG_CBZE;
+    uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE |
+                    SENVCFG_CBZE | SENVCFG_DTSO;
     RISCVException ret;
 
     ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG);
@@ -2119,7 +2121,8 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
 static RISCVException write_henvcfg(CPURISCVState *env, int csrno,
                                     target_ulong val)
 {
-    uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE;
+    uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE |
+                    HENVCFG_CBZE | HENVCFG_DTSO;
     RISCVException ret;
 
     ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index ab56051d6d..a9c2061099 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1196,7 +1196,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->cs = cs;
     ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
     ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
-    ctx->ztso = cpu->cfg.ext_ztso;
+    ctx->ztso = cpu->cfg.ext_ztso || cpu->cfg.ext_ssdtso;
     ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER);
     ctx->zero = tcg_constant_tl(0);
     ctx->virt_inst_excp = false;
-- 
2.41.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2] RISC-V: Add support for Ztso
@ 2023-11-13  9:56 ` Christoph Muellner
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Muellner @ 2023-11-13  9:56 UTC (permalink / raw)
  To: qemu-riscv, qemu-devel, Alistair Francis, Bin Meng,
	Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Palmer Dabbelt, Weiwei Li, Daniel Henrique Barboza, Liu Zhiwei

From: Palmer Dabbelt <palmer@rivosinc.com>

The Ztso extension was recently frozen, this adds it as a CPU property
and adds various fences throughout the port in order to allow TSO
targets to function on weaker hosts.  We need no fences for AMOs as
they're already SC, the placess we need barriers are described.

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
Like the v1 this has been pretty minimally tested, but I figured I'd
just send it.  This is what I would have done the first time had I not
read that comment near TCG_GUEST_DEFAULT_MO, I think trying to describe
that when I ran into Richard at the Cauldron was probably more confusing
than just digging up the code and sending it along.
---
 target/riscv/cpu.c                      |  3 +++
 target/riscv/cpu.h                      |  1 +
 target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
 target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
 target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
 target/riscv/translate.c                |  3 +++
 6 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ac6f82ebd0..d66169efa5 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -919,6 +919,8 @@ static Property riscv_cpu_extensions[] = {
     DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
     DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
 
+    DEFINE_PROP_BOOL("ztso", RISCVCPU, cfg.ext_ztso, false),
+
     /* Vendor-specific custom extensions */
     DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
 
@@ -1094,6 +1096,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
         ISA_EDATA_ENTRY(zksed, ext_zksed),
         ISA_EDATA_ENTRY(zksh, ext_zksh),
         ISA_EDATA_ENTRY(zkt, ext_zkt),
+        ISA_EDATA_ENTRY(ztso, ext_ztso),
         ISA_EDATA_ENTRY(zve32f, ext_zve32f),
         ISA_EDATA_ENTRY(zve64f, ext_zve64f),
         ISA_EDATA_ENTRY(zhinx, ext_zhinx),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 5c7acc055a..c64fd4e258 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -433,6 +433,7 @@ struct RISCVCPUConfig {
     bool ext_zve32f;
     bool ext_zve64f;
     bool ext_zmmul;
+    bool ext_ztso;
     bool rvv_ta_all_1s;
 
     uint32_t mvendorid;
diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
index 45db82c9be..9066e1bde3 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -26,7 +26,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
     }
     tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
-    if (a->aq) {
+    /*
+     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
+     * AMOs.  Instead treat them like loads.
+     */
+    if (a->aq || ctx->ztso) {
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
 
@@ -61,9 +65,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
     gen_set_label(l1);
     /*
      * Address comparison failure.  However, we still need to
-     * provide the memory barrier implied by AQ/RL.
+     * provide the memory barrier implied by AQ/RL/TSO.
      */
-    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
+    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
+    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
     gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
 
     gen_set_label(l2);
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index ca8e3d1ea1..9bef42a3e5 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -261,11 +261,19 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
 
 static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
 {
+    bool out;
+
     if (get_xl(ctx) == MXL_RV128) {
-        return gen_load_i128(ctx, a, memop);
+        out = gen_load_i128(ctx, a, memop);
     } else {
-        return gen_load_tl(ctx, a, memop);
+        out = gen_load_tl(ctx, a, memop);
+    }
+
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
+
+    return out;
 }
 
 static bool trans_lb(DisasContext *ctx, arg_lb *a)
@@ -322,6 +330,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
     TCGv addr = get_address(ctx, a->rs1, a->imm);
     TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
 
+    if (ctx->ztso) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
     return true;
 }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 6c091824b6..1994b38035 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -671,8 +671,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
+    /*
+     * According to the specification
+     *
+     *   Additionally, if the Ztso extension is implemented, then vector memory
+     *   instructions in the V extension and Zve family of extensions follow
+     *   RVTSO at the instruction level.  The Ztso extension does not
+     *   strengthen the ordering of intra-instruction element accesses.
+     *
+     * as a result neither ordered nor unordered accesses from the V
+     * instructions need ordering within the loop but we do still need barriers
+     * around the loop.
+     */
+    if (is_store && s->ztso) {
+      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+
     fn(dest, mask, base, cpu_env, desc);
 
+    if (!is_store && s->ztso) {
+      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+    }
+
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
 
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 63b04e8a94..c7c574b09f 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -109,6 +109,8 @@ typedef struct DisasContext {
     /* PointerMasking extension */
     bool pm_mask_enabled;
     bool pm_base_enabled;
+    /* Ztso */
+    bool ztso;
     /* TCG of the current insn_start */
     TCGOp *insn_start;
 } DisasContext;
@@ -1109,6 +1111,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     memset(ctx->ftemp, 0, sizeof(ctx->ftemp));
     ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
     ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
+    ctx->ztso = cpu->cfg.ext_ztso;
     ctx->zero = tcg_constant_tl(0);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/2] RISC-V: Add support for Ztso
  2023-11-13  9:56 ` [RFC PATCH 1/2] RISC-V: Add support for Ztso Christoph Muellner
@ 2023-11-14 13:22   ` Daniel Henrique Barboza
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Henrique Barboza @ 2023-11-14 13:22 UTC (permalink / raw)
  To: Christoph Muellner, qemu-riscv, qemu-devel, Alistair Francis,
	Bin Meng, Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Palmer Dabbelt, Weiwei Li, Liu Zhiwei



On 11/13/23 06:56, Christoph Muellner wrote:
> From: Palmer Dabbelt <palmer@rivosinc.com>
> 
> The Ztso extension is already ratified, this adds it as a CPU property
> and adds various fences throughout the port in order to allow TSO
> targets to function on weaker hosts.  We need no fences for AMOs as
> they're already SC, the placess we need barriers are described.

s/placess/places

> These fences are placed in the RISC-V backend rather than TCG as is
> planned for x86-on-arm64 because RISC-V allows heterogenous (and

s/heterogenous/heterogeneous

> likely soon dynamic) hart memory models.
> 
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> ---


Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

>   target/riscv/cpu.c                      |  2 ++
>   target/riscv/cpu_cfg.h                  |  1 +
>   target/riscv/insn_trans/trans_rva.c.inc | 11 ++++++++---
>   target/riscv/insn_trans/trans_rvi.c.inc | 16 ++++++++++++++--
>   target/riscv/insn_trans/trans_rvv.c.inc | 20 ++++++++++++++++++++
>   target/riscv/translate.c                |  3 +++
>   6 files changed, 48 insertions(+), 5 deletions(-)
> 
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 83c7c0cf07..b446e553b1 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -118,6 +118,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
>       ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
>       ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
>       ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
> +    ISA_EXT_DATA_ENTRY(ztso, PRIV_VERSION_1_12_0, ext_ztso),
>       ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
>       ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
>       ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
> @@ -1336,6 +1337,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
>       MULTI_EXT_CFG_BOOL("zksed", ext_zksed, false),
>       MULTI_EXT_CFG_BOOL("zksh", ext_zksh, false),
>       MULTI_EXT_CFG_BOOL("zkt", ext_zkt, false),
> +    MULTI_EXT_CFG_BOOL("ztso", ext_ztso, false),
>   
>       MULTI_EXT_CFG_BOOL("zdinx", ext_zdinx, false),
>       MULTI_EXT_CFG_BOOL("zfinx", ext_zfinx, false),
> diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
> index f4605fb190..a0f951d9c1 100644
> --- a/target/riscv/cpu_cfg.h
> +++ b/target/riscv/cpu_cfg.h
> @@ -70,6 +70,7 @@ struct RISCVCPUConfig {
>       bool ext_zihintntl;
>       bool ext_zihintpause;
>       bool ext_zihpm;
> +    bool ext_ztso;
>       bool ext_smstateen;
>       bool ext_sstc;
>       bool ext_svadu;
> diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
> index 5f194a447b..85c7e31f2a 100644
> --- a/target/riscv/insn_trans/trans_rva.c.inc
> +++ b/target/riscv/insn_trans/trans_rva.c.inc
> @@ -28,7 +28,11 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
>           tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
>       }
>       tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
> -    if (a->aq) {
> +    /*
> +     * TSO defines AMOs as acquire+release-RCsc, but does not define LR/SC as
> +     * AMOs.  Instead treat them like loads.
> +     */
> +    if (a->aq || ctx->ztso) {
>           tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>       }
>   
> @@ -64,9 +68,10 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop)
>       gen_set_label(l1);
>       /*
>        * Address comparison failure.  However, we still need to
> -     * provide the memory barrier implied by AQ/RL.
> +     * provide the memory barrier implied by AQ/RL/TSO.
>        */
> -    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
> +    TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0;
> +    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl);
>       gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
>   
>       gen_set_label(l2);
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
> index faf6d65064..ad40d3e87f 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -266,12 +266,20 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
>   
>   static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
>   {
> +    bool out;
> +
>       decode_save_opc(ctx);
>       if (get_xl(ctx) == MXL_RV128) {
> -        return gen_load_i128(ctx, a, memop);
> +        out = gen_load_i128(ctx, a, memop);
>       } else {
> -        return gen_load_tl(ctx, a, memop);
> +        out = gen_load_tl(ctx, a, memop);
> +    }
> +
> +    if (ctx->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
>       }
> +
> +    return out;
>   }
>   
>   static bool trans_lb(DisasContext *ctx, arg_lb *a)
> @@ -328,6 +336,10 @@ static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
>       TCGv addr = get_address(ctx, a->rs1, a->imm);
>       TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
>   
> +    if (ctx->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    }
> +
>       tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, memop);
>       return true;
>   }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 78bd363310..76e63fcbca 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -636,8 +636,28 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
>       tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));
>       tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0));
>   
> +    /*
> +     * According to the specification
> +     *
> +     *   Additionally, if the Ztso extension is implemented, then vector memory
> +     *   instructions in the V extension and Zve family of extensions follow
> +     *   RVTSO at the instruction level.  The Ztso extension does not
> +     *   strengthen the ordering of intra-instruction element accesses.
> +     *
> +     * as a result neither ordered nor unordered accesses from the V
> +     * instructions need ordering within the loop but we do still need barriers
> +     * around the loop.
> +     */
> +    if (is_store && s->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
> +    }
> +
>       fn(dest, mask, base, tcg_env, desc);
>   
> +    if (!is_store && s->ztso) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
> +    }
> +
>       if (!is_store) {
>           mark_vs_dirty(s);
>       }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index f0be79bb16..ab56051d6d 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -109,6 +109,8 @@ typedef struct DisasContext {
>       /* PointerMasking extension */
>       bool pm_mask_enabled;
>       bool pm_base_enabled;
> +    /* Ztso */
> +    bool ztso;
>       /* Use icount trigger for native debug */
>       bool itrigger;
>       /* FRM is known to contain a valid value. */
> @@ -1194,6 +1196,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>       ctx->cs = cs;
>       ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
>       ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
> +    ctx->ztso = cpu->cfg.ext_ztso;
>       ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER);
>       ctx->zero = tcg_constant_tl(0);
>       ctx->virt_inst_excp = false;


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 2/2] RISC-V: Add support for Ssdtso
  2023-11-13  9:56 ` [RFC PATCH 2/2] RISC-V: Add support for Ssdtso Christoph Muellner
@ 2023-11-14 13:23   ` Daniel Henrique Barboza
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Henrique Barboza @ 2023-11-14 13:23 UTC (permalink / raw)
  To: Christoph Muellner, qemu-riscv, qemu-devel, Alistair Francis,
	Bin Meng, Philipp Tomsich, Palmer Dabbelt, Richard Henderson
  Cc: Weiwei Li, Liu Zhiwei



On 11/13/23 06:56, Christoph Muellner wrote:
> From: Christoph Müllner <christoph.muellner@vrull.eu>
> 
> The Ssdtso extension introduces a DTSO field to the {m,s,h}envcfg
> register to enable TSO at run-time.  Building on top of Ztso support,
> this patch treates Ssdtso just like Ztso (always execute in TSO mode),

s/treates/treats

> which should be fine from a correctness perspective.
> 
> Similar like Ztso, this is expected to have little overhead on
> host machines that operate in TSO mode (e.g. x86).
> However, executing the TSO fences on guests without TSO, will
> have a negative performance impact, regardless if TSO is enabled
> in the guest or not (e.g. running a RV guest with Ssdtso and disabled
> DTSO bit on an aarch64 host).
> 
> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> ---

Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

>   target/riscv/cpu.c       | 2 ++
>   target/riscv/cpu_bits.h  | 3 +++
>   target/riscv/cpu_cfg.h   | 1 +
>   target/riscv/csr.c       | 9 ++++++---
>   target/riscv/translate.c | 2 +-
>   5 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index b446e553b1..e01bc56471 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -149,6 +149,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
>       ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
>       ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
>       ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
> +    ISA_EXT_DATA_ENTRY(ssdtso, PRIV_VERSION_1_12_0, ext_ssdtso),
>       ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc),
>       ISA_EXT_DATA_ENTRY(svadu, PRIV_VERSION_1_12_0, ext_svadu),
>       ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval),
> @@ -1308,6 +1309,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
>       MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false),
>       MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
>       MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
> +    MULTI_EXT_CFG_BOOL("ssdtso", ext_ssdtso, false),
>       MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true),
>   
>       MULTI_EXT_CFG_BOOL("smepmp", ext_smepmp, false),
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index ebd7917d49..166f1a879d 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -751,6 +751,7 @@ typedef enum RISCVException {
>   #define MENVCFG_CBIE                       (3UL << 4)
>   #define MENVCFG_CBCFE                      BIT(6)
>   #define MENVCFG_CBZE                       BIT(7)
> +#define MENVCFG_DTSO                       BIT(8)
>   #define MENVCFG_ADUE                       (1ULL << 61)
>   #define MENVCFG_PBMTE                      (1ULL << 62)
>   #define MENVCFG_STCE                       (1ULL << 63)
> @@ -764,11 +765,13 @@ typedef enum RISCVException {
>   #define SENVCFG_CBIE                       MENVCFG_CBIE
>   #define SENVCFG_CBCFE                      MENVCFG_CBCFE
>   #define SENVCFG_CBZE                       MENVCFG_CBZE
> +#define SENVCFG_DTSO                       MENVCFG_DTSO
>   
>   #define HENVCFG_FIOM                       MENVCFG_FIOM
>   #define HENVCFG_CBIE                       MENVCFG_CBIE
>   #define HENVCFG_CBCFE                      MENVCFG_CBCFE
>   #define HENVCFG_CBZE                       MENVCFG_CBZE
> +#define HENVCFG_DTSO                       MENVCFG_DTSO
>   #define HENVCFG_ADUE                       MENVCFG_ADUE
>   #define HENVCFG_PBMTE                      MENVCFG_PBMTE
>   #define HENVCFG_STCE                       MENVCFG_STCE
> diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
> index a0f951d9c1..bdf0d2bbc4 100644
> --- a/target/riscv/cpu_cfg.h
> +++ b/target/riscv/cpu_cfg.h
> @@ -72,6 +72,7 @@ struct RISCVCPUConfig {
>       bool ext_zihpm;
>       bool ext_ztso;
>       bool ext_smstateen;
> +    bool ext_ssdtso;
>       bool ext_sstc;
>       bool ext_svadu;
>       bool ext_svinval;
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index fde7ce1a53..ee40f01185 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -2034,7 +2034,8 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno,
>                                       target_ulong val)
>   {
>       const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
> -    uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE;
> +    uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE |
> +                    MENVCFG_CBZE | MENVCFG_DTSO;
>   
>       if (riscv_cpu_mxl(env) == MXL_RV64) {
>           mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
> @@ -2084,7 +2085,8 @@ static RISCVException read_senvcfg(CPURISCVState *env, int csrno,
>   static RISCVException write_senvcfg(CPURISCVState *env, int csrno,
>                                       target_ulong val)
>   {
> -    uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE | SENVCFG_CBZE;
> +    uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE |
> +                    SENVCFG_CBZE | SENVCFG_DTSO;
>       RISCVException ret;
>   
>       ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG);
> @@ -2119,7 +2121,8 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
>   static RISCVException write_henvcfg(CPURISCVState *env, int csrno,
>                                       target_ulong val)
>   {
> -    uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE;
> +    uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE |
> +                    HENVCFG_CBZE | HENVCFG_DTSO;
>       RISCVException ret;
>   
>       ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index ab56051d6d..a9c2061099 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1196,7 +1196,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>       ctx->cs = cs;
>       ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
>       ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
> -    ctx->ztso = cpu->cfg.ext_ztso;
> +    ctx->ztso = cpu->cfg.ext_ztso || cpu->cfg.ext_ssdtso;
>       ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER);
>       ctx->zero = tcg_constant_tl(0);
>       ctx->virt_inst_excp = false;


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-11-14 13:23 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-17  7:26 [PATCH v2] RISC-V: Add support for Ztso Palmer Dabbelt
2023-11-13  9:56 ` Christoph Muellner
2022-09-23  4:35 ` Alistair Francis
2022-09-23 10:49   ` Palmer Dabbelt
2022-09-25 10:28     ` Richard Henderson
2023-11-13  9:56 [RFC PATCH 0/2] RISC-V: Add TSO extensions (Ztso/Ssdtso) Christoph Muellner
2023-11-13  9:56 ` [RFC PATCH 1/2] RISC-V: Add support for Ztso Christoph Muellner
2023-11-14 13:22   ` Daniel Henrique Barboza
2023-11-13  9:56 ` [RFC PATCH 2/2] RISC-V: Add support for Ssdtso Christoph Muellner
2023-11-14 13:23   ` Daniel Henrique Barboza

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.