qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/1] target/arm: Reduce aa64_va_parameter overhead
@ 2020-02-06 13:08 Richard Henderson
  2020-02-06 13:08 ` [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2020-02-06 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

Based-on: <20200206105448.4726-1-richard.henderson@linaro.org>
("target/arm: Implement ARMv8.1-VHE")

It's not really dependent on VHE, but there's a minor patch
conflict which I've resolved in favor of the larger patch set.


r~


Richard Henderson (1):
  target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid

 target/arm/internals.h |  3 --
 target/arm/helper.c    | 68 +++++++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 34 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid
  2020-02-06 13:08 [PATCH 0/1] target/arm: Reduce aa64_va_parameter overhead Richard Henderson
@ 2020-02-06 13:08 ` Richard Henderson
  2020-02-07 14:00   ` Peter Maydell
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2020-02-06 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

For the purpose of rebuild_hflags_a64, we do not need to compute
all of the va parameters, only tbi.  Moreover, we can compute them
in a form that is more useful to storing in hflags.

This eliminates the need for aa64_va_parameter_both, so fold that
in to aa64_va_parameter.  The remaining calls to aa64_va_parameter
are in get_phys_addr_lpae and in pauth_helper.c.

This reduces the total cpu consumption of aa64_va_parameter in a
kernel boot plus a kvm guest kernel boot from 3% to 0.5%.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/internals.h |  3 --
 target/arm/helper.c    | 68 +++++++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 6d4a942bde..6ac84bbca7 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1042,15 +1042,12 @@ typedef struct ARMVAParameters {
     unsigned tsz    : 8;
     unsigned select : 1;
     bool tbi        : 1;
-    bool tbid       : 1;
     bool epd        : 1;
     bool hpd        : 1;
     bool using16k   : 1;
     bool using64k   : 1;
 } ARMVAParameters;
 
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
-                                        ARMMMUIdx mmu_idx);
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
                                    ARMMMUIdx mmu_idx, bool data);
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d15d5c933..d2e9332696 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -10067,12 +10067,34 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
-                                        ARMMMUIdx mmu_idx)
+static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+    if (regime_has_2_ranges(mmu_idx)) {
+        return extract64(tcr, 37, 2);
+    } else if (mmu_idx == ARMMMUIdx_Stage2) {
+        return 0; /* VTCR_EL2 */
+    } else {
+        return extract32(tcr, 20, 1);
+    }
+}
+
+static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+    if (regime_has_2_ranges(mmu_idx)) {
+        return extract64(tcr, 51, 2);
+    } else if (mmu_idx == ARMMMUIdx_Stage2) {
+        return 0; /* VTCR_EL2 */
+    } else {
+        return extract32(tcr, 29, 1);
+    }
+}
+
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+                                   ARMMMUIdx mmu_idx, bool data)
 {
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
-    bool tbi, tbid, epd, hpd, using16k, using64k;
-    int select, tsz;
+    bool epd, hpd, using16k, using64k;
+    int select, tsz, tbi;
 
     /*
      * Bit 55 is always between the two regions, and is canonical for
@@ -10086,11 +10108,9 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         using16k = extract32(tcr, 15, 1);
         if (mmu_idx == ARMMMUIdx_Stage2) {
             /* VTCR_EL2 */
-            tbi = tbid = hpd = false;
+            hpd = false;
         } else {
-            tbi = extract32(tcr, 20, 1);
             hpd = extract32(tcr, 24, 1);
-            tbid = extract32(tcr, 29, 1);
         }
         epd = false;
     } else if (!select) {
@@ -10098,27 +10118,29 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
         epd = extract32(tcr, 7, 1);
         using64k = extract32(tcr, 14, 1);
         using16k = extract32(tcr, 15, 1);
-        tbi = extract64(tcr, 37, 1);
         hpd = extract64(tcr, 41, 1);
-        tbid = extract64(tcr, 51, 1);
     } else {
         int tg = extract32(tcr, 30, 2);
         using16k = tg == 1;
         using64k = tg == 3;
         tsz = extract32(tcr, 16, 6);
         epd = extract32(tcr, 23, 1);
-        tbi = extract64(tcr, 38, 1);
         hpd = extract64(tcr, 42, 1);
-        tbid = extract64(tcr, 52, 1);
     }
     tsz = MIN(tsz, 39);  /* TODO: ARMv8.4-TTST */
     tsz = MAX(tsz, 16);  /* TODO: ARMv8.2-LVA  */
 
+    /* Present TBI as a composite with TBID.  */
+    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+    if (!data) {
+        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
+    }
+    tbi = (tbi >> select) & 1;
+
     return (ARMVAParameters) {
         .tsz = tsz,
         .select = select,
         .tbi = tbi,
-        .tbid = tbid,
         .epd = epd,
         .hpd = hpd,
         .using16k = using16k,
@@ -10126,16 +10148,6 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
     };
 }
 
-ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
-                                   ARMMMUIdx mmu_idx, bool data)
-{
-    ARMVAParameters ret = aa64_va_parameters_both(env, va, mmu_idx);
-
-    /* Present TBI as a composite with TBID.  */
-    ret.tbi &= (data || !ret.tbid);
-    return ret;
-}
-
 #ifndef CONFIG_USER_ONLY
 static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
                                           ARMMMUIdx mmu_idx)
@@ -11955,21 +11967,15 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
 {
     uint32_t flags = rebuild_hflags_aprofile(env);
     ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
-    ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
+    uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
     uint64_t sctlr;
     int tbii, tbid;
 
     flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
 
     /* Get control bits for tagged addresses.  */
-    if (regime_has_2_ranges(mmu_idx)) {
-        ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
-        tbid = (p1.tbi << 1) | p0.tbi;
-        tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
-    } else {
-        tbid = p0.tbi;
-        tbii = tbid & !p0.tbid;
-    }
+    tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
+    tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
 
     flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
     flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid
  2020-02-06 13:08 ` [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid Richard Henderson
@ 2020-02-07 14:00   ` Peter Maydell
  2020-02-07 14:30     ` Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Maydell @ 2020-02-07 14:00 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers

On Thu, 6 Feb 2020 at 13:08, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> For the purpose of rebuild_hflags_a64, we do not need to compute
> all of the va parameters, only tbi.  Moreover, we can compute them
> in a form that is more useful to storing in hflags.
>
> This eliminates the need for aa64_va_parameter_both, so fold that
> in to aa64_va_parameter.  The remaining calls to aa64_va_parameter
> are in get_phys_addr_lpae and in pauth_helper.c.
>
> This reduces the total cpu consumption of aa64_va_parameter in a
> kernel boot plus a kvm guest kernel boot from 3% to 0.5%.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/internals.h |  3 --
>  target/arm/helper.c    | 68 +++++++++++++++++++++++-------------------
>  2 files changed, 37 insertions(+), 34 deletions(-)
>
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index 6d4a942bde..6ac84bbca7 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -1042,15 +1042,12 @@ typedef struct ARMVAParameters {
>      unsigned tsz    : 8;
>      unsigned select : 1;
>      bool tbi        : 1;
> -    bool tbid       : 1;
>      bool epd        : 1;
>      bool hpd        : 1;
>      bool using16k   : 1;
>      bool using64k   : 1;
>  } ARMVAParameters;
>
> -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
> -                                        ARMMMUIdx mmu_idx);
>  ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
>                                     ARMMMUIdx mmu_idx, bool data);
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 7d15d5c933..d2e9332696 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -10067,12 +10067,34 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
>  }
>  #endif /* !CONFIG_USER_ONLY */
>
> -ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
> -                                        ARMMMUIdx mmu_idx)
> +static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
> +{
> +    if (regime_has_2_ranges(mmu_idx)) {
> +        return extract64(tcr, 37, 2);
> +    } else if (mmu_idx == ARMMMUIdx_Stage2) {
> +        return 0; /* VTCR_EL2 */
> +    } else {
> +        return extract32(tcr, 20, 1);
> +    }

So, this function returns either the two TBI bits, for
the 2-ranges case, or a single TBI bit with bit 1 always
zero, in the 1-range case...

>
> +    /* Present TBI as a composite with TBID.  */
> +    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
> +    if (!data) {
> +        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
> +    }
> +    tbi = (tbi >> select) & 1;

...but aa64_va_parameters() always sets
    select = extract64(va, 55, 1);
even for the 1-range case, and then we assume in this bit
of code that we can pull the corresponding bit out of tbi.

Don't we need to either duplicate the bit returned by
aa64_va_parameter_tbi() in the 1-range case, or else
only shift tbi by 'select' in the 2-range case ?

thanks
-- PMM


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid
  2020-02-07 14:00   ` Peter Maydell
@ 2020-02-07 14:30     ` Richard Henderson
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2020-02-07 14:30 UTC (permalink / raw)
  To: Peter Maydell; +Cc: QEMU Developers

On 2/7/20 2:00 PM, Peter Maydell wrote:
>>
>> +    /* Present TBI as a composite with TBID.  */
>> +    tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
>> +    if (!data) {
>> +        tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
>> +    }
>> +    tbi = (tbi >> select) & 1;
> 
> ...but aa64_va_parameters() always sets
>     select = extract64(va, 55, 1);
> even for the 1-range case, and then we assume in this bit
> of code that we can pull the corresponding bit out of tbi.
> 
> Don't we need to either duplicate the bit returned by
> aa64_va_parameter_tbi() in the 1-range case, or else
> only shift tbi by 'select' in the 2-range case ?

I think we need to force select = 0 in the 1-range case, since "select == 1"
makes no sense in that case.


r~


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-07 14:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06 13:08 [PATCH 0/1] target/arm: Reduce aa64_va_parameter overhead Richard Henderson
2020-02-06 13:08 ` [PATCH 1/1] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid Richard Henderson
2020-02-07 14:00   ` Peter Maydell
2020-02-07 14:30     ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).