qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] target/ppc: Use probe_access
@ 2020-01-29 23:50 Richard Henderson
  2020-01-29 23:50 ` [PATCH 1/4] target/ppc: Use probe_access for LSW, STSW Richard Henderson
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Richard Henderson @ 2020-01-29 23:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: hsp.cat7, qemu-ppc, david

The first two address the performance regression noticed
by Howard Spoelstra.  The last two are just something I
noticed at the same time.


r~


Richard Henderson (4):
  target/ppc: Use probe_access for LSW, STSW
  target/ppc: Use probe_access for LMW, STMW
  target/ppc: Remove redundant mask in DCBZ
  target/ppc: Use probe_write for DCBZ

 target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
 1 file changed, 162 insertions(+), 35 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/4] target/ppc: Use probe_access for LSW, STSW
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
@ 2020-01-29 23:50 ` Richard Henderson
  2020-01-29 23:50 ` [PATCH 2/4] target/ppc: Use probe_access for LMW, STMW Richard Henderson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2020-01-29 23:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: hsp.cat7, qemu-ppc, david

Use a minimum number of mmu lookups for the contiguous bytes
that are accessed.  If the lookup succeeds, we can finish the
operation with host addresses only.

Reported-by: Howard Spoelstra <hsp.cat7@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/mem_helper.c | 148 ++++++++++++++++++++++++++++++++++------
 1 file changed, 128 insertions(+), 20 deletions(-)

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index e8e2a8ac2a..508d472a2f 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -56,6 +56,32 @@ static inline target_ulong addr_add(CPUPPCState *env, target_ulong addr,
     }
 }
 
+static void *probe_contiguous(CPUPPCState *env, target_ulong addr, uint32_t nb,
+                              MMUAccessType access_type, int mmu_idx,
+                              uintptr_t raddr)
+{
+    void *host1, *host2;
+    uint32_t nb_pg1, nb_pg2;
+
+    nb_pg1 = -(addr | TARGET_PAGE_MASK);
+    if (likely(nb <= nb_pg1)) {
+        /* The entire operation is on a single page.  */
+        return probe_access(env, addr, nb, access_type, mmu_idx, raddr);
+    }
+
+    /* The operation spans two pages.  */
+    nb_pg2 = nb - nb_pg1;
+    host1 = probe_access(env, addr, nb_pg1, access_type, mmu_idx, raddr);
+    addr = addr_add(env, addr, nb_pg1);
+    host2 = probe_access(env, addr, nb_pg2, access_type, mmu_idx, raddr);
+
+    /* If the two host pages are contiguous, optimize.  */
+    if (host2 == host1 + nb_pg1) {
+        return host1;
+    }
+    return NULL;
+}
+
 void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
     for (; reg < 32; reg++) {
@@ -84,23 +110,65 @@ void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 static void do_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
                    uint32_t reg, uintptr_t raddr)
 {
-    int sh;
+    int mmu_idx;
+    void *host;
+    uint32_t val;
 
-    for (; nb > 3; nb -= 4) {
-        env->gpr[reg] = cpu_ldl_data_ra(env, addr, raddr);
-        reg = (reg + 1) % 32;
-        addr = addr_add(env, addr, 4);
+    if (unlikely(nb == 0)) {
+        return;
     }
-    if (unlikely(nb > 0)) {
-        env->gpr[reg] = 0;
-        for (sh = 24; nb > 0; nb--, sh -= 8) {
-            env->gpr[reg] |= cpu_ldub_data_ra(env, addr, raddr) << sh;
-            addr = addr_add(env, addr, 1);
+
+    mmu_idx = cpu_mmu_index(env, false);
+    host = probe_contiguous(env, addr, nb, MMU_DATA_LOAD, mmu_idx, raddr);
+
+    if (likely(host)) {
+        /* Fast path -- the entire operation is in RAM at host.  */
+        for (; nb > 3; nb -= 4) {
+            env->gpr[reg] = (uint32_t)ldl_be_p(host);
+            reg = (reg + 1) % 32;
+            host += 4;
+        }
+        switch (nb) {
+        default:
+            return;
+        case 1:
+            val = ldub_p(host) << 24;
+            break;
+        case 2:
+            val = lduw_be_p(host) << 16;
+            break;
+        case 3:
+            val = (lduw_be_p(host) << 16) | (ldub_p(host + 2) << 8);
+            break;
+        }
+    } else {
+        /* Slow path -- at least some of the operation requires i/o.  */
+        for (; nb > 3; nb -= 4) {
+            env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr);
+            reg = (reg + 1) % 32;
+            addr = addr_add(env, addr, 4);
+        }
+        switch (nb) {
+        default:
+            return;
+        case 1:
+            val = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 24;
+            break;
+        case 2:
+            val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16;
+            break;
+        case 3:
+            val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16;
+            addr = addr_add(env, addr, 2);
+            val |= cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 8;
+            break;
         }
     }
+    env->gpr[reg] = val;
 }
 
-void helper_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg)
+void helper_lsw(CPUPPCState *env, target_ulong addr,
+                uint32_t nb, uint32_t reg)
 {
     do_lsw(env, addr, nb, reg, GETPC());
 }
@@ -130,17 +198,57 @@ void helper_lswx(CPUPPCState *env, target_ulong addr, uint32_t reg,
 void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
                  uint32_t reg)
 {
-    int sh;
+    uintptr_t raddr = GETPC();
+    int mmu_idx;
+    void *host;
+    uint32_t val;
 
-    for (; nb > 3; nb -= 4) {
-        cpu_stl_data_ra(env, addr, env->gpr[reg], GETPC());
-        reg = (reg + 1) % 32;
-        addr = addr_add(env, addr, 4);
+    if (unlikely(nb == 0)) {
+        return;
     }
-    if (unlikely(nb > 0)) {
-        for (sh = 24; nb > 0; nb--, sh -= 8) {
-            cpu_stb_data_ra(env, addr, (env->gpr[reg] >> sh) & 0xFF, GETPC());
-            addr = addr_add(env, addr, 1);
+
+    mmu_idx = cpu_mmu_index(env, false);
+    host = probe_contiguous(env, addr, nb, MMU_DATA_STORE, mmu_idx, raddr);
+
+    if (likely(host)) {
+        /* Fast path -- the entire operation is in RAM at host.  */
+        for (; nb > 3; nb -= 4) {
+            stl_be_p(host, env->gpr[reg]);
+            reg = (reg + 1) % 32;
+            host += 4;
+        }
+        val = env->gpr[reg];
+        switch (nb) {
+        case 1:
+            stb_p(host, val >> 24);
+            break;
+        case 2:
+            stw_be_p(host, val >> 16);
+            break;
+        case 3:
+            stw_be_p(host, val >> 16);
+            stb_p(host + 2, val >> 8);
+            break;
+        }
+    } else {
+        for (; nb > 3; nb -= 4) {
+            cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr);
+            reg = (reg + 1) % 32;
+            addr = addr_add(env, addr, 4);
+        }
+        val = env->gpr[reg];
+        switch (nb) {
+        case 1:
+            cpu_stb_mmuidx_ra(env, addr, val >> 24, mmu_idx, raddr);
+            break;
+        case 2:
+            cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr);
+            break;
+        case 3:
+            cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr);
+            addr = addr_add(env, addr, 2);
+            cpu_stb_mmuidx_ra(env, addr, val >> 8, mmu_idx, raddr);
+            break;
         }
     }
 }
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] target/ppc: Use probe_access for LMW, STMW
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
  2020-01-29 23:50 ` [PATCH 1/4] target/ppc: Use probe_access for LSW, STSW Richard Henderson
@ 2020-01-29 23:50 ` Richard Henderson
  2020-01-29 23:50 ` [PATCH 3/4] target/ppc: Remove redundant mask in DCBZ Richard Henderson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2020-01-29 23:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: hsp.cat7, qemu-ppc, david

Use a minimum number of mmu lookups for the contiguous bytes
that are accessed.  If the lookup succeeds, we can finish the
operation with host addresses only.

Reported-by: Howard Spoelstra <hsp.cat7@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/mem_helper.c | 45 +++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 508d472a2f..e7d3a79d96 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -84,26 +84,45 @@ static void *probe_contiguous(CPUPPCState *env, target_ulong addr, uint32_t nb,
 
 void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
-    for (; reg < 32; reg++) {
-        if (needs_byteswap(env)) {
-            env->gpr[reg] = bswap32(cpu_ldl_data_ra(env, addr, GETPC()));
-        } else {
-            env->gpr[reg] = cpu_ldl_data_ra(env, addr, GETPC());
+    uintptr_t raddr = GETPC();
+    int mmu_idx = cpu_mmu_index(env, false);
+    void *host = probe_contiguous(env, addr, (32 - reg) * 4,
+                                  MMU_DATA_LOAD, mmu_idx, raddr);
+
+    if (likely(host)) {
+        /* Fast path -- the entire operation is in RAM at host.  */
+        for (; reg < 32; reg++) {
+            env->gpr[reg] = (uint32_t)ldl_be_p(host);
+            host += 4;
+        }
+    } else {
+        /* Slow path -- at least some of the operation requires i/o.  */
+        for (; reg < 32; reg++) {
+            env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr);
+            addr = addr_add(env, addr, 4);
         }
-        addr = addr_add(env, addr, 4);
     }
 }
 
 void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
-    for (; reg < 32; reg++) {
-        if (needs_byteswap(env)) {
-            cpu_stl_data_ra(env, addr, bswap32((uint32_t)env->gpr[reg]),
-                                                   GETPC());
-        } else {
-            cpu_stl_data_ra(env, addr, (uint32_t)env->gpr[reg], GETPC());
+    uintptr_t raddr = GETPC();
+    int mmu_idx = cpu_mmu_index(env, false);
+    void *host = probe_contiguous(env, addr, (32 - reg) * 4,
+                                  MMU_DATA_STORE, mmu_idx, raddr);
+
+    if (likely(host)) {
+        /* Fast path -- the entire operation is in RAM at host.  */
+        for (; reg < 32; reg++) {
+            stl_be_p(host, env->gpr[reg]);
+            host += 4;
+        }
+    } else {
+        /* Slow path -- at least some of the operation requires i/o.  */
+        for (; reg < 32; reg++) {
+            cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr);
+            addr = addr_add(env, addr, 4);
         }
-        addr = addr_add(env, addr, 4);
     }
 }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] target/ppc: Remove redundant mask in DCBZ
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
  2020-01-29 23:50 ` [PATCH 1/4] target/ppc: Use probe_access for LSW, STSW Richard Henderson
  2020-01-29 23:50 ` [PATCH 2/4] target/ppc: Use probe_access for LMW, STMW Richard Henderson
@ 2020-01-29 23:50 ` Richard Henderson
  2020-01-29 23:50 ` [PATCH 4/4] target/ppc: Use probe_write for DCBZ Richard Henderson
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2020-01-29 23:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: hsp.cat7, qemu-ppc, david

The value of addr has already been masked, just above.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/mem_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index e7d3a79d96..0cb78777e7 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -293,7 +293,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
     addr &= mask;
 
     /* Check reservation */
-    if ((env->reserve_addr & mask) == (addr & mask))  {
+    if ((env->reserve_addr & mask) == addr)  {
         env->reserve_addr = (target_ulong)-1ULL;
     }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] target/ppc: Use probe_write for DCBZ
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
                   ` (2 preceding siblings ...)
  2020-01-29 23:50 ` [PATCH 3/4] target/ppc: Remove redundant mask in DCBZ Richard Henderson
@ 2020-01-29 23:50 ` Richard Henderson
  2020-01-30  1:35 ` [PATCH 0/4] target/ppc: Use probe_access Aleksandar Markovic
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2020-01-29 23:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: hsp.cat7, qemu-ppc, david

Using probe_write instead of tlb_vaddr_to_host means that we
process watchpoints and notdirty pages more efficiently.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/mem_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 0cb78777e7..98f589552b 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -298,7 +298,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
     }
 
     /* Try fast path translate */
-    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
+    haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
     if (haddr) {
         memset(haddr, 0, dcbz_size);
     } else {
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
                   ` (3 preceding siblings ...)
  2020-01-29 23:50 ` [PATCH 4/4] target/ppc: Use probe_write for DCBZ Richard Henderson
@ 2020-01-30  1:35 ` Aleksandar Markovic
  2020-01-30 16:09   ` Richard Henderson
  2020-01-30  7:08 ` Howard Spoelstra
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 12+ messages in thread
From: Aleksandar Markovic @ 2020-01-30  1:35 UTC (permalink / raw)
  To: Richard Henderson; +Cc: david, qemu-ppc, QEMU Developers, Howard Spoelstra

[-- Attachment #1: Type: text/plain, Size: 1024 bytes --]

00:51 Čet, 30.01.2020. Richard Henderson <richard.henderson@linaro.org> је
написао/ла:
>
> The first two address the performance regression noticed
> by Howard Spoelstra.  The last two are just something I
> noticed at the same time.
>

But, performance regression, according to Howard bisect analysis, happened
because of the change in target-independant code, and the fix presented
here is in target-specific code. This defies basic logic and deserves clear
and detailed explanation.

My additional concern, of course, is: Are other targets exposed to
performance degradation, and why?

Thanks,
Aleksandar

>
> r~
>
>
> Richard Henderson (4):
>   target/ppc: Use probe_access for LSW, STSW
>   target/ppc: Use probe_access for LMW, STMW
>   target/ppc: Remove redundant mask in DCBZ
>   target/ppc: Use probe_write for DCBZ
>
>  target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 162 insertions(+), 35 deletions(-)
>
> --
> 2.20.1
>
>

[-- Attachment #2: Type: text/html, Size: 1348 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
                   ` (4 preceding siblings ...)
  2020-01-30  1:35 ` [PATCH 0/4] target/ppc: Use probe_access Aleksandar Markovic
@ 2020-01-30  7:08 ` Howard Spoelstra
  2020-01-30 15:09 ` Howard Spoelstra
  2020-01-30 23:54 ` David Gibson
  7 siblings, 0 replies; 12+ messages in thread
From: Howard Spoelstra @ 2020-01-30  7:08 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-ppc, qemu-devel qemu-devel, David Gibson

[-- Attachment #1: Type: text/plain, Size: 692 bytes --]

On Thu, Jan 30, 2020 at 12:50 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> The first two address the performance regression noticed
> by Howard Spoelstra.  The last two are just something I
> noticed at the same time.
>
>
> r~
>
>
> Richard Henderson (4):
>   target/ppc: Use probe_access for LSW, STSW
>   target/ppc: Use probe_access for LMW, STMW
>   target/ppc: Remove redundant mask in DCBZ
>   target/ppc: Use probe_write for DCBZ
>
>  target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 162 insertions(+), 35 deletions(-)
>
> --
> 2.20.1
>
> Hi,

I can confirm these patches fix the performance issue I reported.

Thanks,
Howard

[-- Attachment #2: Type: text/html, Size: 1167 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
                   ` (5 preceding siblings ...)
  2020-01-30  7:08 ` Howard Spoelstra
@ 2020-01-30 15:09 ` Howard Spoelstra
  2020-01-30 16:07   ` Aleksandar Markovic
  2020-01-30 23:54 ` David Gibson
  7 siblings, 1 reply; 12+ messages in thread
From: Howard Spoelstra @ 2020-01-30 15:09 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-ppc, qemu-devel qemu-devel, David Gibson

[-- Attachment #1: Type: text/plain, Size: 793 bytes --]

As this patch set solved the performance issue and even led to the highest
scores I ever saw on the benchmark tool I used, let me add a:

Tested-by: Howard Spoelstra <hsp.cat7@gmail.com>

On Thu, Jan 30, 2020 at 12:50 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> The first two address the performance regression noticed
> by Howard Spoelstra.  The last two are just something I
> noticed at the same time.
>
>
> r~
>
>
> Richard Henderson (4):
>   target/ppc: Use probe_access for LSW, STSW
>   target/ppc: Use probe_access for LMW, STMW
>   target/ppc: Remove redundant mask in DCBZ
>   target/ppc: Use probe_write for DCBZ
>
>  target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 162 insertions(+), 35 deletions(-)
>
> --
> 2.20.1
>
>

[-- Attachment #2: Type: text/html, Size: 1266 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-30 15:09 ` Howard Spoelstra
@ 2020-01-30 16:07   ` Aleksandar Markovic
  0 siblings, 0 replies; 12+ messages in thread
From: Aleksandar Markovic @ 2020-01-30 16:07 UTC (permalink / raw)
  To: Howard Spoelstra
  Cc: qemu-ppc, Richard Henderson, qemu-devel qemu-devel, David Gibson

On Thu, Jan 30, 2020 at 4:09 PM Howard Spoelstra <hsp.cat7@gmail.com> wrote:
>
> As this patch set solved the performance issue and even led to the highest scores I ever saw on the benchmark tool I used, let me add a:
>

This makes my question to Richard more important:

Are other targets exposed to performance degradation, and why?

> Tested-by: Howard Spoelstra <hsp.cat7@gmail.com>
>
> On Thu, Jan 30, 2020 at 12:50 AM Richard Henderson <richard.henderson@linaro.org> wrote:
>>
>> The first two address the performance regression noticed
>> by Howard Spoelstra.  The last two are just something I
>> noticed at the same time.
>>
>>
>> r~
>>
>>
>> Richard Henderson (4):
>>   target/ppc: Use probe_access for LSW, STSW
>>   target/ppc: Use probe_access for LMW, STMW
>>   target/ppc: Remove redundant mask in DCBZ
>>   target/ppc: Use probe_write for DCBZ
>>
>>  target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
>>  1 file changed, 162 insertions(+), 35 deletions(-)
>>
>> --
>> 2.20.1
>>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-30  1:35 ` [PATCH 0/4] target/ppc: Use probe_access Aleksandar Markovic
@ 2020-01-30 16:09   ` Richard Henderson
  2020-01-30 16:42     ` Aleksandar Markovic
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2020-01-30 16:09 UTC (permalink / raw)
  To: Aleksandar Markovic; +Cc: david, qemu-ppc, QEMU Developers, Howard Spoelstra

On 1/29/20 5:35 PM, Aleksandar Markovic wrote:
> 00:51 Čet, 30.01.2020. Richard Henderson <richard.henderson@linaro.org
> <mailto:richard.henderson@linaro.org>> је написао/ла:
>>
>> The first two address the performance regression noticed
>> by Howard Spoelstra.  The last two are just something I
>> noticed at the same time.
>>
> 
> But, performance regression, according to Howard bisect analysis, happened
> because of the change in target-independant code, and the fix presented here is
> in target-specific code. This defies basic logic and deserves clear and
> detailed explanation.
> 
> My additional concern, of course, is: Are other targets exposed to performance
> degradation, and why?

Potentially, yes.  However:

It requires lots of loads in a loop, on a hot path.  I would not have guessed
that the ppc32 Load Multiple Word (et al) was on a hot path at all, since the
instructions are deprecated.  But that's what an ancient os gets you, I suppose.


r~


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-30 16:09   ` Richard Henderson
@ 2020-01-30 16:42     ` Aleksandar Markovic
  0 siblings, 0 replies; 12+ messages in thread
From: Aleksandar Markovic @ 2020-01-30 16:42 UTC (permalink / raw)
  To: Richard Henderson
  Cc: David Gibson, open list:ppc4xx, QEMU Developers, Howard Spoelstra

On Thu, Jan 30, 2020 at 5:09 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 1/29/20 5:35 PM, Aleksandar Markovic wrote:

> > My additional concern, of course, is: Are other targets exposed to performance
> > degradation, and why?
>
> Potentially, yes.  However:
>
> It requires lots of loads in a loop, on a hot path.  I would not have guessed
> that the ppc32 Load Multiple Word (et al) was on a hot path at all, since the
> instructions are deprecated.  But that's what an ancient os gets you, I suppose.
>

OK.

>
> r~


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] target/ppc: Use probe_access
  2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
                   ` (6 preceding siblings ...)
  2020-01-30 15:09 ` Howard Spoelstra
@ 2020-01-30 23:54 ` David Gibson
  7 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2020-01-30 23:54 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-ppc, qemu-devel, hsp.cat7

[-- Attachment #1: Type: text/plain, Size: 820 bytes --]

On Wed, Jan 29, 2020 at 03:50:36PM -0800, Richard Henderson wrote:
> The first two address the performance regression noticed
> by Howard Spoelstra.  The last two are just something I
> noticed at the same time.

Applied to ppc-for-5.0, thanks.

> 
> 
> r~
> 
> 
> Richard Henderson (4):
>   target/ppc: Use probe_access for LSW, STSW
>   target/ppc: Use probe_access for LMW, STMW
>   target/ppc: Remove redundant mask in DCBZ
>   target/ppc: Use probe_write for DCBZ
> 
>  target/ppc/mem_helper.c | 197 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 162 insertions(+), 35 deletions(-)
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-01-31  1:41 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-29 23:50 [PATCH 0/4] target/ppc: Use probe_access Richard Henderson
2020-01-29 23:50 ` [PATCH 1/4] target/ppc: Use probe_access for LSW, STSW Richard Henderson
2020-01-29 23:50 ` [PATCH 2/4] target/ppc: Use probe_access for LMW, STMW Richard Henderson
2020-01-29 23:50 ` [PATCH 3/4] target/ppc: Remove redundant mask in DCBZ Richard Henderson
2020-01-29 23:50 ` [PATCH 4/4] target/ppc: Use probe_write for DCBZ Richard Henderson
2020-01-30  1:35 ` [PATCH 0/4] target/ppc: Use probe_access Aleksandar Markovic
2020-01-30 16:09   ` Richard Henderson
2020-01-30 16:42     ` Aleksandar Markovic
2020-01-30  7:08 ` Howard Spoelstra
2020-01-30 15:09 ` Howard Spoelstra
2020-01-30 16:07   ` Aleksandar Markovic
2020-01-30 23:54 ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).