All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
@ 2015-05-05  7:18 Paolo Bonzini
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Paolo Bonzini @ 2015-05-05  7:18 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf, rth

Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
proportionally on targets where that is necessary.

Patch 3 uses the new support in the PPC target.

Paolo

v2->v3: - change i386 TCG_TARGET_TLB_DISPLACEMENT_BITS to 31 [rth]
        - tweak comment in patch 2 to account for
          offsetof(CPUArchState, tlb_table[mem_index][0].addend) [rth]


Paolo Bonzini (3):
  tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  softmmu: support up to 12 MMU modes
  target-ppc: use separate indices for various translation modes

 include/exec/cpu-defs.h  |  34 +++++++++++++++-
 include/exec/cpu_ldst.h  | 104 ++++++++++++++++++++++++++++++++++++++++++++---
 target-ppc/cpu.h         |  12 +++---
 target-ppc/excp_helper.c |   3 --
 target-ppc/helper_regs.h |  15 ++++---
 tcg/aarch64/tcg-target.h |   1 +
 tcg/arm/tcg-target.h     |   1 +
 tcg/i386/tcg-target.h    |   1 +
 tcg/ia64/tcg-target.h    |   2 +
 tcg/mips/tcg-target.h    |   1 +
 tcg/ppc/tcg-target.h     |   1 +
 tcg/s390/tcg-target.h    |   1 +
 tcg/sparc/tcg-target.h   |   1 +
 tcg/tci/tcg-target.h     |   1 +
 14 files changed, 156 insertions(+), 22 deletions(-)

-- 
2.3.5

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v2 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
@ 2015-05-05  7:18 ` Paolo Bonzini
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Paolo Bonzini @ 2015-05-05  7:18 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf, rth

This will be used to size the TLB when more than 8 MMU modes are
used by the target.  Limitations come from the limited size of
the immediate fields (which sometimes, as in the case of Aarch64,
extend to instructions that shift the immediate).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1424436345-37924-2-git-send-email-pbonzini@redhat.com>
---
 tcg/aarch64/tcg-target.h | 1 +
 tcg/arm/tcg-target.h     | 1 +
 tcg/i386/tcg-target.h    | 1 +
 tcg/ia64/tcg-target.h    | 2 ++
 tcg/mips/tcg-target.h    | 1 +
 tcg/ppc/tcg-target.h     | 1 +
 tcg/s390/tcg-target.h    | 1 +
 tcg/sparc/tcg-target.h   | 1 +
 tcg/tci/tcg-target.h     | 1 +
 9 files changed, 10 insertions(+)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 60c7493..8aec04d 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -14,6 +14,7 @@
 #define TCG_TARGET_AARCH64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 1c719e2..6559f80 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -27,6 +27,7 @@
 
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 7a9980e..25b5133 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_I386 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
 
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index d675589..a04ed81 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -26,6 +26,8 @@
 #define TCG_TARGET_IA64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 16
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+
 typedef struct {
     uint64_t lo __attribute__((aligned(16)));
     uint64_t hi;
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c88a1c9..f5ba52c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_MIPS 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 32ac442..7ce7048 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -32,6 +32,7 @@
 
 #define TCG_TARGET_NB_REGS 32
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 5acc28c..91576d5 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_S390 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
 
 typedef enum TCGReg {
     TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 0c4c8af..f584de4 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_REG_BITS 64
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index bd1e974..4c41305 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -44,6 +44,7 @@
 
 #define TCG_TARGET_INTERPRETER 1
 #define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
@ 2015-05-05  7:18 ` Paolo Bonzini
  2015-05-18 17:00   ` Alexander Graf
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2015-05-05  7:18 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf, rth

At 8k per TLB (for 64-bit host or target), 8 or more modes
make the TLBs bigger than 64k, and some RISC TCG backends do
not like that.  On the affected hosts, cut the TLB size in
half---there is still a measurable speedup on PPC with the
next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1424436345-37924-3-git-send-email-pbonzini@redhat.com>
---
 include/exec/cpu-defs.h |  34 +++++++++++++++-
 include/exec/cpu_ldst.h | 104 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 3f56546..c667570 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -27,6 +27,7 @@
 #include <inttypes.h>
 #include "qemu/osdep.h"
 #include "qemu/queue.h"
+#include "tcg-target.h"
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
@@ -70,8 +71,6 @@ typedef uint64_t target_ulong;
 #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
 
 #if !defined(CONFIG_USER_ONLY)
-#define CPU_TLB_BITS 8
-#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
 /* use a fully associative victim tlb of 8 entries */
 #define CPU_VTLB_SIZE 8
 
@@ -81,6 +80,38 @@ typedef uint64_t target_ulong;
 #define CPU_TLB_ENTRY_BITS 5
 #endif
 
+/* TCG_TARGET_TLB_DISPLACEMENT_BITS is used in CPU_TLB_BITS to ensure that
+ * the TLB is not unnecessarily small, but still small enough for the
+ * TLB lookup instruction sequence used by the TCG target.
+ *
+ * TCG will have to generate an operand as large as the distance between
+ * env and the tlb_table[NB_MMU_MODES - 1][0].addend.  For simplicity,
+ * the TCG targets just round everything up to the next power of two, and
+ * count bits.  This works because: 1) the size of each TLB is a largish
+ * power of two, 2) and because the limit of the displacement is really close
+ * to a power of two, 3) the offset of tlb_table[0][0] inside env is smaller
+ * than the size of a TLB.
+ *
+ * For example, the maximum displacement 0xFFF0 on PPC and MIPS, but TCG
+ * just says "the displacement is 16 bits".  TCG_TARGET_TLB_DISPLACEMENT_BITS
+ * then ensures that tlb_table at least 0x8000 bytes large ("not unnecessarily
+ * small": 2^15).  The operand then will come up smaller than 0xFFF0 without
+ * any particular care, because the TLB for a single MMU mode is larger than
+ * 0x10000-0xFFF0=16 bytes.  In the end, the maximum value of the operand
+ * could be something like 0xC000 (the offset of the last TLB table) plus
+ * 0x18 (the offset of the addend field in each TLB entry) plus the offset
+ * of tlb_table inside env (which is non-trivial but not huge).
+ */
+#define CPU_TLB_BITS                                             \
+    MIN(8,                                                       \
+        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
+        (NB_MMU_MODES <= 1 ? 0 :                                 \
+         NB_MMU_MODES <= 2 ? 1 :                                 \
+         NB_MMU_MODES <= 4 ? 2 :                                 \
+         NB_MMU_MODES <= 8 ? 3 : 4))
+
+#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+
 typedef struct CPUTLBEntry {
     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 1673287..0ec398c 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -263,12 +263,104 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 #undef MEMSUFFIX
 #endif /* (NB_MMU_MODES >= 7) */
 
-#if (NB_MMU_MODES > 7)
-/* Note that supporting NB_MMU_MODES == 9 would require
- * changes to at least the ARM TCG backend.
- */
-#error "NB_MMU_MODES > 7 is not supported for now"
-#endif /* (NB_MMU_MODES > 7) */
+#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX)
+
+#define CPU_MMU_INDEX 7
+#define MEMSUFFIX MMU_MODE7_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 8) */
+
+#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX)
+
+#define CPU_MMU_INDEX 8
+#define MEMSUFFIX MMU_MODE8_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 9) */
+
+#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX)
+
+#define CPU_MMU_INDEX 9
+#define MEMSUFFIX MMU_MODE9_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 10) */
+
+#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX)
+
+#define CPU_MMU_INDEX 10
+#define MEMSUFFIX MMU_MODE10_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 11) */
+
+#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX)
+
+#define CPU_MMU_INDEX 11
+#define MEMSUFFIX MMU_MODE11_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 12) */
+
+#if (NB_MMU_MODES > 12)
+#error "NB_MMU_MODES > 12 is not supported for now"
+#endif /* (NB_MMU_MODES > 12) */
 
 /* these access are slower, they must be as rare as possible */
 #define CPU_MMU_INDEX (cpu_mmu_index(env))
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
@ 2015-05-05  7:18 ` Paolo Bonzini
  2015-06-02 16:06   ` Alexander Graf
  2015-05-05 15:49 ` [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Richard Henderson
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2015-05-05  7:18 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf, rth

PowerPC TCG flushes the TLB on every IR/DR change, which basically
means on every user<->kernel context switch.  Encode IR/DR in the
MMU index.

This brings the number of TLB flushes down from ~900000 to ~50000
for starting up the Debian installer, which is in line with x86
and gives a ~10% performance improvement.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1424436345-37924-4-git-send-email-pbonzini@redhat.com>
---
 target-ppc/cpu.h         | 12 +++++++-----
 target-ppc/excp_helper.c |  3 ---
 target-ppc/helper_regs.h | 15 +++++++++------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index c05c503..2c41d49 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -944,7 +944,13 @@ struct ppc_segment_page_sizes {
 
 /*****************************************************************************/
 /* The whole PowerPC CPU context */
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES 12
+#define MMU_IDX_IR   1
+#define MMU_IDX_DR   2
+#define MMU_IDX_PR   0
+#define MMU_IDX_SUP  4
+#define MMU_IDX_HV   8
+#define MMU_USER_IDX (MMU_IDX_PR|MMU_IDX_IR|MMU_IDX_DR)
 
 #define PPC_CPU_OPCODES_LEN          0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
@@ -1246,10 +1252,6 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#define MMU_MODE2_SUFFIX _hypv
-#define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env)
 {
     return env->mmu_idx;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index b803475..f608701 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 
     if (env->spr[SPR_LPCR] & LPCR_AIL) {
         new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
-        /* If we disactivated any translation, flush TLBs */
-        tlb_flush(cs, 1);
     }
 
 #ifdef TARGET_PPC64
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 271fddf..5dfc54f 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -41,12 +41,17 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
 
 static inline void hreg_compute_mem_idx(CPUPPCState *env)
 {
+    int mmu_idx;
+
     /* Precompute MMU index */
-    if (msr_pr == 0 && msr_hv != 0) {
-        env->mmu_idx = 2;
+    if (msr_pr == 1) {
+        mmu_idx = MMU_IDX_PR;
     } else {
-        env->mmu_idx = 1 - msr_pr;
+        mmu_idx = msr_hv ? MMU_IDX_HV : MMU_IDX_SUP;
     }
+    mmu_idx |= msr_ir ? MMU_IDX_IR : 0;
+    mmu_idx |= msr_dr ? MMU_IDX_DR : 0;
+    env->mmu_idx = mmu_idx;
 }
 
 static inline void hreg_compute_hflags(CPUPPCState *env)
@@ -56,7 +61,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
     /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
     hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
         (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
-        (1 << MSR_LE) | (1 << MSR_VSX);
+        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
     hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
     hreg_compute_mem_idx(env);
     env->hflags = env->msr & hflags_mask;
@@ -82,8 +87,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     if (((value >> MSR_IR) & 1) != msr_ir ||
         ((value >> MSR_DR) & 1) != msr_dr) {
-        /* Flush all tlb when changing translation mode */
-        tlb_flush(cs, 1);
         excp = POWERPC_EXCP_NONE;
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
-- 
2.3.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
                   ` (2 preceding siblings ...)
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
@ 2015-05-05 15:49 ` Richard Henderson
  2015-05-08  1:15   ` Alexander Graf
  2015-05-07 22:26 ` Alexander Graf
  2015-06-11  8:59 ` Artyom Tarasenko
  5 siblings, 1 reply; 13+ messages in thread
From: Richard Henderson @ 2015-05-05 15:49 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 05/05/2015 12:18 AM, Paolo Bonzini wrote:
> Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
> 1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
> proportionally on targets where that is necessary.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
                   ` (3 preceding siblings ...)
  2015-05-05 15:49 ` [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Richard Henderson
@ 2015-05-07 22:26 ` Alexander Graf
  2015-06-11  8:59 ` Artyom Tarasenko
  5 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2015-05-07 22:26 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: rth



On 05.05.15 09:18, Paolo Bonzini wrote:
> Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
> 1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
> proportionally on targets where that is necessary.
> 
> Patch 3 uses the new support in the PPC target.
> 
> Paolo
> 
> v2->v3: - change i386 TCG_TARGET_TLB_DISPLACEMENT_BITS to 31 [rth]
>         - tweak comment in patch 2 to account for
>           offsetof(CPUArchState, tlb_table[mem_index][0].addend) [rth]

Richard, I would greatly appreciate a final Reviewed-by from you on this
patch set :).


Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-05-05 15:49 ` [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Richard Henderson
@ 2015-05-08  1:15   ` Alexander Graf
  0 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2015-05-08  1:15 UTC (permalink / raw)
  To: Richard Henderson, Paolo Bonzini, qemu-devel



On 05.05.15 17:49, Richard Henderson wrote:
> On 05/05/2015 12:18 AM, Paolo Bonzini wrote:
>> Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
>> 1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
>> proportionally on targets where that is necessary.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>

Oh, there is a reviewed-by! Hah.

Thanks, applied all to ppc-next.


Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
@ 2015-05-18 17:00   ` Alexander Graf
  0 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2015-05-18 17:00 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: rth

On 05/05/2015 09:18 AM, Paolo Bonzini wrote:
> At 8k per TLB (for 64-bit host or target), 8 or more modes
> make the TLBs bigger than 64k, and some RISC TCG backends do
> not like that.  On the affected hosts, cut the TLB size in
> half---there is still a measurable speedup on PPC with the
> next patch.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Message-Id: <1424436345-37924-3-git-send-email-pbonzini@redhat.com>

This patch breaks compilation with TCI:

   https://travis-ci.org/agraf/qemu/jobs/63013710


Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes
  2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
@ 2015-06-02 16:06   ` Alexander Graf
  0 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2015-06-02 16:06 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: rth

On 05/05/2015 09:18 AM, Paolo Bonzini wrote:
> PowerPC TCG flushes the TLB on every IR/DR change, which basically
> means on every user<->kernel context switch.  Encode IR/DR in the
> MMU index.
>
> This brings the number of TLB flushes down from ~900000 to ~50000
> for starting up the Debian installer, which is in line with x86
> and gives a ~10% performance improvement.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Message-Id: <1424436345-37924-4-git-send-email-pbonzini@redhat.com>

So this patch seems to break my Debian squeeze image with -M g3beige.

   $ ./ppc64-softmmu/qemu-system-ppc64 -vnc :3 -snapshot -M g3beige 
debian_squeeze_powerpc_standard.qcow2 -serial mon:stdio

I'll remove it from my queue for now.


Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
                   ` (4 preceding siblings ...)
  2015-05-07 22:26 ` Alexander Graf
@ 2015-06-11  8:59 ` Artyom Tarasenko
  2015-06-15 15:50   ` Richard Henderson
  5 siblings, 1 reply; 13+ messages in thread
From: Artyom Tarasenko @ 2015-06-11  8:59 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Richard Henderson, qemu-devel, Alexander Graf

Hi Paolo,

On Tue, May 5, 2015 at 9:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
> 1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
> proportionally on targets where that is necessary.

This is a very promising approach. Would it also work on a large
numbers of MMU modes?
Particulary I wonder if it would work for SPARC, where 32-bit
processors have up to 65536 MMU contexts.

Regards,
Artyom

> Patch 3 uses the new support in the PPC target.
>
> Paolo
>
> v2->v3: - change i386 TCG_TARGET_TLB_DISPLACEMENT_BITS to 31 [rth]
>         - tweak comment in patch 2 to account for
>           offsetof(CPUArchState, tlb_table[mem_index][0].addend) [rth]
>
>
> Paolo Bonzini (3):
>   tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
>   softmmu: support up to 12 MMU modes
>   target-ppc: use separate indices for various translation modes
>
>  include/exec/cpu-defs.h  |  34 +++++++++++++++-
>  include/exec/cpu_ldst.h  | 104 ++++++++++++++++++++++++++++++++++++++++++++---
>  target-ppc/cpu.h         |  12 +++---
>  target-ppc/excp_helper.c |   3 --
>  target-ppc/helper_regs.h |  15 ++++---
>  tcg/aarch64/tcg-target.h |   1 +
>  tcg/arm/tcg-target.h     |   1 +
>  tcg/i386/tcg-target.h    |   1 +
>  tcg/ia64/tcg-target.h    |   2 +
>  tcg/mips/tcg-target.h    |   1 +
>  tcg/ppc/tcg-target.h     |   1 +
>  tcg/s390/tcg-target.h    |   1 +
>  tcg/sparc/tcg-target.h   |   1 +
>  tcg/tci/tcg-target.h     |   1 +
>  14 files changed, 156 insertions(+), 22 deletions(-)
>
> --
> 2.3.5
>
>



-- 
Regards,
Artyom Tarasenko

SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-06-11  8:59 ` Artyom Tarasenko
@ 2015-06-15 15:50   ` Richard Henderson
  2015-06-15 16:00     ` Artyom Tarasenko
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Henderson @ 2015-06-15 15:50 UTC (permalink / raw)
  To: Artyom Tarasenko, Paolo Bonzini; +Cc: qemu-devel, Alexander Graf

On 06/11/2015 01:59 AM, Artyom Tarasenko wrote:
> This is a very promising approach. Would it also work on a large
> numbers of MMU modes?
> Particulary I wonder if it would work for SPARC, where 32-bit
> processors have up to 65536 MMU contexts.

No, it wouldn't.


r~

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-06-15 15:50   ` Richard Henderson
@ 2015-06-15 16:00     ` Artyom Tarasenko
  2015-06-15 16:29       ` Richard Henderson
  0 siblings, 1 reply; 13+ messages in thread
From: Artyom Tarasenko @ 2015-06-15 16:00 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paolo Bonzini, qemu-devel, Alexander Graf

On Mon, Jun 15, 2015 at 5:50 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 06/11/2015 01:59 AM, Artyom Tarasenko wrote:
>>
>> This is a very promising approach. Would it also work on a large
>> numbers of MMU modes?
>> Particulary I wonder if it would work for SPARC, where 32-bit
>> processors have up to 65536 MMU contexts.
>
>
> No, it wouldn't.
>

What would be a better approach there? Cache recently used contexts only?

Regards,
Artyom

-- 
Regards,
Artyom Tarasenko

SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
  2015-06-15 16:00     ` Artyom Tarasenko
@ 2015-06-15 16:29       ` Richard Henderson
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2015-06-15 16:29 UTC (permalink / raw)
  To: Artyom Tarasenko; +Cc: Paolo Bonzini, qemu-devel, Alexander Graf

On 06/15/2015 09:00 AM, Artyom Tarasenko wrote:
> On Mon, Jun 15, 2015 at 5:50 PM, Richard Henderson <rth@twiddle.net> wrote:
>> On 06/11/2015 01:59 AM, Artyom Tarasenko wrote:
>>>
>>> This is a very promising approach. Would it also work on a large
>>> numbers of MMU modes?
>>> Particulary I wonder if it would work for SPARC, where 32-bit
>>> processors have up to 65536 MMU contexts.
>>
>>
>> No, it wouldn't.
>>
>
> What would be a better approach there? Cache recently used contexts only?

On alpha and arm, we simply ignore Address Space Numbers, which is what I 
assume this is for Sparc.  I've tried several times to find an efficient way to 
do this for Alpha with no success.


r~

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-06-15 16:30 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-05  7:18 [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
2015-05-18 17:00   ` Alexander Graf
2015-05-05  7:18 ` [Qemu-devel] [PATCH v2 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
2015-06-02 16:06   ` Alexander Graf
2015-05-05 15:49 ` [Qemu-devel] [PATCH v3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Richard Henderson
2015-05-08  1:15   ` Alexander Graf
2015-05-07 22:26 ` Alexander Graf
2015-06-11  8:59 ` Artyom Tarasenko
2015-06-15 15:50   ` Richard Henderson
2015-06-15 16:00     ` Artyom Tarasenko
2015-06-15 16:29       ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.