All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform
@ 2015-11-11  0:27 Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 01/77] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
                   ` (79 more replies)
  0 siblings, 80 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This requires an OPAL firmware file which isn't included yet,
will emulate enough to boot existing distros and run KVM
inside TCG

^ permalink raw reply	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 01/77] ppc: Remove MMU_MODEn_SUFFIX definitions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
                   ` (78 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We don't use the resulting accessors and this gets in the way of
the split I/D TLB work.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index b34aed6..9ef0859 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1246,9 +1246,6 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#define MMU_MODE2_SUFFIX _hypv
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 01/77] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  4:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-27 10:29   ` Alexander Graf
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
                   ` (77 subsequent siblings)
  79 siblings, 2 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We rework the way the MMU indices are calculated, providing separate
indices for I and D side based on MSR:IR and MSR:DR respectively,
and thus no longer need to flush the TLB on context changes. This also
adds correct support for HV as a separate address space.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h         | 11 +++++++---
 target-ppc/excp_helper.c | 11 ----------
 target-ppc/helper_regs.h | 54 +++++++++++++++++++++++++++++++++++++++++-------
 target-ppc/machine.c     |  4 +++-
 target-ppc/translate.c   |  7 ++++---
 5 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 9ef0859..aaa7117 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -462,6 +462,8 @@ struct ppc_slb_t {
 #define MSR_EP   6  /* Exception prefix on 601                               */
 #define MSR_IR   5  /* Instruction relocate                                  */
 #define MSR_DR   4  /* Data relocate                                         */
+#define MSR_IS   5  /* Instruction address space (BookE)                     */
+#define MSR_DS   4  /* Data address space (BookE)                            */
 #define MSR_PE   3  /* Protection enable on 403                              */
 #define MSR_PX   2  /* Protection exclusive on 403                  x        */
 #define MSR_PMM  2  /* Performance monitor mark on POWER            x        */
@@ -505,6 +507,8 @@ struct ppc_slb_t {
 #define msr_ep   ((env->msr >> MSR_EP)   & 1)
 #define msr_ir   ((env->msr >> MSR_IR)   & 1)
 #define msr_dr   ((env->msr >> MSR_DR)   & 1)
+#define msr_is   ((env->msr >> MSR_IS)   & 1)
+#define msr_ds   ((env->msr >> MSR_DS)   & 1)
 #define msr_pe   ((env->msr >> MSR_PE)   & 1)
 #define msr_px   ((env->msr >> MSR_PX)   & 1)
 #define msr_pmm  ((env->msr >> MSR_PMM)  & 1)
@@ -944,7 +948,7 @@ struct ppc_segment_page_sizes {
 
 /*****************************************************************************/
 /* The whole PowerPC CPU context */
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES    8
 
 #define PPC_CPU_OPCODES_LEN          0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
@@ -1108,7 +1112,8 @@ struct CPUPPCState {
     /* Those resources are used only in QEMU core */
     target_ulong hflags;      /* hflags is a MSR & HFLAGS_MASK         */
     target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
-    int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
+    int immu_idx;         /* precomputed MMU index to speed up insn access */
+    int dmmu_idx;         /* precomputed MMU index to speed up data accesses */
 
     /* Power management */
     int (*check_pow)(CPUPPCState *env);
@@ -1249,7 +1254,7 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 {
-    return env->mmu_idx;
+    return ifetch ? env->immu_idx : env->dmmu_idx;
 }
 
 #include "exec/cpu-all.h"
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 4250106..3e39098 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 
     if (env->spr[SPR_LPCR] & LPCR_AIL) {
         new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
-        /* If we disactivated any translation, flush TLBs */
-        tlb_flush(cs, 1);
     }
 
 #ifdef TARGET_PPC64
@@ -674,14 +671,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Reset exception state */
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
-
-    if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-        (env->mmu_model == POWERPC_MMU_BOOKE206)) {
-        /* XXX: The BookE changes address space when switching modes,
-                we should probably implement that as different MMU indexes,
-                but for the moment we do it the slow way and flush all.  */
-        tlb_flush(cs, 1);
-    }
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 271fddf..f7edd5b 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -41,11 +41,50 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
 
 static inline void hreg_compute_mem_idx(CPUPPCState *env)
 {
-    /* Precompute MMU index */
-    if (msr_pr == 0 && msr_hv != 0) {
-        env->mmu_idx = 2;
+    /* This is our encoding for server processors
+     *
+     *   0 = Guest User space virtual mode
+     *   1 = Guest Kernel space virtual mode
+     *   2 = Guest Kernel space real mode
+     *   3 = HV User space virtual mode
+     *   4 = HV Kernel space virtual mode
+     *   5 = HV Kernel space real mode
+     *
+     * The combination PR=1 IR&DR=0 is invalid, we will treat
+     * it as IR=DR=1
+     *
+     * For BookE, we need 8 MMU modes as follow:
+     *
+     *  0 = AS 0 HV User space
+     *  1 = AS 0 HV Kernel space
+     *  2 = AS 1 HV User space
+     *  3 = AS 1 HV Kernel space
+     *  4 = AS 0 Guest User space
+     *  5 = AS 0 Guest Kernel space
+     *  6 = AS 1 Guest User space
+     *  7 = AS 1 Guest Kernel space
+     */
+    if (env->mmu_model & POWERPC_MMU_BOOKE) {
+        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
+        env->immu_idx += msr_is ? 2 : 0;
+        env->dmmu_idx += msr_ds ? 2 : 0;
+        env->immu_idx += msr_gs ? 4 : 0;
+        env->dmmu_idx += msr_gs ? 4 : 0;
     } else {
-        env->mmu_idx = 1 - msr_pr;
+        /* First calucalte a base value independent of HV */
+        if (msr_pr != 0) {
+            /* User space, ignore IR and DR */
+            env->immu_idx = env->dmmu_idx = 0;
+        } else {
+            /* Kernel, setup a base I/D value */
+            env->immu_idx = msr_ir ? 1 : 2;
+            env->dmmu_idx = msr_dr ? 1 : 2;
+        }
+        /* Then offset it for HV */
+        if (msr_hv) {
+            env->immu_idx += 3;
+            env->dmmu_idx += 3;
+        }
     }
 }
 
@@ -82,9 +121,10 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     if (((value >> MSR_IR) & 1) != msr_ir ||
         ((value >> MSR_DR) & 1) != msr_dr) {
-        /* Flush all tlb when changing translation mode */
-        tlb_flush(cs, 1);
-        excp = POWERPC_EXCP_NONE;
+        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+    }
+    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
+        ((value >> MSR_GS) & 1) != msr_gs) {
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
     if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index f4ac761..b969492 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
     qemu_get_betls(f, &env->nip);
     qemu_get_betls(f, &env->hflags);
     qemu_get_betls(f, &env->hflags_nmsr);
-    qemu_get_sbe32s(f, &env->mmu_idx);
     qemu_get_sbe32(f); /* Discard unused power_mode */
 
+    /* Ignore saved mmu_idx, recompute */
+    hreg_compute_mem_idx(env);
+
     return 0;
 }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 308ad68..6d9f252 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11220,8 +11220,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
                 cs->cpu_index);
     cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
-                TARGET_FMT_lx " idx %d\n", env->msr, env->spr[SPR_HID0],
-                env->hflags, env->mmu_idx);
+                TARGET_FMT_lx " iidx %d didx %d\n",
+                env->msr, env->spr[SPR_HID0],
+                env->hflags, env->immu_idx, env->dmmu_idx);
 #if !defined(NO_TIMER_DUMP)
     cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
 #if !defined(CONFIG_USER_ONLY)
@@ -11426,7 +11427,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
     ctx.spr_cb = env->spr_cb;
     ctx.pr = msr_pr;
     ctx.hv = !msr_pr && msr_hv;
-    ctx.mem_idx = env->mmu_idx;
+    ctx.mem_idx = env->dmmu_idx;
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
     ctx.access_type = -1;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 01/77] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:00   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions Benjamin Herrenschmidt
                   ` (76 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.

However, those instructions often come in bursts of 3 or more (context
switch will favor a series of slbie's for example to an slbia if the
SLB has less than a certain number of entries in it, and tlbie's can
happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
at a time.

Doing a tlb_flush() each time is a waste of time. We end up doing a memset
of the whole TLB, reloading it for the next instruction, memset'ing again,
etc...

Those instructions don't have to take effect immediately. For slbie, they
can wait for the next context synchronizing event. For tlbie, the next
tlbsync.

This implements batching by keeping a flag that indicates that we have a
TLB in need of flushing. We check it on interrupts, rfi's, isync's and
tlbsync and flush the TLB if needed.

This reduces the number of tlb_flush() on a boot to a ubuntu installer
first dialog screen from roughly 360K down to 36K.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr_hcall.c     | 12 +++++++++---
 target-ppc/cpu.h         |  2 ++
 target-ppc/excp_helper.c |  9 +++++++++
 target-ppc/helper.h      |  1 +
 target-ppc/helper_regs.h | 13 +++++++++++++
 target-ppc/mmu-hash64.c  | 12 +++---------
 target-ppc/mmu_helper.c  |  9 ++++++++-
 target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
 8 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index cebceea..7e2cb4b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     switch (ret) {
     case REMOVE_SUCCESS:
+        check_tlb_flush(env);
         return H_SUCCESS;
 
     case REMOVE_NOT_FOUND:
@@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
+    target_ulong rc = H_SUCCESS;
     int i;
 
     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
@@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             break;
 
         case REMOVE_PARM:
-            return H_PARAMETER;
+            rc = H_PARAMETER;
+            goto exit;
 
         case REMOVE_HW:
-            return H_HARDWARE;
+            rc = H_HARDWARE;
+            goto exit;
         }
     }
+ exit:
+    check_tlb_flush(env);
 
-    return H_SUCCESS;
+    return rc;
 }
 
 static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index aaa7117..e6c43f9 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1013,6 +1013,8 @@ struct CPUPPCState {
     /* PowerPC 64 SLB area */
     ppc_slb_t slb[MAX_SLB_ENTRIES];
     int32_t slb_nr;
+    /* tcg TLB needs flush (deferred slb inval instruction typically) */
+    uint32_t tlb_need_flush;
 #endif
     /* segment registers */
     hwaddr htab_base;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 3e39098..c1d6605 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Reset exception state */
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
+
+    /* Any interrupt is context synchronizing, check if TCG TLB
+     * needs a delayed flush on ppc64
+     */
+    check_tlb_flush(env);
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
@@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
                   __func__, env, env->pending_interrupts,
                   cs->interrupt_request, (int)msr_me, (int)msr_ee);
 #endif
+
     /* External reset */
     if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
         env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
@@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
      * as rfi is always the last insn of a TB
      */
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+
+    /* Context synchronizing: check if TCG TLB needs flush */
+    check_tlb_flush(env);
 }
 
 void helper_rfi(CPUPPCState *env)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 869be15..ff2d50b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 #endif
+DEF_HELPER_1(check_tlb_flush, void, env)
 #endif
 
 DEF_HELPER_3(lmw, void, env, tl, i32)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index f7edd5b..57da931 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     return excp;
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void check_tlb_flush(CPUPPCState *env)
+{
+    CPUState *cs = CPU(ppc_env_get_cpu(env));
+    if (env->tlb_need_flush) {
+        env->tlb_need_flush = 0;
+        tlb_flush(cs, 1);
+    }
+}
+#else
+static inline void check_tlb_flush(CPUPPCState *env) { }
+#endif
+
 #endif /* !defined(__HELPER_REGS_H__) */
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 7df6ede..71e1d14 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
 
 void helper_slbia(CPUPPCState *env)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
-    int n, do_invalidate;
+    int n;
 
-    do_invalidate = 0;
     /* XXX: Warning: slbia never invalidates the first segment */
     for (n = 1; n < env->slb_nr; n++) {
         ppc_slb_t *slb = &env->slb[n];
@@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
              *      and we still don't have a tlb_flush_mask(env, n, mask)
              *      in QEMU, we just invalidate all TLBs
              */
-            do_invalidate = 1;
+            env->tlb_need_flush = true;
         }
     }
-    if (do_invalidate) {
-        tlb_flush(CPU(cpu), 1);
-    }
 }
 
 void helper_slbie(CPUPPCState *env, target_ulong addr)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_slb_t *slb;
 
     slb = slb_lookup(env, addr);
@@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask)
          *      in QEMU, we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = true;
     }
 }
 
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index e52d0e5..54bc5d1 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -23,6 +23,7 @@
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
 #include "exec/cpu_ldst.h"
+#include "helper_regs.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_2_03:
     case POWERPC_MMU_2_06:
     case POWERPC_MMU_2_07:
+        env->tlb_need_flush = 0;
 #endif /* defined(TARGET_PPC64) */
         tlb_flush(CPU(cpu), 1);
         break;
@@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
          *      we just invalidate all TLBs
          */
-        tlb_flush(CPU(cpu), 1);
+        env->tlb_need_flush = 1;
         break;
 #endif /* defined(TARGET_PPC64) */
     default:
@@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
 }
 
 
+void helper_check_tlb_flush(CPUPPCState *env)
+{
+    check_tlb_flush(env);
+}
+
 /*****************************************************************************/
 
 /* try to fill the TLB and return an exception if error. If retaddr is
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 6d9f252..e18d204 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
 {
 }
 
+#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+static inline void gen_check_tlb_flush(DisasContext *ctx)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+    TCGLabel *l = gen_new_label();
+
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
+    gen_helper_check_tlb_flush(cpu_env);
+    gen_set_label(l);
+    tcg_temp_free_i32(t);
+}
+#else
+static inline void gen_check_tlb_flush(DisasContext *ctx) { }
+#endif
+
 /* isync */
 static void gen_isync(DisasContext *ctx)
 {
+    /*
+     * We need to check for a pending TLB flush. This can only happen in
+     * kernel mode however so check MSR_PR
+     */
+    if (!ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
     gen_stop_exception(ctx);
 }
 
@@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
 /* sync */
 static void gen_sync(DisasContext *ctx)
 {
+    uint32_t l = (ctx->opcode >> 21) & 3;
+
+    /*
+     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
+     * This can only happen in kernel mode however so check MSR_PR as well.
+     */
+    if (l == 2 && !ctx->pr) {
+        gen_check_tlb_flush(ctx);
+    }
 }
 
 /* wait */
@@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
-    /* This has no effect: it should ensure that all previous
-     * tlbie have completed
+    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
+     * embedded however needs to deal with tlbsync. We don't try to be
+     * fancy and swallow the overhead of checking for both.
      */
-    gen_stop_exception(ctx);
+    gen_check_tlb_flush(ctx);
 #endif
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (2 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-11  0:59   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
  2015-11-16  5:01   ` David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions Benjamin Herrenschmidt
                   ` (75 subsequent siblings)
  79 siblings, 2 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We never released anything older than POWER8 DD2.0 and POWER8E DD2.1,
so let's use these versions, without that some firmware or Linux code
might fail to use some HW features that were non functional in earlier
internal only spins of the chip.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu-models.c | 12 ++++++------
 target-ppc/cpu-models.h |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 4d5ab4b..349783e 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1138,10 +1138,10 @@
                 "POWER7 v2.3")
     POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7,
                 "POWER7+ v2.1")
-    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8,
-                "POWER8E v1.0")
-    POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
-                "POWER8 v1.0")
+    POWERPC_DEF("POWER8E_v2.1",  CPU_POWERPC_POWER8E_v21,            POWER8,
+                "POWER8E v2.1")
+    POWERPC_DEF("POWER8_v2.0",   CPU_POWERPC_POWER8_v20,             POWER8,
+                "POWER8 v2.0")
     POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
                 "PowerPC 970 v2.2")
     POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
@@ -1389,8 +1389,8 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "POWER5gs", "POWER5+_v2.1" },
     { "POWER7", "POWER7_v2.3" },
     { "POWER7+", "POWER7+_v2.1" },
-    { "POWER8E", "POWER8E_v1.0" },
-    { "POWER8", "POWER8_v1.0" },
+    { "POWER8E", "POWER8E_v2.1" },
+    { "POWER8", "POWER8_v2.0" },
     { "970", "970_v2.2" },
     { "970fx", "970fx_v3.1" },
     { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index 9d80e72..2992427 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -557,9 +557,9 @@ enum {
     CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
     CPU_POWERPC_POWER7P_v21        = 0x004A0201,
     CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
-    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
+    CPU_POWERPC_POWER8E_v21        = 0x004B0201,
     CPU_POWERPC_POWER8_BASE        = 0x004D0000,
-    CPU_POWERPC_POWER8_v10         = 0x004D0100,
+    CPU_POWERPC_POWER8_v20         = 0x004D0200,
     CPU_POWERPC_970_v22            = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (3 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:06   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs Benjamin Herrenschmidt
                   ` (74 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Add definitions for additional SPR numbers and SPR bit definitions
that will be relevant for subsequent improvements to POWER8 emulation

Also fix the definition of LPIDR which was incorrect (and is different
for server and embedded).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index e6c43f9..611367f 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -470,9 +470,17 @@ struct ppc_slb_t {
 #define MSR_RI   1  /* Recoverable interrupt                        1        */
 #define MSR_LE   0  /* Little-endian mode                           1 hflags */
 
-#define LPCR_ILE (1 << (63-38))
-#define LPCR_AIL_SHIFT (63-40)      /* Alternate interrupt location */
-#define LPCR_AIL (3 << LPCR_AIL_SHIFT)
+/* LPCR bits */
+#define LPCR_VPM0         (1ull << (63-0))
+#define LPCR_VPM1         (1ull << (63-1))
+#define LPCR_ISL          (1ull << (63-2))
+#define LPCR_KBV          (1ull << (63-3))
+#define LPCR_ILE          (1ull << (63-38))
+#define LPCR_MER          (1ull << (63-52))
+#define LPCR_LPES0        (1ull << (63-60))
+#define LPCR_LPES1        (1ull << (63-61))
+#define LPCR_AIL_SHIFT    (63-40)      /* Alternate interrupt location */
+#define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
 
 #define msr_sf   ((env->msr >> MSR_SF)   & 1)
 #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
@@ -1338,6 +1346,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_UAMOR             (0x09D)
 #define SPR_MPC_ICTRL         (0x09E)
 #define SPR_MPC_BAR           (0x09F)
+#define SPR_DAWR              (0x0B4)
+#define SPR_RPR               (0x0BA)
+#define SPR_DAWRX             (0x0BC)
+#define SPR_HFSCR             (0x0BE)
 #define SPR_VRSAVE            (0x100)
 #define SPR_USPRG0            (0x100)
 #define SPR_USPRG1            (0x101)
@@ -1392,19 +1404,25 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_HSRR1             (0x13B)
 #define SPR_BOOKE_IAC4        (0x13B)
 #define SPR_BOOKE_DAC1        (0x13C)
-#define SPR_LPIDR             (0x13D)
+#define SPR_MMCRH             (0x13C)
 #define SPR_DABR2             (0x13D)
 #define SPR_BOOKE_DAC2        (0x13D)
+#define SPR_TFMR              (0x13D)
 #define SPR_BOOKE_DVC1        (0x13E)
 #define SPR_LPCR              (0x13E)
 #define SPR_BOOKE_DVC2        (0x13F)
+#define SPR_LPIDR             (0x13F)
 #define SPR_BOOKE_TSR         (0x150)
+#define SPR_HMER              (0x150)
+#define SPR_HMEER             (0x151)
 #define SPR_PCR               (0x152)
+#define SPR_BOOKE_LPIDR       (0x152)
 #define SPR_BOOKE_TCR         (0x154)
 #define SPR_BOOKE_TLB0PS      (0x158)
 #define SPR_BOOKE_TLB1PS      (0x159)
 #define SPR_BOOKE_TLB2PS      (0x15A)
 #define SPR_BOOKE_TLB3PS      (0x15B)
+#define SPR_AMOR	      (0x15D)
 #define SPR_BOOKE_MAS7_MAS3   (0x174)
 #define SPR_BOOKE_IVOR0       (0x190)
 #define SPR_BOOKE_IVOR1       (0x191)
@@ -1622,6 +1640,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_RCPU_L2U_RA3      (0x32B)
 #define SPR_TAR               (0x32F)
 #define SPR_VTB               (0x351)
+#define SPR_MMCRC             (0x353)
 #define SPR_440_INV0          (0x370)
 #define SPR_440_INV1          (0x371)
 #define SPR_440_INV2          (0x372)
@@ -1655,6 +1674,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_440_DVLIM         (0x398)
 #define SPR_750_WPAR          (0x399)
 #define SPR_440_IVLIM         (0x399)
+#define SPR_TSCR	      (0x399)
 #define SPR_750_DMAU          (0x39A)
 #define SPR_750_DMAL          (0x39B)
 #define SPR_440_RSTCFG        (0x39B)
@@ -1829,9 +1849,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define   L1CSR1_ICE		0x00000001	/* Instruction Cache Enable */
 
 /* HID0 bits */
-#define HID0_DEEPNAP        (1 << 24)
-#define HID0_DOZE           (1 << 23)
-#define HID0_NAP            (1 << 22)
+#define HID0_DEEPNAP        (1 << 24)	        /* pre-2.06 */
+#define HID0_DOZE           (1 << 23)	        /* pre-2.06 */
+#define HID0_NAP            (1 << 22)	        /* pre-2.06 */
+#define HID0_HILE           (1ull << (63-19))   /* POWER8 */
 
 /*****************************************************************************/
 /* PowerPC Instructions types definitions                                    */
@@ -2180,6 +2201,25 @@ enum {
     PCR_TM_DIS          = 1ull << (63-2), /* Trans. memory disable (POWER8) */
 };
 
+/* HMER/HMEER */
+enum {
+    HMER_MALFUNCTION_ALERT      = 1ull << (63-0),
+    HMER_PROC_RECV_DONE         = 1ull << (63-2),
+    HMER_PROC_RECV_ERROR_MASKED = 1ull << (63-3),
+    HMER_TFAC_ERROR             = 1ull << (63-4),
+    HMER_TFMR_PARITY_ERROR      = 1ull << (63-5),
+    HMER_XSCOM_FAIL             = 1ull << (63-8),
+    HMER_XSCOM_DONE             = 1ull << (63-9),
+    HMER_PROC_RECV_AGAIN        = 1ull << (63-11),
+    HMER_WARN_RISE              = 1ull << (63-14),
+    HMER_WARN_FALL              = 1ull << (63-15),
+    HMER_SCOM_FIR_HMI           = 1ull << (63-16),
+    HMER_TRIG_FIR_HMI           = 1ull << (63-17),
+    HMER_HYP_RESOURCE_ERR       = 1ull << (63-20),
+    HMER_XSCOM_STATUS_MASK      = 7ull << (63-23),
+    HMER_XSCOM_STATUS_LSH       = (63-23),
+};
+
 /*****************************************************************************/
 
 static inline target_ulong cpu_read_xer(CPUPPCState *env)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (4 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:09   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s Benjamin Herrenschmidt
                   ` (73 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The current set of spr_register_* macros only take the user and
supervisor function pointers. To make the transition easy, we
don't change that but we add "_hv" variants that can be used to
register all 3 sets.

To simplify the transition, users of the "old" macro will set the
hypervisor callback to be the same as the supervisor one. The new
registration function only needs to be used for registers that are
either hypervisor only or behave differently in HV mode.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c      | 26 ++++++++++++++++----------
 target-ppc/translate_init.c | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index e18d204..a2fe1b5 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4299,14 +4299,17 @@ static inline void gen_op_mfspr(DisasContext *ctx)
     void (*read_cb)(DisasContext *ctx, int gprn, int sprn);
     uint32_t sprn = SPR(ctx->opcode);
 
-#if !defined(CONFIG_USER_ONLY)
-    if (ctx->hv)
+#if defined(CONFIG_USER_ONLY)
+    read_cb = ctx->spr_cb[sprn].uea_read;
+#else
+    if (ctx->pr) {
+        read_cb = ctx->spr_cb[sprn].uea_read;
+    } else if (ctx->hv) {
         read_cb = ctx->spr_cb[sprn].hea_read;
-    else if (!ctx->pr)
+    } else if (!ctx->pr) {
         read_cb = ctx->spr_cb[sprn].oea_read;
-    else
+    }
 #endif
-        read_cb = ctx->spr_cb[sprn].uea_read;
     if (likely(read_cb != NULL)) {
         if (likely(read_cb != SPR_NOACCESS)) {
             (*read_cb)(ctx, rD(ctx->opcode), sprn);
@@ -4450,14 +4453,17 @@ static void gen_mtspr(DisasContext *ctx)
     void (*write_cb)(DisasContext *ctx, int sprn, int gprn);
     uint32_t sprn = SPR(ctx->opcode);
 
-#if !defined(CONFIG_USER_ONLY)
-    if (ctx->hv)
+#if defined(CONFIG_USER_ONLY)
+    write_cb = ctx->spr_cb[sprn].uea_write;
+#else
+    if (ctx->pr) {
+        write_cb = ctx->spr_cb[sprn].uea_write;
+    } else if (ctx->hv) {
         write_cb = ctx->spr_cb[sprn].hea_write;
-    else if (!ctx->pr)
+    } else {
         write_cb = ctx->spr_cb[sprn].oea_write;
-    else
+    }
 #endif
-        write_cb = ctx->spr_cb[sprn].uea_write;
     if (likely(write_cb != NULL)) {
         if (likely(write_cb != SPR_NOACCESS)) {
             (*write_cb)(ctx, sprn, rS(ctx->opcode));
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index e88dc7f..30a03ce 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -578,17 +578,33 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val)
 #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
                          oea_read, oea_write, one_reg_id, initial_value)       \
     _spr_register(env, num, name, uea_read, uea_write, initial_value)
+#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
+                            oea_read, oea_write, hea_read, hea_write,          \
+                            one_reg_id, initial_value)                         \
+    _spr_register(env, num, name, uea_read, uea_write, initial_value)
 #else
 #if !defined(CONFIG_KVM)
 #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
-                         oea_read, oea_write, one_reg_id, initial_value) \
+                         oea_read, oea_write, one_reg_id, initial_value)       \
+    _spr_register(env, num, name, uea_read, uea_write,                         \
+                  oea_read, oea_write, oea_read, oea_write, initial_value)
+#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
+                            oea_read, oea_write, hea_read, hea_write,          \
+                            one_reg_id, initial_value)                         \
     _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, initial_value)
+                  oea_read, oea_write, hea_read, hea_write, initial_value)
 #else
 #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
-                         oea_read, oea_write, one_reg_id, initial_value) \
+                         oea_read, oea_write, one_reg_id, initial_value)       \
+    _spr_register(env, num, name, uea_read, uea_write,                         \
+                  oea_read, oea_write, oea_read, oea_write,                    \
+                  one_reg_id, initial_value)
+#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
+                            oea_read, oea_write, hea_read, hea_write,          \
+                            one_reg_id, initial_value)                         \
     _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, one_reg_id, initial_value)
+                  oea_read, oea_write, hea_read, hea_write,                    \
+                  one_reg_id, initial_value)
 #endif
 #endif
 
@@ -597,6 +613,13 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val)
     spr_register_kvm(env, num, name, uea_read, uea_write,                      \
                      oea_read, oea_write, 0, initial_value)
 
+#define spr_register_hv(env, num, name, uea_read, uea_write,                   \
+                        oea_read, oea_write, hea_read, hea_write,              \
+                        initial_value)                                         \
+    spr_register_kvm_hv(env, num, name, uea_read, uea_write,                   \
+                        oea_read, oea_write, hea_read, hea_write,              \
+                        0, initial_value)
+
 static inline void _spr_register(CPUPPCState *env, int num,
                                  const char *name,
                                  void (*uea_read)(DisasContext *ctx, int gprn, int sprn),
@@ -605,6 +628,8 @@ static inline void _spr_register(CPUPPCState *env, int num,
 
                                  void (*oea_read)(DisasContext *ctx, int gprn, int sprn),
                                  void (*oea_write)(DisasContext *ctx, int sprn, int gprn),
+                                 void (*hea_read)(DisasContext *opaque, int gprn, int sprn),
+                                 void (*hea_write)(DisasContext *opaque, int sprn, int gprn),
 #endif
 #if defined(CONFIG_KVM)
                                  uint64_t one_reg_id,
@@ -632,6 +657,8 @@ static inline void _spr_register(CPUPPCState *env, int num,
 #if !defined(CONFIG_USER_ONLY)
     spr->oea_read = oea_read;
     spr->oea_write = oea_write;
+    spr->hea_read = hea_read;
+    spr->hea_write = hea_write;
 #endif
 #if defined(CONFIG_KVM)
     spr->one_reg_id = one_reg_id,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (5 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:11   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition Benjamin Herrenschmidt
                   ` (72 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We don't give them a KVM reg number yet as no current KVM version
supports HV mode.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 140 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 137 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 30a03ce..c743eb1 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -1104,6 +1104,11 @@ static void gen_spr_amr (CPUPPCState *env)
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_UAMOR, 0);
+    spr_register_hv(env, SPR_AMOR, "AMOR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
 #endif /* !CONFIG_USER_ONLY */
 }
 #endif /* TARGET_PPC64 */
@@ -7490,6 +7495,20 @@ static void gen_spr_book3s_dbg(CPUPPCState *env)
                      KVM_REG_PPC_DABRX, 0x00000000);
 }
 
+static void gen_spr_book3s_207_dbg(CPUPPCState *env)
+{
+    spr_register_hv(env, SPR_DAWR, "DAWR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0x00000000);
+    spr_register_hv(env, SPR_DAWRX, "DAWRX",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0x00000000);
+}
+
 static void gen_spr_970_dbg(CPUPPCState *env)
 {
     /* Breakpoints */
@@ -7654,15 +7673,116 @@ static void gen_spr_power5p_lpar(CPUPPCState *env)
     spr_register_kvm(env, SPR_LPCR, "LPCR",
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_LPCR, 0x00000000);
+                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
 }
 
+#if !defined(CONFIG_USER_ONLY)
+static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv hmer = tcg_temp_new();
+
+    gen_load_spr(hmer, sprn);
+    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
+    gen_store_spr(sprn, hmer);
+    spr_store_dump_spr(sprn);
+    tcg_temp_free(hmer);
+}
+#endif
+
 static void gen_spr_book3s_ids(CPUPPCState *env)
 {
+    /* FIXME: Will need to deal with thread vs core only SPRs */
+
     /* Processor identification */
-    spr_register(env, SPR_PIR, "PIR",
+    spr_register_hv(env, SPR_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, NULL,
+                 0x00000000);
+    spr_register_hv(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_TSCR, "TSCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HMER, "HMER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_hmer,
+                 0x00000000);
+    spr_register_hv(env, SPR_HMEER, "HMEER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_TFMR, "TFMR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_LPIDR, "LPIDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HFSCR, "HFSCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_MMCRC, "MMCRC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_MMCRH, "MMCRH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSPRG0, "HSPRG0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSPRG1, "HSPRG1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSRR0, "HSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSRR1, "HSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HDAR, "HDAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HDSISR, "HDSISR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_RMOR, "RMOR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HRMOR, "HRMOR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
                  0x00000000);
 }
 
@@ -7868,6 +7988,17 @@ static void gen_spr_power8_fscr(CPUPPCState *env)
                      KVM_REG_PPC_FSCR, initval);
 }
 
+static void gen_spr_power8_rpr(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_RPR, "RPR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0x00000103070F1F3F);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
     gen_spr_ne_601(env);
@@ -7919,9 +8050,12 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_power8_pmu_user(env);
         gen_spr_power8_tm(env);
         gen_spr_vtb(env);
+        gen_spr_power8_rpr(env);
     }
     if (version < BOOK3S_CPU_POWER8) {
         gen_spr_book3s_dbg(env);
+    } else {
+        gen_spr_book3s_207_dbg(env);
     }
 #if !defined(CONFIG_USER_ONLY)
     switch (version) {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (6 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:16   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation Benjamin Herrenschmidt
                   ` (71 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Also use it to clamp the max SMT mode and ensure that the cpu_dt_id
are offset by that value in order to preserve consistency with the
HW implementations.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu-qom.h        | 1 +
 target-ppc/translate_init.c | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 6967a80..fef23fd 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -68,6 +68,7 @@ typedef struct PowerPCCPUClass {
     uint32_t flags;
     int bfd_mach;
     uint32_t l1_dcache_size, l1_icache_size;
+    uint32_t threads_per_core;
 #if defined(TARGET_PPC64)
     const struct ppc_segment_page_sizes *sps;
 #endif
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index c743eb1..1d402e1 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8193,6 +8193,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
                  POWERPC_FLAG_BUS_CLK;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x10000;
+    pcc->threads_per_core = 2;
 }
 
 static void powerpc_get_compat(Object *obj, Visitor *v,
@@ -8339,6 +8340,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
+    pcc->threads_per_core = 4;
 }
 
 static void init_proc_POWER8(CPUPPCState *env)
@@ -8419,6 +8421,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
+    pcc->threads_per_core = 8;
 }
 #endif /* defined (TARGET_PPC64) */
 
@@ -9074,6 +9077,9 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+    if (max_smt > pcc->threads_per_core) {
+        max_smt = pcc->threads_per_core;
+    }
     if (smp_threads > max_smt) {
         error_setg(errp, "Cannot support more than %d threads on PPC with %s",
                    max_smt, kvm_enabled() ? "KVM" : "TCG");
@@ -9094,7 +9100,7 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 #if !defined(CONFIG_USER_ONLY)
-    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
+    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * pcc->threads_per_core
         + (cs->cpu_index % smp_threads);
 #endif
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (7 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:19   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV Benjamin Herrenschmidt
                   ` (70 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

XXX This patch needs double checking... It fixed 32-bit userspace
but I'm not sure it's right. I wonder whether msr_is_64bit() should
be applied to env->msr, not msr, but I need to double check the
architecture.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/excp_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index c1d6605..00fae60 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -878,13 +878,13 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
     CPUState *cs = CPU(ppc_env_get_cpu(env));
 
 #if defined(TARGET_PPC64)
+    msr = msr & msrm;
     if (msr_is_64bit(env, msr)) {
         nip = (uint64_t)nip;
-        msr &= (uint64_t)msrm;
     } else {
         nip = (uint32_t)nip;
-        msr = (uint32_t)(msr & msrm);
         if (keep_msrh) {
+	    msr &= 0xffffffff;
             msr |= env->msr & ~((uint64_t)0xFFFFFFFF);
         }
     }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (8 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper Benjamin Herrenschmidt
                   ` (69 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This helper is only used by the various instructions that can alter
MSR and not interrupts. Add a comment to that effect to the interrupt
code as well in case somebody wants to change this

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/excp_helper.c | 8 ++++++--
 target-ppc/helper_regs.h | 4 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 00fae60..83e6c07 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -662,8 +662,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
     }
 #endif
-    /* XXX: we don't use hreg_store_msr here as already have treated
-     *      any special case that could occur. Just store MSR and update hflags
+    /* We don't use hreg_store_msr here as already have treated
+     * any special case that could occur. Just store MSR and update hflags
+     *
+     * Note: We *MUST* not use hreg_store_msr() as-is anyway because it
+     * will prevent setting of the HV bit which some exceptions might need
+     * to do.
      */
     env->msr = new_msr & env->msr_mask;
     hreg_compute_hflags(env);
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 57da931..12af61c 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -114,8 +114,8 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     excp = 0;
     value &= env->msr_mask;
 #if !defined(CONFIG_USER_ONLY)
-    if (!alter_hv) {
-        /* mtmsr cannot alter the hypervisor state */
+    /* Neither mtmsr nor guest state can alter HV */
+    if (!alter_hv || !(env->msr & MSR_HVB)) {
         value &= ~MSR_HVB;
         value |= env->msr & MSR_HVB;
     }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (9 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:30   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode Benjamin Herrenschmidt
                   ` (68 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

And move the code adjusting the MSR mask and calling kvmppc_set_papr()
to it. This allows us to add a few more things such as disabling setting
of MSR:HV and appropriate LPCR bits which will be used when fixing
the exception model.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr.c              | 12 +++---------
 target-ppc/cpu.h            |  1 +
 target-ppc/translate_init.c | 37 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 37d071e..610629e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1606,15 +1606,8 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
     /* Set time-base frequency to 512 MHz */
     cpu_ppc_tb_init(env, TIMEBASE_FREQ);
 
-    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
-     * MSR[IP] should never be set.
-     */
-    env->msr_mask &= ~(1 << 6);
-
-    /* Tell KVM that we're in PAPR mode */
-    if (kvm_enabled()) {
-        kvmppc_set_papr(cpu);
-    }
+    /* Enable PAPR mode in TCG or KVM */
+    cpu_ppc_set_papr(cpu);
 
     if (cpu->max_compat) {
         if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
@@ -1791,6 +1784,7 @@ static void ppc_spapr_init(MachineState *machine)
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
             exit(1);
         }
+
         spapr_cpu_init(spapr, cpu);
     }
 
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 611367f..357b6e7 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1229,6 +1229,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
 void store_booke_tsr (CPUPPCState *env, target_ulong val);
 void ppc_tlb_invalidate_all (CPUPPCState *env);
 void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
+void cpu_ppc_set_papr(PowerPCCPU *cpu);
 #endif
 #endif
 
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 1d402e1..7bcfbc0 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8423,8 +8423,43 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
     pcc->threads_per_core = 8;
 }
-#endif /* defined (TARGET_PPC64) */
 
+#if !defined(CONFIG_USER_ONLY)
+
+void cpu_ppc_set_papr(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+    ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
+
+    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
+     * MSR[IP] should never be set.
+     *
+     * We also disallow setting of MSR_HV
+     */
+    env->msr_mask &= ~((1ull << MSR_EP) | MSR_HVB);
+
+    /* Set emulated LPCR to not send interrupts to hypervisor. Note that
+     * under KVM, the actual HW LPCR will be set differently by KVM itself,
+     * the settings below ensure proper operations with TCG in absence of
+     * a real hypervisor
+     */
+    lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
+    lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
+
+    /* We should be followed by a CPU reset but update the active value
+     * just in case...
+     */
+    env->spr[SPR_LPCR] = lpcr->default_value;
+
+    /* Tell KVM that we're in PAPR mode */
+    if (kvm_enabled()) {
+        kvmppc_set_papr(cpu);
+    }
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#endif /* defined (TARGET_PPC64) */
 
 /*****************************************************************************/
 /* Generic CPU instantiation routine                                         */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (10 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:22   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only Benjamin Herrenschmidt
                   ` (67 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We use an env. flag which is set to the initial value of MSR_HVB in
the msr_mask. We also adjust the POWER8 mask to set SHV.

Also use this to adjust ctx.hv so that it is *set* when the processor
doesn't have an HV mode (970 with Apple mode for example), thus enabling
hypervisor instructions/SPRs.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  4 ++++
 target-ppc/translate.c      |  4 +++-
 target-ppc/translate_init.c | 21 ++++++++++++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 357b6e7..062644e 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1113,6 +1113,10 @@ struct CPUPPCState {
     hwaddr mpic_iack;
     /* true when the external proxy facility mode is enabled */
     bool mpic_proxy;
+    /* set when the processor has an HV mode, thus HV priv
+     * instructions and SPRs are diallowed if MSR:HV is 0
+     */
+    bool has_hv_mode;
 #endif
 
     /* Those resources are used only during code translation */
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a2fe1b5..10eb9e3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11465,8 +11465,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
     ctx.exception = POWERPC_EXCP_NONE;
     ctx.spr_cb = env->spr_cb;
     ctx.pr = msr_pr;
-    ctx.hv = !msr_pr && msr_hv;
     ctx.mem_idx = env->dmmu_idx;
+#if !defined(CONFIG_USER_ONLY)
+    ctx.hv = !msr_pr && (msr_hv || !env->has_hv_mode);
+#endif
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
     ctx.access_type = -1;
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 7bcfbc0..76f20ea 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8391,7 +8391,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                         PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
                         PPC2_TM;
     pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_TM) |
+                    (1ull << MSR_SHV) |
+		    (1ull << MSR_TM) |
                     (1ull << MSR_VR) |
                     (1ull << MSR_VSX) |
                     (1ull << MSR_EE) |
@@ -9748,10 +9749,7 @@ static void ppc_cpu_reset(CPUState *s)
     pcc->parent_reset(s);
 
     msr = (target_ulong)0;
-    if (0) {
-        /* XXX: find a suitable condition to enable the hypervisor mode */
-        msr |= (target_ulong)MSR_HVB;
-    }
+    msr |= (target_ulong)MSR_HVB;
     msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */
     msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */
     msr |= (target_ulong)1 << MSR_EP;
@@ -9852,6 +9850,19 @@ static void ppc_cpu_initfn(Object *obj)
     env->bfd_mach = pcc->bfd_mach;
     env->check_pow = pcc->check_pow;
 
+    /* Mark HV mode as supported if the CPU has an MSR_HV bit
+     * in the msr_mask. The mask can later be cleared by PAPR
+     * mode but the hv mode support will remain, thus enforcing
+     * that we cannot use priv. instructions in guest in PAPR
+     * mode. For 970 we currently simply don't set HV in msr_mask
+     * thus simulating an "Apple mode" 970. If we ever want to
+     * support 970 HV mode, we'll have to add a processor attribute
+     * of some sort.
+     */
+#if !defined(CONFIG_USER_ONLY)
+    env->has_hv_mode = !!(env->msr_mask & MSR_HVB);
+#endif
+
 #if defined(TARGET_PPC64)
     if (pcc->sps) {
         env->sps = *pcc->sps;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (11 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:34   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie Benjamin Herrenschmidt
                   ` (66 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Not that anything remotely recent supports tlbia but ...

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 10eb9e3..014fe5e 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4836,7 +4836,7 @@ static void gen_tlbia(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(ctx->pr)) {
+    if (unlikely(ctx->pr || !ctx->hv)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4850,7 +4850,7 @@ static void gen_tlbiel(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(ctx->pr)) {
+    if (unlikely(ctx->pr || !ctx->hv)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
@@ -4864,7 +4864,7 @@ static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 #else
-    if (unlikely(ctx->pr)) {
+    if (unlikely(ctx->pr || !ctx->hv)) {
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (12 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  7:02   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation Benjamin Herrenschmidt
                   ` (65 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Otherwise it will trip on the forms used in recent architecture.

Ideally, we should have different handlers for different architecture
levels but our current implementation of TLB flushing is dumb enough
that this will do for now.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 014fe5e..bd5df40 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9952,8 +9952,10 @@ GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 0x001F0001, PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, PPC_SEGMENT_64B),
 #endif
 GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
-GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x03FF0001, PPC_MEM_TLBIE),
-GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x03FF0001, PPC_MEM_TLBIE),
+/* XXX Those instructions will need to be handled differently for
+ * different ISA versions */
+GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
+GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x03FFFC01, PPC_SLBI),
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (13 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:26   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops Benjamin Herrenschmidt
                   ` (64 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: Michael Neuling, qemu-devel

From: Michael Neuling <mikey@neuling.org>

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index bd5df40..3974cd2 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4391,7 +4391,7 @@ static void gen_mtmsrd(DisasContext *ctx)
         /* Special form that does not need any synchronisation */
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 << MSR_EE)));
+        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
         tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
         tcg_temp_free(t0);
     } else {
@@ -4422,7 +4422,7 @@ static void gen_mtmsr(DisasContext *ctx)
         /* Special form that does not need any synchronisation */
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 << MSR_EE)));
+        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
         tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
         tcg_temp_free(t0);
     } else {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (14 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:40   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8 Benjamin Herrenschmidt
                   ` (63 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Otherwise tight loops at smt_low for example, which OPAL does,
eat so much CPU that we can't boot a kernel anymore. With that,
I can boot 8 CPUs just fine with powernv.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 3974cd2..e8bbd59 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1396,6 +1396,19 @@ GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER);
 /* nor & nor. */
 GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER);
 
+#if defined(TARGET_PPC64)
+static void gen_pause(DisasContext *ctx)
+{
+    TCGv_i32 t0 = tcg_const_i32(0);
+    tcg_gen_st_i32(t0, cpu_env,
+                   -offsetof(PowerPCCPU, env) + offsetof(CPUState, halted));
+    tcg_temp_free_i32(t0);
+
+    /* Stop translation, this gives other CPUs a chance to run */
+    gen_exception_err(ctx, EXCP_HLT, 1);
+}
+#endif /* defined(TARGET_PPC64) */
+
 /* or & or. */
 static void gen_or(DisasContext *ctx)
 {
@@ -1468,6 +1481,10 @@ static void gen_or(DisasContext *ctx)
             tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50);
             gen_store_spr(SPR_PPR, t0);
             tcg_temp_free(t0);
+            /* Pause us out of TCG otherwise spin loops with smt_low
+             * eat too much CPU and the kernel hangs
+             */
+            gen_pause(ctx);
         }
 #endif
     }
@@ -1493,8 +1510,6 @@ static void gen_ori(DisasContext *ctx)
     target_ulong uimm = UIMM(ctx->opcode);
 
     if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
-        /* NOP */
-        /* XXX: should handle special NOPs for POWER series */
         return;
     }
     tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (15 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-16  5:41   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model Benjamin Herrenschmidt
                   ` (62 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This will enable decoding of hrfid

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 76f20ea..f11e7d0 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8303,7 +8303,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_ALTIVEC |
+                       PPC_64B | PPC_64H | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD;
     pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
@@ -8380,7 +8380,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
+                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD;
     pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (16 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8 Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:44   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions Benjamin Herrenschmidt
                   ` (61 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Properly implement LPES0/1 handling for HV vs. !HV mode and fix AIL
implementation.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |   2 +
 target-ppc/excp_helper.c    | 175 ++++++++++++++++++++++----------------------
 target-ppc/translate_init.c |   2 +-
 3 files changed, 92 insertions(+), 87 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 062644e..8185812 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -162,6 +162,8 @@ enum powerpc_excp_t {
     POWERPC_EXCP_970,
     /* POWER7 exception model           */
     POWERPC_EXCP_POWER7,
+    /* POWER8 exception model           */
+    POWERPC_EXCP_POWER8,
 #endif /* defined(TARGET_PPC64) */
 };
 
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 83e6c07..716b27b 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -74,22 +74,14 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
     target_ulong msr, new_msr, vector;
-    int srr0, srr1, asrr0, asrr1;
-    int lpes0, lpes1, lev;
+    int srr0, srr1, asrr0, asrr1, lev, ail;
+    bool lpes0;
 
-    if (0) {
-        /* XXX: find a suitable condition to enable the hypervisor mode */
-        lpes0 = (env->spr[SPR_LPCR] >> 1) & 1;
-        lpes1 = (env->spr[SPR_LPCR] >> 2) & 1;
-    } else {
-        /* Those values ensure we won't enter the hypervisor mode */
-        lpes0 = 0;
-        lpes1 = 1;
-    }
 
     qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
                   " => %08x (%02x)\n", env->nip, excp, env->error_code);
 
+
     /* new srr1 value excluding must-be-zero bits */
     if (excp_model == POWERPC_EXCP_BOOKE) {
         msr = env->msr;
@@ -97,8 +89,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         msr = env->msr & ~0x783f0000ULL;
     }
 
-    /* new interrupt handler msr */
-    new_msr = env->msr & ((target_ulong)1 << MSR_ME);
+    /* new interrupt handler msr preserves existing HV and ME unless
+     * explicitly overriden
+     */
+    new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
 
     /* target registers */
     srr0 = SPR_SRR0;
@@ -106,6 +100,33 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     asrr0 = -1;
     asrr1 = -1;
 
+    /* Exception targetting modifiers
+     *
+     * LPES0 is supported on POWER7/8
+     * LPES1 is not supported (old iSeries mode)
+     *
+     * On anything else, we behave as if LPES0 is 1
+     * (externals don't alter MSR:HV)
+     *
+     * AIL is initialized here but can be cleared by
+     * selected exceptions
+     */
+#if defined(TARGET_PPC64)
+    if (excp_model == POWERPC_EXCP_POWER7 ||
+        excp_model == POWERPC_EXCP_POWER8) {
+        lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0);
+        if (excp_model == POWERPC_EXCP_POWER8) {
+            ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT;
+        } else {
+            ail = 0;
+        }
+    } else
+#endif /* defined(TARGET_PPC64) */
+    {
+        lpes0 = true;
+        ail = 0;
+    }
+
     switch (excp) {
     case POWERPC_EXCP_NONE:
         /* Should never happen */
@@ -141,10 +162,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             cs->halted = 1;
             cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
         }
-        if (0) {
-            /* XXX: find a suitable condition to enable the hypervisor mode */
-            new_msr |= (target_ulong)MSR_HVB;
-        }
+        new_msr |= (target_ulong)MSR_HVB;
+        ail = 0;
 
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -169,23 +188,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     case POWERPC_EXCP_DSI:       /* Data storage exception                   */
         LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
                  "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_next;
     case POWERPC_EXCP_ISI:       /* Instruction storage exception            */
         LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
                  "\n", msr, env->nip);
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         msr |= env->error_code;
         goto store_next;
     case POWERPC_EXCP_EXTERNAL:  /* External input                           */
         cs = CPU(cpu);
 
-        if (lpes0 == 1) {
+        if (!lpes0) {
             new_msr |= (target_ulong)MSR_HVB;
+            new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+            srr0 = SPR_HSRR0;
+            srr1 = SPR_HSRR1;
         }
         if (env->mpic_proxy) {
             /* IACK the IRQ on delivery */
@@ -193,9 +209,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         goto store_next;
     case POWERPC_EXCP_ALIGN:     /* Alignment exception                      */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
         env->spr[SPR_DSISR] |= (cpu_ldl_code(env, (env->nip - 4))
@@ -210,9 +223,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                 env->error_code = 0;
                 return;
             }
-            if (lpes1 == 0) {
-                new_msr |= (target_ulong)MSR_HVB;
-            }
             msr |= 0x00100000;
             if (msr_fe0 == msr_fe1) {
                 goto store_next;
@@ -221,23 +231,14 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             break;
         case POWERPC_EXCP_INVAL:
             LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
-            if (lpes1 == 0) {
-                new_msr |= (target_ulong)MSR_HVB;
-            }
             msr |= 0x00080000;
             env->spr[SPR_BOOKE_ESR] = ESR_PIL;
             break;
         case POWERPC_EXCP_PRIV:
-            if (lpes1 == 0) {
-                new_msr |= (target_ulong)MSR_HVB;
-            }
             msr |= 0x00040000;
             env->spr[SPR_BOOKE_ESR] = ESR_PPR;
             break;
         case POWERPC_EXCP_TRAP:
-            if (lpes1 == 0) {
-                new_msr |= (target_ulong)MSR_HVB;
-            }
             msr |= 0x00020000;
             env->spr[SPR_BOOKE_ESR] = ESR_PTR;
             break;
@@ -249,27 +250,23 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         goto store_current;
     case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_current;
     case POWERPC_EXCP_SYSCALL:   /* System call exception                    */
         dump_syscall(env);
         lev = env->error_code;
+
+        /* "PAPR mode" built-in hypercall emulation */
         if ((lev == 1) && cpu_ppc_hypercall) {
             cpu_ppc_hypercall(cpu);
             return;
         }
-        if (lev == 1 || (lpes0 == 0 && lpes1 == 0)) {
+        if (lev == 1) {
             new_msr |= (target_ulong)MSR_HVB;
         }
         goto store_next;
     case POWERPC_EXCP_APU:       /* Auxiliary processor unavailable          */
         goto store_current;
     case POWERPC_EXCP_DECR:      /* Decrementer exception                    */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_next;
     case POWERPC_EXCP_FIT:       /* Fixed-interval timer interrupt           */
         /* FIT on 4xx */
@@ -338,21 +335,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         } else {
             new_msr &= ~((target_ulong)1 << MSR_ME);
         }
-
-        if (0) {
-            /* XXX: find a suitable condition to enable the hypervisor mode */
-            new_msr |= (target_ulong)MSR_HVB;
-        }
+        new_msr |= (target_ulong)MSR_HVB;
+        ail = 0;
         goto store_next;
     case POWERPC_EXCP_DSEG:      /* Data segment exception                   */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_next;
     case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_next;
     case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
         srr0 = SPR_HSRR0;
@@ -361,21 +349,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
         goto store_next;
     case POWERPC_EXCP_TRACE:     /* Trace exception                          */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_next;
     case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
         srr0 = SPR_HSRR0;
         srr1 = SPR_HSRR1;
         new_msr |= (target_ulong)MSR_HVB;
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+        ail = 0;
         goto store_next;
     case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
         srr0 = SPR_HSRR0;
         srr1 = SPR_HSRR1;
         new_msr |= (target_ulong)MSR_HVB;
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+        ail = 0;
         goto store_next;
     case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
         srr0 = SPR_HSRR0;
@@ -390,19 +377,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
         goto store_next;
     case POWERPC_EXCP_VPU:       /* Vector unavailable exception             */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_current;
     case POWERPC_EXCP_VSXU:       /* VSX unavailable exception               */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_current;
     case POWERPC_EXCP_FU:         /* Facility unavailable exception          */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         goto store_current;
     case POWERPC_EXCP_PIT:       /* Programmable interval timer interrupt    */
         LOG_EXCP("PIT exception\n");
@@ -421,9 +399,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                   "is not implemented yet !\n");
         goto store_next;
     case POWERPC_EXCP_IFTLB:     /* Instruction fetch TLB error              */
-        if (lpes1 == 0) { /* XXX: check this */
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         switch (excp_model) {
         case POWERPC_EXCP_602:
         case POWERPC_EXCP_603:
@@ -440,9 +415,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         break;
     case POWERPC_EXCP_DLTLB:     /* Data load TLB miss                       */
-        if (lpes1 == 0) { /* XXX: check this */
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         switch (excp_model) {
         case POWERPC_EXCP_602:
         case POWERPC_EXCP_603:
@@ -459,9 +431,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         break;
     case POWERPC_EXCP_DSTLB:     /* Data store TLB miss                      */
-        if (lpes1 == 0) { /* XXX: check this */
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         switch (excp_model) {
         case POWERPC_EXCP_602:
         case POWERPC_EXCP_603:
@@ -567,9 +536,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                   "is not implemented yet !\n");
         goto store_next;
     case POWERPC_EXCP_PERFM:     /* Embedded performance monitor interrupt   */
-        if (lpes1 == 0) {
-            new_msr |= (target_ulong)MSR_HVB;
-        }
         /* XXX: TODO */
         cpu_abort(cs,
                   "Performance counter exception is not implemented yet !\n");
@@ -613,6 +579,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     }
     /* Save MSR */
     env->spr[srr1] = msr;
+
+    /* Sanity check */
+    if (!(env->msr_mask & MSR_HVB) && (srr0 == SPR_HSRR0)) {
+        cpu_abort(cs, "Trying to deliver HV exception %d with no HV support\n", excp);
+    }
+
     /* If any alternate SRR register are defined, duplicate saved values */
     if (asrr0 != -1) {
         env->spr[asrr0] = env->spr[srr0];
@@ -621,13 +593,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         env->spr[asrr1] = env->spr[srr1];
     }
 
-    if (env->spr[SPR_LPCR] & LPCR_AIL) {
-        new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    }
-
+    /* Sort out endianness of interrupt, this differs depending on the
+     * CPU, the HV mode, etc...
+     */
 #ifdef TARGET_PPC64
     if (excp_model == POWERPC_EXCP_POWER7) {
-        if (env->spr[SPR_LPCR] & LPCR_ILE) {
+        if (!(new_msr & MSR_HVB) && (env->spr[SPR_LPCR] & LPCR_ILE)) {
+            new_msr |= (target_ulong)1 << MSR_LE;
+        }
+    } else if (excp_model == POWERPC_EXCP_POWER8) {
+        if (new_msr & MSR_HVB) {
+            if (env->spr[SPR_HID0] & HID0_HILE) {
+                new_msr |= (target_ulong)1 << MSR_LE;
+            }
+        } else if (env->spr[SPR_LPCR] & LPCR_ILE) {
             new_msr |= (target_ulong)1 << MSR_LE;
         }
     } else if (msr_ile) {
@@ -646,6 +625,30 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                   excp);
     }
     vector |= env->excp_prefix;
+
+    /* AIL only works if there is no HV transition and we are running with
+     * translations enabled
+     */
+    if (!((msr >> MSR_IR) & 1) || !((msr >> MSR_DR) & 1) ||
+        ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) {
+        ail = 0;
+    }
+    /* Handle AIL */
+    if (ail) {
+        new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
+        switch(ail) {
+        case 2:
+            vector |= 0x18000;
+            break;
+        case 3:
+            vector |= 0xc000000000004000ull;
+            break;
+        default:
+            cpu_abort(cs, "Invalid AIL combination %d\n", ail);
+            break;
+        }
+    }
+
 #if defined(TARGET_PPC64)
     if (excp_model == POWERPC_EXCP_BOOKE) {
         if (env->spr[SPR_BOOKE_EPCR] & EPCR_ICM) {
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index f11e7d0..8a50273 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8412,7 +8412,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
 #endif
-    pcc->excp_model = POWERPC_EXCP_POWER7;
+    pcc->excp_model = POWERPC_EXCP_POWER8;
     pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
     pcc->bfd_mach = bfd_mach_ppc64;
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (17 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:46   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode Benjamin Herrenschmidt
                   ` (60 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We were initializing unused ones and missing some

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            | 11 ++++++++++-
 target-ppc/translate_init.c | 27 +++++++++++++++++++++------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 8185812..23479b1 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -200,6 +200,9 @@ enum {
     POWERPC_EXCP_HYPPRIV  = 41, /* Embedded hypervisor priv instruction      */
     /* Vectors 42 to 63 are reserved                                         */
     /* Exceptions defined in the PowerPC server specification                */
+    /* Server doorbell variants */
+#define POWERPC_EXCP_SDOOR	POWERPC_EXCP_GDOORI
+#define POWERPC_EXCP_SDOOR_HV	POWERPC_EXCP_DOORI
     POWERPC_EXCP_RESET    = 64, /* System reset exception                    */
     POWERPC_EXCP_DSEG     = 65, /* Data segment exception                    */
     POWERPC_EXCP_ISEG     = 66, /* Instruction segment exception             */
@@ -242,8 +245,12 @@ enum {
     /* VSX Unavailable (Power ISA 2.06 and later)                            */
     POWERPC_EXCP_VSXU     = 94, /* VSX Unavailable                           */
     POWERPC_EXCP_FU       = 95, /* Facility Unavailable                      */
+    /* Additional ISA 2.06 and later server exceptions                       */
+    POWERPC_EXCP_HV_EMU   = 96, /* HV emulation assistance                   */
+    POWERPC_EXCP_HV_MAINT = 97, /* HMI                                       */
+    POWERPC_EXCP_HV_FU    = 98, /* Hypervisor Facility unavailable           */
     /* EOL                                                                   */
-    POWERPC_EXCP_NB       = 96,
+    POWERPC_EXCP_NB       = 99,
     /* QEMU exceptions: used internally during code translation              */
     POWERPC_EXCP_STOP         = 0x200, /* stop translation                   */
     POWERPC_EXCP_BRANCH       = 0x201, /* branch instruction                 */
@@ -2197,6 +2204,8 @@ enum {
     PPC_INTERRUPT_CDOORBELL,      /* Critical doorbell interrupt          */
     PPC_INTERRUPT_DOORBELL,       /* Doorbell interrupt                   */
     PPC_INTERRUPT_PERFM,          /* Performance monitor interrupt        */
+    PPC_INTERRUPT_HMI,            /* Hypervisor Maintainance interrupt    */
+    PPC_INTERRUPT_HDOORBELL,      /* Hypervisor Doorbell interrupt        */
 };
 
 /* Processor Compatibility mask (PCR) */
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 8a50273..5210b25 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -3105,18 +3105,30 @@ static void init_excp_POWER7 (CPUPPCState *env)
     env->excp_vectors[POWERPC_EXCP_HDECR]    = 0x00000980;
     env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
     env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_HDSI]     = 0x00000E00;
+    env->excp_vectors[POWERPC_EXCP_HISI]     = 0x00000E20;
+    env->excp_vectors[POWERPC_EXCP_HV_EMU]   = 0x00000E40;
+    env->excp_vectors[POWERPC_EXCP_HV_MAINT] = 0x00000E60;
     env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
     env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
     env->excp_vectors[POWERPC_EXCP_VSXU]     = 0x00000F40;
-    env->excp_vectors[POWERPC_EXCP_FU]       = 0x00000F60;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_MAINT]    = 0x00001600;
-    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001700;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001800;
     /* Hardware reset vector */
     env->hreset_vector = 0x0000000000000100ULL;
 #endif
 }
+
+static void init_excp_POWER8 (CPUPPCState *env)
+{
+    init_excp_POWER7(env);
+
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_SDOOR]    = 0x00000A00;
+    env->excp_vectors[POWERPC_EXCP_FU]       = 0x00000F60;
+    env->excp_vectors[POWERPC_EXCP_HV_FU]    = 0x00000F80;
+    env->excp_vectors[POWERPC_EXCP_SDOOR_HV] = 0x00000E80;
+#endif
+}
+
 #endif
 
 /*****************************************************************************/
@@ -8078,10 +8090,13 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         ppc970_irq_init(env);
         break;
     case BOOK3S_CPU_POWER7:
-    case BOOK3S_CPU_POWER8:
         init_excp_POWER7(env);
         ppcPOWER7_irq_init(env);
         break;
+    case BOOK3S_CPU_POWER8:
+        init_excp_POWER8(env);
+        ppcPOWER7_irq_init(env);
+        break;
     default:
         g_assert_not_reached();
     }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (18 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  6:50   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts Benjamin Herrenschmidt
                   ` (59 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Under some circumstances, we need to direct ISI and DSI interrupts
at the hypervisor, turning them into HISI/HDSI, and using different
SPRs (HDSISR and HDAR) depending on the combination of MSR_DR and
the corresponding VPM bits in LPCR.

This moves part of the code into helpers that are fixed to select
the right exception type and registers. On pre-P7 processors, LPCR
is 0 which provides the old behaviour of directing the interrupts
at the supervisor.

Thanks to Andrei Warkentin for finding a bug when HV=1

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/mmu-hash64.c | 66 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 71e1d14..e489fa4 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -466,6 +466,44 @@ static hwaddr ppc_hash64_pte_raddr(ppc_slb_t *slb, ppc_hash_pte64_t pte,
     return (rpn & ~mask) | (eaddr & mask);
 }
 
+static void ppc_hash64_set_isi(CPUState *cs, CPUPPCState *env, uint64_t error_code)
+{
+    bool vpm;
+
+    if (msr_ir) {
+        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
+    } else {
+        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+    }
+    if (vpm && !msr_hv) {
+        cs->exception_index = POWERPC_EXCP_HISI;
+    } else {
+        cs->exception_index = POWERPC_EXCP_ISI;
+    }
+    env->error_code = error_code;
+}
+
+static void ppc_hash64_set_dsi(CPUState *cs, CPUPPCState *env, uint64_t dar, uint64_t dsisr)
+{
+    bool vpm;
+
+    if (msr_dr) {
+        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
+    } else {
+        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+    }
+    if (vpm && msr_hv) {
+        cs->exception_index = POWERPC_EXCP_HDSI;
+        env->spr[SPR_HDAR] = dar;
+        env->spr[SPR_HDSISR] = dsisr;
+    } else {
+        cs->exception_index = POWERPC_EXCP_DSI;
+        env->spr[SPR_DAR] = dar;
+        env->spr[SPR_DSISR] = dsisr;
+   }
+    env->error_code = 0;
+}
+
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
                                 int rwx, int mmu_idx)
 {
@@ -475,7 +513,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
     hwaddr pte_offset;
     ppc_hash_pte64_t pte;
     int pp_prot, amr_prot, prot;
-    uint64_t new_pte1;
+    uint64_t new_pte1, dsisr;
     const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
     hwaddr raddr;
 
@@ -509,26 +547,21 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
     /* 3. Check for segment level no-execute violation */
     if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
-        cs->exception_index = POWERPC_EXCP_ISI;
-        env->error_code = 0x10000000;
+        ppc_hash64_set_isi(cs, env, 0x10000000);
         return 1;
     }
 
     /* 4. Locate the PTE in the hash table */
     pte_offset = ppc_hash64_htab_lookup(env, slb, eaddr, &pte);
     if (pte_offset == -1) {
+        dsisr = 0x40000000;
         if (rwx == 2) {
-            cs->exception_index = POWERPC_EXCP_ISI;
-            env->error_code = 0x40000000;
+            ppc_hash64_set_isi(cs, env, dsisr);
         } else {
-            cs->exception_index = POWERPC_EXCP_DSI;
-            env->error_code = 0;
-            env->spr[SPR_DAR] = eaddr;
             if (rwx == 1) {
-                env->spr[SPR_DSISR] = 0x42000000;
-            } else {
-                env->spr[SPR_DSISR] = 0x40000000;
+                dsisr |= 0x02000000;
             }
+            ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
         }
         return 1;
     }
@@ -545,14 +578,9 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
         /* Access right violation */
         qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
         if (rwx == 2) {
-            cs->exception_index = POWERPC_EXCP_ISI;
-            env->error_code = 0x08000000;
+            ppc_hash64_set_isi(cs, env, 0x08000000);
         } else {
-            target_ulong dsisr = 0;
-
-            cs->exception_index = POWERPC_EXCP_DSI;
-            env->error_code = 0;
-            env->spr[SPR_DAR] = eaddr;
+            dsisr = 0;
             if (need_prot[rwx] & ~pp_prot) {
                 dsisr |= 0x08000000;
             }
@@ -562,7 +590,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
             if (need_prot[rwx] & ~amr_prot) {
                 dsisr |= 0x00200000;
             }
-            env->spr[SPR_DSISR] = dsisr;
+            ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
         }
         return 1;
     }
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (19 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  7:45   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8 Benjamin Herrenschmidt
                   ` (58 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Recent server processors use the Hypervisor Emulation Assistance
interrupt for illegal instructions and *some* type of SPR accesses.

Also the code was always generating inval instructions even for priv
violations due to setting the wrong flags

Finally, the checking for PR/HV was open coded everywhere.

This reworks it all, using little helper macros for checking, and
adding the HV interrupt (which gets converted back to program check
in the slow path of excp_helper.c on CPUs that don't want it).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 linux-user/main.c        |   1 +
 target-ppc/excp_helper.c |  19 ++
 target-ppc/translate.c   | 678 ++++++++++++++++++++---------------------------
 3 files changed, 302 insertions(+), 396 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 8acfe0f..beb621f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1654,6 +1654,7 @@ void cpu_loop(CPUPPCState *env)
             queue_signal(env, info.si_signo, &info);
             break;
         case POWERPC_EXCP_PROGRAM:  /* Program exception                     */
+        case POWERPC_EXCP_HV_EMU:   /* HV emulation                          */
             /* XXX: check this */
             switch (env->error_code & ~0xF) {
             case POWERPC_EXCP_FP:
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 716b27b..80a70f4 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -127,6 +127,19 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         ail = 0;
     }
 
+    /* Hypervisor emulation assistance interrupt only exists on server
+     * arch 2.05 server or later. We also don't want to generate it if
+     * we don't have HVB in msr_mask (PAPR mode).
+     */
+    if (excp == POWERPC_EXCP_HV_EMU
+#if defined(TARGET_PPC64)
+        && !((env->mmu_model & POWERPC_MMU_64) && (env->msr_mask & MSR_HVB))
+#endif /* defined(TARGET_PPC64) */
+
+    ) {
+        excp = POWERPC_EXCP_PROGRAM;
+    }
+
     switch (excp) {
     case POWERPC_EXCP_NONE:
         /* Should never happen */
@@ -249,6 +262,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             break;
         }
         goto store_current;
+    case POWERPC_EXCP_HV_EMU:
+        srr0 = SPR_HSRR0;
+        srr1 = SPR_HSRR1;
+        new_msr |= (target_ulong)MSR_HVB;
+        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+        goto store_current;
     case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
         goto store_current;
     case POWERPC_EXCP_SYSCALL:   /* System call exception                    */
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index e8bbd59..3f657b1 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -321,7 +321,19 @@ static inline void gen_debug_exception(DisasContext *ctx)
 
 static inline void gen_inval_exception(DisasContext *ctx, uint32_t error)
 {
-    gen_exception_err(ctx, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | error);
+    /* Will be converted to program check if needed */
+    gen_exception_err(ctx, POWERPC_EXCP_HV_EMU, POWERPC_EXCP_INVAL | error);
+}
+
+static inline void gen_priv_exception(DisasContext *ctx, uint32_t error)
+{
+    gen_exception_err(ctx, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_PRIV | error);
+}
+
+static inline void gen_hvpriv_exception(DisasContext *ctx, uint32_t error)
+{
+    /* Will be converted to program check if needed */
+    gen_exception_err(ctx, POWERPC_EXCP_HV_EMU, POWERPC_EXCP_PRIV | error);
 }
 
 /* Stop translation */
@@ -362,6 +374,20 @@ typedef struct opcode_t {
     const char *oname;
 } opcode_t;
 
+/* Helpers for priv. check */
+#define GEN_PRIV do { gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC); return; } while(0)
+
+#if defined(CONFIG_USER_ONLY)
+#define CHK_HV GEN_PRIV
+#define CHK_SV GEN_PRIV
+#else
+#define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
+#define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
+#endif
+
+#define CHK_NONE
+
+
 /*****************************************************************************/
 /***                           Instruction decoding                        ***/
 #define EXTRACT_HELPER(name, shift, nb)                                       \
@@ -2950,7 +2976,7 @@ static void gen_lq(DisasContext *ctx)
     bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
     if (!legal_in_user_mode && ctx->pr) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+        gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
 
@@ -3073,7 +3099,7 @@ static void gen_std(DisasContext *ctx)
         bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
         if (!legal_in_user_mode && ctx->pr) {
-            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
             return;
         }
 
@@ -4086,13 +4112,10 @@ static void gen_mcrf(DisasContext *ctx)
 static void gen_rfi(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     /* Restore CPU state */
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_update_cfar(ctx, ctx->nip);
     gen_helper_rfi(cpu_env);
     gen_sync_exception(ctx);
@@ -4103,13 +4126,10 @@ static void gen_rfi(DisasContext *ctx)
 static void gen_rfid(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     /* Restore CPU state */
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_update_cfar(ctx, ctx->nip);
     gen_helper_rfid(cpu_env);
     gen_sync_exception(ctx);
@@ -4119,13 +4139,10 @@ static void gen_rfid(DisasContext *ctx)
 static void gen_hrfid(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     /* Restore CPU state */
-    if (unlikely(!ctx->hv)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_HV;
     gen_helper_hrfid(cpu_env);
     gen_sync_exception(ctx);
 #endif
@@ -4288,15 +4305,8 @@ static void gen_mfcr(DisasContext *ctx)
 /* mfmsr */
 static void gen_mfmsr(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_msr);
-#endif
 }
 
 static void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
@@ -4340,7 +4350,7 @@ static inline void gen_op_mfspr(DisasContext *ctx)
                 printf("Trying to read privileged spr %d (0x%03x) at "
                        TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
             }
-            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
     } else {
         /* Not defined */
@@ -4348,7 +4358,25 @@ static inline void gen_op_mfspr(DisasContext *ctx)
                  TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
         printf("Trying to read invalid spr %d (0x%03x) at "
                TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+
+        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
+         * it can generate a priv, a hv emu or a no-op
+         */
+        if (sprn & 0x10) {
+            if (ctx->pr) {
+                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+            }
+        } else {
+            if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 || sprn == 6) {
+                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+            }
+        }
+#if !defined(CONFIG_USER_ONLY)
+        /* HV priv */
+        if (ctx->spr_cb[sprn].hea_read) {
+            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+        }
+#endif
     }
 }
 
@@ -4395,13 +4423,9 @@ static void gen_mtcrf(DisasContext *ctx)
 #if defined(TARGET_PPC64)
 static void gen_mtmsrd(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
+
+#if !defined(CONFIG_USER_ONLY)
     if (ctx->opcode & 0x00010000) {
         /* Special form that does not need any synchronisation */
         TCGv t0 = tcg_temp_new();
@@ -4420,20 +4444,16 @@ static void gen_mtmsrd(DisasContext *ctx)
         /* Note that mtmsr is not always defined as context-synchronizing */
         gen_stop_exception(ctx);
     }
-#endif
+#endif /* !defined(CONFIG_USER_ONLY) */
 }
-#endif
+#endif /* defined(TARGET_PPC64) */
 
 static void gen_mtmsr(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
-    if (ctx->opcode & 0x00010000) {
+    CHK_SV;
+
+#if !defined(CONFIG_USER_ONLY)
+   if (ctx->opcode & 0x00010000) {
         /* Special form that does not need any synchronisation */
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
@@ -4488,7 +4508,7 @@ static void gen_mtspr(DisasContext *ctx)
                      TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
             printf("Trying to write privileged spr %d (0x%03x) at "
                    TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
-            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
     } else {
         /* Not defined */
@@ -4496,7 +4516,25 @@ static void gen_mtspr(DisasContext *ctx)
                  TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
         printf("Trying to write invalid spr %d (0x%03x) at "
                TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+
+        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
+         * it can generate a priv, a hv emu or a no-op
+         */
+        if (sprn & 0x10) {
+            if (ctx->pr) {
+                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+            }
+        } else {
+            if (ctx->pr || sprn == 0) {
+                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+            }
+        }
+#if !defined(CONFIG_USER_ONLY)
+        /* HV priv */
+        if (ctx->spr_cb[sprn].hea_write) {
+            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+        }
+#endif
     }
 }
 
@@ -4518,13 +4556,11 @@ static void gen_dcbf(DisasContext *ctx)
 static void gen_dcbi(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv EA, val;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     EA = tcg_temp_new();
     gen_set_access_type(ctx, ACCESS_CACHE);
     gen_addr_reg_index(ctx, EA);
@@ -4534,7 +4570,7 @@ static void gen_dcbi(DisasContext *ctx)
     gen_qemu_st8(ctx, val, EA);
     tcg_temp_free(val);
     tcg_temp_free(EA);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* dcdst */
@@ -4655,72 +4691,64 @@ static void gen_dcba(DisasContext *ctx)
 static void gen_mfsr(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_const_tl(SR(ctx->opcode));
     gen_helper_load_sr(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mfsrin */
 static void gen_mfsrin(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     tcg_gen_shri_tl(t0, cpu_gpr[rB(ctx->opcode)], 28);
     tcg_gen_andi_tl(t0, t0, 0xF);
     gen_helper_load_sr(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtsr */
 static void gen_mtsr(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_const_tl(SR(ctx->opcode));
     gen_helper_store_sr(cpu_env, t0, cpu_gpr[rS(ctx->opcode)]);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtsrin */
 static void gen_mtsrin(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
+
     t0 = tcg_temp_new();
     tcg_gen_shri_tl(t0, cpu_gpr[rB(ctx->opcode)], 28);
     tcg_gen_andi_tl(t0, t0, 0xF);
     gen_helper_store_sr(cpu_env, t0, cpu_gpr[rD(ctx->opcode)]);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 #if defined(TARGET_PPC64)
@@ -4730,115 +4758,101 @@ static void gen_mtsrin(DisasContext *ctx)
 static void gen_mfsr_64b(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_const_tl(SR(ctx->opcode));
     gen_helper_load_sr(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mfsrin */
 static void gen_mfsrin_64b(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     tcg_gen_shri_tl(t0, cpu_gpr[rB(ctx->opcode)], 28);
     tcg_gen_andi_tl(t0, t0, 0xF);
     gen_helper_load_sr(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtsr */
 static void gen_mtsr_64b(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_const_tl(SR(ctx->opcode));
     gen_helper_store_sr(cpu_env, t0, cpu_gpr[rS(ctx->opcode)]);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtsrin */
 static void gen_mtsrin_64b(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     tcg_gen_shri_tl(t0, cpu_gpr[rB(ctx->opcode)], 28);
     tcg_gen_andi_tl(t0, t0, 0xF);
     gen_helper_store_sr(cpu_env, t0, cpu_gpr[rS(ctx->opcode)]);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* slbmte */
 static void gen_slbmte(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_store_slb(cpu_env, cpu_gpr[rB(ctx->opcode)],
                          cpu_gpr[rS(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_slbmfee(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_load_slb_esid(cpu_gpr[rS(ctx->opcode)], cpu_env,
                              cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_slbmfev(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_load_slb_vsid(cpu_gpr[rS(ctx->opcode)], cpu_env,
                              cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 #endif /* defined(TARGET_PPC64) */
 
@@ -4849,40 +4863,34 @@ static void gen_slbmfev(DisasContext *ctx)
 static void gen_tlbia(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr || !ctx->hv)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_HV;
+
     gen_helper_tlbia(cpu_env);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbiel */
 static void gen_tlbiel(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr || !ctx->hv)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbie */
 static void gen_tlbie(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr || !ctx->hv)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_HV;
+
     if (NARROW_MODE(ctx)) {
         TCGv t0 = tcg_temp_new();
         tcg_gen_ext32u_tl(t0, cpu_gpr[rB(ctx->opcode)]);
@@ -4891,56 +4899,52 @@ static void gen_tlbie(DisasContext *ctx)
     } else {
         gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbsync */
 static void gen_tlbsync(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    GEN_PRIV;
+#else    
+    CHK_HV;
+
     /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
      * embedded however needs to deal with tlbsync. We don't try to be
      * fancy and swallow the overhead of checking for both.
      */
     gen_check_tlb_flush(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 #if defined(TARGET_PPC64)
+
 /* slbia */
 static void gen_slbia(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_slbia(cpu_env);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* slbie */
 static void gen_slbie(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_slbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
-#endif
+
+#endif /* defined(TARGET_PPC64) */
 
 /***                              External control                         ***/
 /* Optional: */
@@ -5639,14 +5643,11 @@ static void gen_esa(DisasContext *ctx)
 static void gen_mfrom(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_helper_602_mfrom(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* 602 - 603 - G2 TLB management */
@@ -5655,28 +5656,22 @@ static void gen_mfrom(DisasContext *ctx)
 static void gen_tlbld_6xx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_helper_6xx_tlbd(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbli */
 static void gen_tlbli_6xx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_helper_6xx_tlbi(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* 74xx TLB management */
@@ -5685,28 +5680,22 @@ static void gen_tlbli_6xx(DisasContext *ctx)
 static void gen_tlbld_74xx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_helper_74xx_tlbd(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbli */
 static void gen_tlbli_74xx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_helper_74xx_tlbi(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* POWER instructions not in PowerPC 601 */
@@ -5720,15 +5709,12 @@ static void gen_clf(DisasContext *ctx)
 /* cli */
 static void gen_cli(DisasContext *ctx)
 {
-    /* Cache line invalidate: privileged and treated as no-op */
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
-#endif
+    /* Cache line invalidate: privileged and treated as no-op */
+    CHK_SV;
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* dclst */
@@ -5740,15 +5726,13 @@ static void gen_dclst(DisasContext *ctx)
 static void gen_mfsri(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     int ra = rA(ctx->opcode);
     int rd = rD(ctx->opcode);
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     tcg_gen_shri_tl(t0, t0, 28);
@@ -5757,38 +5741,34 @@ static void gen_mfsri(DisasContext *ctx)
     tcg_temp_free(t0);
     if (ra != 0 && ra != rd)
         tcg_gen_mov_tl(cpu_gpr[ra], cpu_gpr[rd]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_rac(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     gen_helper_rac(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_rfsvc(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     gen_helper_rfsvc(cpu_env);
     gen_sync_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* svc is not implemented for now */
@@ -5941,18 +5921,16 @@ static void gen_mfapidi(DisasContext *ctx)
 static void gen_tlbiva(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* All 405 MAC instructions are translated here */
@@ -6174,38 +6152,34 @@ GEN_MAC_HANDLER(mullhwu, 0x08, 0x0C);
 static void gen_mfdcr(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv dcrn;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     /* NIP cannot be restored if the memory exception comes from an helper */
     gen_update_nip(ctx, ctx->nip - 4);
     dcrn = tcg_const_tl(SPR(ctx->opcode));
     gen_helper_load_dcr(cpu_gpr[rD(ctx->opcode)], cpu_env, dcrn);
     tcg_temp_free(dcrn);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtdcr */
 static void gen_mtdcr(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
     TCGv dcrn;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+
+    CHK_SV;
     /* NIP cannot be restored if the memory exception comes from an helper */
     gen_update_nip(ctx, ctx->nip - 4);
     dcrn = tcg_const_tl(SPR(ctx->opcode));
     gen_helper_store_dcr(cpu_env, dcrn, cpu_gpr[rS(ctx->opcode)]);
     tcg_temp_free(dcrn);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mfdcrx */
@@ -6213,18 +6187,15 @@ static void gen_mtdcr(DisasContext *ctx)
 static void gen_mfdcrx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
     /* NIP cannot be restored if the memory exception comes from an helper */
     gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_load_dcr(cpu_gpr[rD(ctx->opcode)], cpu_env,
                         cpu_gpr[rA(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mtdcrx */
@@ -6232,18 +6203,15 @@ static void gen_mfdcrx(DisasContext *ctx)
 static void gen_mtdcrx(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
+    CHK_SV;
     /* NIP cannot be restored if the memory exception comes from an helper */
     gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_store_dcr(cpu_env, cpu_gpr[rA(ctx->opcode)],
                          cpu_gpr[rS(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* mfdcrux (PPC 460) : user-mode access to DCR */
@@ -6269,28 +6237,19 @@ static void gen_mtdcrux(DisasContext *ctx)
 /* dccci */
 static void gen_dccci(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* interpreted as no-op */
-#endif
 }
 
 /* dcread */
 static void gen_dcread(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv EA, val;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     gen_set_access_type(ctx, ACCESS_CACHE);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
@@ -6299,7 +6258,7 @@ static void gen_dcread(DisasContext *ctx)
     tcg_temp_free(val);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], EA);
     tcg_temp_free(EA);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* icbt */
@@ -6314,60 +6273,40 @@ static void gen_icbt_40x(DisasContext *ctx)
 /* iccci */
 static void gen_iccci(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* interpreted as no-op */
-#endif
 }
 
 /* icread */
 static void gen_icread(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-#else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* interpreted as no-op */
-#endif
 }
 
 /* rfci (supervisor only) */
 static void gen_rfci_40x(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* Restore CPU state */
     gen_helper_40x_rfci(cpu_env);
     gen_sync_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_rfci(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* Restore CPU state */
     gen_helper_rfci(cpu_env);
     gen_sync_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* BookE specific */
@@ -6376,32 +6315,26 @@ static void gen_rfci(DisasContext *ctx)
 static void gen_rfdi(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* Restore CPU state */
     gen_helper_rfdi(cpu_env);
     gen_sync_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* XXX: not implemented on 440 ? */
 static void gen_rfmci(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     /* Restore CPU state */
     gen_helper_rfmci(cpu_env);
     gen_sync_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* TLB management - PowerPC 405 implementation */
@@ -6410,12 +6343,9 @@ static void gen_rfmci(DisasContext *ctx)
 static void gen_tlbre_40x(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     switch (rB(ctx->opcode)) {
     case 0:
         gen_helper_4xx_tlbre_hi(cpu_gpr[rD(ctx->opcode)], cpu_env,
@@ -6429,20 +6359,18 @@ static void gen_tlbre_40x(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         break;
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbsx - tlbsx. */
 static void gen_tlbsx_40x(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
@@ -6454,19 +6382,17 @@ static void gen_tlbsx_40x(DisasContext *ctx)
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
         gen_set_label(l1);
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbwe */
 static void gen_tlbwe_40x(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     switch (rB(ctx->opcode)) {
     case 0:
         gen_helper_4xx_tlbwe_hi(cpu_env, cpu_gpr[rA(ctx->opcode)],
@@ -6480,7 +6406,7 @@ static void gen_tlbwe_40x(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         break;
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* TLB management - PowerPC 440 implementation */
@@ -6489,12 +6415,10 @@ static void gen_tlbwe_40x(DisasContext *ctx)
 static void gen_tlbre_440(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
+
     switch (rB(ctx->opcode)) {
     case 0:
     case 1:
@@ -6510,20 +6434,18 @@ static void gen_tlbre_440(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         break;
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbsx - tlbsx. */
 static void gen_tlbsx_440(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
@@ -6535,19 +6457,16 @@ static void gen_tlbsx_440(DisasContext *ctx)
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
         gen_set_label(l1);
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbwe */
 static void gen_tlbwe_440(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     switch (rB(ctx->opcode)) {
     case 0:
     case 1:
@@ -6563,7 +6482,7 @@ static void gen_tlbwe_440(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
         break;
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* TLB management - PowerPC BookE 2.06 implementation */
@@ -6571,30 +6490,23 @@ static void gen_tlbwe_440(DisasContext *ctx)
 /* tlbre */
 static void gen_tlbre_booke206(DisasContext *ctx)
 {
-#if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+ #if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
-
+   CHK_SV;
     gen_helper_booke206_tlbre(cpu_env);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbsx - tlbsx. */
 static void gen_tlbsx_booke206(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
 
+    CHK_SV;
     if (rA(ctx->opcode)) {
         t0 = tcg_temp_new();
         tcg_gen_mov_tl(t0, cpu_gpr[rD(ctx->opcode)]);
@@ -6605,54 +6517,44 @@ static void gen_tlbsx_booke206(DisasContext *ctx)
     tcg_gen_add_tl(t0, t0, cpu_gpr[rB(ctx->opcode)]);
     gen_helper_booke206_tlbsx(cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* tlbwe */
 static void gen_tlbwe_booke206(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_booke206_tlbwe(cpu_env);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_tlbivax_booke206(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
 
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
-
     gen_helper_booke206_tlbivax(cpu_env, t0);
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_tlbilx_booke206(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
 
+    CHK_SV;
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
 
@@ -6672,7 +6574,7 @@ static void gen_tlbilx_booke206(DisasContext *ctx)
     }
 
     tcg_temp_free(t0);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 
@@ -6680,13 +6582,11 @@ static void gen_tlbilx_booke206(DisasContext *ctx)
 static void gen_wrtee(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
     TCGv t0;
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+
+    CHK_SV;
     t0 = tcg_temp_new();
     tcg_gen_andi_tl(t0, cpu_gpr[rD(ctx->opcode)], (1 << MSR_EE));
     tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
@@ -6696,19 +6596,16 @@ static void gen_wrtee(DisasContext *ctx)
      * if we just set msr_ee to 1
      */
     gen_stop_exception(ctx);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* wrteei */
 static void gen_wrteei(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
+    CHK_SV;
     if (ctx->opcode & 0x00008000) {
         tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE));
         /* Stop translation to have a chance to raise an exception */
@@ -6716,7 +6613,7 @@ static void gen_wrteei(DisasContext *ctx)
     } else {
         tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
     }
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /* PowerPC 440 specific instructions */
@@ -6756,29 +6653,21 @@ static void gen_icbt_440(DisasContext *ctx)
 static void gen_msgclr(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
-
+    CHK_SV;
     gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 static void gen_msgsnd(DisasContext *ctx)
 {
 #if defined(CONFIG_USER_ONLY)
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+    GEN_PRIV;
 #else
-    if (unlikely(ctx->pr)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
-
+    CHK_SV;
     gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]);
-#endif
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
 /***                      Altivec vector extension                         ***/
@@ -9780,7 +9669,7 @@ static void gen_tcheck(DisasContext *ctx)
 #define GEN_TM_PRIV_NOOP(name)                                 \
 static inline void gen_##name(DisasContext *ctx)               \
 {                                                              \
-    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);           \
+    gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);           \
 }
 
 #else
@@ -9788,10 +9677,7 @@ static inline void gen_##name(DisasContext *ctx)               \
 #define GEN_TM_PRIV_NOOP(name)                                 \
 static inline void gen_##name(DisasContext *ctx)               \
 {                                                              \
-    if (unlikely(ctx->pr)) {                                   \
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);       \
-        return;                                                \
-    }                                                          \
+    CHK_SV;                                                    \
     if (unlikely(!ctx->tm_enabled)) {                          \
         gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
         return;                                                \
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (20 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  7:48   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool Benjamin Herrenschmidt
                   ` (57 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Those instructions are only available in hypervisor real mode and
allow cache inhibited garded access to devices in that mode.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  4 +++-
 target-ppc/translate.c      | 56 +++++++++++++++++++++++++++++++++++----------
 target-ppc/translate_init.c |  6 +++--
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 23479b1..3d22a4f 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1913,6 +1913,8 @@ enum {
     PPC_POPCNTB        = 0x0000000000001000ULL,
     /*   string load / store                                                 */
     PPC_STRING         = 0x0000000000002000ULL,
+    /*   real mode cache inhibited load / store                              */
+    PPC_CILDST         = 0x0000000000004000ULL,
 
     /* Floating-point unit extensions                                        */
     /*   Optional floating point instructions                                */
@@ -2027,7 +2029,7 @@ enum {
                         | PPC_MFAPIDI | PPC_TLBIVA | PPC_TLBIVAX \
                         | PPC_4xx_COMMON | PPC_40x_ICBT | PPC_RFMCI \
                         | PPC_RFDI | PPC_DCR | PPC_DCRX | PPC_DCRUX \
-                        | PPC_POPCNTWD)
+                        | PPC_POPCNTWD | PPC_CILDST)
 
     /* extended type values */
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 3f657b1..4d01fd0 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -189,7 +189,7 @@ struct DisasContext {
     uint32_t opcode;
     uint32_t exception;
     /* Routine used to access memory */
-    bool pr, hv;
+    bool pr, hv, dr;
     int mem_idx;
     int access_type;
     /* Translation flags */
@@ -380,9 +380,11 @@ typedef struct opcode_t {
 #if defined(CONFIG_USER_ONLY)
 #define CHK_HV GEN_PRIV
 #define CHK_SV GEN_PRIV
+#define CHK_HVDR GEN_PRIV
 #else
 #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
 #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
+#define CHK_HVRM do { if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) GEN_PRIV; } while(0)
 #endif
 
 #define CHK_NONE
@@ -2887,7 +2889,7 @@ static void glue(gen_, name##u)(DisasContext *ctx)
 }
 
 #define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
-static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
+static void glue(gen_, name##ux)(DisasContext *ctx)                           \
 {                                                                             \
     TCGv EA;                                                                  \
     if (unlikely(rA(ctx->opcode) == 0 ||                                      \
@@ -2903,18 +2905,23 @@ static void glue(gen_, name##ux)(DisasContext *ctx)
     tcg_temp_free(EA);                                                        \
 }
 
-#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2)                        \
+#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
 static void glue(gen_, name##x)(DisasContext *ctx)                            \
 {                                                                             \
     TCGv EA;                                                                  \
+    chk;                                                                      \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
     gen_addr_reg_index(ctx, EA);                                              \
     gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
     tcg_temp_free(EA);                                                        \
 }
+
 #define GEN_LDX(name, ldop, opc2, opc3, type)                                 \
-    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE)
+    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_NONE)
+
+#define GEN_LDX_HVRM(name, ldop, opc2, opc3, type)                            \
+    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
 
 #define GEN_LDS(name, ldop, op, type)                                         \
 GEN_LD(name, ldop, op | 0x20, type);                                          \
@@ -2940,6 +2947,12 @@ GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B);
 /* ldx */
 GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B);
 
+/* CI load/store variants */
+GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
+GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x15, PPC_CILDST)
+GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
+GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
+
 static void gen_ld(DisasContext *ctx)
 {
     TCGv EA;
@@ -3058,10 +3071,11 @@ static void glue(gen_, name##ux)(DisasContext *ctx)
     tcg_temp_free(EA);                                                        \
 }
 
-#define GEN_STX_E(name, stop, opc2, opc3, type, type2)                        \
+#define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
 static void glue(gen_, name##x)(DisasContext *ctx)                            \
 {                                                                             \
     TCGv EA;                                                                  \
+    chk;                                                                      \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
     gen_addr_reg_index(ctx, EA);                                              \
@@ -3069,7 +3083,10 @@ static void glue(gen_, name##x)(DisasContext *ctx)                            \
     tcg_temp_free(EA);                                                        \
 }
 #define GEN_STX(name, stop, opc2, opc3, type)                                 \
-    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE)
+    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE, CHK_NONE)
+
+#define GEN_STX_HVRM(name, stop, opc2, opc3, type)                            \
+    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
 
 #define GEN_STS(name, stop, op, type)                                         \
 GEN_ST(name, stop, op | 0x20, type);                                          \
@@ -3086,6 +3103,10 @@ GEN_STS(stw, st32, 0x04, PPC_INTEGER);
 #if defined(TARGET_PPC64)
 GEN_STUX(std, st64, 0x15, 0x05, PPC_64B);
 GEN_STX(std, st64, 0x15, 0x04, PPC_64B);
+GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
+GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
+GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
+GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
 
 static void gen_std(DisasContext *ctx)
 {
@@ -3171,7 +3192,7 @@ static inline void gen_qemu_ld64ur(DisasContext *ctx, TCGv arg1, TCGv arg2)
     TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
     tcg_gen_qemu_ld_i64(arg1, arg2, ctx->mem_idx, op);
 }
-GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX);
+GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE);
 #endif  /* TARGET_PPC64 */
 
 /* sthbrx */
@@ -3197,7 +3218,7 @@ static inline void gen_qemu_st64r(DisasContext *ctx, TCGv arg1, TCGv arg2)
     TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
     tcg_gen_qemu_st_i64(arg1, arg2, ctx->mem_idx, op);
 }
-GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX);
+GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE);
 #endif  /* TARGET_PPC64 */
 
 /***                    Integer load and store multiple                    ***/
@@ -10156,7 +10177,7 @@ GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
 GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
 #define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
 GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2)                        \
+#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
 GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
 #define GEN_LDS(name, ldop, op, type)                                         \
 GEN_LD(name, ldop, op | 0x20, type)                                           \
@@ -10173,7 +10194,13 @@ GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B)
 GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B)
 GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B)
 GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B)
-GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX)
+GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE)
+
+/* HV/P7 and later only */
+GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
+GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x18, PPC_CILDST)
+GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
+GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
 #endif
 GEN_LDX(lhbr, ld16ur, 0x16, 0x18, PPC_INTEGER)
 GEN_LDX(lwbr, ld32ur, 0x16, 0x10, PPC_INTEGER)
@@ -10189,7 +10216,7 @@ GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
 GEN_HANDLER(stop##u, opc, 0xFF, 0xFF, 0x00000000, type),
 #define GEN_STUX(name, stop, opc2, opc3, type)                                \
 GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_STX_E(name, stop, opc2, opc3, type, type2)                        \
+#define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
 GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
 #define GEN_STS(name, stop, op, type)                                         \
 GEN_ST(name, stop, op | 0x20, type)                                           \
@@ -10203,7 +10230,11 @@ GEN_STS(stw, st32, 0x04, PPC_INTEGER)
 #if defined(TARGET_PPC64)
 GEN_STUX(std, st64, 0x15, 0x05, PPC_64B)
 GEN_STX(std, st64, 0x15, 0x04, PPC_64B)
-GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX)
+GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE)
+GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
+GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
+GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
+GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
 #endif
 GEN_STX(sthbr, st16r, 0x16, 0x1C, PPC_INTEGER)
 GEN_STX(stwbr, st32r, 0x16, 0x14, PPC_INTEGER)
@@ -11369,6 +11400,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
     ctx.spr_cb = env->spr_cb;
     ctx.pr = msr_pr;
     ctx.mem_idx = env->dmmu_idx;
+    ctx.dr = msr_dr;
 #if !defined(CONFIG_USER_ONLY)
     ctx.hv = !msr_pr && (msr_hv || !env->has_hv_mode);
 #endif
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 5210b25..8d82bc8 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8320,7 +8320,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
                        PPC_64B | PPC_64H | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD;
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
     pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
                         PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
                         PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
@@ -8397,7 +8398,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                        PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
                        PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD;
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
     pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
                         PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
                         PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (21 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8 Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  7:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 24/77] ppc: Move exception generation code out of line Benjamin Herrenschmidt
                   ` (56 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4d01fd0..a5ab2eb 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -189,21 +189,20 @@ struct DisasContext {
     uint32_t opcode;
     uint32_t exception;
     /* Routine used to access memory */
-    bool pr, hv, dr;
+    bool pr, hv, dr, le_mode;
     int mem_idx;
     int access_type;
     /* Translation flags */
-    int le_mode;
     TCGMemOp default_tcg_memop_mask;
 #if defined(TARGET_PPC64)
-    int sf_mode;
-    int has_cfar;
+    bool sf_mode;
+    bool has_cfar;
 #endif
-    int fpu_enabled;
-    int altivec_enabled;
-    int vsx_enabled;
-    int spe_enabled;
-    int tm_enabled;
+    bool fpu_enabled;
+    bool altivec_enabled;
+    bool vsx_enabled;
+    bool spe_enabled;
+    bool tm_enabled;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint64_t insns_flags;
@@ -380,7 +379,7 @@ typedef struct opcode_t {
 #if defined(CONFIG_USER_ONLY)
 #define CHK_HV GEN_PRIV
 #define CHK_SV GEN_PRIV
-#define CHK_HVDR GEN_PRIV
+#define CHK_HVRM GEN_PRIV
 #else
 #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
 #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
@@ -11407,31 +11406,31 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
     ctx.access_type = -1;
-    ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0;
+    ctx.le_mode = !!(env->hflags & (1 << MSR_LE));
     ctx.default_tcg_memop_mask = ctx.le_mode ? MO_LE : MO_BE;
 #if defined(TARGET_PPC64)
     ctx.sf_mode = msr_is_64bit(env, env->msr);
     ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
 #endif
-    ctx.fpu_enabled = msr_fp;
+    ctx.fpu_enabled = !!msr_fp;
     if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
-        ctx.spe_enabled = msr_spe;
+        ctx.spe_enabled = !!msr_spe;
     else
-        ctx.spe_enabled = 0;
+        ctx.spe_enabled = false;
     if ((env->flags & POWERPC_FLAG_VRE) && msr_vr)
-        ctx.altivec_enabled = msr_vr;
+        ctx.altivec_enabled = !!msr_vr;
     else
-        ctx.altivec_enabled = 0;
+        ctx.altivec_enabled = false;
     if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) {
-        ctx.vsx_enabled = msr_vsx;
+        ctx.vsx_enabled = !!msr_vsx;
     } else {
-        ctx.vsx_enabled = 0;
+        ctx.vsx_enabled = false;
     }
 #if defined(TARGET_PPC64)
     if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
-        ctx.tm_enabled = msr_tm;
+        ctx.tm_enabled = !!msr_tm;
     } else {
-        ctx.tm_enabled = 0;
+        ctx.tm_enabled = false;
     }
 #endif
     if ((env->flags & POWERPC_FLAG_SE) && msr_se)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 24/77] ppc: Move exception generation code out of line
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (22 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  7:53   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions Benjamin Herrenschmidt
                   ` (55 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

There's no point inlining this, if you hit the exception case you exit
anyway, and not inlining saves about 100K of code size (and cache
footprint).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a5ab2eb..ac62942 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -279,7 +279,8 @@ void gen_update_current_nip(void *opaque)
     tcg_gen_movi_tl(cpu_nip, ctx->nip);
 }
 
-static inline void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
+static void __attribute__((noinline))
+gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
 {
     TCGv_i32 t0, t1;
     if (ctx->exception == POWERPC_EXCP_NONE) {
@@ -293,7 +294,8 @@ static inline void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t
     ctx->exception = (excp);
 }
 
-static inline void gen_exception(DisasContext *ctx, uint32_t excp)
+static void __attribute__((noinline))
+gen_exception(DisasContext *ctx, uint32_t excp)
 {
     TCGv_i32 t0;
     if (ctx->exception == POWERPC_EXCP_NONE) {
@@ -305,7 +307,8 @@ static inline void gen_exception(DisasContext *ctx, uint32_t excp)
     ctx->exception = (excp);
 }
 
-static inline void gen_debug_exception(DisasContext *ctx)
+static void __attribute__((noinline))
+gen_debug_exception(DisasContext *ctx)
 {
     TCGv_i32 t0;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (23 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 24/77] ppc: Move exception generation code out of line Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-20  8:06   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform Benjamin Herrenschmidt
                   ` (54 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This adds the ISA 2.06 and later power management instructions
(doze, nap, sleep and rvwinkle) and associated wakeup cause testing
in LPCR

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            | 26 ++++++++++++-
 target-ppc/excp_helper.c    | 59 +++++++++++++++++++++++++++++
 target-ppc/helper.h         |  1 +
 target-ppc/translate.c      | 66 ++++++++++++++++++++++++++++++++
 target-ppc/translate_init.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 241 insertions(+), 3 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 3d22a4f..a7236cf 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -300,6 +300,15 @@ enum {
 };
 
 /*****************************************************************************/
+/* PM instructions */
+typedef enum {
+    PPC_PM_DOZE,
+    PPC_PM_NAP,
+    PPC_PM_SLEEP,
+    PPC_PM_RVWINKLE,
+} powerpc_pm_insn_t;
+
+/*****************************************************************************/
 /* Input pins model                                                          */
 typedef enum powerpc_input_t powerpc_input_t;
 enum powerpc_input_t {
@@ -490,6 +499,14 @@ struct ppc_slb_t {
 #define LPCR_LPES1        (1ull << (63-61))
 #define LPCR_AIL_SHIFT    (63-40)      /* Alternate interrupt location */
 #define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
+#define LPCR_P7_PECE0     (1ull << (63-49))
+#define LPCR_P7_PECE1     (1ull << (63-50))
+#define LPCR_P7_PECE2     (1ull << (63-51))
+#define LPCR_P8_PECE0     (1ull << (63-47))
+#define LPCR_P8_PECE1     (1ull << (63-48))
+#define LPCR_P8_PECE2     (1ull << (63-49))
+#define LPCR_P8_PECE3     (1ull << (63-50))
+#define LPCR_P8_PECE4     (1ull << (63-51))
 
 #define msr_sf   ((env->msr >> MSR_SF)   & 1)
 #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
@@ -1126,6 +1143,11 @@ struct CPUPPCState {
      * instructions and SPRs are diallowed if MSR:HV is 0
      */
     bool has_hv_mode;
+    /* On P7/P8, set when in PM state, we need to handle resume
+     * in a special way (such as routing some resume causes to
+     * 0x100), so flag this here.
+     */
+    bool in_pm_state;
 #endif
 
     /* Those resources are used only during code translation */
@@ -2069,6 +2091,8 @@ enum {
     PPC2_FP_CVT_S64    = 0x0000000000010000ULL,
     /* Transactional Memory (ISA 2.07, Book II)                              */
     PPC2_TM            = 0x0000000000020000ULL,
+    /* Server PM instructgions (ISA 2.06, Book III)                          */
+    PPC2_PM_ISA206     = 0x0000000000040000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
                         PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2076,7 +2100,7 @@ enum {
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
                         PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
-                        PPC2_FP_CVT_S64 | PPC2_TM)
+                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206)
 };
 
 /*****************************************************************************/
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 80a70f4..3f77df7 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -100,6 +100,44 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     asrr0 = -1;
     asrr1 = -1;
 
+    /* check for special resume at 0x100 from doze/nap/sleep/winkle on P7/P8 */
+    if (env->in_pm_state) {
+        env->in_pm_state = false;
+
+        /* Pretend to be returning from doze always as we don't lose state */
+        msr |= (0x1ull << (63 - 47));
+
+        /* Non-machine check are routed to 0x100 with a wakeup cause
+         * encoded in SRR1
+         */
+        if (excp != POWERPC_EXCP_MCHECK) {
+            switch(excp) {
+            case POWERPC_EXCP_RESET:
+                msr |= 0x4ull << (63-45);
+                break;
+            case POWERPC_EXCP_EXTERNAL:
+                msr |= 0x8ull << (63-45);
+                break;
+            case POWERPC_EXCP_DECR:
+                msr |= 0x6ull << (63-45);
+                break;
+            case POWERPC_EXCP_SDOOR:
+                msr |= 0x5ull << (63-45);
+                break;
+            case POWERPC_EXCP_SDOOR_HV:
+                msr |= 0x3ull << (63-45);
+                break;
+            case POWERPC_EXCP_HV_MAINT:
+                msr |= 0xaull << (63-45);
+                break;
+            default:
+                cpu_abort(cs, "Unsupported exception %d in Power Save mode\n",
+                          excp);
+            }
+            excp = POWERPC_EXCP_RESET;
+        }
+    }
+
     /* Exception targetting modifiers
      *
      * LPES0 is supported on POWER7/8
@@ -898,6 +936,27 @@ void helper_store_msr(CPUPPCState *env, target_ulong val)
     }
 }
 
+#if defined(TARGET_PPC64)
+void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
+{
+    CPUState *cs;
+
+    cs = CPU(ppc_env_get_cpu(env));
+    cs->halted = 1;
+    env->in_pm_state = true;
+
+    /* Technically, nap doesn't set EE, but if we don't set it
+     * then ppc_hw_interrupt() won't deliver. We could add some
+     * other tests there based on LPCR but it's simpler to just
+     * whack EE in. It will be cleared by the 0x100 at wakeup
+     * anyway. It will still be observable by the guest in SRR1
+     * but this doesn't seem to be a problem.
+     */
+    env->msr |= (1ull << MSR_EE);
+    helper_raise_exception(env, EXCP_HLT);
+}
+#endif /* defined(TARGET_PPC64) */
+
 static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
                           target_ulong msrm, int keep_msrh)
 {
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index ff2d50b..8292dd8 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -13,6 +13,7 @@ DEF_HELPER_1(rfci, void, env)
 DEF_HELPER_1(rfdi, void, env)
 DEF_HELPER_1(rfmci, void, env)
 #if defined(TARGET_PPC64)
+DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 #endif
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ac62942..f76a0c3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3567,6 +3567,68 @@ static void gen_wait(DisasContext *ctx)
     gen_exception_err(ctx, EXCP_HLT, 1);
 }
 
+#if defined(TARGET_PPC64)
+static void gen_doze(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv_i32 t;
+
+    CHK_HV;
+    t = tcg_const_i32(PPC_PM_DOZE);
+    gen_helper_pminsn(cpu_env, t);
+    tcg_temp_free_i32(t);
+    gen_stop_exception(ctx);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
+static void gen_nap(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv_i32 t;
+
+    CHK_HV;
+    t = tcg_const_i32(PPC_PM_NAP);
+    gen_helper_pminsn(cpu_env, t);
+    tcg_temp_free_i32(t);
+    gen_stop_exception(ctx);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
+static void gen_sleep(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv_i32 t;
+
+    CHK_HV;
+    t = tcg_const_i32(PPC_PM_SLEEP);
+    gen_helper_pminsn(cpu_env, t);
+    tcg_temp_free_i32(t);
+    gen_stop_exception(ctx);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
+static void gen_rvwinkle(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv_i32 t;
+
+    CHK_HV;
+    t = tcg_const_i32(PPC_PM_RVWINKLE);
+    gen_helper_pminsn(cpu_env, t);
+    tcg_temp_free_i32(t);
+    gen_stop_exception(ctx);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+#endif /* #if defined(TARGET_PPC64) */
+
 /***                         Floating-point load                           ***/
 #define GEN_LDF(name, ldop, opc, type)                                        \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
@@ -9828,6 +9890,10 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
 GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B),
+GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
+GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
+GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
+GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
 #endif
 GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW),
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 8d82bc8..8a1ce85 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8297,10 +8297,45 @@ static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr)
     return false;
 }
 
+static bool cpu_has_work_POWER7(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
 POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
 
     dc->fw_name = "PowerPC,POWER7";
     dc->desc = "POWER7";
@@ -8309,6 +8344,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
     pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
     pcc->init_proc = init_proc_POWER7;
     pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER7;
     pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
                        PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
                        PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
@@ -8325,7 +8361,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
     pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
                         PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
                         PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64;
+                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
+                        PPC2_PM_ISA206;
     pcc->msr_mask = (1ull << MSR_SF) |
                     (1ull << MSR_VR) |
                     (1ull << MSR_VSX) |
@@ -8375,10 +8412,53 @@ static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr)
     return false;
 }
 
+static bool cpu_has_work_POWER8(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
 POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
 
     dc->fw_name = "PowerPC,POWER8";
     dc->desc = "POWER8";
@@ -8387,6 +8467,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
     pcc->init_proc = init_proc_POWER8;
     pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER8;
     pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
                        PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
                        PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
@@ -8406,7 +8487,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                         PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
                         PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
                         PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM;
+                        PPC2_TM | PPC2_PM_ISA206;
     pcc->msr_mask = (1ull << MSR_SF) |
                     (1ull << MSR_SHV) |
 		    (1ull << MSR_TM) |
@@ -8464,6 +8545,13 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
     lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
     lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
 
+    /* P7 and P8 has slightly different PECE bits, mostly because P8 adds
+     * bit 47 and 48 which are reserved on P7. Here we set them all, which
+     * will work as expected for both implementations
+     */
+    lpcr->default_value |= LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
+                           LPCR_P8_PECE3 | LPCR_P8_PECE4;
+
     /* We should be followed by a CPU reset but update the active value
      * just in case...
      */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (24 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-19  8:58   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
  2015-11-20  8:21   ` David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure Benjamin Herrenschmidt
                   ` (53 subsequent siblings)
  79 siblings, 2 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

No devices yet, not even an interrupt controller, just to get
started.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 default-configs/ppc64-softmmu.mak |   1 +
 hw/ppc/Makefile.objs              |   2 +
 hw/ppc/pnv.c                      | 600 ++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h              |  36 +++
 4 files changed, 639 insertions(+)
 create mode 100644 hw/ppc/pnv.c
 create mode 100644 include/hw/ppc/pnv.h

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index bb71b23..96574c8 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -40,6 +40,7 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_PSERIES=y
+CONFIG_POWERNV=y
 CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..cd74c96 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,8 @@ obj-y += ppc.o ppc_booke.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
+# IBM PowerNV
+obj-$(CONFIG_POWERNV) += pnv.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
new file mode 100644
index 0000000..e68c9b1
--- /dev/null
+++ b/hw/ppc/pnv.c
@@ -0,0 +1,600 @@
+/*
+ * QEMU PowerPC PowerNV model
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#include "sysemu/sysemu.h"
+#include "hw/hw.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "net/net.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/numa.h"
+#include "kvm_ppc.h"
+#include "mmu-hash64.h"
+#include "qom/cpu.h"
+
+#include "hw/boards.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/loader.h"
+
+#include "exec/address-spaces.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "hw/nmi.h"
+
+#include "hw/compat.h"
+
+#include <libfdt.h>
+
+#define FDT_ADDR                0x01000000
+#define FDT_MAX_SIZE            0x00100000
+#define FW_MAX_SIZE             0x00400000
+#define FW_FILE_NAME            "skiboot.lid"
+#define KERNEL_FILE_NAME        "skiroot.lid"
+#define KERNEL_LOAD_ADDR        0x20000000
+
+#define TIMEBASE_FREQ           512000000ULL
+
+#define MAX_CPUS                255
+
+#define PHANDLE_XICP            0x00001111
+
+typedef struct sPowerNVMachineState sPowerNVMachineState;
+
+#define TYPE_POWERNV_MACHINE      "powernv-machine"
+#define POWERNV_MACHINE(obj) \
+    OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE)
+
+/**
+ * sPowerNVMachineState:
+ */
+struct sPowerNVMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+    PnvSystem sys;
+};
+
+static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
+                                     size_t maxsize)
+{
+    size_t maxcells = maxsize / sizeof(uint32_t);
+    int i, j, count;
+    uint32_t *p = prop;
+
+    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
+        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
+
+        if (!sps->page_shift) {
+            break;
+        }
+        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
+            if (sps->enc[count].page_shift == 0) {
+                break;
+            }
+        }
+        if ((p - prop) >= (maxcells - 3 - count * 2)) {
+            break;
+        }
+        *(p++) = cpu_to_be32(sps->page_shift);
+        *(p++) = cpu_to_be32(sps->slb_enc);
+        *(p++) = cpu_to_be32(count);
+        for (j = 0; j < count; j++) {
+            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
+            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
+        }
+    }
+
+    return (p - prop) * sizeof(uint32_t);
+}
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+static void powernv_populate_memory_node(void *fdt, int nodeid, hwaddr start,
+                                         hwaddr size)
+{
+    /* Probablly bogus, need to match with what's going on in CPU nodes */
+    uint32_t chip_id[] = {
+        cpu_to_be32(0x0), cpu_to_be32(nodeid)
+    };
+    char *mem_name;
+    uint64_t mem_reg_property[2];
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    mem_name = g_strdup_printf("memory@"TARGET_FMT_lx, start);
+    _FDT((fdt_begin_node(fdt, mem_name)));
+    g_free(mem_name);
+    _FDT((fdt_property_string(fdt, "device_type", "memory")));
+    _FDT((fdt_property(fdt, "reg", mem_reg_property,
+                       sizeof(mem_reg_property))));
+    _FDT((fdt_property(fdt, "ibm,chip-id", chip_id, sizeof(chip_id))));
+    _FDT((fdt_end_node(fdt)));
+}
+
+static int powernv_populate_memory(void *fdt)
+{
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = nb_numa_nodes;
+    NodeInfo *nodes = numa_info;
+    NodeInfo ramnode;
+
+    /* No NUMA nodes, assume there is just one node with whole RAM */
+    if (!nb_numa_nodes) {
+        nb_nodes = 1;
+        ramnode.node_mem = ram_size;
+        nodes = &ramnode;
+    }
+
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
+        if (mem_start >= ram_size) {
+            node_size = 0;
+        } else {
+            node_size = nodes[i].node_mem;
+            if (node_size > ram_size - mem_start) {
+                node_size = ram_size - mem_start;
+            }
+        }
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            powernv_populate_memory_node(fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
+    }
+
+    return 0;
+}
+
+static void powernv_create_cpu_node(void *fdt, CPUState *cs, int smt_threads)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    uint32_t servers_prop[smt_threads];
+    uint32_t gservers_prop[smt_threads * 2];
+    int i, index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    char *nodename;
+
+    if ((index % smt_threads) != 0) {
+        return;
+    }
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+
+    _FDT((fdt_begin_node(fdt, nodename)));
+
+    g_free(nodename);
+
+    _FDT((fdt_property_cell(fdt, "reg", index)));
+    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
+
+    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+    }
+
+    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
+    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
+    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_property_string(fdt, "status", "okay")));
+    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    /* XXX Just a hack for now */
+    _FDT((fdt_property_cell(fdt, "ibm,chip-id", 0)));
+
+    if (cpu->cpu_version) {
+        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
+    }
+
+    /* Build interrupt servers and gservers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(index + i);
+        /* Hack, direct the group queues back to cpu 0 */
+        gservers_prop[i*2] = cpu_to_be32(index + i);
+        gservers_prop[i*2 + 1] = 0;
+    }
+    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop))));
+    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+                       gservers_prop, sizeof(gservers_prop))));
+
+    _FDT((fdt_end_node(fdt)));
+}
+
+static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
+{
+    void *fdt;
+    CPUState *cs;
+    int smt = kvmppc_smt_threads();
+    uint32_t start_prop = cpu_to_be32(initrd_base);
+    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
+    char *buf;
+    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
+    _FDT((fdt_finish_reservemap(fdt)));
+
+    /* Root node */
+    _FDT((fdt_begin_node(fdt, "")));
+    _FDT((fdt_property_string(fdt, "model", "IBM PowerNV (emulated by qemu)")));
+    _FDT((fdt_property(fdt, "compatible", plat_compat, sizeof(plat_compat))));
+
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-model", buf)));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-serial", buf)));
+        g_free(buf);
+    }
+
+    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
+                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
+                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
+                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
+                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
+                          qemu_uuid[14], qemu_uuid[15]);
+
+    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
+    g_free(buf);
+
+    _FDT((fdt_begin_node(fdt, "chosen")));
+    _FDT((fdt_property(fdt, "linux,initrd-start",
+                       &start_prop, sizeof(start_prop))));
+    _FDT((fdt_property(fdt, "linux,initrd-end",
+                       &end_prop, sizeof(end_prop))));
+    _FDT((fdt_end_node(fdt)));
+
+    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
+    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
+
+    /* cpus */
+    _FDT((fdt_begin_node(fdt, "cpus")));
+    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
+    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
+
+    CPU_FOREACH(cs) {
+        powernv_create_cpu_node(fdt, cs, smt);
+    }
+
+    _FDT((fdt_end_node(fdt)));
+
+    /* Memory */
+    _FDT((powernv_populate_memory(fdt)));
+
+    /* /hypervisor node */
+    if (kvm_enabled()) {
+        uint8_t hypercall[16];
+
+        /* indicate KVM hypercall interface */
+        _FDT((fdt_begin_node(fdt, "hypervisor")));
+        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
+        if (kvmppc_has_cap_fixup_hcalls()) {
+            /*
+             * Older KVM versions with older guest kernels were broken with the
+             * magic page, don't allow the guest to map it.
+             */
+            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+                                 sizeof(hypercall));
+            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
+                              sizeof(hypercall))));
+        }
+        _FDT((fdt_end_node(fdt)));
+    }
+
+    _FDT((fdt_end_node(fdt))); /* close root node */
+    _FDT((fdt_finish(fdt)));
+
+    return fdt;
+}
+
+static void powernv_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_reset(cs);
+
+    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
+    env->spr[SPR_HIOR] = 0;
+    env->gpr[3] = FDT_ADDR;
+    env->nip = 0x10;
+    env->msr |= MSR_HVB;
+}
+
+static const VMStateDescription vmstate_powernv = {
+    .name = "powernv",
+    .version_id = 1,
+    .minimum_version_id = 1,
+};
+
+static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
+{
+    PnvChip *chip = &sys->chips[chip_no];
+
+    if (chip_no >= PNV_MAX_CHIPS) {
+            return;
+    }
+
+    /* XXX Improve chip numbering to better match HW */
+    chip->chip_id = chip_no;
+}
+
+static void ppc_powernv_init(MachineState *machine)
+{
+    ram_addr_t ram_size = machine->ram_size;
+    const char *cpu_model = machine->cpu_model;
+    const char *kernel_filename = machine->kernel_filename;
+    const char *initrd_filename = machine->initrd_filename;
+    uint32_t initrd_base = 0;
+    long initrd_size = 0;
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
+    PnvSystem *sys = &pnv_machine->sys;
+    long fw_size;
+    char *filename;
+    void *fdt;
+    int i;
+
+    /* init CPUs */
+    if (cpu_model == NULL) {
+        cpu_model = kvm_enabled() ? "host" : "POWER8";
+    }
+
+    for (i = 0; i < smp_cpus; i++) {
+        cpu = cpu_ppc_init(cpu_model);
+        if (cpu == NULL) {
+            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
+            exit(1);
+        }
+        env = &cpu->env;
+
+        /* Set time-base frequency to 512 MHz */
+        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
+
+        /* MSR[IP] doesn't exist nowadays */
+        env->msr_mask &= ~(1 << 6);
+
+        qemu_register_reset(powernv_cpu_reset, cpu);
+    }
+
+    /* allocate RAM */
+    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", ram_size);
+    memory_region_add_subregion(sysmem, 0, ram);
+
+    /* XXX We should decide how many chips to create based on #cores and
+     * Venice vs. Murano vs. Naples chip type etc..., for now, just create
+     * one chip. Also creation of the CPUs should be done per-chip
+     */
+    sys->num_chips = 1;
+
+    /* Create only one PHB for now until I figure out what's wrong
+     * when I create more (resource assignment failures in Linux)
+     */
+    pnv_create_chip(sys, 0);
+
+    if (bios_name == NULL) {
+        bios_name = FW_FILE_NAME;
+    }
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
+    if (fw_size < 0) {
+        hw_error("qemu: could not load OPAL '%s'\n", filename);
+        exit(1);
+    }
+    g_free(filename);
+
+
+    if (kernel_filename == NULL) {
+        kernel_filename = KERNEL_FILE_NAME;
+    }
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, kernel_filename);
+    fw_size = load_image_targphys(filename, 0x20000000, 0x2000000);
+    if (fw_size < 0) {
+        hw_error("qemu: could not load kernel'%s'\n", filename);
+        exit(1);
+    }
+    g_free(filename);
+
+    /* load initrd */
+    if (initrd_filename) {
+            /* Try to locate the initrd in the gap between the kernel
+             * and the firmware. Add a bit of space just in case
+             */
+            initrd_base = 0x40000000;
+            initrd_size = load_image_targphys(initrd_filename, initrd_base,
+                                              0x10000000); // 128MB max
+            if (initrd_size < 0) {
+                    fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+                            initrd_filename);
+                    exit(1);
+            }
+    } else {
+            initrd_base = 0;
+            initrd_size = 0;
+    }
+    fdt = powernv_create_fdt(sys, initrd_base, initrd_size);
+    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
+}
+
+static int powernv_kvm_type(const char *vm_type)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
+static void ppc_cpu_do_nmi_on_cpu(void *arg)
+{
+    CPUState *cs = arg;
+
+    cpu_synchronize_state(cs);
+    ppc_cpu_do_system_reset(cs);
+}
+
+static void powernv_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
+    }
+}
+
+static void powernv_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    NMIClass *nc = NMI_CLASS(oc);
+
+    mc->init = ppc_powernv_init;
+    mc->block_default_type = IF_SCSI;
+    mc->max_cpus = MAX_CPUS;
+    mc->no_parallel = 1;
+    mc->default_boot_order = NULL;
+    mc->kvm_type = powernv_kvm_type;
+
+    nc->nmi_monitor_handler = powernv_nmi;
+}
+
+static const TypeInfo powernv_machine_info = {
+    .name          = TYPE_POWERNV_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .abstract      = true,
+    .instance_size = sizeof(sPowerNVMachineState),
+    .class_init    = powernv_machine_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_NMI },
+        { }
+    },
+};
+
+static void powernv_machine_2_5_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->name = "powernv-2.5";
+    mc->desc = "PowerNV v2.5";
+    mc->alias = "powernv";
+}
+
+static const TypeInfo powernv_machine_2_5_info = {
+    .name          = MACHINE_TYPE_NAME("powernv-2.5"),
+    .parent        = TYPE_POWERNV_MACHINE,
+    .class_init    = powernv_machine_2_5_class_init,
+};
+
+static void powernv_machine_register_types(void)
+{
+    type_register_static(&powernv_machine_info);
+    type_register_static(&powernv_machine_2_5_info);
+}
+
+type_init(powernv_machine_register_types)
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
new file mode 100644
index 0000000..9a48c16
--- /dev/null
+++ b/include/hw/ppc/pnv.h
@@ -0,0 +1,36 @@
+#ifndef _HW_LPC_H
+#define _HW_LPC_H
+/*
+ * QEMU PowerNV various definitions
+ *
+ * Copyright (c) 2014 BenH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+
+/* Should we turn that into a QOjb of some sort ? */
+typedef struct PnvChip {
+    uint32_t         chip_id;
+} PnvChip;
+
+typedef struct PnvSystem {
+    uint32_t  num_chips;
+#define PNV_MAX_CHIPS		1
+    PnvChip   chips[PNV_MAX_CHIPS];
+} PnvSystem;
+
+#endif /* _HW_PNV_LPC_H */
+
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (25 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-24  3:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR Benjamin Herrenschmidt
                   ` (52 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

XSCOM is an interface to a sideband bus provided by the POWER8 chip
pervasive unit, which gives access to a number of facilities in the
chip that are needed by the OPAL firmware and to a lesser extent,
Linux. This is among others how the PCI Host bridges get configured
at boot or how the LPC bus is accessed.

This provides a simple bus and device type for devices sitting on
XSCOM along with some facilities to optionally generate corresponding
device-tree nodes

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/Makefile.objs       |   2 +-
 hw/ppc/pnv.c               |  11 ++
 hw/ppc/pnv_xscom.c         | 415 +++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h       |   2 +
 include/hw/ppc/pnv_xscom.h |  73 ++++++++
 5 files changed, 502 insertions(+), 1 deletion(-)
 create mode 100644 hw/ppc/pnv_xscom.c
 create mode 100644 include/hw/ppc/pnv_xscom.h

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index cd74c96..2a7dd42 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -5,7 +5,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 # IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index e68c9b1..2eac877 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -41,6 +41,7 @@
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/pnv.h"
 #include "hw/loader.h"
+#include "hw/ppc/pnv_xscom.h"
 
 #include "exec/address-spaces.h"
 #include "qemu/config-file.h"
@@ -310,6 +311,7 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
     char *buf;
     const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    unsigned int i;
 
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
@@ -367,6 +369,12 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
     /* Memory */
     _FDT((powernv_populate_memory(fdt)));
 
+    /* For each chip */
+    for (i = 0; i < sys->num_chips; i++) {
+        /* Populate XSCOM */
+        _FDT((xscom_populate_fdt(sys->chips[i].xscom, fdt)));
+    }
+
     /* /hypervisor node */
     if (kvm_enabled()) {
         uint8_t hypercall[16];
@@ -424,6 +432,9 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
 
     /* XXX Improve chip numbering to better match HW */
     chip->chip_id = chip_no;
+
+    /* Set up XSCOM bus */
+    xscom_create(chip);
 }
 
 static void ppc_powernv_init(MachineState *machine)
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
new file mode 100644
index 0000000..bb35422
--- /dev/null
+++ b/hw/ppc/pnv_xscom.c
@@ -0,0 +1,415 @@
+
+/*
+ * QEMU PowerNV XSCOM bus definitions
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Based on the s390 virtio bus code:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* TODO: Add some infrastructure for "random stuff" and FIRs that
+ * various units might want to deal with without creating actual
+ * XSCOM devices.
+ *
+ * For example, HB LPC XSCOM in the PIBAM
+ */
+#include "hw/hw.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "monitor/monitor.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/device_tree.h"
+#include "kvm_ppc.h"
+
+#include "hw/ppc/pnv_xscom.h"
+
+#include <libfdt.h>
+
+#define TYPE_XSCOM "xscom"
+#define XSCOM(obj) OBJECT_CHECK(XScomState, (obj), TYPE_XSCOM)
+
+#define XSCOM_SIZE        0x800000000ull
+#define XSCOM_BASE(chip)  (0x3fc0000000000ull + ((uint64_t)(chip)) * XSCOM_SIZE)
+
+//#define TRACE_SCOMS
+
+typedef struct XScomState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion mem;
+    int32_t chip_id;
+    XScomBus *bus;
+} XScomState;
+
+static uint32_t xscom_to_pcb_addr(uint64_t addr)
+{
+        addr &= (XSCOM_SIZE - 1);
+        return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
+}
+
+static void xscom_complete(uint64_t hmer_bits)
+{
+    CPUState *cs = current_cpu;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_synchronize_state(cs);
+    env->spr[SPR_HMER] |= hmer_bits;
+
+    /* XXX Need a CPU helper to set HMER, also handle gneeration
+     * of HMIs
+     */
+}
+
+static XScomDevice *xscom_find_target(XScomState *s, uint32_t pcb_addr, uint32_t *range)
+{
+    BusChild *bc;
+
+    QTAILQ_FOREACH(bc, &s->bus->bus.children, sibling) {
+        DeviceState *qd = bc->child;
+        XScomDevice *xd = XSCOM_DEVICE(qd);
+        unsigned int i;
+
+        for (i = 0; i < MAX_XSCOM_RANGES; i++) {
+            if (xd->ranges[i].addr <= pcb_addr &&
+                (xd->ranges[i].addr + xd->ranges[i].size) > pcb_addr) {
+                *range = i;
+                return xd;
+            }
+        }
+    }
+    return NULL;
+}
+
+static bool xscom_dispatch_read(XScomState *s, uint32_t pcb_addr, uint64_t *out_val)
+{
+    uint32_t range, offset;
+    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
+    XScomDeviceClass *xc;
+
+    if (!xd) {
+        return false;
+    }
+    xc = XSCOM_DEVICE_GET_CLASS(xd);
+    if (!xc->read) {
+        return false;
+    }
+    offset = pcb_addr - xd->ranges[range].addr;
+    return xc->read(xd, range, offset, out_val);
+}
+
+static bool xscom_dispatch_write(XScomState *s, uint32_t pcb_addr, uint64_t val)
+{
+    uint32_t range, offset;
+    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
+    XScomDeviceClass *xc;
+
+    if (!xd) {
+        return false;
+    }
+    xc = XSCOM_DEVICE_GET_CLASS(xd);
+    if (!xc->write) {
+        return false;
+    }
+    offset = pcb_addr - xd->ranges[range].addr;
+    return xc->write(xd, range, offset, val);
+}
+
+static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+    XScomState *s = opaque;
+    uint32_t pcba = xscom_to_pcb_addr(addr);
+    uint64_t val;
+
+    assert(width == 8);
+
+#ifdef TRACE_SCOMS
+    printf("XSCOM_READ(0x%x:0x%x)\n", s->chip_id, pcba);
+#endif
+
+    /* Handle some SCOMs here before dispatch */
+    switch(pcba) {
+    case 0xf000f:
+        val = 0x221EF04980000000;
+        break;
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        val = 0;
+        break;
+    case 0x2013f00:     /* PBA stuff */
+    case 0x2013f01:     /* PBA stuff */
+    case 0x2013f02:     /* PBA stuff */
+    case 0x2013f03:     /* PBA stuff */
+    case 0x2013f04:     /* PBA stuff */
+    case 0x2013f05:     /* PBA stuff */
+    case 0x2013f06:     /* PBA stuff */
+    case 0x2013f07:     /* PBA stuff */
+        val = 0;
+        break;
+    default:
+        if (!xscom_dispatch_read(s, pcba, &val)) {
+            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+            return 0;
+        }
+    }
+
+    xscom_complete(HMER_XSCOM_DONE);
+    return val;
+}
+
+static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    XScomState *s = opaque;
+    uint32_t pcba = xscom_to_pcb_addr(addr);
+
+    assert(width == 8);
+
+#ifdef TRACE_SCOMS
+    printf("XSCOM_WRITE(0x%x:0x%x, 0x%016llx)\n",
+           s->chip_id, pcba, (unsigned long long)val);
+#endif
+    /* Handle some SCOMs here before dispatch */
+    switch(pcba) {
+        /* We ignore writes to these */
+    case 0xf000f:       /* chip id is RO */
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c01:     /* PIBAM FIR */
+    case 0x1010c02:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x1010c04:     /* PIBAM FIR MASK */
+    case 0x1010c05:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        break;
+    default:
+        if (!xscom_dispatch_write(s, pcba, val)) {
+            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+            return;
+        }
+    }
+
+    xscom_complete(HMER_XSCOM_DONE);
+}
+
+static const MemoryRegionOps xscom_ops = {
+    .read = xscom_read,
+    .write = xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int xscom_init(SysBusDevice *dev)
+{
+    XScomState *s = XSCOM(dev);
+
+    s->chip_id = -1;
+    return 0;
+}
+
+static void xscom_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    XScomState *s = XSCOM(dev);
+    char *name;
+
+    assert(s->chip_id >= 0);
+    name = g_strdup_printf("xscom-%x", s->chip_id);
+    memory_region_init_io(&s->mem, OBJECT(s), &xscom_ops, s, name, XSCOM_SIZE);
+    sysbus_init_mmio(sbd, &s->mem);
+    sysbus_mmio_map(sbd, 0, XSCOM_BASE(s->chip_id));
+}
+
+static Property xscom_properties[] = {
+        DEFINE_PROP_INT32("chip_id", XScomState, chip_id, 0),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xscom_class_init(ObjectClass *klass, void *data)
+{
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = xscom_properties;
+    dc->realize = xscom_realize;
+    k->init = xscom_init;
+}
+
+static const TypeInfo xscom_info = {
+    .name          = TYPE_XSCOM,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XScomState),
+    .class_init    = xscom_class_init,
+};
+
+static void xscom_bus_class_init(ObjectClass *klass, void *data)
+{
+}
+
+static const TypeInfo xscom_bus_info = {
+    .name = TYPE_XSCOM_BUS,
+    .parent = TYPE_BUS,
+    .class_init = xscom_bus_class_init,
+    .instance_size = sizeof(XScomBus),
+};
+
+void xscom_create(PnvChip *chip)
+{
+    DeviceState *dev;
+    XScomState *xdev;
+    BusState *qbus;
+    XScomBus *xb;
+
+    dev = qdev_create(NULL, TYPE_XSCOM);
+    qdev_prop_set_uint32(dev, "chip_id", chip->chip_id);
+    qdev_init_nofail(dev);
+
+    /* Create bus on bridge device */
+    qbus = qbus_create(TYPE_XSCOM_BUS, dev, "xscom");
+    xb = DO_UPCAST(XScomBus, bus, qbus);
+    xb->chip_id = chip->chip_id;
+    xdev = XSCOM(dev);
+    xdev->bus = xb;
+    chip->xscom = xb;
+}
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+
+int xscom_populate_fdt(XScomBus *xb, void *fdt)
+{
+    BusChild *bc;
+    char *name;
+    const char compat[] = "ibm,power8-xscom\0ibm,xscom";
+    uint64_t reg[] = { cpu_to_be64(XSCOM_BASE(xb->chip_id)),
+                       cpu_to_be64(XSCOM_SIZE) };
+
+    name = g_strdup_printf("xscom@%llx", (unsigned long long)be64_to_cpu(reg[0]));
+    _FDT((fdt_begin_node(fdt, name)));
+    g_free(name);
+    _FDT((fdt_property_cell(fdt, "ibm,chip-id", xb->chip_id)));
+    _FDT((fdt_property_cell(fdt, "#address-cells", 1)));
+    _FDT((fdt_property_cell(fdt, "#size-cells", 1)));
+    _FDT((fdt_property(fdt, "reg", reg, sizeof(reg))));
+    _FDT((fdt_property(fdt, "compatible", compat, sizeof(compat)))); 
+    _FDT((fdt_property(fdt, "scom-controller", NULL, 0))); 
+
+    QTAILQ_FOREACH(bc, &xb->bus.children, sibling) {
+        DeviceState *qd = bc->child;
+        XScomDevice *xd = XSCOM_DEVICE(qd);
+        XScomDeviceClass *xc = XSCOM_DEVICE_GET_CLASS(xd);
+        uint32_t reg[MAX_XSCOM_RANGES * 2];
+        unsigned int i, sz = 0;
+        void *cp, *p;
+
+        /* Some XSCOM slaves may not be represented in the DT */
+        if (!xc->dt_name) {
+            continue;
+        }
+        name = g_strdup_printf("%s@%x", xc->dt_name, xd->ranges[0].addr);
+        _FDT((fdt_begin_node(fdt, name)));
+        g_free(name);
+        for (i = 0; i < MAX_XSCOM_RANGES; i++) {
+            if (xd->ranges[i].size == 0) {
+                break;
+            }
+            reg[sz++] = cpu_to_be32(xd->ranges[i].addr);
+            reg[sz++] = cpu_to_be32(xd->ranges[i].size);
+        }
+        _FDT((fdt_property(fdt, "reg", reg, sz * 4)));
+        if (xc->devnode) {
+            _FDT((xc->devnode(xd, fdt)));
+        }
+#define MAX_COMPATIBLE_PROP     1024
+        cp = p = g_malloc0(MAX_COMPATIBLE_PROP);
+        i = 0;
+        while((p - cp) < MAX_COMPATIBLE_PROP) {
+            int l;
+            if (xc->dt_compatible[i] == NULL) {
+                break;
+            }
+            l = strlen(xc->dt_compatible[i]);
+            if (l >= (MAX_COMPATIBLE_PROP - i)) {
+                break;
+            }
+            strcpy(p, xc->dt_compatible[i++]);
+            p += l + 1;
+        }
+        _FDT((fdt_property(fdt, "compatible", cp, p - cp)));
+        _FDT((fdt_end_node(fdt)));
+    }
+
+    _FDT((fdt_end_node(fdt)));
+
+    return 0;
+}
+
+static int xscom_qdev_init(DeviceState *qdev)
+{
+    XScomDevice *xdev = (XScomDevice *)qdev;
+    XScomDeviceClass *xc = XSCOM_DEVICE_GET_CLASS(xdev);
+
+    if (xc->init) {
+        return xc->init(xdev);
+    }
+    return 0;
+}
+
+static void xscom_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *k = DEVICE_CLASS(klass);
+    k->init = xscom_qdev_init;
+    k->bus_type = TYPE_XSCOM_BUS;
+}
+
+static const TypeInfo xscom_dev_info = {
+    .name = TYPE_XSCOM_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(XScomDevice),
+    .abstract = true,
+    .class_size = sizeof(XScomDeviceClass),
+    .class_init = xscom_device_class_init,
+};
+
+static void xscom_register_types(void)
+{
+    type_register_static(&xscom_info);
+    type_register_static(&xscom_bus_info);
+    type_register_static(&xscom_dev_info);
+}
+
+type_init(xscom_register_types)
+
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 9a48c16..cb157eb 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -20,10 +20,12 @@
  */
 
 #include "hw/hw.h"
+typedef struct XScomBus XScomBus;
 
 /* Should we turn that into a QOjb of some sort ? */
 typedef struct PnvChip {
     uint32_t         chip_id;
+    XScomBus         *xscom;
 } PnvChip;
 
 typedef struct PnvSystem {
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
new file mode 100644
index 0000000..99de078
--- /dev/null
+++ b/include/hw/ppc/pnv_xscom.h
@@ -0,0 +1,73 @@
+#ifndef _HW_XSCOM_H
+#define _HW_XSCOM_H
+/*
+ * QEMU PowerNV XSCOM bus definitions
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au>
+ * Based on the s390 virtio bus definitions:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <hw/ppc/pnv.h>
+
+#define TYPE_XSCOM_DEVICE "xscom-device"
+#define XSCOM_DEVICE(obj) \
+     OBJECT_CHECK(XScomDevice, (obj), TYPE_XSCOM_DEVICE)
+#define XSCOM_DEVICE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(XScomDeviceClass, (klass), TYPE_XSCOM_DEVICE)
+#define XSCOM_DEVICE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(XScomDeviceClass, (obj), TYPE_XSCOM_DEVICE)
+
+#define TYPE_XSCOM_BUS "xscom-bus"
+#define XSCOM_BUS(obj) OBJECT_CHECK(XScomBus, (obj), TYPE_XSCOM_BUS)
+
+typedef struct XScomDevice XScomDevice;
+typedef struct XScomBus XScomBus;
+
+typedef struct XScomDeviceClass {
+    DeviceClass parent_class;
+
+    const char *dt_name;
+    const char **dt_compatible;
+    int (*init)(XScomDevice *dev);
+    int (*devnode)(XScomDevice *dev, void *fdt);
+
+    /* Actual XScom accesses */
+    bool (*read)(XScomDevice *dev, uint32_t range, uint32_t offset, uint64_t *out_val);
+    bool (*write)(XScomDevice *dev, uint32_t range, uint32_t offset, uint64_t val);
+} XScomDeviceClass;
+
+typedef struct XScomRange {
+    uint32_t addr;
+    uint32_t size;
+} XScomRange;
+
+struct XScomDevice {
+    DeviceState qdev;
+#define MAX_XSCOM_RANGES	4
+    struct XScomRange ranges[MAX_XSCOM_RANGES];
+};
+
+struct XScomBus {
+    BusState bus;
+    uint32_t chip_id;
+};
+
+extern void xscom_create(PnvChip *chip);
+extern int xscom_populate_fdt(XScomBus *xscom, void *fdt);
+
+
+#endif /* _HW_XSCOM_H */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (26 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-24  3:25   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file Benjamin Herrenschmidt
                   ` (51 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The common class doesn't change, the KVM one is SPAPR specific, this
is a preliminary change to make it easier to support "native" XICS.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 26 +++++++++++++-------------
 hw/intc/xics_kvm.c    |  6 +++---
 hw/ppc/spapr.c        |  6 +++---
 hw/ppc/spapr_events.c |  2 +-
 hw/ppc/spapr_pci.c    |  8 ++++----
 hw/ppc/spapr_vio.c    |  2 +-
 include/hw/ppc/xics.h | 25 +++++++++++++------------
 7 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 9ff5796..bcea1f0 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -711,7 +711,7 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
 {
     ICSState *ics = &icp->ics[src];
     int irq;
@@ -742,7 +742,7 @@ int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
  * If align==true, aligns the first IRQ number to num.
  */
-int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
 {
     int i, first = -1;
     ICSState *ics = &icp->ics[src];
@@ -787,7 +787,7 @@ static void ics_free(ICSState *ics, int srcno, int num)
     }
 }
 
-void xics_free(XICSState *icp, int irq, int num)
+void xics_spapr_free(XICSState *icp, int irq, int num)
 {
     int src = xics_find_source(icp, irq);
 
@@ -1006,7 +1006,7 @@ static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
     }
 }
 
-static void xics_realize(DeviceState *dev, Error **errp)
+static void xics_spapr_realize(DeviceState *dev, Error **errp)
 {
     XICSState *icp = XICS(dev);
     Error *error = NULL;
@@ -1045,7 +1045,7 @@ static void xics_realize(DeviceState *dev, Error **errp)
     }
 }
 
-static void xics_initfn(Object *obj)
+static void xics_spapr_initfn(Object *obj)
 {
     XICSState *xics = XICS(obj);
 
@@ -1054,29 +1054,29 @@ static void xics_initfn(Object *obj)
     xics->ics->icp = xics;
 }
 
-static void xics_class_init(ObjectClass *oc, void *data)
+static void xics_spapr_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_CLASS(oc);
+    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
 
-    dc->realize = xics_realize;
+    dc->realize = xics_spapr_realize;
     xsc->set_nr_irqs = xics_set_nr_irqs;
     xsc->set_nr_servers = xics_set_nr_servers;
 }
 
-static const TypeInfo xics_info = {
-    .name          = TYPE_XICS,
+static const TypeInfo xics_spapr_info = {
+    .name          = TYPE_XICS_SPAPR,
     .parent        = TYPE_XICS_COMMON,
     .instance_size = sizeof(XICSState),
     .class_size = sizeof(XICSStateClass),
-    .class_init    = xics_class_init,
-    .instance_init = xics_initfn,
+    .class_init    = xics_spapr_class_init,
+    .instance_init = xics_spapr_initfn,
 };
 
 static void xics_register_types(void)
 {
     type_register_static(&xics_common_info);
-    type_register_static(&xics_info);
+    type_register_static(&xics_spapr_info);
     type_register_static(&ics_info);
     type_register_static(&icp_info);
 }
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index d58729c..03ae801 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -490,8 +490,8 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
     xsc->set_nr_servers = xics_kvm_set_nr_servers;
 }
 
-static const TypeInfo xics_kvm_info = {
-    .name          = TYPE_KVM_XICS,
+static const TypeInfo xics_spapr_kvm_info = {
+    .name          = TYPE_XICS_SPAPR_KVM,
     .parent        = TYPE_XICS_COMMON,
     .instance_size = sizeof(KVMXICSState),
     .class_init    = xics_kvm_class_init,
@@ -500,7 +500,7 @@ static const TypeInfo xics_kvm_info = {
 
 static void xics_kvm_register_types(void)
 {
-    type_register_static(&xics_kvm_info);
+    type_register_static(&xics_spapr_kvm_info);
     type_register_static(&ics_kvm_info);
     type_register_static(&icp_kvm_info);
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 610629e..bf94426 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -119,7 +119,7 @@ static XICSState *xics_system_init(MachineState *machine,
         Error *err = NULL;
 
         if (machine_kernel_irqchip_allowed(machine)) {
-            icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err);
+            icp = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, &err);
         }
         if (machine_kernel_irqchip_required(machine) && !icp) {
             error_report("kernel_irqchip requested but unavailable: %s",
@@ -128,7 +128,7 @@ static XICSState *xics_system_init(MachineState *machine,
     }
 
     if (!icp) {
-        icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort);
+        icp = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, &error_abort);
     }
 
     return icp;
@@ -1768,7 +1768,7 @@ static void ppc_spapr_init(MachineState *machine)
     spapr->icp = xics_system_init(machine,
                                   DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
                                                smp_threads),
-                                  XICS_IRQS);
+                                  XICS_IRQS_SPAPR);
 
     if (smc->dr_lmb_enabled) {
         spapr_validate_node_memory(machine);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 744ea62..3b3663e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -587,7 +587,7 @@ out_no_events:
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
+    spapr->check_exception_irq = xics_spapr_alloc(spapr->icp, 0, 0, false);
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 55fa8db..8b613a8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -313,7 +313,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
-        xics_free(spapr->icp, msi->first_irq, msi->num);
+        xics_spapr_free(spapr->icp, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, num);
         }
@@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_alloc_block(spapr->icp, 0, req_num, false,
+    irq = xics_spapr_alloc_block(spapr->icp, 0, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI);
     if (!irq) {
         error_report("Cannot allocate MSIs for device %x", config_addr);
@@ -1360,7 +1360,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < PCI_NUM_PINS; i++) {
         uint32_t irq;
 
-        irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
+        irq = xics_spapr_alloc_block(spapr->icp, 0, 1, true, false);
         if (!irq) {
             error_setg(errp, "spapr_allocate_lsi failed");
             return;
@@ -1717,7 +1717,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
-    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
+    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
 
     /* Build the interrupt-map, this must matches what is done
      * in pci_spapr_map_irq
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index c51eb8e..7b718cc 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -462,7 +462,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
+    dev->irq = xics_spapr_alloc(spapr->icp, 0, dev->irq, false);
     if (!dev->irq) {
         error_setg(errp, "can't allocate IRQ");
         return;
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 355a966..8d33dfa 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -32,20 +32,20 @@
 #define TYPE_XICS_COMMON "xics-common"
 #define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
 
-#define TYPE_XICS "xics"
-#define XICS(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS)
+#define TYPE_XICS_SPAPR "xics-spapr"
+#define XICS(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
 
-#define TYPE_KVM_XICS "xics-kvm"
-#define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_KVM_XICS)
+#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
+#define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
 
 #define XICS_COMMON_CLASS(klass) \
      OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
-#define XICS_CLASS(klass) \
-     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS)
+#define XICS_SPAPR_CLASS(klass) \
+     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
 #define XICS_COMMON_GET_CLASS(obj) \
      OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
-#define XICS_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS)
+#define XICS_SPAPR_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
 
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
@@ -157,13 +157,14 @@ struct ICSIRQState {
     uint8_t flags;
 };
 
-#define XICS_IRQS               1024
+#define XICS_IRQS_SPAPR               1024
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
-int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
-int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
-void xics_free(XICSState *icp, int irq, int num);
+
+int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
+int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+void xics_spapr_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (27 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-24  3:32   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 30/77] ppc/xics: Implement H_IPOLL using an accessor Benjamin Herrenschmidt
                   ` (50 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Leave the core ICP/ICS logic in xics.c and move the top level
class wrapper, hypercall and RTAS handlers to xics_spapr.c

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 default-configs/ppc64-softmmu.mak |   1 +
 hw/intc/Makefile.objs             |   1 +
 hw/intc/xics.c                    | 390 ++----------------------------------
 hw/intc/xics_spapr.c              | 401 ++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/xics.h             |  23 +++
 5 files changed, 437 insertions(+), 379 deletions(-)
 create mode 100644 hw/intc/xics_spapr.c

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 96574c8..516a6e2 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -50,6 +50,7 @@ CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
+CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 004b0c2..e24cb03 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -26,6 +26,7 @@ obj-$(CONFIG_OMAP) += omap_intc.o
 obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
 obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
+obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index bcea1f0..38cacd8 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -33,7 +33,7 @@
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
 
-static int get_cpu_index_by_dt_id(int cpu_dt_id)
+int get_cpu_index_by_dt_id(int cpu_dt_id)
 {
     PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id);
 
@@ -224,7 +224,7 @@ static void icp_resend(XICSState *icp, int server)
     ics_resend(icp->ics);
 }
 
-static void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
+void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
 {
     ICPState *ss = icp->ss + server;
     uint8_t old_cppr;
@@ -248,7 +248,7 @@ static void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
     }
 }
 
-static void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
+void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
 {
     ICPState *ss = icp->ss + server;
 
@@ -258,7 +258,7 @@ static void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
     }
 }
 
-static uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *ss)
 {
     uint32_t xirr = ss->xirr;
 
@@ -271,7 +271,7 @@ static uint32_t icp_accept(ICPState *ss)
     return xirr;
 }
 
-static void icp_eoi(XICSState *icp, int server, uint32_t xirr)
+void icp_eoi(XICSState *icp, int server, uint32_t xirr)
 {
     ICPState *ss = icp->ss + server;
 
@@ -372,12 +372,6 @@ static const TypeInfo icp_info = {
 /*
  * ICS: Source layer
  */
-static int ics_valid_irq(ICSState *ics, uint32_t nr)
-{
-    return (nr >= ics->offset)
-        && (nr < (ics->offset + ics->nr_irqs));
-}
-
 static void resend_msi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
@@ -462,8 +456,8 @@ static void write_xive_lsi(ICSState *ics, int srcno)
     resend_lsi(ics, srcno);
 }
 
-static void ics_write_xive(ICSState *ics, int nr, int server,
-                           uint8_t priority, uint8_t saved_priority)
+void ics_write_xive(ICSState *ics, int nr, int server,
+		    uint8_t priority, uint8_t saved_priority)
 {
     int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
@@ -640,7 +634,7 @@ static const TypeInfo ics_info = {
 /*
  * Exported functions
  */
-static int xics_find_source(XICSState *icp, int irq)
+int xics_find_source(XICSState *icp, int irq)
 {
     int sources = 1;
     int src;
@@ -668,7 +662,7 @@ qemu_irq xics_get_qirq(XICSState *icp, int irq)
     return NULL;
 }
 
-static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 {
     assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
 
@@ -687,310 +681,16 @@ void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
     ics_set_irq_type(ics, irq - ics->offset, lsi);
 }
 
-#define ICS_IRQ_FREE(ics, srcno)   \
-    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
-
-static int ics_find_free_block(ICSState *ics, int num, int alignnum)
-{
-    int first, i;
-
-    for (first = 0; first < ics->nr_irqs; first += alignnum) {
-        if (num > (ics->nr_irqs - first)) {
-            return -1;
-        }
-        for (i = first; i < first + num; ++i) {
-            if (!ICS_IRQ_FREE(ics, i)) {
-                break;
-            }
-        }
-        if (i == (first + num)) {
-            return first;
-        }
-    }
-
-    return -1;
-}
-
-int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
-{
-    ICSState *ics = &icp->ics[src];
-    int irq;
-
-    if (irq_hint) {
-        assert(src == xics_find_source(icp, irq_hint));
-        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
-            trace_xics_alloc_failed_hint(src, irq_hint);
-            return -1;
-        }
-        irq = irq_hint;
-    } else {
-        irq = ics_find_free_block(ics, 1, 1);
-        if (irq < 0) {
-            trace_xics_alloc_failed_no_left(src);
-            return -1;
-        }
-        irq += ics->offset;
-    }
-
-    ics_set_irq_type(ics, irq - ics->offset, lsi);
-    trace_xics_alloc(src, irq);
-
-    return irq;
-}
-
-/*
- * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
- * If align==true, aligns the first IRQ number to num.
- */
-int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
-{
-    int i, first = -1;
-    ICSState *ics = &icp->ics[src];
-
-    assert(src == 0);
-    /*
-     * MSIMesage::data is used for storing VIRQ so
-     * it has to be aligned to num to support multiple
-     * MSI vectors. MSI-X is not affected by this.
-     * The hint is used for the first IRQ, the rest should
-     * be allocated continuously.
-     */
-    if (align) {
-        assert((num == 1) || (num == 2) || (num == 4) ||
-               (num == 8) || (num == 16) || (num == 32));
-        first = ics_find_free_block(ics, num, num);
-    } else {
-        first = ics_find_free_block(ics, num, 1);
-    }
-
-    if (first >= 0) {
-        for (i = first; i < first + num; ++i) {
-            ics_set_irq_type(ics, i, lsi);
-        }
-    }
-    first += ics->offset;
-
-    trace_xics_alloc_block(src, first, num, lsi, align);
-
-    return first;
-}
-
-static void ics_free(ICSState *ics, int srcno, int num)
-{
-    int i;
-
-    for (i = srcno; i < srcno + num; ++i) {
-        if (ICS_IRQ_FREE(ics, i)) {
-            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
-        }
-        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
-    }
-}
-
-void xics_spapr_free(XICSState *icp, int irq, int num)
-{
-    int src = xics_find_source(icp, irq);
-
-    if (src >= 0) {
-        ICSState *ics = &icp->ics[src];
-
-        /* FIXME: implement multiple sources */
-        assert(src == 0);
-
-        trace_xics_ics_free(ics - icp->ics, irq, num);
-        ics_free(ics, irq - ics->offset, num);
-    }
-}
-
-/*
- * Guest interfaces
- */
-
-static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                           target_ulong opcode, target_ulong *args)
-{
-    CPUState *cs = CPU(cpu);
-    target_ulong cppr = args[0];
-
-    icp_set_cppr(spapr->icp, cs->cpu_index, cppr);
-    return H_SUCCESS;
-}
-
-static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                          target_ulong opcode, target_ulong *args)
-{
-    target_ulong server = get_cpu_index_by_dt_id(args[0]);
-    target_ulong mfrr = args[1];
-
-    if (server >= spapr->icp->nr_servers) {
-        return H_PARAMETER;
-    }
-
-    icp_set_mfrr(spapr->icp, server, mfrr);
-    return H_SUCCESS;
-}
-
-static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                           target_ulong opcode, target_ulong *args)
-{
-    CPUState *cs = CPU(cpu);
-    uint32_t xirr = icp_accept(spapr->icp->ss + cs->cpu_index);
-
-    args[0] = xirr;
-    return H_SUCCESS;
-}
-
-static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                             target_ulong opcode, target_ulong *args)
-{
-    CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
-    uint32_t xirr = icp_accept(ss);
-
-    args[0] = xirr;
-    args[1] = cpu_get_host_ticks();
-    return H_SUCCESS;
-}
-
-static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                          target_ulong opcode, target_ulong *args)
-{
-    CPUState *cs = CPU(cpu);
-    target_ulong xirr = args[0];
-
-    icp_eoi(spapr->icp, cs->cpu_index, xirr);
-    return H_SUCCESS;
-}
-
-static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                            target_ulong opcode, target_ulong *args)
-{
-    CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
-
-    args[0] = ss->xirr;
-    args[1] = ss->mfrr;
-
-    return H_SUCCESS;
-}
-
-static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                          uint32_t token,
-                          uint32_t nargs, target_ulong args,
-                          uint32_t nret, target_ulong rets)
-{
-    ICSState *ics = spapr->icp->ics;
-    uint32_t nr, server, priority;
-
-    if ((nargs != 3) || (nret != 1)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    nr = rtas_ld(args, 0);
-    server = get_cpu_index_by_dt_id(rtas_ld(args, 1));
-    priority = rtas_ld(args, 2);
-
-    if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
-        || (priority > 0xff)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    ics_write_xive(ics, nr, server, priority, priority);
-
-    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-}
-
-static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                          uint32_t token,
-                          uint32_t nargs, target_ulong args,
-                          uint32_t nret, target_ulong rets)
-{
-    ICSState *ics = spapr->icp->ics;
-    uint32_t nr;
-
-    if ((nargs != 1) || (nret != 3)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    nr = rtas_ld(args, 0);
-
-    if (!ics_valid_irq(ics, nr)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
-    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
-}
-
-static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                         uint32_t token,
-                         uint32_t nargs, target_ulong args,
-                         uint32_t nret, target_ulong rets)
-{
-    ICSState *ics = spapr->icp->ics;
-    uint32_t nr;
-
-    if ((nargs != 1) || (nret != 1)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    nr = rtas_ld(args, 0);
-
-    if (!ics_valid_irq(ics, nr)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
-                   ics->irqs[nr - ics->offset].priority);
-
-    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-}
-
-static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-                        uint32_t token,
-                        uint32_t nargs, target_ulong args,
-                        uint32_t nret, target_ulong rets)
-{
-    ICSState *ics = spapr->icp->ics;
-    uint32_t nr;
-
-    if ((nargs != 1) || (nret != 1)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    nr = rtas_ld(args, 0);
-
-    if (!ics_valid_irq(ics, nr)) {
-        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
-        return;
-    }
-
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
-                   ics->irqs[nr - ics->offset].saved_priority,
-                   ics->irqs[nr - ics->offset].saved_priority);
-
-    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-}
-
 /*
  * XICS
  */
 
-static void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
+void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
 {
     icp->nr_irqs = icp->ics->nr_irqs = nr_irqs;
 }
 
-static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
-                                Error **errp)
+void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp)
 {
     int i;
 
@@ -1006,77 +706,9 @@ static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
     }
 }
 
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
-    XICSState *icp = XICS(dev);
-    Error *error = NULL;
-    int i;
-
-    if (!icp->nr_servers) {
-        error_setg(errp, "Number of servers needs to be greater 0");
-        return;
-    }
-
-    /* Registration of global state belongs into realize */
-    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
-    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
-    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
-    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
-
-    spapr_register_hypercall(H_CPPR, h_cppr);
-    spapr_register_hypercall(H_IPI, h_ipi);
-    spapr_register_hypercall(H_XIRR, h_xirr);
-    spapr_register_hypercall(H_XIRR_X, h_xirr_x);
-    spapr_register_hypercall(H_EOI, h_eoi);
-    spapr_register_hypercall(H_IPOLL, h_ipoll);
-
-    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
-    if (error) {
-        error_propagate(errp, error);
-        return;
-    }
-
-    for (i = 0; i < icp->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
-    XICSState *xics = XICS(obj);
-
-    xics->ics = ICS(object_new(TYPE_ICS));
-    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->icp = xics;
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
-    dc->realize = xics_spapr_realize;
-    xsc->set_nr_irqs = xics_set_nr_irqs;
-    xsc->set_nr_servers = xics_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_info = {
-    .name          = TYPE_XICS_SPAPR,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(XICSState),
-    .class_size = sizeof(XICSStateClass),
-    .class_init    = xics_spapr_class_init,
-    .instance_init = xics_spapr_initfn,
-};
-
 static void xics_register_types(void)
 {
     type_register_static(&xics_common_info);
-    type_register_static(&xics_spapr_info);
     type_register_static(&ics_info);
     type_register_static(&icp_info);
 }
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
new file mode 100644
index 0000000..820fe79
--- /dev/null
+++ b/hw/intc/xics_spapr.c
@@ -0,0 +1,401 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "hw/hw.h"
+#include "trace.h"
+#include "qemu/timer.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/xics.h"
+#include "qemu/error-report.h"
+#include "qapi/visitor.h"
+
+/*
+ * Guest interfaces
+ */
+
+static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    target_ulong cppr = args[0];
+
+    icp_set_cppr(spapr->icp, cs->cpu_index, cppr);
+    return H_SUCCESS;
+}
+
+static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                          target_ulong opcode, target_ulong *args)
+{
+    target_ulong server = get_cpu_index_by_dt_id(args[0]);
+    target_ulong mfrr = args[1];
+
+    if (server >= spapr->icp->nr_servers) {
+        return H_PARAMETER;
+    }
+
+    icp_set_mfrr(spapr->icp, server, mfrr);
+    return H_SUCCESS;
+}
+
+static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    uint32_t xirr = icp_accept(spapr->icp->ss + cs->cpu_index);
+
+    args[0] = xirr;
+    return H_SUCCESS;
+}
+
+static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
+    uint32_t xirr = icp_accept(ss);
+
+    args[0] = xirr;
+    args[1] = cpu_get_host_ticks();
+    return H_SUCCESS;
+}
+
+static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                          target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    target_ulong xirr = args[0];
+
+    icp_eoi(spapr->icp, cs->cpu_index, xirr);
+    return H_SUCCESS;
+}
+
+static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
+
+    args[0] = ss->xirr;
+    args[1] = ss->mfrr;
+
+    return H_SUCCESS;
+}
+
+static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                          uint32_t token,
+                          uint32_t nargs, target_ulong args,
+                          uint32_t nret, target_ulong rets)
+{
+    ICSState *ics = spapr->icp->ics;
+    uint32_t nr, server, priority;
+
+    if ((nargs != 3) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    nr = rtas_ld(args, 0);
+    server = get_cpu_index_by_dt_id(rtas_ld(args, 1));
+    priority = rtas_ld(args, 2);
+
+    if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+        || (priority > 0xff)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    ics_write_xive(ics, nr, server, priority, priority);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                          uint32_t token,
+                          uint32_t nargs, target_ulong args,
+                          uint32_t nret, target_ulong rets)
+{
+    ICSState *ics = spapr->icp->ics;
+    uint32_t nr;
+
+    if ((nargs != 1) || (nret != 3)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    nr = rtas_ld(args, 0);
+
+    if (!ics_valid_irq(ics, nr)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
+    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
+}
+
+static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                         uint32_t token,
+                         uint32_t nargs, target_ulong args,
+                         uint32_t nret, target_ulong rets)
+{
+    ICSState *ics = spapr->icp->ics;
+    uint32_t nr;
+
+    if ((nargs != 1) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    nr = rtas_ld(args, 0);
+
+    if (!ics_valid_irq(ics, nr)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
+                   ics->irqs[nr - ics->offset].priority);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+                        uint32_t token,
+                        uint32_t nargs, target_ulong args,
+                        uint32_t nret, target_ulong rets)
+{
+    ICSState *ics = spapr->icp->ics;
+    uint32_t nr;
+
+    if ((nargs != 1) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    nr = rtas_ld(args, 0);
+
+    if (!ics_valid_irq(ics, nr)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
+                   ics->irqs[nr - ics->offset].saved_priority,
+                   ics->irqs[nr - ics->offset].saved_priority);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void xics_spapr_realize(DeviceState *dev, Error **errp)
+{
+    XICSState *icp = XICS(dev);
+    Error *error = NULL;
+    int i;
+
+    if (!icp->nr_servers) {
+        error_setg(errp, "Number of servers needs to be greater 0");
+        return;
+    }
+
+    /* Registration of global state belongs into realize */
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
+
+    spapr_register_hypercall(H_CPPR, h_cppr);
+    spapr_register_hypercall(H_IPI, h_ipi);
+    spapr_register_hypercall(H_XIRR, h_xirr);
+    spapr_register_hypercall(H_XIRR_X, h_xirr_x);
+    spapr_register_hypercall(H_EOI, h_eoi);
+    spapr_register_hypercall(H_IPOLL, h_ipoll);
+
+    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            return;
+        }
+    }
+}
+
+static void xics_spapr_initfn(Object *obj)
+{
+    XICSState *xics = XICS(obj);
+
+    xics->ics = ICS(object_new(TYPE_ICS));
+    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
+    xics->ics->icp = xics;
+}
+
+static void xics_spapr_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
+
+    dc->realize = xics_spapr_realize;
+    xsc->set_nr_irqs = xics_set_nr_irqs;
+    xsc->set_nr_servers = xics_set_nr_servers;
+}
+
+static const TypeInfo xics_spapr_info = {
+    .name          = TYPE_XICS_SPAPR,
+    .parent        = TYPE_XICS_COMMON,
+    .instance_size = sizeof(XICSState),
+    .class_size = sizeof(XICSStateClass),
+    .class_init    = xics_spapr_class_init,
+    .instance_init = xics_spapr_initfn,
+};
+
+#define ICS_IRQ_FREE(ics, srcno)   \
+    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+    int first, i;
+
+    for (first = 0; first < ics->nr_irqs; first += alignnum) {
+        if (num > (ics->nr_irqs - first)) {
+            return -1;
+        }
+        for (i = first; i < first + num; ++i) {
+            if (!ICS_IRQ_FREE(ics, i)) {
+                break;
+            }
+        }
+        if (i == (first + num)) {
+            return first;
+        }
+    }
+
+    return -1;
+}
+
+int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+    ICSState *ics = &icp->ics[src];
+    int irq;
+
+    if (irq_hint) {
+        assert(src == xics_find_source(icp, irq_hint));
+        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+            trace_xics_alloc_failed_hint(src, irq_hint);
+            return -1;
+        }
+        irq = irq_hint;
+    } else {
+        irq = ics_find_free_block(ics, 1, 1);
+        if (irq < 0) {
+            trace_xics_alloc_failed_no_left(src);
+            return -1;
+        }
+        irq += ics->offset;
+    }
+
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+    trace_xics_alloc(src, irq);
+
+    return irq;
+}
+
+/*
+ * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+    int i, first = -1;
+    ICSState *ics = &icp->ics[src];
+
+    assert(src == 0);
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continuously.
+     */
+    if (align) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        first = ics_find_free_block(ics, num, num);
+    } else {
+        first = ics_find_free_block(ics, num, 1);
+    }
+
+    if (first >= 0) {
+        for (i = first; i < first + num; ++i) {
+            ics_set_irq_type(ics, i, lsi);
+        }
+    }
+    first += ics->offset;
+
+    trace_xics_alloc_block(src, first, num, lsi, align);
+
+    return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+    int i;
+
+    for (i = srcno; i < srcno + num; ++i) {
+        if (ICS_IRQ_FREE(ics, i)) {
+            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+        }
+        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+    }
+}
+
+void xics_spapr_free(XICSState *icp, int irq, int num)
+{
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+
+        /* FIXME: implement multiple sources */
+        assert(src == 0);
+
+        trace_xics_ics_free(ics - icp->ics, irq, num);
+        ics_free(ics, irq - ics->offset, num);
+    }
+}
+
+static void xics_spapr_register_types(void)
+{
+    type_register_static(&xics_spapr_info);
+}
+
+type_init(xics_spapr_register_types)
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 8d33dfa..8efff94 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -141,6 +141,12 @@ struct ICSState {
     XICSState *icp;
 };
 
+static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
+{
+    return (nr >= ics->offset)
+        && (nr < (ics->offset + ics->nr_irqs));
+}
+
 struct ICSIRQState {
     uint32_t server;
     uint8_t priority;
@@ -168,4 +174,21 @@ void xics_spapr_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
+/* Internal XICS interfaces */
+int get_cpu_index_by_dt_id(int cpu_dt_id);
+
+void icp_set_cppr(XICSState *icp, int server, uint8_t cppr);
+void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr);
+uint32_t icp_accept(ICPState *ss);
+void icp_eoi(XICSState *icp, int server, uint32_t xirr);
+
+void ics_write_xive(ICSState *ics, int nr, int server,
+                    uint8_t priority, uint8_t saved_priority);
+
+void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+
+void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
+void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
+int xics_find_source(XICSState *icp, int irq);
+
 #endif /* __XICS_H__ */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 30/77] ppc/xics: Implement H_IPOLL using an accessor
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (28 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type() Benjamin Herrenschmidt
                   ` (49 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

None of the other presenter functions directly mucks with the
internal state, so don't do it there either.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 8 ++++++++
 hw/intc/xics_spapr.c  | 7 ++++---
 include/hw/ppc/xics.h | 1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 38cacd8..165ff0b 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -271,6 +271,14 @@ uint32_t icp_accept(ICPState *ss)
     return xirr;
 }
 
+uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+{
+    if (mfrr) {
+        *mfrr = ss->mfrr;
+    }
+    return ss->xirr;
+}
+
 void icp_eoi(XICSState *icp, int server, uint32_t xirr)
 {
     ICPState *ss = icp->ss + server;
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 820fe79..aae1665 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -97,10 +97,11 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
+    uint32_t mfrr;
+    uint32_t xirr = icp_ipoll(spapr->icp->ss + cs->cpu_index, &mfrr);
 
-    args[0] = ss->xirr;
-    args[1] = ss->mfrr;
+    args[0] = xirr;
+    args[1] = mfrr;
 
     return H_SUCCESS;
 }
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 8efff94..ad39c8c 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -180,6 +180,7 @@ int get_cpu_index_by_dt_id(int cpu_dt_id);
 void icp_set_cppr(XICSState *icp, int server, uint8_t cppr);
 void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr);
 uint32_t icp_accept(ICPState *ss);
+uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
 void icp_eoi(XICSState *icp, int server, uint32_t xirr);
 
 void ics_write_xive(ICSState *ics, int nr, int server,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type()
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (29 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 30/77] ppc/xics: Implement H_IPOLL using an accessor Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-24  3:34   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places Benjamin Herrenschmidt
                   ` (48 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 11 -----------
 include/hw/ppc/xics.h |  1 -
 2 files changed, 12 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 165ff0b..197df33 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -678,17 +678,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
         lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 }
 
-void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
-{
-    int src = xics_find_source(icp, irq);
-    ICSState *ics;
-
-    assert(src >= 0);
-
-    ics = &icp->ics[src];
-    ics_set_irq_type(ics, irq - ics->offset, lsi);
-}
-
 /*
  * XICS
  */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index ad39c8c..8e7998f 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -166,7 +166,6 @@ struct ICSIRQState {
 #define XICS_IRQS_SPAPR               1024
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
-void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
 
 int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
 int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (30 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type() Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-24  3:36   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 33/77] ppc/xics: Make the ICSState a list Benjamin Herrenschmidt
                   ` (47 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The "ICP" is a different object than the "XICS". For historical reasons,
we have a number of places where we name a variable "icp" while it contains
a XICSState pointer. There *is* an ICPState structure too so this makes
the code really confusing.

This is a mechanical replacement of all those instances to use the name
"xics" instead. There should be no functional change.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c              | 132 ++++++++++++++++++++++----------------------
 hw/intc/xics_kvm.c          |  54 +++++++++---------
 hw/intc/xics_spapr.c        |  56 +++++++++----------
 hw/ppc/spapr.c              |  22 ++++----
 hw/ppc/spapr_events.c       |   8 +--
 hw/ppc/spapr_pci.c          |   8 +--
 hw/ppc/spapr_vio.c          |   2 +-
 include/hw/pci-host/spapr.h |   2 +-
 include/hw/ppc/spapr.h      |   2 +-
 include/hw/ppc/spapr_vio.h  |   2 +-
 include/hw/ppc/xics.h       |   2 +-
 11 files changed, 145 insertions(+), 145 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 197df33..d21471f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -44,17 +44,17 @@ int get_cpu_index_by_dt_id(int cpu_dt_id)
     return -1;
 }
 
-void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    ICPState *ss = &icp->ss[cs->cpu_index];
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(icp);
+    ICPState *ss = &xics->ss[cs->cpu_index];
+    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
 
-    assert(cs->cpu_index < icp->nr_servers);
+    assert(cs->cpu_index < xics->nr_servers);
 
     if (info->cpu_setup) {
-        info->cpu_setup(icp, cpu);
+        info->cpu_setup(xics, cpu);
     }
 
     switch (PPC_INPUT(env)) {
@@ -78,21 +78,21 @@ void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
  */
 static void xics_common_reset(DeviceState *d)
 {
-    XICSState *icp = XICS_COMMON(d);
+    XICSState *xics = XICS_COMMON(d);
     int i;
 
-    for (i = 0; i < icp->nr_servers; i++) {
-        device_reset(DEVICE(&icp->ss[i]));
+    for (i = 0; i < xics->nr_servers; i++) {
+        device_reset(DEVICE(&xics->ss[i]));
     }
 
-    device_reset(DEVICE(icp->ics));
+    device_reset(DEVICE(xics->ics));
 }
 
 static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
                                   void *opaque, const char *name, Error **errp)
 {
-    XICSState *icp = XICS_COMMON(obj);
-    int64_t value = icp->nr_irqs;
+    XICSState *xics = XICS_COMMON(obj);
+    int64_t value = xics->nr_irqs;
 
     visit_type_int(v, &value, name, errp);
 }
@@ -100,8 +100,8 @@ static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
 static void xics_prop_set_nr_irqs(Object *obj, Visitor *v,
                                   void *opaque, const char *name, Error **errp)
 {
-    XICSState *icp = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(icp);
+    XICSState *xics = XICS_COMMON(obj);
+    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
     Error *error = NULL;
     int64_t value;
 
@@ -110,23 +110,23 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v,
         error_propagate(errp, error);
         return;
     }
-    if (icp->nr_irqs) {
+    if (xics->nr_irqs) {
         error_setg(errp, "Number of interrupts is already set to %u",
-                   icp->nr_irqs);
+                   xics->nr_irqs);
         return;
     }
 
     assert(info->set_nr_irqs);
-    assert(icp->ics);
-    info->set_nr_irqs(icp, value, errp);
+    assert(xics->ics);
+    info->set_nr_irqs(xics, value, errp);
 }
 
 static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
                                      void *opaque, const char *name,
                                      Error **errp)
 {
-    XICSState *icp = XICS_COMMON(obj);
-    int64_t value = icp->nr_servers;
+    XICSState *xics = XICS_COMMON(obj);
+    int64_t value = xics->nr_servers;
 
     visit_type_int(v, &value, name, errp);
 }
@@ -135,8 +135,8 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
                                      void *opaque, const char *name,
                                      Error **errp)
 {
-    XICSState *icp = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(icp);
+    XICSState *xics = XICS_COMMON(obj);
+    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
     Error *error = NULL;
     int64_t value;
 
@@ -145,14 +145,14 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
         error_propagate(errp, error);
         return;
     }
-    if (icp->nr_servers) {
+    if (xics->nr_servers) {
         error_setg(errp, "Number of servers is already set to %u",
-                   icp->nr_servers);
+                   xics->nr_servers);
         return;
     }
 
     assert(info->set_nr_servers);
-    info->set_nr_servers(icp, value, errp);
+    info->set_nr_servers(xics, value, errp);
 }
 
 static void xics_common_initfn(Object *obj)
@@ -195,9 +195,9 @@ static void ics_reject(ICSState *ics, int nr);
 static void ics_resend(ICSState *ics);
 static void ics_eoi(ICSState *ics, int nr);
 
-static void icp_check_ipi(XICSState *icp, int server)
+static void icp_check_ipi(XICSState *xics, int server)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
 
     if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
         return;
@@ -206,7 +206,7 @@ static void icp_check_ipi(XICSState *icp, int server)
     trace_xics_icp_check_ipi(server, ss->mfrr);
 
     if (XISR(ss)) {
-        ics_reject(icp->ics, XISR(ss));
+        ics_reject(xics->ics, XISR(ss));
     }
 
     ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
@@ -214,19 +214,19 @@ static void icp_check_ipi(XICSState *icp, int server)
     qemu_irq_raise(ss->output);
 }
 
-static void icp_resend(XICSState *icp, int server)
+static void icp_resend(XICSState *xics, int server)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
 
     if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(icp, server);
+        icp_check_ipi(xics, server);
     }
-    ics_resend(icp->ics);
+    ics_resend(xics->ics);
 }
 
-void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
+void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
     uint8_t old_cppr;
     uint32_t old_xisr;
 
@@ -239,22 +239,22 @@ void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
             ss->xirr &= ~XISR_MASK; /* Clear XISR */
             ss->pending_priority = 0xff;
             qemu_irq_lower(ss->output);
-            ics_reject(icp->ics, old_xisr);
+            ics_reject(xics->ics, old_xisr);
         }
     } else {
         if (!XISR(ss)) {
-            icp_resend(icp, server);
+            icp_resend(xics, server);
         }
     }
 }
 
-void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
+void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
 
     ss->mfrr = mfrr;
     if (mfrr < CPPR(ss)) {
-        icp_check_ipi(icp, server);
+        icp_check_ipi(xics, server);
     }
 }
 
@@ -279,31 +279,31 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
     return ss->xirr;
 }
 
-void icp_eoi(XICSState *icp, int server, uint32_t xirr)
+void icp_eoi(XICSState *xics, int server, uint32_t xirr)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
     trace_xics_icp_eoi(server, xirr, ss->xirr);
-    ics_eoi(icp->ics, xirr & XISR_MASK);
+    ics_eoi(xics->ics, xirr & XISR_MASK);
     if (!XISR(ss)) {
-        icp_resend(icp, server);
+        icp_resend(xics, server);
     }
 }
 
-static void icp_irq(XICSState *icp, int server, int nr, uint8_t priority)
+static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
 {
-    ICPState *ss = icp->ss + server;
+    ICPState *ss = xics->ss + server;
 
     trace_xics_icp_irq(server, nr, priority);
 
     if ((priority >= CPPR(ss))
         || (XISR(ss) && (ss->pending_priority <= priority))) {
-        ics_reject(icp->ics, nr);
+        ics_reject(xics->ics, nr);
     } else {
         if (XISR(ss)) {
-            ics_reject(icp->ics, XISR(ss));
+            ics_reject(xics->ics, XISR(ss));
         }
         ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
         ss->pending_priority = priority;
@@ -388,7 +388,7 @@ static void resend_msi(ICSState *ics, int srcno)
     if (irq->status & XICS_STATUS_REJECTED) {
         irq->status &= ~XICS_STATUS_REJECTED;
         if (irq->priority != 0xff) {
-            icp_irq(ics->icp, irq->server, srcno + ics->offset,
+            icp_irq(ics->xics, irq->server, srcno + ics->offset,
                     irq->priority);
         }
     }
@@ -402,7 +402,7 @@ static void resend_lsi(ICSState *ics, int srcno)
         && (irq->status & XICS_STATUS_ASSERTED)
         && !(irq->status & XICS_STATUS_SENT)) {
         irq->status |= XICS_STATUS_SENT;
-        icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+        icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
     }
 }
 
@@ -417,7 +417,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int val)
             irq->status |= XICS_STATUS_MASKED_PENDING;
             trace_xics_masked_pending();
         } else  {
-            icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+            icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
@@ -456,7 +456,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
     }
 
     irq->status &= ~XICS_STATUS_MASKED_PENDING;
-    icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+    icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
 }
 
 static void write_xive_lsi(ICSState *ics, int srcno)
@@ -465,7 +465,7 @@ static void write_xive_lsi(ICSState *ics, int srcno)
 }
 
 void ics_write_xive(ICSState *ics, int nr, int server,
-		    uint8_t priority, uint8_t saved_priority)
+                    uint8_t priority, uint8_t saved_priority)
 {
     int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
@@ -541,8 +541,8 @@ static int ics_post_load(ICSState *ics, int version_id)
 {
     int i;
 
-    for (i = 0; i < ics->icp->nr_servers; i++) {
-        icp_resend(ics->icp, i);
+    for (i = 0; i < ics->xics->nr_servers; i++) {
+        icp_resend(ics->xics, i);
     }
 
     return 0;
@@ -642,14 +642,14 @@ static const TypeInfo ics_info = {
 /*
  * Exported functions
  */
-int xics_find_source(XICSState *icp, int irq)
+int xics_find_source(XICSState *xics, int irq)
 {
     int sources = 1;
     int src;
 
     /* FIXME: implement multiple sources */
     for (src = 0; src < sources; ++src) {
-        ICSState *ics = &icp->ics[src];
+        ICSState *ics = &xics->ics[src];
         if (ics_valid_irq(ics, irq)) {
             return src;
         }
@@ -658,12 +658,12 @@ int xics_find_source(XICSState *icp, int irq)
     return -1;
 }
 
-qemu_irq xics_get_qirq(XICSState *icp, int irq)
+qemu_irq xics_get_qirq(XICSState *xics, int irq)
 {
-    int src = xics_find_source(icp, irq);
+    int src = xics_find_source(xics, irq);
 
     if (src >= 0) {
-        ICSState *ics = &icp->ics[src];
+        ICSState *ics = &xics->ics[src];
         return ics->qirqs[irq - ics->offset];
     }
 
@@ -682,23 +682,23 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
  * XICS
  */
 
-void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
+void xics_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
 {
-    icp->nr_irqs = icp->ics->nr_irqs = nr_irqs;
+    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
 }
 
-void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp)
+void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
 {
     int i;
 
-    icp->nr_servers = nr_servers;
+    xics->nr_servers = nr_servers;
 
-    icp->ss = g_malloc0(icp->nr_servers*sizeof(ICPState));
-    for (i = 0; i < icp->nr_servers; i++) {
+    xics->ss = g_malloc0(xics->nr_servers*sizeof(ICPState));
+    for (i = 0; i < xics->nr_servers; i++) {
         char buffer[32];
-        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_ICP);
+        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_ICP);
         snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(icp), buffer, OBJECT(&icp->ss[i]),
+        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
                                   errp);
     }
 }
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 03ae801..7d86157 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -138,7 +138,7 @@ static const TypeInfo icp_kvm_info = {
  */
 static void ics_get_kvm_state(ICSState *ics)
 {
-    KVMXICSState *icpkvm = KVM_XICS(ics->icp);
+    KVMXICSState *xicskvm = KVM_XICS(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -153,7 +153,7 @@ static void ics_get_kvm_state(ICSState *ics)
 
         attr.attr = i + ics->offset;
 
-        ret = ioctl(icpkvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+        ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to retrieve KVM interrupt controller state"
                     " for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -197,7 +197,7 @@ static void ics_get_kvm_state(ICSState *ics)
 
 static int ics_set_kvm_state(ICSState *ics, int version_id)
 {
-    KVMXICSState *icpkvm = KVM_XICS(ics->icp);
+    KVMXICSState *xicskvm = KVM_XICS(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -231,7 +231,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             }
         }
 
-        ret = ioctl(icpkvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+        ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to restore KVM interrupt controller state"
                     " for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -317,17 +317,17 @@ static const TypeInfo ics_kvm_info = {
 /*
  * XICS-KVM
  */
-static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
+static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
 {
     CPUState *cs;
     ICPState *ss;
-    KVMXICSState *icpkvm = KVM_XICS(icp);
+    KVMXICSState *xicskvm = KVM_XICS(xics);
 
     cs = CPU(cpu);
-    ss = &icp->ss[cs->cpu_index];
+    ss = &xics->ss[cs->cpu_index];
 
-    assert(cs->cpu_index < icp->nr_servers);
-    if (icpkvm->kernel_xics_fd == -1) {
+    assert(cs->cpu_index < xics->nr_servers);
+    if (xicskvm->kernel_xics_fd == -1) {
         abort();
     }
 
@@ -340,13 +340,13 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
         return;
     }
 
-    if (icpkvm->kernel_xics_fd != -1) {
+    if (xicskvm->kernel_xics_fd != -1) {
         int ret;
 
         ss->cs = cs;
 
         ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0,
-                                  icpkvm->kernel_xics_fd, kvm_arch_vcpu_id(cs));
+                                  xicskvm->kernel_xics_fd, kvm_arch_vcpu_id(cs));
         if (ret < 0) {
             error_report("Unable to connect CPU%ld to kernel XICS: %s",
                     kvm_arch_vcpu_id(cs), strerror(errno));
@@ -356,24 +356,24 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
     }
 }
 
-static void xics_kvm_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
+static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
 {
-    icp->nr_irqs = icp->ics->nr_irqs = nr_irqs;
+    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
 }
 
-static void xics_kvm_set_nr_servers(XICSState *icp, uint32_t nr_servers,
+static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                     Error **errp)
 {
     int i;
 
-    icp->nr_servers = nr_servers;
+    xics->nr_servers = nr_servers;
 
-    icp->ss = g_malloc0(icp->nr_servers*sizeof(ICPState));
-    for (i = 0; i < icp->nr_servers; i++) {
+    xics->ss = g_malloc0(xics->nr_servers*sizeof(ICPState));
+    for (i = 0; i < xics->nr_servers; i++) {
         char buffer[32];
-        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_KVM_ICP);
+        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_KVM_ICP);
         snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(icp), buffer, OBJECT(&icp->ss[i]),
+        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
                                   errp);
     }
 }
@@ -389,8 +389,8 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
 static void xics_kvm_realize(DeviceState *dev, Error **errp)
 {
-    KVMXICSState *icpkvm = KVM_XICS(dev);
-    XICSState *icp = XICS_COMMON(dev);
+    KVMXICSState *xicskvm = KVM_XICS(dev);
+    XICSState *xics = XICS_COMMON(dev);
     int i, rc;
     Error *error = NULL;
     struct kvm_create_device xics_create_device = {
@@ -440,17 +440,17 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    icpkvm->kernel_xics_fd = xics_create_device.fd;
+    xicskvm->kernel_xics_fd = xics_create_device.fd;
 
-    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
+    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
     if (error) {
         error_propagate(errp, error);
         goto fail;
     }
 
-    assert(icp->nr_servers);
-    for (i = 0; i < icp->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
+    assert(xics->nr_servers);
+    for (i = 0; i < xics->nr_servers; i++) {
+        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized", &error);
         if (error) {
             error_propagate(errp, error);
             goto fail;
@@ -476,7 +476,7 @@ static void xics_kvm_initfn(Object *obj)
 
     xics->ics = ICS(object_new(TYPE_KVM_ICS));
     object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->icp = xics;
+    xics->ics->xics = xics;
 }
 
 static void xics_kvm_class_init(ObjectClass *oc, void *data)
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index aae1665..fb508cd 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -43,7 +43,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     CPUState *cs = CPU(cpu);
     target_ulong cppr = args[0];
 
-    icp_set_cppr(spapr->icp, cs->cpu_index, cppr);
+    icp_set_cppr(spapr->xics, cs->cpu_index, cppr);
     return H_SUCCESS;
 }
 
@@ -53,11 +53,11 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     target_ulong server = get_cpu_index_by_dt_id(args[0]);
     target_ulong mfrr = args[1];
 
-    if (server >= spapr->icp->nr_servers) {
+    if (server >= spapr->xics->nr_servers) {
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->icp, server, mfrr);
+    icp_set_mfrr(spapr->xics, server, mfrr);
     return H_SUCCESS;
 }
 
@@ -65,7 +65,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    uint32_t xirr = icp_accept(spapr->icp->ss + cs->cpu_index);
+    uint32_t xirr = icp_accept(spapr->xics->ss + cs->cpu_index);
 
     args[0] = xirr;
     return H_SUCCESS;
@@ -75,7 +75,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
+    ICPState *ss = &spapr->xics->ss[cs->cpu_index];
     uint32_t xirr = icp_accept(ss);
 
     args[0] = xirr;
@@ -89,7 +89,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     CPUState *cs = CPU(cpu);
     target_ulong xirr = args[0];
 
-    icp_eoi(spapr->icp, cs->cpu_index, xirr);
+    icp_eoi(spapr->xics, cs->cpu_index, xirr);
     return H_SUCCESS;
 }
 
@@ -98,7 +98,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUState *cs = CPU(cpu);
     uint32_t mfrr;
-    uint32_t xirr = icp_ipoll(spapr->icp->ss + cs->cpu_index, &mfrr);
+    uint32_t xirr = icp_ipoll(spapr->xics->ss + cs->cpu_index, &mfrr);
 
     args[0] = xirr;
     args[1] = mfrr;
@@ -111,7 +111,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->icp->ics;
+    ICSState *ics = spapr->xics->ics;
     uint32_t nr, server, priority;
 
     if ((nargs != 3) || (nret != 1)) {
@@ -123,7 +123,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     server = get_cpu_index_by_dt_id(rtas_ld(args, 1));
     priority = rtas_ld(args, 2);
 
-    if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+    if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
         || (priority > 0xff)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
@@ -139,7 +139,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->icp->ics;
+    ICSState *ics = spapr->xics->ics;
     uint32_t nr;
 
     if ((nargs != 1) || (nret != 3)) {
@@ -164,7 +164,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->icp->ics;
+    ICSState *ics = spapr->xics->ics;
     uint32_t nr;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -190,7 +190,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->icp->ics;
+    ICSState *ics = spapr->xics->ics;
     uint32_t nr;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -214,11 +214,11 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
 static void xics_spapr_realize(DeviceState *dev, Error **errp)
 {
-    XICSState *icp = XICS(dev);
+    XICSState *xics = XICS(dev);
     Error *error = NULL;
     int i;
 
-    if (!icp->nr_servers) {
+    if (!xics->nr_servers) {
         error_setg(errp, "Number of servers needs to be greater 0");
         return;
     }
@@ -236,14 +236,14 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
 
-    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
+    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
     if (error) {
         error_propagate(errp, error);
         return;
     }
 
-    for (i = 0; i < icp->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
+    for (i = 0; i < xics->nr_servers; i++) {
+        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized", &error);
         if (error) {
             error_propagate(errp, error);
             return;
@@ -257,7 +257,7 @@ static void xics_spapr_initfn(Object *obj)
 
     xics->ics = ICS(object_new(TYPE_ICS));
     object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->icp = xics;
+    xics->ics->xics = xics;
 }
 
 static void xics_spapr_class_init(ObjectClass *oc, void *data)
@@ -303,13 +303,13 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi)
 {
-    ICSState *ics = &icp->ics[src];
+    ICSState *ics = &xics->ics[src];
     int irq;
 
     if (irq_hint) {
-        assert(src == xics_find_source(icp, irq_hint));
+        assert(src == xics_find_source(xics, irq_hint));
         if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
             trace_xics_alloc_failed_hint(src, irq_hint);
             return -1;
@@ -334,10 +334,10 @@ int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
  * If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, bool align)
 {
     int i, first = -1;
-    ICSState *ics = &icp->ics[src];
+    ICSState *ics = &xics->ics[src];
 
     assert(src == 0);
     /*
@@ -373,23 +373,23 @@ static void ics_free(ICSState *ics, int srcno, int num)
 
     for (i = srcno; i < srcno + num; ++i) {
         if (ICS_IRQ_FREE(ics, i)) {
-            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+            trace_xics_ics_free_warn(ics - ics->xics->ics, i + ics->offset);
         }
         memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
     }
 }
 
-void xics_spapr_free(XICSState *icp, int irq, int num)
+void xics_spapr_free(XICSState *xics, int irq, int num)
 {
-    int src = xics_find_source(icp, irq);
+    int src = xics_find_source(xics, irq);
 
     if (src >= 0) {
-        ICSState *ics = &icp->ics[src];
+        ICSState *ics = &xics->ics[src];
 
         /* FIXME: implement multiple sources */
         assert(src == 0);
 
-        trace_xics_ics_free(ics - icp->ics, irq, num);
+        trace_xics_ics_free(ics - xics->ics, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bf94426..d8a84ca 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -113,25 +113,25 @@ static XICSState *try_create_xics(const char *type, int nr_servers,
 static XICSState *xics_system_init(MachineState *machine,
                                    int nr_servers, int nr_irqs)
 {
-    XICSState *icp = NULL;
+    XICSState *xics = NULL;
 
     if (kvm_enabled()) {
         Error *err = NULL;
 
         if (machine_kernel_irqchip_allowed(machine)) {
-            icp = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, &err);
+            xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, &err);
         }
-        if (machine_kernel_irqchip_required(machine) && !icp) {
+        if (machine_kernel_irqchip_required(machine) && !xics) {
             error_report("kernel_irqchip requested but unavailable: %s",
                          error_get_pretty(err));
         }
     }
 
-    if (!icp) {
-        icp = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, &error_abort);
+    if (!xics) {
+        xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, &error_abort);
     }
 
-    return icp;
+    return xics;
 }
 
 static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -1615,7 +1615,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
         }
     }
 
-    xics_cpu_setup(spapr->icp, cpu);
+    xics_cpu_setup(spapr->xics, cpu);
 
     qemu_register_reset(spapr_cpu_reset, cpu);
 }
@@ -1765,10 +1765,10 @@ static void ppc_spapr_init(MachineState *machine)
     spapr_alloc_htab(spapr);
 
     /* Set up Interrupt Controller before we create the VCPUs */
-    spapr->icp = xics_system_init(machine,
-                                  DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
-                                               smp_threads),
-                                  XICS_IRQS_SPAPR);
+    spapr->xics = xics_system_init(machine,
+                                   DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
+                                                smp_threads),
+                                   XICS_IRQS_SPAPR);
 
     if (smc->dr_lmb_enabled) {
         spapr_validate_node_memory(machine);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 3b3663e..c06deea 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -383,7 +383,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
 }
 
 static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
@@ -452,7 +452,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
 }
 
 void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
@@ -535,7 +535,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
      * interrupts.
      */
     if (rtas_event_log_contains(mask, true)) {
-        qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
     }
 
     return;
@@ -587,7 +587,7 @@ out_no_events:
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_spapr_alloc(spapr->icp, 0, 0, false);
+    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false);
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 8b613a8..cf3192e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -313,7 +313,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
-        xics_spapr_free(spapr->icp, msi->first_irq, msi->num);
+        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, num);
         }
@@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->icp, 0, req_num, false,
+    irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI);
     if (!irq) {
         error_report("Cannot allocate MSIs for device %x", config_addr);
@@ -729,7 +729,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
 
     trace_spapr_pci_msi_write(addr, data, irq);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
 }
 
 static const MemoryRegionOps spapr_msi_ops = {
@@ -1360,7 +1360,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < PCI_NUM_PINS; i++) {
         uint32_t irq;
 
-        irq = xics_spapr_alloc_block(spapr->icp, 0, 1, true, false);
+        irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false);
         if (!irq) {
             error_setg(errp, "spapr_allocate_lsi failed");
             return;
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 7b718cc..fc731eb 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -462,7 +462,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->icp, 0, dev->irq, false);
+    dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false);
     if (!dev->irq) {
         error_setg(errp, "can't allocate IRQ");
         return;
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 7de5e02..117c1f8 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -120,7 +120,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
+    return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq);
 }
 
 PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5baa906..c6a0136 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -48,7 +48,7 @@ struct sPAPRMachineState {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
     struct sPAPRNVRAM *nvram;
-    XICSState *icp;
+    XICSState *xics;
     DeviceState *rtc;
 
     void *htab;
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index 2299a54..e10fd52 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -90,7 +90,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->icp, dev->irq);
+    return xics_get_qirq(spapr->xics, dev->irq);
 }
 
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 8e7998f..e670e89 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -138,7 +138,7 @@ struct ICSState {
     uint32_t offset;
     qemu_irq *qirqs;
     ICSIRQState *irqs;
-    XICSState *icp;
+    XICSState *xics;
 };
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 33/77] ppc/xics: Make the ICSState a list
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (31 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  4:30   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized Benjamin Herrenschmidt
                   ` (46 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Instead of an array of fixed sized blocks, use a list, as we will need
to have sources with variable number of interrupts. SPAPR only uses
a single entry. Native will create more. If performance becomes an
issue we can add some hashed lookup but for now this will do fine.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 86 +++++++++++++++++++++++++++++++--------------------
 hw/intc/xics_kvm.c    | 28 +++++++++--------
 hw/intc/xics_spapr.c  | 75 ++++++++++++++++++++++++--------------------
 hw/ppc/spapr_events.c |  2 +-
 hw/ppc/spapr_pci.c    |  4 +--
 hw/ppc/spapr_vio.c    |  2 +-
 include/hw/ppc/xics.h | 10 +++---
 7 files changed, 118 insertions(+), 89 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index d21471f..c4ac057 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -79,13 +79,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
 static void xics_common_reset(DeviceState *d)
 {
     XICSState *xics = XICS_COMMON(d);
+    ICSState *ics;
     int i;
 
     for (i = 0; i < xics->nr_servers; i++) {
         device_reset(DEVICE(&xics->ss[i]));
     }
 
-    device_reset(DEVICE(xics->ics));
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        device_reset(DEVICE(ics));
+    }
 }
 
 static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
@@ -117,7 +120,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v,
     }
 
     assert(info->set_nr_irqs);
-    assert(xics->ics);
     info->set_nr_irqs(xics, value, errp);
 }
 
@@ -195,33 +197,35 @@ static void ics_reject(ICSState *ics, int nr);
 static void ics_resend(ICSState *ics);
 static void ics_eoi(ICSState *ics, int nr);
 
-static void icp_check_ipi(XICSState *xics, int server)
+static void icp_check_ipi(ICPState *ss, int server)
 {
-    ICPState *ss = xics->ss + server;
-
     if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
         return;
     }
 
     trace_xics_icp_check_ipi(server, ss->mfrr);
 
-    if (XISR(ss)) {
-        ics_reject(xics->ics, XISR(ss));
+    if (XISR(ss) && ss->xirr_owner) {
+        ics_reject(ss->xirr_owner, XISR(ss));
     }
 
     ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
     ss->pending_priority = ss->mfrr;
+    ss->xirr_owner = NULL;
     qemu_irq_raise(ss->output);
 }
 
 static void icp_resend(XICSState *xics, int server)
 {
     ICPState *ss = xics->ss + server;
+    ICSState *ics;
 
     if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(xics, server);
+        icp_check_ipi(ss, server);
+    }
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        ics_resend(ics);
     }
-    ics_resend(xics->ics);
 }
 
 void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
@@ -239,7 +243,10 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
             ss->xirr &= ~XISR_MASK; /* Clear XISR */
             ss->pending_priority = 0xff;
             qemu_irq_lower(ss->output);
-            ics_reject(xics->ics, old_xisr);
+            if (ss->xirr_owner) {
+                ics_reject(ss->xirr_owner, old_xisr);
+                ss->xirr_owner = NULL;
+            }
         }
     } else {
         if (!XISR(ss)) {
@@ -254,7 +261,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr)
 
     ss->mfrr = mfrr;
     if (mfrr < CPPR(ss)) {
-        icp_check_ipi(xics, server);
+        icp_check_ipi(ss, server);
     }
 }
 
@@ -265,6 +272,7 @@ uint32_t icp_accept(ICPState *ss)
     qemu_irq_lower(ss->output);
     ss->xirr = ss->pending_priority << 24;
     ss->pending_priority = 0xff;
+    ss->xirr_owner = NULL;
 
     trace_xics_icp_accept(xirr, ss->xirr);
 
@@ -282,30 +290,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
 void icp_eoi(XICSState *xics, int server, uint32_t xirr)
 {
     ICPState *ss = xics->ss + server;
+    ICSState *ics;
+    uint32_t irq;
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
     trace_xics_icp_eoi(server, xirr, ss->xirr);
-    ics_eoi(xics->ics, xirr & XISR_MASK);
+    irq = xirr & XISR_MASK;
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        if (ics_valid_irq(ics, irq)) {
+            ics_eoi(ics, irq);
+        }
+    }
     if (!XISR(ss)) {
         icp_resend(xics, server);
     }
 }
 
-static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
+static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
+    XICSState *xics = ics->xics;
     ICPState *ss = xics->ss + server;
 
     trace_xics_icp_irq(server, nr, priority);
 
     if ((priority >= CPPR(ss))
         || (XISR(ss) && (ss->pending_priority <= priority))) {
-        ics_reject(xics->ics, nr);
+        ics_reject(ics, nr);
     } else {
-        if (XISR(ss)) {
-            ics_reject(xics->ics, XISR(ss));
+        if (XISR(ss) && ss->xirr_owner) {
+            ics_reject(ss->xirr_owner, XISR(ss));
+            ss->xirr_owner = NULL;
         }
         ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+        ss->xirr_owner = ics;
         ss->pending_priority = priority;
         trace_xics_icp_raise(ss->xirr, ss->pending_priority);
         qemu_irq_raise(ss->output);
@@ -388,8 +406,7 @@ static void resend_msi(ICSState *ics, int srcno)
     if (irq->status & XICS_STATUS_REJECTED) {
         irq->status &= ~XICS_STATUS_REJECTED;
         if (irq->priority != 0xff) {
-            icp_irq(ics->xics, irq->server, srcno + ics->offset,
-                    irq->priority);
+            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
@@ -402,7 +419,7 @@ static void resend_lsi(ICSState *ics, int srcno)
         && (irq->status & XICS_STATUS_ASSERTED)
         && !(irq->status & XICS_STATUS_SENT)) {
         irq->status |= XICS_STATUS_SENT;
-        icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+        icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
     }
 }
 
@@ -417,7 +434,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int val)
             irq->status |= XICS_STATUS_MASKED_PENDING;
             trace_xics_masked_pending();
         } else  {
-            icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
@@ -456,7 +473,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
     }
 
     irq->status &= ~XICS_STATUS_MASKED_PENDING;
-    icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+    icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
 }
 
 static void write_xive_lsi(ICSState *ics, int srcno)
@@ -642,28 +659,23 @@ static const TypeInfo ics_info = {
 /*
  * Exported functions
  */
-int xics_find_source(XICSState *xics, int irq)
+ICSState *xics_find_source(XICSState *xics, int irq)
 {
-    int sources = 1;
-    int src;
+    ICSState *ics;
 
-    /* FIXME: implement multiple sources */
-    for (src = 0; src < sources; ++src) {
-        ICSState *ics = &xics->ics[src];
+    QLIST_FOREACH(ics, &xics->ics, list) {
         if (ics_valid_irq(ics, irq)) {
-            return src;
+            return ics;
         }
     }
-
-    return -1;
+    return NULL;
 }
 
 qemu_irq xics_get_qirq(XICSState *xics, int irq)
 {
-    int src = xics_find_source(xics, irq);
+    ICSState *ics = xics_find_source(xics, irq);
 
-    if (src >= 0) {
-        ICSState *ics = &xics->ics[src];
+    if (ics) {
         return ics->qirqs[irq - ics->offset];
     }
 
@@ -684,7 +696,13 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 
 void xics_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
 {
-    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
+    ICSState *ics = QLIST_FIRST(&xics->ics);
+
+    /* This needs to be deprecated ... */
+    xics->nr_irqs = nr_irqs;
+    if (ics) {
+        ics->nr_irqs = nr_irqs;
+    }
 }
 
 void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 7d86157..a478d25 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -356,11 +356,6 @@ static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
-{
-    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
-}
-
 static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                     Error **errp)
 {
@@ -391,6 +386,7 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
 {
     KVMXICSState *xicskvm = KVM_XICS(dev);
     XICSState *xics = XICS_COMMON(dev);
+    ICSState *ics;
     int i, rc;
     Error *error = NULL;
     struct kvm_create_device xics_create_device = {
@@ -442,10 +438,12 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
 
     xicskvm->kernel_xics_fd = xics_create_device.fd;
 
-    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto fail;
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        object_property_set_bool(OBJECT(ics), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            goto fail;
+        }
     }
 
     assert(xics->nr_servers);
@@ -473,10 +471,14 @@ fail:
 static void xics_kvm_initfn(Object *obj)
 {
     XICSState *xics = XICS_COMMON(obj);
+    ICSState *ics;
+
+    QLIST_INIT(&xics->ics);
 
-    xics->ics = ICS(object_new(TYPE_KVM_ICS));
-    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->xics = xics;
+    ics = ICS(object_new(TYPE_KVM_ICS));
+    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
+    ics->xics = xics;
+    QLIST_INSERT_HEAD(&xics->ics, ics, list);
 }
 
 static void xics_kvm_class_init(ObjectClass *oc, void *data)
@@ -486,7 +488,7 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
 
     dc->realize = xics_kvm_realize;
     xsc->cpu_setup = xics_kvm_cpu_setup;
-    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
+    xsc->set_nr_irqs = xics_set_nr_irqs;
     xsc->set_nr_servers = xics_kvm_set_nr_servers;
 }
 
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index fb508cd..d75fcf0 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -111,10 +111,10 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
     uint32_t nr, server, priority;
 
-    if ((nargs != 3) || (nret != 1)) {
+    if ((nargs != 3) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
@@ -139,10 +139,10 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
     uint32_t nr;
 
-    if ((nargs != 1) || (nret != 3)) {
+    if ((nargs != 1) || (nret != 3) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
@@ -164,10 +164,10 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
     uint32_t nr;
 
-    if ((nargs != 1) || (nret != 1)) {
+    if ((nargs != 1) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
@@ -190,10 +190,10 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
     uint32_t nr;
 
-    if ((nargs != 1) || (nret != 1)) {
+    if ((nargs != 1) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
@@ -215,6 +215,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 static void xics_spapr_realize(DeviceState *dev, Error **errp)
 {
     XICSState *xics = XICS(dev);
+    ICSState *ics;
     Error *error = NULL;
     int i;
 
@@ -236,10 +237,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
 
-    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
-    if (error) {
-        error_propagate(errp, error);
-        return;
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        object_property_set_bool(OBJECT(ics), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            return;
+        }
     }
 
     for (i = 0; i < xics->nr_servers; i++) {
@@ -254,10 +257,14 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
 static void xics_spapr_initfn(Object *obj)
 {
     XICSState *xics = XICS(obj);
+    ICSState *ics;
+
+    QLIST_INIT(&xics->ics);
 
-    xics->ics = ICS(object_new(TYPE_ICS));
-    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->xics = xics;
+    ics = ICS(object_new(TYPE_ICS));    
+    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
+    ics->xics = xics;
+    QLIST_INSERT_HEAD(&xics->ics, ics, list);
 }
 
 static void xics_spapr_class_init(ObjectClass *oc, void *data)
@@ -303,29 +310,31 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi)
+int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi)
 {
-    ICSState *ics = &xics->ics[src];
+    ICSState *ics = QLIST_FIRST(&xics->ics);
     int irq;
 
+    if (!ics) {
+        return -1;
+    }
     if (irq_hint) {
-        assert(src == xics_find_source(xics, irq_hint));
         if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
-            trace_xics_alloc_failed_hint(src, irq_hint);
+            trace_xics_alloc_failed_hint(0, irq_hint);
             return -1;
         }
         irq = irq_hint;
     } else {
         irq = ics_find_free_block(ics, 1, 1);
         if (irq < 0) {
-            trace_xics_alloc_failed_no_left(src);
+            trace_xics_alloc_failed_no_left(0);
             return -1;
         }
         irq += ics->offset;
     }
 
     ics_set_irq_type(ics, irq - ics->offset, lsi);
-    trace_xics_alloc(src, irq);
+    trace_xics_alloc(0, irq);
 
     return irq;
 }
@@ -334,12 +343,15 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
  * If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, bool align)
+int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align)
 {
+    ICSState *ics = QLIST_FIRST(&xics->ics);
     int i, first = -1;
-    ICSState *ics = &xics->ics[src];
 
-    assert(src == 0);
+    if (!ics) {
+        return -1;
+    }
+
     /*
      * MSIMesage::data is used for storing VIRQ so
      * it has to be aligned to num to support multiple
@@ -362,7 +374,7 @@ int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, bool ali
     }
     first += ics->offset;
 
-    trace_xics_alloc_block(src, first, num, lsi, align);
+    trace_xics_alloc_block(0, first, num, lsi, align);
 
     return first;
 }
@@ -373,7 +385,7 @@ static void ics_free(ICSState *ics, int srcno, int num)
 
     for (i = srcno; i < srcno + num; ++i) {
         if (ICS_IRQ_FREE(ics, i)) {
-            trace_xics_ics_free_warn(ics - ics->xics->ics, i + ics->offset);
+            trace_xics_ics_free_warn(0, i + ics->offset);
         }
         memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
     }
@@ -381,15 +393,10 @@ static void ics_free(ICSState *ics, int srcno, int num)
 
 void xics_spapr_free(XICSState *xics, int irq, int num)
 {
-    int src = xics_find_source(xics, irq);
-
-    if (src >= 0) {
-        ICSState *ics = &xics->ics[src];
-
-        /* FIXME: implement multiple sources */
-        assert(src == 0);
+    ICSState *ics = xics_find_source(xics, irq);
 
-        trace_xics_ics_free(ics - xics->ics, irq, num);
+    if (ics) {
+        trace_xics_ics_free(0, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index c06deea..6335ead 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -587,7 +587,7 @@ out_no_events:
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false);
+    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, false);
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index cf3192e..9b13f85 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false,
+    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI);
     if (!irq) {
         error_report("Cannot allocate MSIs for device %x", config_addr);
@@ -1360,7 +1360,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < PCI_NUM_PINS; i++) {
         uint32_t irq;
 
-        irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false);
+        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false);
         if (!irq) {
             error_setg(errp, "spapr_allocate_lsi failed");
             return;
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index fc731eb..1a84815 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -462,7 +462,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false);
+    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false);
     if (!dev->irq) {
         error_setg(errp, "can't allocate IRQ");
         return;
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index e670e89..12fc584 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -79,7 +79,7 @@ struct XICSState {
     uint32_t nr_servers;
     uint32_t nr_irqs;
     ICPState *ss;
-    ICSState *ics;
+    QLIST_HEAD(, ICSState) ics;
 };
 
 #define TYPE_ICP "icp"
@@ -105,6 +105,7 @@ struct ICPState {
     DeviceState parent_obj;
     /*< public >*/
     CPUState *cs;
+    ICSState *xirr_owner;
     uint32_t xirr;
     uint8_t pending_priority;
     uint8_t mfrr;
@@ -139,6 +140,7 @@ struct ICSState {
     qemu_irq *qirqs;
     ICSIRQState *irqs;
     XICSState *xics;
+    QLIST_ENTRY(ICSState) list;
 };
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
@@ -167,8 +169,8 @@ struct ICSIRQState {
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 
-int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
-int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi);
+int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align);
 void xics_spapr_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
@@ -189,6 +191,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
 
 void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
 void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
-int xics_find_source(XICSState *icp, int irq);
+ICSState *xics_find_source(XICSState *icp, int irq);
 
 #endif /* __XICS_H__ */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (32 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 33/77] ppc/xics: Make the ICSState a list Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  4:40   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c Benjamin Herrenschmidt
                   ` (45 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This will make life easier for dealing with dynamically configured
ICSes such as PHB3

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/hw/ppc/xics.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 12fc584..51a63cb 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -145,7 +145,7 @@ struct ICSState {
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
 {
-    return (nr >= ics->offset)
+    return (ics->offset != 0) && (nr >= ics->offset)
         && (nr < (ics->offset + ics->nr_irqs));
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (33 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  4:46   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS Benjamin Herrenschmidt
                   ` (44 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

It will not be used by the native implementation. This allows us to
also remove the include of spapr.h from the common code

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 12 ------------
 hw/intc/xics_kvm.c    | 13 ++++++++++++-
 hw/intc/xics_spapr.c  | 13 ++++++++++++-
 include/hw/ppc/xics.h |  1 -
 4 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index c4ac057..0c355f4 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -28,7 +28,6 @@
 #include "hw/hw.h"
 #include "trace.h"
 #include "qemu/timer.h"
-#include "hw/ppc/spapr.h"
 #include "hw/ppc/xics.h"
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
@@ -694,17 +693,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
  * XICS
  */
 
-void xics_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
-{
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
 void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
 {
     int i;
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index a478d25..6fbc715 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -468,6 +468,17 @@ fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
 }
 
+static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
+{
+    ICSState *ics = QLIST_FIRST(&xics->ics);
+
+    /* This needs to be deprecated ... */
+    xics->nr_irqs = nr_irqs;
+    if (ics) {
+        ics->nr_irqs = nr_irqs;
+    }
+}
+
 static void xics_kvm_initfn(Object *obj)
 {
     XICSState *xics = XICS_COMMON(obj);
@@ -488,7 +499,7 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
 
     dc->realize = xics_kvm_realize;
     xsc->cpu_setup = xics_kvm_cpu_setup;
-    xsc->set_nr_irqs = xics_set_nr_irqs;
+    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
     xsc->set_nr_servers = xics_kvm_set_nr_servers;
 }
 
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index d75fcf0..3092f8d 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -267,13 +267,24 @@ static void xics_spapr_initfn(Object *obj)
     QLIST_INSERT_HEAD(&xics->ics, ics, list);
 }
 
+static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
+{
+    ICSState *ics = QLIST_FIRST(&xics->ics);
+
+    /* This needs to be deprecated ... */
+    xics->nr_irqs = nr_irqs;
+    if (ics) {
+        ics->nr_irqs = nr_irqs;
+    }
+}
+
 static void xics_spapr_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
 
     dc->realize = xics_spapr_realize;
-    xsc->set_nr_irqs = xics_set_nr_irqs;
+    xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
     xsc->set_nr_servers = xics_set_nr_servers;
 }
 
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 51a63cb..9e5b751 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -189,7 +189,6 @@ void ics_write_xive(ICSState *ics, int nr, int server,
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
 
-void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
 void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
 ICSState *xics_find_source(XICSState *icp, int irq);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (34 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  4:47   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation Benjamin Herrenschmidt
                   ` (43 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 6 ++++++
 hw/intc/xics_spapr.c  | 3 +--
 include/hw/ppc/xics.h | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 0c355f4..3cd696f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -90,6 +90,12 @@ static void xics_common_reset(DeviceState *d)
     }
 }
 
+void xics_add_ics(XICSState *xics, ICSState *ics)
+{
+    ics->xics = xics;
+    QLIST_INSERT_HEAD(&xics->ics, ics, list);
+}
+
 static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
                                   void *opaque, const char *name, Error **errp)
 {
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 3092f8d..fc331d8 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -263,8 +263,7 @@ static void xics_spapr_initfn(Object *obj)
 
     ics = ICS(object_new(TYPE_ICS));    
     object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
+    xics_add_ics(xics, ics);
 }
 
 static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 9e5b751..5acb329 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -191,5 +191,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
 
 void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
 ICSState *xics_find_source(XICSState *icp, int irq);
+void xics_add_ics(XICSState *xics, ICSState *ics);
 
 #endif /* __XICS_H__ */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (35 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  5:13   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass Benjamin Herrenschmidt
                   ` (42 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The existing implementation becomes the "ics-simple" subclass of ICS,
so there should be no change in behaviour for SPAPR.

This will allow different implementations for the source controllers
such as the MSI support of PHB3 on Power8 which uses in-memory state
tables for example.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c        | 123 ++++++++++++++++++++++++++++++++------------------
 hw/intc/xics_kvm.c    |   2 +-
 hw/intc/xics_spapr.c  |  34 ++++++++------
 include/hw/ppc/xics.h |  12 +++--
 4 files changed, 108 insertions(+), 63 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 3cd696f..4b33e6d 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -198,9 +198,32 @@ static const TypeInfo xics_common_info = {
 #define XISR(ss)   (((ss)->xirr) & XISR_MASK)
 #define CPPR(ss)   (((ss)->xirr) >> 24)
 
-static void ics_reject(ICSState *ics, int nr);
-static void ics_resend(ICSState *ics);
-static void ics_eoi(ICSState *ics, int nr);
+static void ics_reject(ICSState *ics, uint32_t nr)
+{
+    ICSStateClass *k = ICS_GET_CLASS(ics);
+
+    if (k->reject) {
+        k->reject(ics, nr);
+    }
+}
+
+static void ics_resend(ICSState *ics)
+{
+    ICSStateClass *k = ICS_GET_CLASS(ics);
+
+    if (k->resend) {
+        k->resend(ics);
+    }
+}
+
+static void ics_eoi(ICSState *ics, int nr)
+{
+    ICSStateClass *k = ICS_GET_CLASS(ics);
+
+    if (k->eoi) {
+        k->eoi(ics, nr);
+    }
+}
 
 static void icp_check_ipi(ICPState *ss, int server)
 {
@@ -403,7 +426,7 @@ static const TypeInfo icp_info = {
 /*
  * ICS: Source layer
  */
-static void resend_msi(ICSState *ics, int srcno)
+static void ics_simple_resend_msi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -416,7 +439,7 @@ static void resend_msi(ICSState *ics, int srcno)
     }
 }
 
-static void resend_lsi(ICSState *ics, int srcno)
+static void ics_simple_resend_lsi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -428,7 +451,7 @@ static void resend_lsi(ICSState *ics, int srcno)
     }
 }
 
-static void set_irq_msi(ICSState *ics, int srcno, int val)
+static void ics_simple_set_irq_msi(ICSState *ics, int srcno, int val)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -444,7 +467,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int val)
     }
 }
 
-static void set_irq_lsi(ICSState *ics, int srcno, int val)
+static void ics_simple_set_irq_lsi(ICSState *ics, int srcno, int val)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -454,21 +477,21 @@ static void set_irq_lsi(ICSState *ics, int srcno, int val)
     } else {
         irq->status &= ~XICS_STATUS_ASSERTED;
     }
-    resend_lsi(ics, srcno);
+    ics_simple_resend_lsi(ics, srcno);
 }
 
-static void ics_set_irq(void *opaque, int srcno, int val)
+static void ics_simple_set_irq(void *opaque, int srcno, int val)
 {
     ICSState *ics = (ICSState *)opaque;
 
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
-        set_irq_lsi(ics, srcno, val);
+        ics_simple_set_irq_lsi(ics, srcno, val);
     } else {
-        set_irq_msi(ics, srcno, val);
+        ics_simple_set_irq_msi(ics, srcno, val);
     }
 }
 
-static void write_xive_msi(ICSState *ics, int srcno)
+static void ics_simple_write_xive_msi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -481,31 +504,30 @@ static void write_xive_msi(ICSState *ics, int srcno)
     icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
 }
 
-static void write_xive_lsi(ICSState *ics, int srcno)
+static void ics_simple_write_xive_lsi(ICSState *ics, int srcno)
 {
-    resend_lsi(ics, srcno);
+    ics_simple_resend_lsi(ics, srcno);
 }
 
-void ics_write_xive(ICSState *ics, int nr, int server,
-                    uint8_t priority, uint8_t saved_priority)
+void ics_simple_write_xive(ICSState *ics, int srcno, int server,
+                           uint8_t priority, uint8_t saved_priority)
 {
-    int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
 
     irq->server = server;
     irq->priority = priority;
     irq->saved_priority = saved_priority;
 
-    trace_xics_ics_write_xive(nr, srcno, server, priority);
+    trace_xics_ics_write_xive(ics->offset + srcno, srcno, server, priority);
 
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
-        write_xive_lsi(ics, srcno);
+        ics_simple_write_xive_lsi(ics, srcno);
     } else {
-        write_xive_msi(ics, srcno);
+        ics_simple_write_xive_msi(ics, srcno);
     }
 }
 
-static void ics_reject(ICSState *ics, int nr)
+static void ics_simple_reject(ICSState *ics, uint32_t nr)
 {
     ICSIRQState *irq = ics->irqs + nr - ics->offset;
 
@@ -514,21 +536,21 @@ static void ics_reject(ICSState *ics, int nr)
     irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }
 
-static void ics_resend(ICSState *ics)
+static void ics_simple_resend(ICSState *ics)
 {
     int i;
 
     for (i = 0; i < ics->nr_irqs; i++) {
         /* FIXME: filter by server#? */
         if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
-            resend_lsi(ics, i);
+            ics_simple_resend_lsi(ics, i);
         } else {
-            resend_msi(ics, i);
+            ics_simple_resend_msi(ics, i);
         }
     }
 }
 
-static void ics_eoi(ICSState *ics, int nr)
+static void ics_simple_eoi(ICSState *ics, uint32_t nr)
 {
     int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
@@ -540,9 +562,9 @@ static void ics_eoi(ICSState *ics, int nr)
     }
 }
 
-static void ics_reset(DeviceState *dev)
+static void ics_simple_reset(DeviceState *dev)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
     int i;
     uint8_t flags[ics->nr_irqs];
 
@@ -559,7 +581,7 @@ static void ics_reset(DeviceState *dev)
     }
 }
 
-static int ics_post_load(ICSState *ics, int version_id)
+static int ics_simple_post_load(ICSState *ics, int version_id)
 {
     int i;
 
@@ -592,7 +614,7 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static const VMStateDescription vmstate_ics_irq = {
+static const VMStateDescription vmstate_ics_simple_irq = {
     .name = "ics/irq",
     .version_id = 2,
     .minimum_version_id = 1,
@@ -606,7 +628,7 @@ static const VMStateDescription vmstate_ics_irq = {
     },
 };
 
-static const VMStateDescription vmstate_ics = {
+static const VMStateDescription vmstate_ics_simple = {
     .name = "ics",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -617,48 +639,60 @@ static const VMStateDescription vmstate_ics = {
         VMSTATE_UINT32_EQUAL(nr_irqs, ICSState),
 
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs,
-                                             vmstate_ics_irq, ICSIRQState),
+                                             vmstate_ics_simple_irq,
+                                             ICSIRQState),
         VMSTATE_END_OF_LIST()
     },
 };
 
-static void ics_initfn(Object *obj)
+static void ics_simple_initfn(Object *obj)
 {
-    ICSState *ics = ICS(obj);
+    ICSState *ics = ICS_SIMPLE(obj);
 
     ics->offset = XICS_IRQ_BASE;
 }
 
-static void ics_realize(DeviceState *dev, Error **errp)
+static void ics_simple_realize(DeviceState *dev, Error **errp)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
 
     if (!ics->nr_irqs) {
         error_setg(errp, "Number of interrupts needs to be greater 0");
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
+    ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 }
 
-static void ics_class_init(ObjectClass *klass, void *data)
+static void ics_simple_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *isc = ICS_CLASS(klass);
 
-    dc->realize = ics_realize;
-    dc->vmsd = &vmstate_ics;
-    dc->reset = ics_reset;
-    isc->post_load = ics_post_load;
+    dc->realize = ics_simple_realize;
+    dc->vmsd = &vmstate_ics_simple;
+    dc->reset = ics_simple_reset;
+    isc->post_load = ics_simple_post_load;
+    isc->reject = ics_simple_reject;
+    isc->resend = ics_simple_resend;
+    isc->eoi = ics_simple_eoi;
 }
 
+static const TypeInfo ics_simple_info = {
+    .name = TYPE_ICS_SIMPLE,
+    .parent = TYPE_ICS,
+    .instance_size = sizeof(ICSState),
+    .class_init = ics_simple_class_init,
+    .class_size = sizeof(ICSStateClass),
+    .instance_init = ics_simple_initfn,
+};
+
 static const TypeInfo ics_info = {
     .name = TYPE_ICS,
     .parent = TYPE_DEVICE,
+    .abstract = true,
     .instance_size = sizeof(ICSState),
-    .class_init = ics_class_init,
     .class_size = sizeof(ICSStateClass),
-    .instance_init = ics_initfn,
 };
 
 /*
@@ -687,7 +721,7 @@ qemu_irq xics_get_qirq(XICSState *xics, int irq)
     return NULL;
 }
 
-void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+void ics_simple_set_irq_type(ICSState *ics, int srcno, bool lsi)
 {
     assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
 
@@ -718,6 +752,7 @@ void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
 static void xics_register_types(void)
 {
     type_register_static(&xics_common_info);
+    type_register_static(&ics_simple_info);
     type_register_static(&ics_info);
     type_register_static(&icp_info);
 }
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 6fbc715..aeae68a 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -309,7 +309,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
 
 static const TypeInfo ics_kvm_info = {
     .name = TYPE_KVM_ICS,
-    .parent = TYPE_ICS,
+    .parent = TYPE_ICS_SIMPLE,
     .instance_size = sizeof(ICSState),
     .class_init = ics_kvm_class_init,
 };
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index fc331d8..f7d444a 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -112,7 +112,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nret, target_ulong rets)
 {
     ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
-    uint32_t nr, server, priority;
+    uint32_t nr, src_no, server, priority;
 
     if ((nargs != 3) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -129,7 +129,8 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, server, priority, priority);
+    src_no = nr - ics->offset;
+    ics_simple_write_xive(ics, src_no, server, priority, priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -140,7 +141,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nret, target_ulong rets)
 {
     ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
-    uint32_t nr;
+    uint32_t nr, src_no;
 
     if ((nargs != 1) || (nret != 3) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -155,8 +156,9 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
-    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
+    src_no = nr - ics->offset;
+    rtas_st(rets, 1, ics->irqs[src_no].server);
+    rtas_st(rets, 2, ics->irqs[src_no].priority);
 }
 
 static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -165,7 +167,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nret, target_ulong rets)
 {
     ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
-    uint32_t nr;
+    uint32_t nr, src_no;
 
     if ((nargs != 1) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -179,8 +181,9 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
-                   ics->irqs[nr - ics->offset].priority);
+    src_no = nr - ics->offset;
+    ics_simple_write_xive(ics, src_no, ics->irqs[src_no].server, 0xff,
+                          ics->irqs[src_no].priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -191,7 +194,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nret, target_ulong rets)
 {
     ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
-    uint32_t nr;
+    uint32_t nr, src_no;
 
     if ((nargs != 1) || (nret != 1) || !ics) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -205,9 +208,10 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
-                   ics->irqs[nr - ics->offset].saved_priority,
-                   ics->irqs[nr - ics->offset].saved_priority);
+    src_no = nr - ics->offset;
+    ics_simple_write_xive(ics, src_no, ics->irqs[src_no].server,
+                          ics->irqs[src_no].saved_priority,
+                          ics->irqs[src_no].saved_priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -261,7 +265,7 @@ static void xics_spapr_initfn(Object *obj)
 
     QLIST_INIT(&xics->ics);
 
-    ics = ICS(object_new(TYPE_ICS));    
+    ics = ICS(object_new(TYPE_ICS_SIMPLE));
     object_property_add_child(obj, "ics", OBJECT(ics), NULL);
     xics_add_ics(xics, ics);
 }
@@ -343,7 +347,7 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi)
         irq += ics->offset;
     }
 
-    ics_set_irq_type(ics, irq - ics->offset, lsi);
+    ics_simple_set_irq_type(ics, irq - ics->offset, lsi);
     trace_xics_alloc(0, irq);
 
     return irq;
@@ -379,7 +383,7 @@ int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align)
 
     if (first >= 0) {
         for (i = first; i < first + num; ++i) {
-            ics_set_irq_type(ics, i, lsi);
+            ics_simple_set_irq_type(ics, i, lsi);
         }
     }
     first += ics->offset;
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 5acb329..93a627b 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -116,6 +116,9 @@ struct ICPState {
 #define TYPE_ICS "ics"
 #define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS)
 
+#define TYPE_ICS_SIMPLE "ics-simple"
+#define ICS_SIMPLE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SIMPLE)
+
 #define TYPE_KVM_ICS "icskvm"
 #define KVM_ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_KVM_ICS)
 
@@ -129,6 +132,9 @@ struct ICSStateClass {
 
     void (*pre_save)(ICSState *s);
     int (*post_load)(ICSState *s, int version_id);
+    void (*reject)(ICSState *s, uint32_t irq);
+    void (*resend)(ICSState *s);
+    void (*eoi)(ICSState *s, uint32_t irq);
 };
 
 struct ICSState {
@@ -184,10 +190,10 @@ uint32_t icp_accept(ICPState *ss);
 uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
 void icp_eoi(XICSState *icp, int server, uint32_t xirr);
 
-void ics_write_xive(ICSState *ics, int nr, int server,
-                    uint8_t priority, uint8_t saved_priority);
+void ics_simple_write_xive(ICSState *ics, int nr, int server,
+                           uint8_t priority, uint8_t saved_priority);
 
-void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void ics_simple_set_irq_type(ICSState *ics, int srcno, bool lsi);
 
 void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
 ICSState *xics_find_source(XICSState *icp, int irq);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (36 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  6:28   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-12-01  6:39   ` David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command Benjamin Herrenschmidt
                   ` (41 subsequent siblings)
  79 siblings, 2 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This provides MMIO based ICP access as found on POWER8

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 default-configs/ppc64-softmmu.mak |   3 +-
 hw/intc/Makefile.objs             |   1 +
 hw/intc/xics_native.c             | 294 ++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/xics.h             |  14 ++
 4 files changed, 311 insertions(+), 1 deletion(-)
 create mode 100644 hw/intc/xics_native.c

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 516a6e2..d30176e 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -49,8 +49,9 @@ CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
 # For pSeries
-CONFIG_XICS=$(CONFIG_PSERIES)
+CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
+CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index e24cb03..104a169 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -27,6 +27,7 @@ obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
 obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
+obj-$(CONFIG_XICS_NATIVE) += xics_native.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
new file mode 100644
index 0000000..3f488f3
--- /dev/null
+++ b/hw/intc/xics_native.c
@@ -0,0 +1,294 @@
+/*
+ * QEMU PowerPC hardware System Emulator
+ *
+ * Native version of ICS/ICP
+ *
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "hw/hw.h"
+#include "trace.h"
+#include "qemu/timer.h"
+#include "hw/ppc/xics.h"
+#include "qemu/error-report.h"
+#include "qapi/visitor.h"
+
+#include <libfdt.h>
+
+//#define DEBUG_MM(fmt...)      printf(fmt)
+#define DEBUG_MM(fmt...)        do { } while(0)
+
+static void xics_native_initfn(Object *obj)
+{
+    XICSState *xics = XICS_NATIVE(obj);
+
+    QLIST_INIT(&xics->ics);
+}
+
+static uint64_t icp_mm_read(void *opaque, hwaddr addr, unsigned width)
+{
+    XICSState *s = opaque;
+    int32_t cpu_id, server;
+    uint32_t val;
+    ICPState *ss;
+    bool byte0 = (width == 1 && (addr & 0x3) == 0);
+
+    cpu_id = (addr & (ICP_MM_SIZE - 1)) >> 12;
+    server = get_cpu_index_by_dt_id(cpu_id);
+    if (server < 0) {
+        fprintf(stderr, "XICS: Bad ICP server %d\n", server);
+        goto bad_access;
+    }
+    ss = &s->ss[server];
+
+    switch(addr & 0xffc) {
+    case 0: /* poll */
+        val = icp_ipoll(ss, NULL);
+        if (byte0) {
+            val >>= 24;
+        } else if (width != 4) {
+            goto bad_access;
+        }
+        break;
+    case 4: /* xirr */
+        if (byte0) {
+            val = icp_ipoll(ss, NULL) >> 24;
+        } else if (width == 4) {
+            val = icp_accept(ss);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 12:
+        if (byte0) {
+            val = ss->mfrr;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 16:
+        if (width == 4) {
+            val = ss->links[0];
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 20:
+        if (width == 4) {
+            val = ss->links[1];
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 24:
+        if (width == 4) {
+            val = ss->links[2];
+        } else {
+            goto bad_access;
+        }
+        break;
+    default:
+bad_access:
+        fprintf(stderr, "XICS: Bad ICP access %llx/%d\n",
+                (unsigned long long)addr, width);
+        val = 0xffffffff;
+    }
+    DEBUG_MM("icp_mm_read(addr=%016llx,serv=0x%x/%d,off=%d,w=%d,val=0x%08x)\n",
+             (unsigned long long)addr, cpu_id, server, (int)(addr & 0xffc),
+             width, val);
+
+    return val;
+}
+
+static void icp_mm_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    XICSState *s = opaque;
+    int32_t cpu_id, server;
+    ICPState *ss;
+    bool byte0 = (width == 1 && (addr & 0x3) == 0);
+
+    cpu_id = (addr & (ICP_MM_SIZE - 1)) >> 12;
+    server = get_cpu_index_by_dt_id(cpu_id);
+    if (server < 0) {
+        fprintf(stderr, "XICS: Bad ICP server %d\n", server);
+        goto bad_access;
+    }
+    ss = &s->ss[server];
+
+    DEBUG_MM("icp_mm_write(addr=%016llx,serv=0x%x/%d,off=%d,w=%d,val=0x%08x)\n",
+             (unsigned long long)addr, cpu_id, server,
+             (int)(addr & 0xffc), width, (uint32_t)val);
+
+    switch(addr & 0xffc) {
+    case 4: /* xirr */
+        if (byte0) {
+            icp_set_cppr(s, server, val);
+        } else if (width == 4) {
+            icp_eoi(s, server, val);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 12:
+        if (byte0) {
+            icp_set_mfrr(s, server, val);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 16:
+        if (width == 4) {
+            ss->links[0] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 20:
+        if (width == 4) {
+            ss->links[1] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 24:
+        if (width == 4) {
+            ss->links[2] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    default:
+ bad_access:
+        val = 0xffffffff;
+    }
+}
+
+static const MemoryRegionOps icp_mm_ops = {
+    .read = icp_mm_read,
+    .write = icp_mm_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+void xics_create_native_icp_node(XICSState *s, void *fdt,
+                                 uint32_t base, uint32_t count)
+{
+    uint64_t addr;
+    char *name;
+    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
+    uint32_t irange[2], i, rsize;
+    uint64_t *reg;
+
+    addr = ICP_MM_BASE | (base << 12);
+
+    irange[0] = cpu_to_be32(base);
+    irange[1] = cpu_to_be32(count);
+
+    rsize = sizeof(uint64_t) * 2 * count;
+    reg = g_malloc(rsize);
+    for (i = 0; i < count; i++) {
+        reg[i * 2] = cpu_to_be64(addr | ((base + i) * 0x1000));
+        reg[i * 2 + 1] = cpu_to_be64(0x1000);
+    }
+
+    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
+
+    /* interrupt controller */
+    _FDT((fdt_begin_node(fdt, name)));
+    g_free(name);
+
+    _FDT((fdt_property(fdt, "compatible", compat, sizeof(compat))));
+    _FDT((fdt_property(fdt, "reg", reg, rsize)));
+    _FDT((fdt_property_string(fdt, "device_type",
+                              "PowerPC-External-Interrupt-Presentation")));
+    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
+                       irange, sizeof(irange))));
+    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 1)));
+    _FDT((fdt_property_cell(fdt, "#address-cells", 0)));
+    _FDT((fdt_end_node(fdt)));
+}
+
+static void xics_native_realize(DeviceState *dev, Error **errp)
+{
+    XICSState *s = XICS_NATIVE(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    Error *error = NULL;
+    int i;
+
+    if (!s->nr_servers) {
+        error_setg(errp, "Number of servers needs to be greater 0");
+        return;
+    }
+
+    /* Register MMIO regions */
+    memory_region_init_io(&s->icp_mmio, OBJECT(s), &icp_mm_ops, s, "icp",
+                          ICP_MM_SIZE);
+    sysbus_init_mmio(sbd, &s->icp_mmio);
+    sysbus_mmio_map(sbd, 0, ICP_MM_BASE);
+
+    for (i = 0; i < s->nr_servers; i++) {
+        object_property_set_bool(OBJECT(&s->ss[i]), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            return;
+        }
+    }
+}
+
+static void xics_native_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
+
+    dc->realize = xics_native_realize;
+    xsc->set_nr_servers = xics_set_nr_servers;
+}
+
+static const TypeInfo xics_native_info = {
+    .name          = TYPE_XICS_NATIVE,
+    .parent        = TYPE_XICS_COMMON,
+    .instance_size = sizeof(XICSState),
+    .class_size = sizeof(XICSStateClass),
+    .class_init    = xics_native_class_init,
+    .instance_init = xics_native_initfn,
+};
+
+static void xics_native_register_types(void)
+{
+    type_register_static(&xics_native_info);
+}
+type_init(xics_native_register_types)
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 93a627b..f32f409 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -38,6 +38,9 @@
 #define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
 #define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
 
+#define TYPE_XICS_NATIVE "xics-native"
+#define XICS_NATIVE(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_NATIVE)
+
 #define XICS_COMMON_CLASS(klass) \
      OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
 #define XICS_SPAPR_CLASS(klass) \
@@ -46,6 +49,8 @@
      OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
 #define XICS_SPAPR_GET_CLASS(obj) \
      OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
+#define XICS_NATIVE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
 
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
@@ -80,6 +85,7 @@ struct XICSState {
     uint32_t nr_irqs;
     ICPState *ss;
     QLIST_HEAD(, ICSState) ics;
+    MemoryRegion icp_mmio;
 };
 
 #define TYPE_ICP "icp"
@@ -111,8 +117,13 @@ struct ICPState {
     uint8_t mfrr;
     qemu_irq output;
     bool cap_irq_xics_enabled;
+    uint32_t links[3];
 };
 
+/* This should be an XSCOM BAR ... the size is arbitrary as well */
+#define ICP_MM_BASE     0x0003FFFF80000000
+#define ICP_MM_SIZE     0x0000000010000000
+
 #define TYPE_ICS "ics"
 #define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS)
 
@@ -181,6 +192,9 @@ void xics_spapr_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
+void xics_create_native_icp_node(XICSState *s, void *fdt,
+                                 uint32_t base, uint32_t count);
+
 /* Internal XICS interfaces */
 int get_cpu_index_by_dt_id(int cpu_dt_id);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (37 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  6:32   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform Benjamin Herrenschmidt
                   ` (40 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Useful to debug interrupt problems.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hmp-commands-info.hx  |  2 ++
 hw/intc/xics.c        | 38 ++++++++++++++++++++++++++++++++++++++
 hw/ppc/ppc.c          | 14 ++++++++++++++
 include/hw/ppc/ppc.h  |  2 ++
 include/hw/ppc/xics.h |  2 ++
 monitor.c             |  3 +++
 6 files changed, 61 insertions(+)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 9b71351..2f1dc86 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -203,6 +203,8 @@ ETEXI
         .mhandler.cmd = sun4m_hmp_info_pic,
 #elif defined(TARGET_LM32)
         .mhandler.cmd = lm32_hmp_info_pic,
+#elif defined(TARGET_PPC)
+        .mhandler.cmd = ppc_hmp_info_pic,
 #else
         .mhandler.cmd = hmp_info_pic,
 #endif
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 4b33e6d..d027a24 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -31,6 +31,9 @@
 #include "hw/ppc/xics.h"
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
+#include "monitor/monitor.h"
+
+static XICSState *g_xics;
 
 int get_cpu_index_by_dt_id(int cpu_dt_id)
 {
@@ -170,6 +173,9 @@ static void xics_common_initfn(Object *obj)
     object_property_add(obj, "nr_servers", "int",
                         xics_prop_get_nr_servers, xics_prop_set_nr_servers,
                         NULL, NULL, NULL);
+
+    /* For exclusive use of monitor command */
+    g_xics = XICS_COMMON(obj);
 }
 
 static void xics_common_class_init(ObjectClass *oc, void *data)
@@ -614,6 +620,38 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
     return 0;
 }
 
+void xics_hmp_info_pic(Monitor *mon, const QDict *qdict)
+{
+    ICSState *ics;
+    uint32_t i;
+
+    for (i = 0; i < g_xics->nr_servers; i++) {
+        ICPState *icp = &g_xics->ss[i];
+
+        if (!icp->output) {
+            continue;
+        }
+        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                       i, icp->xirr, icp->xirr_owner,
+                       icp->pending_priority, icp->mfrr);
+    }
+    QLIST_FOREACH(ics, &g_xics->ics, list) {
+        monitor_printf(mon, "ICS %4x..%4x %p\n",
+                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+        for (i = 0; i < ics->nr_irqs; i++) {
+            ICSIRQState *irq = ics->irqs + i;
+
+            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+                continue;
+            }
+            monitor_printf(mon, "  %4x %s %02x %02x\n",
+                           ics->offset + i,
+                           (irq->flags & XICS_FLAGS_IRQ_LSI) ? "LSI" : "MSI",
+                           irq->priority, irq->status);
+        }
+    }
+}
+
 static const VMStateDescription vmstate_ics_simple_irq = {
     .name = "ics/irq",
     .version_id = 2,
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 2c604ef..3b14f09 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -24,6 +24,7 @@
 #include "hw/hw.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/ppc_e500.h"
+#include "hw/i386/pc.h"
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/cpus.h"
@@ -35,6 +36,10 @@
 #include "kvm_ppc.h"
 #include "trace.h"
 
+#if defined(TARGET_PPC64)
+#include "hw/ppc/xics.h"
+#endif
+
 //#define PPC_DEBUG_IRQ
 //#define PPC_DEBUG_TB
 
@@ -1337,3 +1342,12 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 
     return NULL;
 }
+
+void ppc_hmp_info_pic(Monitor *mon, const QDict *qdict)
+{
+    /* Call in turn every PIC around. OpenPIC doesn't have one yet */
+#ifdef TARGET_PPC64
+    xics_hmp_info_pic(mon, qdict);
+#endif
+    hmp_info_pic(mon, qdict);
+}
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 14efd0c..d5c648d 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -1,6 +1,8 @@
 #ifndef HW_PPC_H
 #define HW_PPC_H 1
 
+void ppc_hmp_info_pic(Monitor *mon, const QDict *qdict);
+
 void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level);
 
 /* PowerPC hardware exceptions management helpers */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index f32f409..1cf7037 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -213,4 +213,6 @@ void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
 ICSState *xics_find_source(XICSState *icp, int irq);
 void xics_add_ics(XICSState *xics, ICSState *ics);
 
+void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
+
 #endif /* __XICS_H__ */
diff --git a/monitor.c b/monitor.c
index 3295840..988477e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,9 @@
 #include "qapi-event.h"
 #include "qmp-introspect.h"
 #include "sysemu/block-backend.h"
+#if defined(TARGET_PPC)
+#include "hw/ppc/ppc.h"
+#endif
 
 /* for hmp_info_irq/pic */
 #if defined(TARGET_SPARC)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (38 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-12-01  6:41   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC Benjamin Herrenschmidt
                   ` (39 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/pnv.c          | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h  |  2 ++
 include/hw/ppc/xics.h |  2 ++
 3 files changed, 73 insertions(+)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 2eac877..a7a9b0f 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -41,6 +41,7 @@
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/pnv.h"
 #include "hw/loader.h"
+#include "hw/ppc/xics.h"
 #include "hw/ppc/pnv_xscom.h"
 
 #include "exec/address-spaces.h"
@@ -81,6 +82,59 @@ struct sPowerNVMachineState {
     PnvSystem sys;
 };
 
+static XICSState *try_create_xics(const char *type, int nr_servers,
+                                  int nr_irqs, Error **errp)
+{
+    Error *err = NULL;
+    DeviceState *dev;
+
+    dev = qdev_create(NULL, type);
+    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
+    object_property_set_bool(OBJECT(dev), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        object_unparent(OBJECT(dev));
+        return NULL;
+    }
+
+    return XICS_COMMON(dev);
+}
+
+static XICSState *xics_system_init(int nr_servers, int nr_irqs)
+{
+    XICSState *xics = NULL;
+
+#if 0 /* Some fixing needed to handle native ICS in KVM mode */
+    if (kvm_enabled()) {
+        QemuOpts *machine_opts = qemu_get_machine_opts();
+        bool irqchip_allowed = qemu_opt_get_bool(machine_opts,
+                                                "kernel_irqchip", true);
+        bool irqchip_required = qemu_opt_get_bool(machine_opts,
+                                                  "kernel_irqchip", false);
+        if (irqchip_allowed) {
+                icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs,
+                                      &error_abort);
+        }
+
+        if (irqchip_required && !icp) {
+            perror("Failed to create in-kernel XICS\n");
+            abort();
+        }
+    }
+#endif
+
+    if (!xics) {
+        xics = try_create_xics(TYPE_XICS_NATIVE, nr_servers, nr_irqs,
+                               &error_abort);
+    }
+
+    if (!xics) {
+        perror("Failed to create XICS\n");
+        abort();
+    }
+    return xics;
+}
+
 static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
                                      size_t maxsize)
 {
@@ -366,6 +420,13 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
 
     _FDT((fdt_end_node(fdt)));
 
+    /* ICPs */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        uint32_t base_server = ppc_get_vcpu_dt_id(cpu);
+        xics_create_native_icp_node(sys->xics, fdt, base_server, smt);
+    }
+
     /* Memory */
     _FDT((powernv_populate_memory(fdt)));
 
@@ -451,11 +512,17 @@ static void ppc_powernv_init(MachineState *machine)
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
     PnvSystem *sys = &pnv_machine->sys;
+    XICSState *xics;
     long fw_size;
     char *filename;
     void *fdt;
     int i;
 
+    /* Set up Interrupt Controller before we create the VCPUs */
+    xics = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
+                            XICS_IRQS_POWERNV);
+    sys->xics = xics;
+
     /* init CPUs */
     if (cpu_model == NULL) {
         cpu_model = kvm_enabled() ? "host" : "POWER8";
@@ -475,6 +542,8 @@ static void ppc_powernv_init(MachineState *machine)
         /* MSR[IP] doesn't exist nowadays */
         env->msr_mask &= ~(1 << 6);
 
+        xics_cpu_setup(xics, cpu);
+
         qemu_register_reset(powernv_cpu_reset, cpu);
     }
 
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index cb157eb..80617b4 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -21,6 +21,7 @@
 
 #include "hw/hw.h"
 typedef struct XScomBus XScomBus;
+typedef struct XICSState XICSState;
 
 /* Should we turn that into a QOjb of some sort ? */
 typedef struct PnvChip {
@@ -29,6 +30,7 @@ typedef struct PnvChip {
 } PnvChip;
 
 typedef struct PnvSystem {
+    XICSState *xics;
     uint32_t  num_chips;
 #define PNV_MAX_CHIPS		1
     PnvChip   chips[PNV_MAX_CHIPS];
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 1cf7037..85d2fb9 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -183,6 +183,8 @@ struct ICSIRQState {
 };
 
 #define XICS_IRQS_SPAPR               1024
+#define XICS_IRQS_POWERNV             (1 << 19)
+
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (39 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-17  0:32   ` Alexey Kardashevskiy
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 42/77] ppc/pnv: Add cut down PSI bridge model and hookup external interrupt Benjamin Herrenschmidt
                   ` (38 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This adds a model of the POWER8 LPC controller. It is then used
by the PowerNV code to attach a UART and RTC, which, with the right
version of OPAL firmware, will provide a working console.

This version of the LPC controller model doesn't yet implement
support for the SerIRQ deserializer present in the Naples version
of the chip though some preliminary work is there.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/Makefile.objs |   2 +-
 hw/ppc/pnv.c         |  49 ++++-
 hw/ppc/pnv_lpc.c     | 527 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h |   5 +
 4 files changed, 578 insertions(+), 5 deletions(-)
 create mode 100644 hw/ppc/pnv_lpc.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 2a7dd42..5ebf0e0 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -5,7 +5,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 # IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_lpc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index a7a9b0f..b4c6dd4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -44,6 +44,9 @@
 #include "hw/ppc/xics.h"
 #include "hw/ppc/pnv_xscom.h"
 
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "hw/timer/mc146818rtc.h"
 #include "exec/address-spaces.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -483,7 +486,15 @@ static const VMStateDescription vmstate_powernv = {
     .minimum_version_id = 1,
 };
 
-static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
+static void pnv_lpc_irq_handler_cpld(void *opaque, int n, int level)
+{
+    /* We don't yet emulate the PSI bridge which provides the external
+     * interrupt, so just drop interrupts on the floor
+     */
+}
+
+static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
+                            bool has_lpc, bool has_lpc_irq)
 {
     PnvChip *chip = &sys->chips[chip_no];
 
@@ -496,6 +507,27 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
 
     /* Set up XSCOM bus */
     xscom_create(chip);
+
+    /* Create LPC controller */
+    if (has_lpc) {
+        pnv_lpc_create(chip, has_lpc_irq);
+
+        /* If we don't use the built-in LPC interrupt deserializer, we need
+         * to provide a set of qirqs for the ISA bus or things will go bad.
+         *
+         * Most machines using pre-Naples chips (without said deserializer)
+         * have a CPLD that will collect the SerIRQ and shoot them as a
+         * single level interrupt to the P8 chip. So let's setup a hook
+         * for doing just that.
+         *
+         * Note: The actual interrupt input isn't emulated yet, this will
+         * come with the PSI bridge model.
+         */
+        if (!has_lpc_irq) {
+            isa_bus_irqs(chip->lpc_bus,
+                         qemu_allocate_irqs(pnv_lpc_irq_handler_cpld, NULL, 16));
+        }
+    }
 }
 
 static void ppc_powernv_init(MachineState *machine)
@@ -513,6 +545,7 @@ static void ppc_powernv_init(MachineState *machine)
     sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
     PnvSystem *sys = &pnv_machine->sys;
     XICSState *xics;
+    ISABus *isa_bus;
     long fw_size;
     char *filename;
     void *fdt;
@@ -557,10 +590,18 @@ static void ppc_powernv_init(MachineState *machine)
      */
     sys->num_chips = 1;
 
-    /* Create only one PHB for now until I figure out what's wrong
-     * when I create more (resource assignment failures in Linux)
+    /* Create only one chip for now with an LPC bus
      */
-    pnv_create_chip(sys, 0);
+    pnv_create_chip(sys, 0, true, false);
+
+    /* Grab chip 0's ISA bus */
+    isa_bus = sys->chips[0].lpc_bus;
+
+     /* Create serial port */
+    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+
+    /* Create an RTC ISA device too */
+    rtc_init(isa_bus, 2000, NULL);
 
     if (bios_name == NULL) {
         bios_name = FW_FILE_NAME;
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
new file mode 100644
index 0000000..2165f24
--- /dev/null
+++ b/hw/ppc/pnv_lpc.c
@@ -0,0 +1,527 @@
+
+/*
+ * QEMU PowerNV LPC bus definitions
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Based on the s390 virtio bus code:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "monitor/monitor.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/device_tree.h"
+#include "kvm_ppc.h"
+
+#include "hw/isa/isa.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+enum {
+    ECCB_CTL    = 0,
+    ECCB_RESET  = 1,
+    ECCB_STAT   = 2,
+    ECCB_DATA   = 3,
+};
+
+#define LPCDBG(fmt...) do { } while(0)
+//#define LPCDBG(fmt...) do { printf(fmt); } while(0)
+#define OPBDBG(fmt...) do { } while(0)
+//#define OPBDBG(fmt...) do { printf(fmt); } while(0)
+
+typedef struct PnvLpcController {
+    XScomDevice xd;
+    uint64_t eccb_stat_reg;
+    uint32_t eccb_data_reg;
+    bool has_serirq;
+
+    /* OPB bus */
+    MemoryRegion opb_mr;
+    AddressSpace opb_as;
+    /* ISA IO and Memory space */
+    MemoryRegion isa_io;
+    MemoryRegion isa_mem;
+    ISABus *isa_bus;
+    /* Windows from OPB to ISA (aliases) */
+    MemoryRegion opb_isa_io;
+    MemoryRegion opb_isa_mem;
+    MemoryRegion opb_isa_fw;
+    /* Registers */
+    MemoryRegion lpc_hc_regs;
+    MemoryRegion opb_master_regs;
+
+    /* OPB Master LS registers */
+#define OPB_MASTER_LS_IRQ_STAT  0x50
+#define   OPB_MASTER_IRQ_LPC            0x00000800
+    uint32_t opb_irq_stat;
+#define OPB_MASTER_LS_IRQ_MASK  0x54
+    uint32_t opb_irq_mask;
+#define OPB_MASTER_LS_IRQ_POL   0x58
+    uint32_t opb_irq_pol;
+
+    /* LPC HC registers */
+#define LPC_HC_FW_SEG_IDSEL     0x24
+    uint32_t lpc_hc_fw_seg_idsel;
+#define LPC_HC_FW_RD_ACC_SIZE   0x28
+#define   LPC_HC_FW_RD_1B               0x00000000
+#define   LPC_HC_FW_RD_2B               0x01000000
+#define   LPC_HC_FW_RD_4B               0x02000000
+#define   LPC_HC_FW_RD_16B              0x04000000
+#define   LPC_HC_FW_RD_128B             0x07000000
+    uint32_t lpc_hc_fw_rd_acc_size;
+#define LPC_HC_IRQSER_CTRL      0x30
+#define   LPC_HC_IRQSER_EN              0x80000000
+#define   LPC_HC_IRQSER_QMODE           0x40000000
+#define   LPC_HC_IRQSER_START_MASK      0x03000000
+#define   LPC_HC_IRQSER_START_4CLK      0x00000000
+#define   LPC_HC_IRQSER_START_6CLK      0x01000000
+#define   LPC_HC_IRQSER_START_8CLK      0x02000000
+    uint32_t lpc_hc_irqser_ctrl;
+#define LPC_HC_IRQMASK          0x34    /* same bit defs as LPC_HC_IRQSTAT */
+    uint32_t lpc_hc_irqmask;
+#define LPC_HC_IRQSTAT          0x38
+#define   LPC_HC_IRQ_SERIRQ0            0x80000000 /* all bits down to ... */
+#define   LPC_HC_IRQ_SERIRQ16           0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */
+#define   LPC_HC_IRQ_SERIRQ_ALL         0xffff8000
+#define   LPC_HC_IRQ_LRESET             0x00000400
+#define   LPC_HC_IRQ_SYNC_ABNORM_ERR    0x00000080
+#define   LPC_HC_IRQ_SYNC_NORESP_ERR    0x00000040
+#define   LPC_HC_IRQ_SYNC_NORM_ERR      0x00000020
+#define   LPC_HC_IRQ_SYNC_TIMEOUT_ERR   0x00000010
+#define   LPC_HC_IRQ_SYNC_TARG_TAR_ERR  0x00000008
+#define   LPC_HC_IRQ_SYNC_BM_TAR_ERR    0x00000004
+#define   LPC_HC_IRQ_SYNC_BM0_REQ       0x00000002
+#define   LPC_HC_IRQ_SYNC_BM1_REQ       0x00000001
+    uint32_t lpc_hc_irqstat;
+#define LPC_HC_ERROR_ADDRESS    0x40
+    uint32_t lpc_hc_error_addr;
+
+} PnvLpcController;
+
+#define ISA_IO_SIZE             0x00010000
+#define ISA_MEM_SIZE            0x10000000
+#define LPC_IO_OPB_ADDR         0xd0010000
+#define LPC_IO_OPB_SIZE         0x00010000
+#define LPC_MEM_OPB_ADDR        0xe0010000
+#define LPC_MEM_OPB_SIZE        0x10000000
+#define LPC_FW_OPB_ADDR         0xf0000000
+#define LPC_FW_OPB_SIZE         0x10000000
+
+#define LPC_OPB_REGS_OPB_ADDR   0xc0010000
+#define LPC_OPB_REGS_OPB_SIZE   0x00002000
+#define LPC_HC_REGS_OPB_ADDR    0xc0012000
+#define LPC_HC_REGS_OPB_SIZE    0x00001000
+
+#define TYPE_PNV_LPC_CONTROLLER "pnv-lpc"
+#define PNV_LPC_CONTROLLER(obj) \
+     OBJECT_CHECK(PnvLpcController, (obj), TYPE_PNV_LPC_CONTROLLER)
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+static int pnv_lpc_devnode(XScomDevice *dev, void *fdt)
+{
+    _FDT((fdt_property_cell(fdt, "#address-cells", 2)));
+    _FDT((fdt_property_cell(fdt, "#size-cells", 1)));
+    _FDT((fdt_property(fdt, "primary", NULL, 0)));
+    return 0;
+}
+
+static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data, int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits and FW read caching here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, false);
+
+    LPCDBG("OPB read @0x%08x, sz=%d data=%02x %02x %02x %02x ok=%d\n",
+           addr, sz, data[0], data[1], data[2], data[3], success);
+
+    return success;
+}
+
+static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data, int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, true);
+
+    LPCDBG("OPB write @0x%08x, sz=%d data=%02x %02x %02x %02x ok=%d\n",
+           addr, sz, data[0], data[1], data[2], data[3], success);
+
+    return success;
+}
+
+#define ECCB_CTL_READ           (1ull << (63-15))
+#define ECCB_CTL_SZ_LSH         (63-7)
+#define ECCB_CTL_SZ_MASK        (0xfull << ECCB_CTL_SZ_LSH)
+#define ECCB_CTL_ADDR_MASK      0xffffffffu;
+
+#define ECCB_STAT_OP_DONE       (1ull << (63-52))
+#define ECCB_STAT_OP_ERR        (1ull << (63-52))
+#define ECCB_STAT_RD_DATA_LSH   (63-37)
+#define ECCB_STAT_RD_DATA_MASK  (0xffffffff << ECCB_STAT_RD_DATA_LSH)
+
+static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd)
+{
+    /* XXX Check for magic bits at the top, addr size etc... */
+    unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH;
+    uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK;
+    uint8_t data[4];
+    bool success;
+
+    LPCDBG("ECCB cmd: %016llx data: %08x\n",
+           (unsigned long long)cmd, lpc->eccb_data_reg);
+
+    if (cmd & ECCB_CTL_READ) {
+        success = opb_read(lpc, opb_addr, data, sz);
+        if (success) {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (((uint64_t)data[0]) << 24 |
+                     ((uint64_t)data[1]) << 16 |
+                     ((uint64_t)data[2]) <<  8 |
+                     ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH;
+        } else {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (0xffffffffull << ECCB_STAT_RD_DATA_LSH);
+        }
+    } else {
+        data[0] = lpc->eccb_data_reg >> 24; 
+        data[1] = lpc->eccb_data_reg >> 16;
+        data[2] = lpc->eccb_data_reg >>  8;
+        data[3] = lpc->eccb_data_reg;
+
+        LPCDBG("OPB write @0x%08x, sz=%d data=%02x %02x %02x %02x\n",
+               opb_addr, sz, data[0], data[1], data[2], data[3]);
+
+        success = opb_write(lpc, opb_addr, data, sz);
+        lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
+    }
+    /* XXX Which error bit (if any) to signal OPB error ? */
+}
+
+static bool pnv_lpc_xscom_read(XScomDevice *dev, uint32_t range,
+                               uint32_t offset, uint64_t *out_val)
+{
+    PnvLpcController *lpc = PNV_LPC_CONTROLLER(dev);
+
+    switch(offset & 3) {
+    case ECCB_CTL:
+    case ECCB_RESET:
+        *out_val = 0;
+        break;
+    case ECCB_STAT:
+        *out_val = lpc->eccb_stat_reg;
+        lpc->eccb_stat_reg = 0;
+        break;
+    case ECCB_DATA:
+        *out_val = ((uint64_t)lpc->eccb_data_reg) << 32;
+        break;
+    }
+    return true;
+}
+
+static bool pnv_lpc_xscom_write(XScomDevice *dev, uint32_t range,
+                                uint32_t offset, uint64_t val)
+{
+    PnvLpcController *lpc = PNV_LPC_CONTROLLER(dev);
+
+    switch(offset & 3) {
+    case ECCB_CTL:
+        pnv_lpc_do_eccb(lpc, val);
+        break;
+    case ECCB_RESET:
+        /*  XXXX  */
+        break;
+    case ECCB_STAT:
+        break;
+    case ECCB_DATA:
+        lpc->eccb_data_reg = val >> 32;
+        break;
+    }
+    return true;
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+     /* XXX TODO */
+}
+
+static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    if (size != 4) {
+        fprintf(stderr, "lpc_hc_read: Invalid size %d\n", size);
+        return 0xfffffffffffffffful;
+    }
+
+    OPBDBG("LPC HC read @0x%08x\n", (unsigned int)addr);
+
+    switch(addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        return lpc->lpc_hc_fw_seg_idsel;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        return lpc->lpc_hc_fw_rd_acc_size;
+    case LPC_HC_IRQSER_CTRL:
+        return lpc->lpc_hc_irqser_ctrl;
+    case LPC_HC_IRQMASK:
+        return lpc->lpc_hc_irqmask;
+    case LPC_HC_IRQSTAT:
+        return lpc->lpc_hc_irqstat;
+    case LPC_HC_ERROR_ADDRESS:
+        return lpc->lpc_hc_error_addr;
+    default:
+        OPBDBG("LPC HC Unimplemented register !\n");
+        return 0xfffffffffffffffful;
+    }
+}
+
+static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    if (size != 4) {
+        fprintf(stderr, "lpc_hc_write: Invalid size %d\n", size);
+        return;
+    }
+
+    OPBDBG("LPC HC write @0x%08x\n", (unsigned int)addr);
+
+    /* XXX Filter out reserved bits */
+
+    switch(addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        /* XXX Actually figure out how that works as this impact
+         * memory regions/aliases\
+         */
+        lpc->lpc_hc_fw_seg_idsel = val;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        lpc->lpc_hc_fw_rd_acc_size = val;
+    case LPC_HC_IRQSER_CTRL:
+        lpc->lpc_hc_irqser_ctrl = val;
+    case LPC_HC_IRQMASK:
+        lpc->lpc_hc_irqmask = val;
+    case LPC_HC_IRQSTAT:
+        lpc->lpc_hc_irqstat &= ~val;
+    case LPC_HC_ERROR_ADDRESS:
+        break;
+    default:
+        OPBDBG("LPC HC Unimplemented register !\n");
+    }
+}
+
+static const MemoryRegionOps lpc_hc_ops = {
+    .read = lpc_hc_read,
+    .write = lpc_hc_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    if (size != 4) {
+        fprintf(stderr, "opb_master_read: Invalid size %d\n", size);
+        return 0xfffffffffffffffful;
+    }
+
+    OPBDBG("OPB MASTER read @0x%08x\n", (unsigned int)addr);
+
+    switch(addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        return lpc->opb_irq_stat;
+    case OPB_MASTER_LS_IRQ_MASK:
+        return lpc->opb_irq_mask;
+    case OPB_MASTER_LS_IRQ_POL:
+        return lpc->opb_irq_pol;
+    default:
+        OPBDBG("OPB MASTER Unimplemented register !\n");
+        return 0xfffffffffffffffful;
+    }
+}
+
+static void opb_master_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    if (size != 4) {
+        fprintf(stderr, "opb_master_write: Invalid size %d\n", size);
+        return;
+    }
+
+    OPBDBG("OPB MASTER write @0x%08x\n", (unsigned int)addr);
+
+    switch(addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        lpc->opb_irq_stat &= ~val;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_mask = val;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_pol = val;
+        break;
+    default:
+        OPBDBG("OPB MASTER Unimplemented register !\n");
+    }
+}
+
+static const MemoryRegionOps opb_master_ops = {
+    .read = opb_master_read,
+    .write = opb_master_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void pnv_lpc_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC_CONTROLLER(dev);
+
+    /* LPC XSCOM address is fixed */
+    lpc->xd.ranges[0].addr = 0xb0020;
+    lpc->xd.ranges[0].size = 4;
+
+    /* Reg inits */
+    lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
+
+    /* Create address space and backing MR for the OPB bus */
+    memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
+    address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
+
+    /* Create ISA IO and Mem space regions which are the root of
+     * the ISA bus (ie, ISA address spaces). We don't create a
+     * separate one for FW which we alias to memory.
+     */
+    memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
+    memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
+
+    /* Create windows from the OPB space to the ISA space */
+    memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io",
+                             &lpc->isa_io, 0, LPC_IO_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR,
+                                &lpc->opb_isa_io);
+    memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem",
+                             &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR,
+                                &lpc->opb_isa_mem);
+    memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw",
+                             &lpc->isa_mem, 0, LPC_FW_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR,
+                                &lpc->opb_isa_fw);
+
+
+    /* Create MMIO regions for LPC HC and OPB registers */
+    memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops,
+                          lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR,
+                                &lpc->opb_master_regs);
+    memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc,
+                          "lpc-hc", LPC_HC_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
+                                &lpc->lpc_hc_regs);
+
+    /* Instanciate ISA bus */
+    lpc->isa_bus = isa_bus_new(dev, &lpc->isa_mem, &lpc->isa_io);
+
+    /* Not all variants have a working serial irq decoder. If not,
+     * handling of LPC interrupts becomes a platform issue (some
+     * platforms have a CPLD to do it).
+     */
+    if (lpc->has_serirq) {
+        isa_bus_irqs(lpc->isa_bus,
+                     qemu_allocate_irqs(pnv_lpc_isa_irq_handler, lpc, 16));
+    }
+}
+
+void pnv_lpc_create(PnvChip *chip, bool has_serirq)
+{
+    struct DeviceState *dev;
+    PnvLpcController *lpc;
+
+    dev = qdev_create(&chip->xscom->bus, TYPE_PNV_LPC_CONTROLLER);
+    lpc = PNV_LPC_CONTROLLER(dev);
+    lpc->has_serirq = has_serirq;
+    qdev_init_nofail(dev);
+    chip->lpc = lpc;
+    chip->lpc_bus = lpc->isa_bus;
+}
+
+static void pnv_lpc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XScomDeviceClass *k = XSCOM_DEVICE_CLASS(klass);
+    static const char *compat[] = { "ibm,power8-lpc", NULL };
+
+    k->devnode = pnv_lpc_devnode;
+    k->read = pnv_lpc_xscom_read;
+    k->write = pnv_lpc_xscom_write;
+    k->dt_name = "isa";
+    k->dt_compatible = compat;
+
+    dc->realize = pnv_lpc_realize;
+}
+
+static const TypeInfo pnv_lpc_info = {
+    .name          = TYPE_PNV_LPC_CONTROLLER,
+    .parent        = TYPE_XSCOM_DEVICE,
+    .instance_size = sizeof(PnvLpcController),
+    .class_init    = pnv_lpc_class_init,
+};
+
+static void pnv_lpc_register_types(void)
+{
+    type_register_static(&pnv_lpc_info);
+}
+
+type_init(pnv_lpc_register_types)
+
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 80617b4..77b809a 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -21,12 +21,16 @@
 
 #include "hw/hw.h"
 typedef struct XScomBus XScomBus;
+typedef struct ISABus ISABus;
+typedef struct PnvLpcController PnvLpcController;
 typedef struct XICSState XICSState;
 
 /* Should we turn that into a QOjb of some sort ? */
 typedef struct PnvChip {
     uint32_t         chip_id;
     XScomBus         *xscom;
+    PnvLpcController *lpc;
+    ISABus           *lpc_bus;
 } PnvChip;
 
 typedef struct PnvSystem {
@@ -36,5 +40,6 @@ typedef struct PnvSystem {
     PnvChip   chips[PNV_MAX_CHIPS];
 } PnvSystem;
 
+extern void pnv_lpc_create(PnvChip *chip, bool has_serirq);
 #endif /* _HW_PNV_LPC_H */
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 42/77] ppc/pnv: Add cut down PSI bridge model and hookup external interrupt
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (40 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 43/77] ppc/pnv: Add OCC model stub with interrupt support Benjamin Herrenschmidt
                   ` (37 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This adds just enough of the PSI bridge to handle various on-chip
and the one external interrupt. The rest of PSI has to do with
the link to the IBM FSP service processor which we don't plan to
emulate (not used on OpenPower machines).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/Makefile.objs |   2 +-
 hw/ppc/pnv.c         |  28 ++-
 hw/ppc/pnv_psi.c     | 594 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h |  16 ++
 4 files changed, 632 insertions(+), 8 deletions(-)
 create mode 100644 hw/ppc/pnv_psi.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 5ebf0e0..a5b3ce6 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -5,7 +5,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 # IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_lpc.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_lpc.o pnv_psi.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index b4c6dd4..a10fa60 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -488,9 +488,22 @@ static const VMStateDescription vmstate_powernv = {
 
 static void pnv_lpc_irq_handler_cpld(void *opaque, int n, int level)
 {
-    /* We don't yet emulate the PSI bridge which provides the external
-     * interrupt, so just drop interrupts on the floor
-     */
+#define MAX_ISA_IRQ 16
+    static uint32_t irqstate;
+    uint32_t old_state = irqstate;
+    PnvPsiController *psi = opaque;
+
+    if (n >= MAX_ISA_IRQ) {
+        return;
+    }
+    if (level) {
+        irqstate |= 1u << n;
+    } else {
+        irqstate &= ~(1u << n);
+    }
+    if (irqstate != old_state) {
+        pnv_psi_irq_set(psi, PSIHB_IRQ_EXTERNAL, irqstate != 0);
+    }
 }
 
 static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
@@ -508,6 +521,9 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
     /* Set up XSCOM bus */
     xscom_create(chip);
 
+    /* Create PSI */
+    pnv_psi_create(chip, sys->xics);
+
     /* Create LPC controller */
     if (has_lpc) {
         pnv_lpc_create(chip, has_lpc_irq);
@@ -519,13 +535,11 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
          * have a CPLD that will collect the SerIRQ and shoot them as a
          * single level interrupt to the P8 chip. So let's setup a hook
          * for doing just that.
-         *
-         * Note: The actual interrupt input isn't emulated yet, this will
-         * come with the PSI bridge model.
          */
         if (!has_lpc_irq) {
             isa_bus_irqs(chip->lpc_bus,
-                         qemu_allocate_irqs(pnv_lpc_irq_handler_cpld, NULL, 16));
+                         qemu_allocate_irqs(pnv_lpc_irq_handler_cpld,
+                                            chip->psi, 16));
         }
     }
 }
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
new file mode 100644
index 0000000..5c7ae42
--- /dev/null
+++ b/hw/ppc/pnv_psi.c
@@ -0,0 +1,594 @@
+
+/*
+ * QEMU PowerNV Limited PSI interface
+ *
+ * Copyright 2015 IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "monitor/monitor.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/device_tree.h"
+#include "kvm_ppc.h"
+#include "exec/address-spaces.h"
+
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/xics.h"
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+//#define PSIDBG(fmt...) printf("PSI "fmt)
+#define PSIDBG(fmt...) do { } while(0)
+
+#define PSIHB_BAR_SIZE          0x100000ull
+
+#define PSIHB_XSCOM_FIR_RW      0x00
+#define PSIHB_XSCOM_FIR_AND     0x01
+#define PSIHB_XSCOM_FIR_OR      0x02
+#define PSIHB_XSCOM_FIRMASK_RW  0x03
+#define PSIHB_XSCOM_FIRMASK_AND 0x04
+#define PSIHB_XSCOM_FIRMASK_OR  0x05
+#define PSIHB_XSCOM_FIRACT0     0x06
+#define PSIHB_XSCOM_FIRACT1     0x07
+#define PSIHB_XSCOM_BAR         0x0a
+#define   PSIHB_BAR_EN                  0x0000000000000001ull
+#define PSIHB_XSCOM_FSPBAR      0x0b
+#define PSIHB_XSCOM_CR          0x0e
+#define   PSIHB_CR_FSP_CMD_ENABLE       0x8000000000000000ull
+#define   PSIHB_CR_FSP_MMIO_ENABLE      0x4000000000000000ull
+#define   PSIHB_CR_FSP_IRQ_ENABLE       0x1000000000000000ull
+#define   PSIHB_CR_FSP_ERR_RSP_ENABLE   0x0800000000000000ull
+#define   PSIHB_CR_PSI_LINK_ENABLE      0x0400000000000000ull
+#define   PSIHB_CR_FSP_RESET            0x0200000000000000ull
+#define   PSIHB_CR_PSIHB_RESET          0x0100000000000000ull
+#define   PSIHB_CR_PSI_IRQ              0x0000800000000000ull
+#define   PSIHB_CR_FSP_IRQ              0x0000400000000000ull
+#define   PSIHB_CR_FSP_LINK_ACTIVE      0x0000200000000000ull
+          /* and more ... */
+#define PSIHB_XSCOM_SEMR        0x0f
+#define PSIHB_XSCOM_XIVR_PSI    0x10
+#define   PSIHB_XIVR_SERVER_SH  40
+#define   PSIHB_XIVR_SERVER_MSK (0xffffull << PSIHB_XIVR_SERVER_SH)
+#define   PSIHB_XIVR_PRIO_SH    32
+#define   PSIHB_XIVR_PRIO_MSK   (0xffull << PSIHB_XIVR_PRIO_SH)
+#define   PSIHB_XIVR_SRC_SH             29
+#define   PSIHB_XIVR_SRC_MSK    (0x7ull << PSIHB_XIVR_SRC_SH)
+#define   PSIHB_XIVR_PENDING    0x01000000ull
+#define PSIHB_XSCOM_SCR         0x12
+#define PSIHB_XSCOM_CCR         0x13
+#define PSIHB_XSCOM_DMA_UPADD   0x14
+#define PSIHB_XSCOM_IRQ_STAT    0x15
+#define  PSIHB_IRQ_STAT_OCC             0x0000001000000000ull
+#define  PSIHB_IRQ_STAT_FSI             0x0000000800000000ull
+#define  PSIHB_IRQ_STAT_LPCI2C          0x0000000400000000ull
+#define  PSIHB_IRQ_STAT_LOCERR          0x0000000200000000ull
+#define  PSIHB_IRQ_STAT_EXT             0x0000000100000000ull
+#define PSIHB_XSCOM_XIVR_OCC    0x16
+#define PSIHB_XSCOM_XIVR_FSI    0x17
+#define PSIHB_XSCOM_XIVR_LPCI2C 0x18
+#define PSIHB_XSCOM_XIVR_LOCERR 0x19
+#define PSIHB_XSCOM_XIVR_EXT    0x1a
+#define PSIHB_XSCOM_IRSN        0x1b
+#define   PSIHB_IRSN_COMP_SH            45
+#define   PSIHB_IRSN_COMP_MSK           (0x7ffffull << PSIHB_IRSN_COMP_SH)
+#define   PSIHB_IRSN_IRQ_MUX            0x0000000800000000ull
+#define   PSIHB_IRSN_IRQ_RESET          0x0000000400000000ull
+#define   PSIHB_IRSN_DOWNSTREAM_EN      0x0000000200000000ull
+#define   PSIHB_IRSN_UPSTREAM_EN        0x0000000100000000ull
+#define   PSIHB_IRSN_COMPMASK_SH        13
+#define   PSIHB_IRSN_COMPMASK_MSK       (0x7ffffull << PSIHB_IRSN_COMPMASK_SH)
+#define PSIHB_XSCOM_MAX         0x20
+
+#define PSIHB_MMIO_BAR          0x00
+#define PSIHB_MMIO_FSPBAR       0x08
+#define PSIHB_MMIO_CR           0x20
+#define PSIHB_MMIO_SEMR         0x28
+#define PSIHB_MMIO_XIVR_PSI     0x30
+#define PSIHB_MMIO_SCR          0x40
+#define PSIHB_MMIO_CCR          0x48
+#define PSIHB_MMIO_DMA_UPADD    0x50
+#define PSIHB_MMIO_IRQ_STAT     0x58
+#define PSIHB_MMIO_XIVR_OCC     0x60
+#define PSIHB_MMIO_XIVR_FSI     0x68
+#define PSIHB_MMIO_XIVR_LPCI2C  0x70
+#define PSIHB_MMIO_XIVR_LOCERR  0x78
+#define PSIHB_MMIO_XIVR_EXT     0x80
+#define PSIHB_MMIO_IRSN         0x88
+#define PSIHB_MMIO_MAX          0x100
+
+struct PnvPsiController {
+    XScomDevice xd;    
+    MemoryRegion regs_mr;
+
+    /* FSP region not supported */
+    /* MemoryRegion fsp_mr; */
+
+    /* Interrupt generation */
+    XICSState *xics;
+    ICSState *ics;
+
+    /* Registers */    
+    uint64_t regs[PSIHB_XSCOM_MAX];
+};
+
+#define TYPE_PNV_PSI_CONTROLLER "pnv-psi"
+#define PNV_PSI_CONTROLLER(obj) \
+     OBJECT_CHECK(PnvPsiController, (obj), TYPE_PNV_PSI_CONTROLLER)
+
+static const uint32_t psi_mmio_to_xscom[PSIHB_MMIO_MAX/8] = {
+        [PSIHB_MMIO_BAR/8]         = PSIHB_XSCOM_BAR,
+        [PSIHB_MMIO_FSPBAR/8]      = PSIHB_XSCOM_FSPBAR,
+        [PSIHB_MMIO_CR/8]          = PSIHB_XSCOM_CR,
+        [PSIHB_MMIO_SCR/8]         = PSIHB_XSCOM_SCR,
+        [PSIHB_MMIO_CCR/8]         = PSIHB_XSCOM_CCR,
+        [PSIHB_MMIO_SEMR/8]        = PSIHB_XSCOM_SEMR,
+        [PSIHB_MMIO_XIVR_PSI/8]    = PSIHB_XSCOM_XIVR_PSI,
+        [PSIHB_MMIO_XIVR_OCC/8]    = PSIHB_XSCOM_XIVR_OCC,
+        [PSIHB_MMIO_XIVR_FSI/8]    = PSIHB_XSCOM_XIVR_FSI,
+        [PSIHB_MMIO_XIVR_LPCI2C/8] = PSIHB_XSCOM_XIVR_LPCI2C,
+        [PSIHB_MMIO_XIVR_LOCERR/8] = PSIHB_XSCOM_XIVR_LOCERR,
+        [PSIHB_MMIO_XIVR_EXT/8]    = PSIHB_XSCOM_XIVR_EXT,
+        [PSIHB_MMIO_IRQ_STAT/8]    = PSIHB_XSCOM_IRQ_STAT,
+        [PSIHB_MMIO_DMA_UPADD/8]   = PSIHB_XSCOM_DMA_UPADD,
+        [PSIHB_MMIO_IRSN/8]        = PSIHB_XSCOM_IRSN,
+};
+
+static void pnv_psi_set_bar(PnvPsiController *psi, uint64_t bar)
+{
+    MemoryRegion *sysmem = get_system_memory();
+    uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
+
+    psi->regs[PSIHB_XSCOM_BAR] = bar & 0x0003fffffff00001;
+
+    /* Update MR, always remove it first */
+    if (old & PSIHB_BAR_EN) {
+        memory_region_del_subregion(sysmem, &psi->regs_mr);
+    }
+    /* Then add it back if needed */
+    if (bar & PSIHB_BAR_EN) {
+        uint64_t addr = bar & 0x0003fffffff00000;
+        memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
+    }    
+}
+
+static void pnv_psi_update_fsp_mr(PnvPsiController *psi)
+{
+    /* XXX Update FSP MR if/when we support FSP BAR */
+}
+
+static void pnv_psi_set_cr(PnvPsiController *psi, uint64_t cr)
+{
+    uint64_t old = psi->regs[PSIHB_XSCOM_CR];
+
+    psi->regs[PSIHB_XSCOM_CR] = cr & 0x0003ffff00000000;
+
+    /* Check some bit changes */
+    if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) {
+        pnv_psi_update_fsp_mr(psi);
+    }
+}
+
+static void pnv_psi_set_irsn(PnvPsiController *psi, uint64_t val)
+{
+    uint32_t offset;
+
+    /* In this model we ignore the up/down enable bits for now
+     * as SW doesn't use them (other than setting them at boot).
+     * We ignore IRQ_MUX, its meaning isn't clear and we don't use
+     * it and finally we ignore reset (XXX fix that ?)
+     */
+    psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK |
+                                         PSIHB_IRSN_IRQ_MUX |
+                                         PSIHB_IRSN_DOWNSTREAM_EN |
+                                         PSIHB_IRSN_DOWNSTREAM_EN |
+                                         PSIHB_IRSN_DOWNSTREAM_EN);
+
+    /* We ignore the compare mask as well, our ICS emulation is too
+     * simplistic to make any use if it, and we extract the offset
+     * from the compare value
+     */
+    offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH;
+    psi->ics->offset = offset;
+    PSIDBG("Interrupt offset=0x%x\n", offset);
+}
+
+static bool pnv_psi_irq_bits(PnvPsiController *psi, PnvPsiIrq irq,
+                             uint32_t *out_xivr_reg,
+                             uint32_t *out_stat_reg,
+                             uint64_t *out_stat_bit)
+{
+    switch(irq) {
+    case PSIHB_IRQ_PSI:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_PSI;
+        *out_stat_reg = PSIHB_XSCOM_CR;
+        *out_stat_bit = PSIHB_CR_PSI_IRQ;
+        break;
+    case PSIHB_IRQ_FSP:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_PSI;
+        *out_stat_reg = PSIHB_XSCOM_CR;
+        *out_stat_bit = PSIHB_CR_FSP_IRQ;
+        break;
+    case PSIHB_IRQ_OCC:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_OCC;
+        *out_stat_reg = PSIHB_XSCOM_IRQ_STAT;
+        *out_stat_bit = PSIHB_IRQ_STAT_OCC;
+        break;
+    case PSIHB_IRQ_FSI:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_FSI;
+        *out_stat_reg = PSIHB_XSCOM_IRQ_STAT;
+        *out_stat_bit = PSIHB_IRQ_STAT_FSI;
+        break;
+    case PSIHB_IRQ_LPC_I2C:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_LPCI2C;
+        *out_stat_reg = PSIHB_XSCOM_IRQ_STAT;
+        *out_stat_bit = PSIHB_IRQ_STAT_LPCI2C;
+        break;
+    case PSIHB_IRQ_LOCAL_ERR:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_LOCERR;
+        *out_stat_reg = PSIHB_XSCOM_IRQ_STAT;
+        *out_stat_bit = PSIHB_IRQ_STAT_LOCERR;
+        break;
+    case PSIHB_IRQ_EXTERNAL:
+        *out_xivr_reg = PSIHB_XSCOM_XIVR_EXT;
+        *out_stat_reg = PSIHB_XSCOM_IRQ_STAT;
+        *out_stat_bit = PSIHB_IRQ_STAT_EXT;
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
+void pnv_psi_irq_set(PnvPsiController *psi, PnvPsiIrq irq, bool state)
+{
+    uint32_t xivr_reg;
+    uint32_t stat_reg;
+    uint64_t stat_bit;
+    uint32_t src;
+    bool masked;
+
+    if (!pnv_psi_irq_bits(psi, irq, &xivr_reg, &stat_reg, &stat_bit)) {
+        /* XXX Generate an error ? */
+        fprintf(stderr, "PSI: Unsupported irq %d\n", irq);
+        return;
+    }
+    src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+    masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK;
+    if (state) {  
+        psi->regs[stat_reg] |= stat_bit;
+        /* XXX optimization: check mask here. That means re-evaluating
+         * when unmasking, thus TODO
+         */
+        qemu_irq_raise(psi->ics->qirqs[src]);
+    } else {
+        psi->regs[stat_reg] &= ~stat_bit;
+
+        /* FSP and PSI are muxed so don't lower if either still set */
+        if (stat_reg != PSIHB_XSCOM_CR ||
+            !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) {
+            qemu_irq_lower(psi->ics->qirqs[src]);
+        } else {
+            state = true;
+        }
+    }
+
+    /* XXX Note about the emulation of the pending bit: This isn't
+     * entirely correct. The pending bit should be cleared when the
+     * EOI has been received. However, we don't have callbacks on
+     * EOI (especially not under KVM) so no way to emulate that
+     * properly, so instead we just set that bit as the logical
+     * "output" of the XIVR (ie pending & !masked)
+     * XXX TODO: Also update it on set_xivr
+     */
+    if (state && !masked) {
+        psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING;
+    } else {
+        psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING;
+    }
+}
+
+static void pnv_psi_set_xivr(PnvPsiController *psi, uint32_t reg, uint64_t val)
+{
+    uint16_t server;
+    uint8_t prio;
+    uint8_t src;
+
+    psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) |
+            (val & (PSIHB_XIVR_SERVER_MSK |
+                    PSIHB_XIVR_PRIO_MSK |
+                    PSIHB_XIVR_SRC_MSK));
+    val = psi->regs[reg];
+    server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH;
+    prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH;
+    src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+    if (src > PSIHB_IRQ_EXTERNAL) {
+        /* XXX Generate error ? */
+        return;
+    }
+    /* Now because of source remapping, weird things can happen
+     * if you change the source number dynamically, our simple ICS
+     * doesn't deal with remapping. So we just poke a different
+     * ICS entry based on what source number was written. This will
+     * do for now but a more accurate implementation would instead
+     * use a fixed server/prio and a remapper of the generated irq.
+     */
+    PSIDBG("IRQ %d server 0x%x prio %x\n", src, server, prio);
+    ics_simple_write_xive(psi->ics, src, server, prio, prio);
+}
+
+static bool pnv_psi_reg_read(PnvPsiController *psi, uint32_t offset,
+                             uint64_t *out_val, bool mmio)
+{
+    switch(offset) {
+    case PSIHB_XSCOM_FIR_RW:
+    case PSIHB_XSCOM_FIRACT0:
+    case PSIHB_XSCOM_FIRACT1:
+    case PSIHB_XSCOM_BAR:
+    case PSIHB_XSCOM_FSPBAR:
+    case PSIHB_XSCOM_CR:
+    case PSIHB_XSCOM_XIVR_PSI:
+    case PSIHB_XSCOM_XIVR_OCC:
+    case PSIHB_XSCOM_XIVR_FSI:
+    case PSIHB_XSCOM_XIVR_LPCI2C:
+    case PSIHB_XSCOM_XIVR_LOCERR:
+    case PSIHB_XSCOM_XIVR_EXT:
+    case PSIHB_XSCOM_IRQ_STAT:
+    case PSIHB_XSCOM_SEMR:
+    case PSIHB_XSCOM_DMA_UPADD:
+    case PSIHB_XSCOM_IRSN:
+        *out_val = psi->regs[offset];
+        return true;
+    }
+    return false;
+}
+
+static bool pnv_psi_reg_write(PnvPsiController *psi, uint32_t offset,
+                              uint64_t val, bool mmio)
+{
+    switch(offset) {
+    case PSIHB_XSCOM_FIR_RW:
+    case PSIHB_XSCOM_FIRACT0:
+    case PSIHB_XSCOM_FIRACT1:
+    case PSIHB_XSCOM_SEMR:
+    case PSIHB_XSCOM_DMA_UPADD:
+        psi->regs[offset] = val;
+        return true;
+    case PSIHB_XSCOM_FIR_OR:
+        psi->regs[PSIHB_XSCOM_FIR_RW] |= val;
+        return true;
+    case PSIHB_XSCOM_FIR_AND:
+        psi->regs[PSIHB_XSCOM_FIR_RW] &= val;
+        return true;
+    case PSIHB_XSCOM_BAR:
+        /* Only XSCOM can write this one */
+        if (!mmio) {
+            pnv_psi_set_bar(psi, val);
+        }
+        return true;
+    case PSIHB_XSCOM_FSPBAR:
+        psi->regs[PSIHB_XSCOM_BAR] = val & 0x0003ffff00000000;
+        pnv_psi_update_fsp_mr(psi);
+        return true;
+    case PSIHB_XSCOM_CR:
+        pnv_psi_set_cr(psi, val);
+        return true;
+    case PSIHB_XSCOM_SCR:
+        pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val);
+        return true;
+    case PSIHB_XSCOM_CCR:
+        pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val);
+        return true;
+    case PSIHB_XSCOM_XIVR_PSI:
+    case PSIHB_XSCOM_XIVR_OCC:
+    case PSIHB_XSCOM_XIVR_FSI:
+    case PSIHB_XSCOM_XIVR_LPCI2C:
+    case PSIHB_XSCOM_XIVR_LOCERR:
+    case PSIHB_XSCOM_XIVR_EXT:
+        pnv_psi_set_xivr(psi, offset, val);
+        return true;
+    case PSIHB_XSCOM_IRQ_STAT:
+        /* Read only, should we generate an error ? */
+        return true;
+    case PSIHB_XSCOM_IRSN:
+        pnv_psi_set_irsn(psi, val);
+        return true;
+    }
+    return false;
+}
+
+static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPsiController *psi = opaque;
+    uint32_t xscom_off;
+    uint64_t val;
+
+    if (size != 8) {
+        goto fail;
+    }
+
+    addr &= (PSIHB_BAR_SIZE - 1);
+
+    PSIDBG("MMIO read 0x%x\n", (unsigned int)addr);
+
+    if (addr >= PSIHB_MMIO_MAX) {
+        goto fail;
+    }
+    xscom_off = psi_mmio_to_xscom[addr/8];
+    if (xscom_off == 0) {
+        goto fail;
+    }
+    if (pnv_psi_reg_read(psi, xscom_off, &val, true)) {
+        return val;
+    }
+ fail:
+    return 0xffffffffffffffffull;                         
+}
+
+static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
+                              uint64_t val, unsigned size)
+{
+    PnvPsiController *psi = opaque;
+    uint32_t xscom_off;
+
+    if (size != 8) {
+        return;
+    }
+
+    addr &= (PSIHB_BAR_SIZE - 1);
+
+    PSIDBG("MMIO write 0x%x val 0x%016llx\n",
+           (unsigned int)addr, (unsigned long long)val);
+
+    if (addr >= PSIHB_MMIO_MAX) {
+        return;
+    }
+    xscom_off = psi_mmio_to_xscom[addr/8];
+    if (xscom_off == 0) {
+        return;
+    }
+    pnv_psi_reg_write(psi, xscom_off, val, true);
+}
+
+static const MemoryRegionOps psi_mmio_ops = {
+    .read = pnv_psi_mmio_read,
+    .write = pnv_psi_mmio_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static bool pnv_psi_xscom_read(XScomDevice *dev, uint32_t range,
+                               uint32_t offset, uint64_t *out_val)
+{
+    PnvPsiController *psi = PNV_PSI_CONTROLLER(dev);
+
+    PSIDBG("XSCOM read 0x%x\n", offset);
+
+    return pnv_psi_reg_read(psi, offset, out_val, false);
+}
+
+static bool pnv_psi_xscom_write(XScomDevice *dev, uint32_t range,
+                                uint32_t offset, uint64_t val)
+{
+    PnvPsiController *psi = PNV_PSI_CONTROLLER(dev);
+
+    PSIDBG("XSCOM write 0x%x val 0x%016llx\n",
+           offset, (unsigned long long)val);
+
+    return pnv_psi_reg_write(psi, offset, val, false);
+}
+
+static void pnv_psi_realize(DeviceState *dev, Error **errp)
+{
+    PnvPsiController *psi = PNV_PSI_CONTROLLER(dev);
+    Error *error = NULL;
+    unsigned int i;
+
+    /* PSI XSCOM address is fixed */
+    psi->xd.ranges[0].addr = 0x02010900;
+    psi->xd.ranges[0].size = 0x20;
+
+    /* Initialize MMIO region */
+    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi,
+                          "psihb", PSIHB_BAR_SIZE);
+
+    /* Default BAR. Use object properties ? */
+    pnv_psi_set_bar(psi, 0x0003fffe80000001);
+
+    /* Default sources in XIVR */
+    psi->regs[PSIHB_XSCOM_XIVR_PSI] = PSIHB_XIVR_PRIO_MSK |
+            (0ull << PSIHB_XIVR_SRC_SH);
+    psi->regs[PSIHB_XSCOM_XIVR_OCC] = PSIHB_XIVR_PRIO_MSK |
+            (1ull << PSIHB_XIVR_SRC_SH);
+    psi->regs[PSIHB_XSCOM_XIVR_FSI] = PSIHB_XIVR_PRIO_MSK |
+            (2ull << PSIHB_XIVR_SRC_SH);
+    psi->regs[PSIHB_XSCOM_XIVR_LPCI2C] = PSIHB_XIVR_PRIO_MSK |
+            (3ull << PSIHB_XIVR_SRC_SH);
+    psi->regs[PSIHB_XSCOM_XIVR_LOCERR] = PSIHB_XIVR_PRIO_MSK |
+            (4ull << PSIHB_XIVR_SRC_SH);
+    psi->regs[PSIHB_XSCOM_XIVR_EXT] = PSIHB_XIVR_PRIO_MSK |
+            (5ull << PSIHB_XIVR_SRC_SH);
+
+    /* Create ICS object */
+    psi->ics = ICS(object_new(TYPE_ICS_SIMPLE));
+    object_property_add_child(OBJECT(psi), "ics", OBJECT(psi->ics), NULL);
+    psi->ics->offset = 0;
+#define PSI_NUM_INTERRUPTS 6
+    psi->ics->nr_irqs = PSI_NUM_INTERRUPTS;
+    xics_add_ics(psi->xics, psi->ics);
+    object_property_set_bool(OBJECT(psi->ics), true, "realized", &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    for (i = 0; i < PSI_NUM_INTERRUPTS; i++)
+        ics_simple_set_irq_type(psi->ics, i, true);
+
+}
+
+void pnv_psi_create(PnvChip *chip, XICSState *xics)
+{
+    struct DeviceState *dev;
+    PnvPsiController *psi;
+
+    dev = qdev_create(&chip->xscom->bus, TYPE_PNV_PSI_CONTROLLER);
+    psi = PNV_PSI_CONTROLLER(dev);
+    psi->xics = xics;
+    qdev_init_nofail(dev);
+    chip->psi = psi;
+}
+
+static void pnv_psi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XScomDeviceClass *k = XSCOM_DEVICE_CLASS(klass);
+    static const char *compat[] = { "ibm,power8-psihb-x",
+                                    "ibm,psihb-x", NULL };
+
+    k->read = pnv_psi_xscom_read;
+    k->write = pnv_psi_xscom_write;
+    k->dt_name = "psihb";
+    k->dt_compatible = compat;
+
+    dc->realize = pnv_psi_realize;
+}
+
+static const TypeInfo pnv_psi_info = {
+    .name          = TYPE_PNV_PSI_CONTROLLER,
+    .parent        = TYPE_XSCOM_DEVICE,
+    .instance_size = sizeof(PnvPsiController),
+    .class_init    = pnv_psi_class_init,
+};
+
+static void pnv_psi_register_types(void)
+{
+    type_register_static(&pnv_psi_info);
+}
+
+type_init(pnv_psi_register_types)
+
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 77b809a..73bbef9 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -23,6 +23,7 @@
 typedef struct XScomBus XScomBus;
 typedef struct ISABus ISABus;
 typedef struct PnvLpcController PnvLpcController;
+typedef struct PnvPsiController PnvPsiController;
 typedef struct XICSState XICSState;
 
 /* Should we turn that into a QOjb of some sort ? */
@@ -31,6 +32,7 @@ typedef struct PnvChip {
     XScomBus         *xscom;
     PnvLpcController *lpc;
     ISABus           *lpc_bus;
+    PnvPsiController *psi;
 } PnvChip;
 
 typedef struct PnvSystem {
@@ -41,5 +43,19 @@ typedef struct PnvSystem {
 } PnvSystem;
 
 extern void pnv_lpc_create(PnvChip *chip, bool has_serirq);
+extern void pnv_psi_create(PnvChip *chip, XICSState *xics);
+
+typedef enum PnvPsiIrq {
+    PSIHB_IRQ_PSI, /* internal use only */
+    PSIHB_IRQ_FSP, /* internal use only */
+    PSIHB_IRQ_OCC,
+    PSIHB_IRQ_FSI,
+    PSIHB_IRQ_LPC_I2C,
+    PSIHB_IRQ_LOCAL_ERR,
+    PSIHB_IRQ_EXTERNAL,
+} PnvPsiIrq;
+
+extern void pnv_psi_irq_set(PnvPsiController *psi, PnvPsiIrq irq, bool state);
+
 #endif /* _HW_PNV_LPC_H */
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 43/77] ppc/pnv: Add OCC model stub with interrupt support
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (41 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 42/77] ppc/pnv: Add cut down PSI bridge model and hookup external interrupt Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge Benjamin Herrenschmidt
                   ` (36 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The OCC is an on-chip microcontroller based on a ppc405 core used
for various power management tasks. It comes with a pile of additional
hardware sitting on the PIB (aka XSCOM bus). At this point we don't
emulate it (nor plan to do so). However there is one facility which
is provided by the surrounding hardware that we do need, which is the
interrupt generation facility. OPAL uses it to send itself interrupts
under some circumstances and there are other uses around the corner.

So this implement just enough to support this.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/Makefile.objs |   2 +-
 hw/ppc/pnv.c         |   3 ++
 hw/ppc/pnv_occ.c     | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h |   3 ++
 4 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 hw/ppc/pnv_occ.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index a5b3ce6..a795b1c 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -5,7 +5,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 # IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_lpc.o pnv_psi.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_lpc.o pnv_psi.o pnv_occ.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index a10fa60..ae6efbd 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -542,6 +542,9 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
                                             chip->psi, 16));
         }
     }
+
+    /* Create the simplified OCC model */
+    pnv_occ_create(chip);
 }
 
 static void ppc_powernv_init(MachineState *machine)
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
new file mode 100644
index 0000000..a759c6d
--- /dev/null
+++ b/hw/ppc/pnv_occ.c
@@ -0,0 +1,125 @@
+/*
+ * Emulation of a few OCC related registers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2014
+ */
+#include "hw/hw.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+
+struct PnvOCCState {
+    XScomDevice xd;
+    PnvPsiController *psi;
+
+    /* OCC Misc interrupt */
+    uint64_t occmisc;
+};
+
+#define TYPE_PNV_OCC "pnv-occ"
+#define PNV_OCC(obj) OBJECT_CHECK(PnvOCCState, (obj), TYPE_PNV_OCC)
+
+static void pnv_occ_set_misc(PnvOCCState *occ, uint64_t val)
+{
+    bool irq_state;
+
+    val &= 0xffff000000000000ull;
+
+    occ->occmisc = val;
+    irq_state = !!(val >> 63);
+    pnv_psi_irq_set(occ->psi, PSIHB_IRQ_OCC, irq_state);
+}
+
+static bool pnv_occ_xscom_read(XScomDevice *dev, uint32_t range,
+                               uint32_t offset, uint64_t *out_val)
+{
+    PnvOCCState *occ = PNV_OCC(dev);
+    uint32_t pcb_addr = dev->ranges[range].addr + offset;
+
+    switch(pcb_addr) {
+    case 0x6a020:
+        *out_val = occ->occmisc;
+        return true;
+    }
+    return false;
+}
+
+static bool pnv_occ_xscom_write(XScomDevice *dev, uint32_t range,
+                                uint32_t offset, uint64_t val)
+{
+    PnvOCCState *occ = PNV_OCC(dev);
+    uint32_t pcb_addr = dev->ranges[range].addr + offset;
+
+    switch(pcb_addr) {
+    default:
+    case 0x6a020:
+        pnv_occ_set_misc(occ, val);
+        return true;
+    case 0x6a021:
+        pnv_occ_set_misc(occ, occ->occmisc & val);
+        return true;
+    case 0x6a022:
+        pnv_occ_set_misc(occ, occ->occmisc | val);
+        return true;
+   }
+    return false;
+}
+
+static void pnv_occ_realize(DeviceState *dev, Error **errp)
+{
+    PnvOCCState *occ = PNV_OCC(dev);
+    XScomDevice *xd = XSCOM_DEVICE(dev);
+
+    xd->ranges[0].addr = 0x66000;
+    xd->ranges[0].size = 0x6000;
+
+    occ->occmisc = 0;
+}
+
+static void pnv_occ_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XScomDeviceClass *k = XSCOM_DEVICE_CLASS(klass);
+
+    k->read = pnv_occ_xscom_read;
+    k->write = pnv_occ_xscom_write;
+
+    dc->realize = pnv_occ_realize;
+}
+
+static const TypeInfo pnv_occ_type_info = {
+    .name          = TYPE_PNV_OCC,
+    .parent        = TYPE_XSCOM_DEVICE,
+    .instance_size = sizeof(PnvOCCState),
+    .class_init    = pnv_occ_class_init,
+};
+
+static void pnv_occ_register_types(void)
+{
+    type_register_static(&pnv_occ_type_info);
+}
+
+type_init(pnv_occ_register_types)
+
+void pnv_occ_create(PnvChip *chip)
+{
+    struct DeviceState *dev;
+    PnvOCCState *occ;
+
+    dev = qdev_create(&chip->xscom->bus, TYPE_PNV_OCC);
+    occ = PNV_OCC(dev);
+    occ->psi = chip->psi;
+    qdev_init_nofail(dev);
+    chip->occ = occ;
+}
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 73bbef9..c488f12 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -25,6 +25,7 @@ typedef struct ISABus ISABus;
 typedef struct PnvLpcController PnvLpcController;
 typedef struct PnvPsiController PnvPsiController;
 typedef struct XICSState XICSState;
+typedef struct PnvOCCState PnvOCCState;
 
 /* Should we turn that into a QOjb of some sort ? */
 typedef struct PnvChip {
@@ -33,6 +34,7 @@ typedef struct PnvChip {
     PnvLpcController *lpc;
     ISABus           *lpc_bus;
     PnvPsiController *psi;
+    PnvOCCState      *occ;
 } PnvChip;
 
 typedef struct PnvSystem {
@@ -44,6 +46,7 @@ typedef struct PnvSystem {
 
 extern void pnv_lpc_create(PnvChip *chip, bool has_serirq);
 extern void pnv_psi_create(PnvChip *chip, XICSState *xics);
+extern void pnv_occ_create(PnvChip *chip);
 
 typedef enum PnvPsiIrq {
     PSIHB_IRQ_PSI, /* internal use only */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (42 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 43/77] ppc/pnv: Add OCC model stub with interrupt support Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-18 12:31   ` Paolo Bonzini
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices Benjamin Herrenschmidt
                   ` (35 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Otherwise QEMU might add a device at slot 0 which isn't supported
by the SHPC controller.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/pci-bridge/pci_bridge_dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 26aded9..c0355c2 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -1,3 +1,4 @@
+
 /*
  * Standard PCI Bridge Device
  *
@@ -57,6 +58,8 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
         goto bridge_error;
     }
     if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) {
+        /* SHCP gets upset if we try to use slot 0 */
+        br->sec_bus.devfn_min = PCI_FUNC_MAX;
         dev->config[PCI_INTERRUPT_PIN] = 0x1;
         memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
                            shpc_bar_size(dev));
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (43 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-18 12:34   ` Paolo Bonzini
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max Benjamin Herrenschmidt
                   ` (34 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This allows a bus class to tell whether a given bus has room for
any new device. max_dev isn't sufficient as the rules can depend
on some arguments or can differ between instances of a bus. This
will be used by PCI in subsequent patches

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/hw/qdev-core.h |  1 +
 qdev-monitor.c         | 13 ++++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 8057aed..6f3dd8d 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -202,6 +202,7 @@ struct BusClass {
      */
     char *(*get_fw_dev_path)(DeviceState *dev);
     void (*reset)(BusState *bus);
+    bool (*can_add_device)(BusState *bus, QemuOpts *opts);
     BusRealize realize;
     BusUnrealize unrealize;
 
diff --git a/qdev-monitor.c b/qdev-monitor.c
index a35098f..4023357 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -384,7 +384,7 @@ static inline bool qbus_is_full(BusState *bus)
  * Return the bus if found, else %NULL.
  */
 static BusState *qbus_find_recursive(BusState *bus, const char *name,
-                                     const char *bus_typename)
+                                     const char *bus_typename, QemuOpts *opts)
 {
     BusChild *kid;
     BusState *pick, *child, *ret;
@@ -398,7 +398,10 @@ static BusState *qbus_find_recursive(BusState *bus, const char *name,
     }
 
     if (match && !qbus_is_full(bus)) {
-        return bus;             /* root matches and isn't full */
+        BusClass *bc = BUS_GET_CLASS(bus);
+        if (!bc->can_add_device || bc->can_add_device(bus, opts)) {
+            return bus;             /* root matches and isn't full */
+	}
     }
 
     pick = match ? bus : NULL;
@@ -406,7 +409,7 @@ static BusState *qbus_find_recursive(BusState *bus, const char *name,
     QTAILQ_FOREACH(kid, &bus->children, sibling) {
         DeviceState *dev = kid->child;
         QLIST_FOREACH(child, &dev->child_bus, sibling) {
-            ret = qbus_find_recursive(child, name, bus_typename);
+		ret = qbus_find_recursive(child, name, bus_typename, opts);
             if (ret && !qbus_is_full(ret)) {
                 return ret;     /* a descendant matches and isn't full */
             }
@@ -436,7 +439,7 @@ static BusState *qbus_find(const char *path, Error **errp)
             assert(!path[0]);
             elem[0] = len = 0;
         }
-        bus = qbus_find_recursive(sysbus_get_default(), elem, NULL);
+        bus = qbus_find_recursive(sysbus_get_default(), elem, NULL, NULL);
         if (!bus) {
             error_setg(errp, "Bus '%s' not found", elem);
             return NULL;
@@ -542,7 +545,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
             return NULL;
         }
     } else if (dc->bus_type != NULL) {
-        bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type);
+	    bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type, opts);
         if (!bus || qbus_is_full(bus)) {
             error_setg(errp, "No '%s' bus found for device '%s'",
                        dc->bus_type, driver);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (44 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices Benjamin Herrenschmidt
@ 2015-11-11  0:27 ` Benjamin Herrenschmidt
  2015-11-18 12:35   ` Paolo Bonzini
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 47/77] pci: Don't call pci_irq_handler() for a negative intx Benjamin Herrenschmidt
                   ` (33 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:27 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This adds a devfn_max field to PCIBus and adds a pci_can_add_device()
function which, if no "addr" (aka devfn) is specified, will tell whether
there is any slot free between devfn_min and devfn_max.

This will be used by some PCI root complex implementations that support
only one direct child to avoid having qemu put dumb devices at different
slot numbers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/pci/pci.c             | 22 ++++++++++++++++++++++
 include/hw/pci/pci_bus.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..7003f7c 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -108,6 +108,27 @@ static uint16_t pcibus_numa_node(PCIBus *bus)
     return NUMA_NODE_UNASSIGNED;
 }
 
+static bool pci_can_add_device(BusState *bus, QemuOpts *opts)
+{
+    unsigned int devfn, max;
+    PCIBus *pbus = PCI_BUS(bus);
+
+    /* If address is specified, say yes and let it fail if that doesn't work */
+    if (qemu_opt_get(opts, "addr") != NULL) {
+        return true;
+    }
+    max = ARRAY_SIZE(pbus->devices);
+    if (pbus->devfn_max && pbus->devfn_max < max) {
+       max = pbus->devfn_max;
+    }
+    for (devfn = pbus->devfn_min ; devfn < max; devfn += PCI_FUNC_MAX) {
+        if (!pbus->devices[devfn]) {
+            break;
+        }
+    }
+    return devfn < max;
+}
+
 static void pci_bus_class_init(ObjectClass *klass, void *data)
 {
     BusClass *k = BUS_CLASS(klass);
@@ -119,6 +140,7 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
     k->realize = pci_bus_realize;
     k->unrealize = pci_bus_unrealize;
     k->reset = pcibus_reset;
+    k->can_add_device = pci_can_add_device;
 
     pbc->is_root = pcibus_is_root;
     pbc->bus_num = pcibus_num;
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..02055d4 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -23,6 +23,7 @@ struct PCIBus {
     PCIIOMMUFunc iommu_fn;
     void *iommu_opaque;
     uint8_t devfn_min;
+    uint8_t devfn_max;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
     pci_route_irq_fn route_intx_to_irq;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 47/77] pci: Don't call pci_irq_handler() for a negative intx
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (45 preceding siblings ...)
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge Benjamin Herrenschmidt
                   ` (32 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Under some circumstances, pci_intx() can return -1 (when the interrupt
pin in the config space is 0 which normally means no interrupt).

I have seen cases of pci_set_irq() being called on such devices, in
turn causing pci_irq_handler() to be called with "-1" as an argument
which doesn't seem like a terribly good idea.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/pci/pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 7003f7c..b364eff 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1298,7 +1298,9 @@ qemu_irq pci_allocate_irq(PCIDevice *pci_dev)
 void pci_set_irq(PCIDevice *pci_dev, int level)
 {
     int intx = pci_intx(pci_dev);
-    pci_irq_handler(pci_dev, intx, level);
+    if (intx >= 0) {
+        pci_irq_handler(pci_dev, intx, level);
+    }
 }
 
 /* Special hooks used by device assignment */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (46 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 47/77] pci: Don't call pci_irq_handler() for a negative intx Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2017-03-17  8:24   ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 49/77] ppc/pnv: Create a default PCI layout Benjamin Herrenschmidt
                   ` (31 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This is a model of the PCIe host bridge found on Power8 chips,
including IOMMU support, PCIe root complex etc...

This implementation doesn't emulate the EEH error handling (and
may never do).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/intc/xics.c                      |    2 +-
 hw/pci-host/Makefile.objs           |    2 +
 hw/pci-host/pnv_phb3.c              | 1083 +++++++++++++++++++++++++++++++++++
 hw/pci-host/pnv_phb3_msi.c          |  338 +++++++++++
 hw/pci-host/pnv_phb3_pbcq.c         |  314 ++++++++++
 hw/pci-host/pnv_phb3_rc.c           |  132 +++++
 hw/ppc/pnv.c                        |   22 +-
 include/hw/pci-host/pnv_phb3.h      |  145 +++++
 include/hw/pci-host/pnv_phb3_regs.h |  505 ++++++++++++++++
 include/hw/ppc/pnv.h                |    3 +
 include/hw/ppc/xics.h               |    1 +
 11 files changed, 2543 insertions(+), 4 deletions(-)
 create mode 100644 hw/pci-host/pnv_phb3.c
 create mode 100644 hw/pci-host/pnv_phb3_msi.c
 create mode 100644 hw/pci-host/pnv_phb3_pbcq.c
 create mode 100644 hw/pci-host/pnv_phb3_rc.c
 create mode 100644 include/hw/pci-host/pnv_phb3.h
 create mode 100644 include/hw/pci-host/pnv_phb3_regs.h

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index d027a24..eaeb1e1 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -341,7 +341,7 @@ void icp_eoi(XICSState *xics, int server, uint32_t xirr)
     }
 }
 
-static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
     XICSState *xics = ics->xics;
     ICPState *ss = xics->ss + server;
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 45f1f0e..961fe5b 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -16,3 +16,5 @@ common-obj-$(CONFIG_FULONG) += bonito.o
 common-obj-$(CONFIG_PCI_PIIX) += piix.o
 common-obj-$(CONFIG_PCI_Q35) += q35.o
 common-obj-$(CONFIG_PCI_GENERIC) += gpex.o
+
+obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_pbcq.o pnv_phb3_rc.o pnv_phb3_msi.o
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
new file mode 100644
index 0000000..1136003
--- /dev/null
+++ b/hw/pci-host/pnv_phb3.c
@@ -0,0 +1,1083 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2014
+ */
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/ppc/xics.h"
+
+#define DBG_ERR(p, fmt, ...) do { \
+    if (1) fprintf(stderr, "PHB3(%s): " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while(0)
+
+#define DBG_DMA(p, fmt, ...) do { \
+    if (0) fprintf(stderr, "PHB3(%s): " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while(0)
+
+#define DBG_MAP(p, fmt, ...) do { \
+    if (0) fprintf(stderr, "PHB3(%s): " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while(0)
+
+//#define DISPLAY_UNIMPLENTED_REG
+
+static PCIDevice *pnb_phb3_find_cfg_dev(PnvPhb3State *phb)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+    uint8_t bus, devfn;
+
+    if (!(addr >> 63)) {
+        return NULL;
+    }
+    bus = (addr >> 52) & 0xff;
+    devfn = (addr >> 44) & 0xff;
+
+    return pci_find_device(pci->bus, bus, devfn);
+}
+
+static void pnv_phb3_config_write(PnvPhb3State *phb, unsigned off,
+                                  unsigned size, uint64_t val)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+
+    pdev = pnb_phb3_find_cfg_dev(phb);
+    if (!pdev) {
+        return;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xfff;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /* conventional pci device can be behind pcie-to-pci bridge.
+           256 <= addr < 4K has no effects. */
+        return;
+    }
+    switch (size) {
+    case 1:
+        break;
+    case 2:
+        val = bswap16(val);
+        break;
+    case 4:
+        val = bswap32(val);
+        break;
+    default:
+        DBG_ERR(phb, "Unsupported config access size %d !", size);
+        return;
+    }
+    pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb3_config_read(PnvPhb3State *phb, unsigned off,
+                                     unsigned size)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+    uint64_t val;
+
+    pdev = pnb_phb3_find_cfg_dev(phb);
+    if (!pdev) {
+        return ~0ull;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /* conventional pci device can be behind pcie-to-pci bridge.
+           256 <= addr < 4K has no effects. */
+        return ~0ull;
+    }
+    val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+    switch (size) {
+    case 1:
+        return val;
+    case 2:
+        return bswap16(val);
+    case 4:
+        return bswap32(val);
+    default:
+        DBG_ERR(phb, "Unsupported config access size %d !", size);
+        return ~0ull;
+    }
+}
+
+static void pnv_phb3_check_m32(PnvPhb3State *phb)
+{
+    uint64_t base, start, size;
+    MemoryRegion *parent;
+
+    if (phb->m32_mapped) {
+        // Should we destroy it in RCU friendly way... ?
+        memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
+        phb->m32_mapped = false;
+    }
+
+    /* Disabled ? move on with life ... */
+    if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
+        return;
+    }
+
+    /* Grab geometry from registers */
+    base = phb->regs[PHB_M32_BASE_ADDR >> 3];
+    start = phb->regs[PHB_M32_START_ADDR >> 3];
+    size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
+
+    DBG_MAP(phb, "PHB3: M32 enabled, base=%016llx/%016llx",
+           (unsigned long long)base, (unsigned long long)size);
+    DBG_MAP(phb, "PHB3: MMIO0            =%016llx/%016llx [%d]",
+           (unsigned long long)phb->pbcq->mmio0_base,
+           (unsigned long long)phb->pbcq->mmio0_size, phb->pbcq->mmio0_mapped);
+    DBG_MAP(phb, "PHB3: MMIO1            =%016llx/%016llx [%d]",
+           (unsigned long long)phb->pbcq->mmio1_base,
+           (unsigned long long)phb->pbcq->mmio1_size, phb->pbcq->mmio1_mapped);
+    /* Check if it matches an enabled MMIO region in the PBCQ */
+    if (phb->pbcq->mmio0_mapped && base >= phb->pbcq->mmio0_base &&
+        (base + size) <= (phb->pbcq->mmio0_base + phb->pbcq->mmio0_size)) {
+        parent = &phb->pbcq->mmbar0;
+        base -= phb->pbcq->mmio0_base;
+        DBG_MAP(phb, "M32: Mapping under MMIO0");
+    } else if (phb->pbcq->mmio1_mapped && base >= phb->pbcq->mmio1_base &&
+        (base + size) <= (phb->pbcq->mmio1_base + phb->pbcq->mmio1_size)) {
+        parent = &phb->pbcq->mmbar1;
+        base -= phb->pbcq->mmio1_base;
+        DBG_MAP(phb, "M32: Mapping under MMIO1");
+    } else {
+        DBG_MAP(phb, "M32 not matching either PBCQ MMIO region !");
+        return;
+    }
+
+    /* Create alias */
+    memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
+                             &phb->pci_mmio, start, size);
+    memory_region_add_subregion(parent, base, &phb->mr_m32);
+    phb->m32_mapped = true;
+}
+
+static void pnv_phb3_check_m64(PnvPhb3State *phb, uint32_t index)
+{
+    uint64_t base, start, size, m64;
+    MemoryRegion *parent;
+
+    if (phb->m64_mapped[index]) {
+        // Should we destroy it in RCU friendly way... ?
+        memory_region_del_subregion(phb->mr_m64[index].container,
+                                    &phb->mr_m64[index]);
+        phb->m64_mapped[index] = false;
+    }
+
+    /* Get table entry */
+    m64 = phb->ioda_M64BT[index];
+
+    /* Disabled ? move on with life ... */
+    if (!(m64 & IODA2_M64BT_ENABLE)) {
+        return;
+    }
+
+    /* Grab geometry from registers */
+    base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
+    if (m64 & IODA2_M64BT_SINGLE_PE) {
+        base &= ~0x1ffffffull;
+    }
+    size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
+    size |= 0xfffc000000000000ull;
+    size = ~size + 1;
+    start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+
+    DBG_MAP(phb, "PHB3: M64[%d] enabled, base=%016llx/%016llx", index,
+           (unsigned long long)base, (unsigned long long)size);
+    DBG_MAP(phb, "PHB3: MMIO0            =%016llx/%016llx [%d]",
+           (unsigned long long)phb->pbcq->mmio0_base,
+           (unsigned long long)phb->pbcq->mmio0_size, phb->pbcq->mmio0_mapped);
+    DBG_MAP(phb, "PHB3: MMIO1            =%016llx/%016llx [%d]",
+           (unsigned long long)phb->pbcq->mmio1_base,
+           (unsigned long long)phb->pbcq->mmio1_size, phb->pbcq->mmio1_mapped);
+    /* Check if it matches an enabled MMIO region in the PBCQ */
+    if (phb->pbcq->mmio0_mapped && base >= phb->pbcq->mmio0_base &&
+        (base + size) <= (phb->pbcq->mmio0_base + phb->pbcq->mmio0_size)) {
+        parent = &phb->pbcq->mmbar0;
+        base -= phb->pbcq->mmio0_base;
+        DBG_MAP(phb, "M64: Mapping under MMIO0");
+    } else if (phb->pbcq->mmio1_mapped && base >= phb->pbcq->mmio1_base &&
+        (base + size) <= (phb->pbcq->mmio1_base + phb->pbcq->mmio1_size)) {
+        parent = &phb->pbcq->mmbar1;
+        base -= phb->pbcq->mmio1_base;
+        DBG_MAP(phb, "M64: Mapping under MMIO1");
+    } else {
+        DBG_MAP(phb, "M64 not matching either PBCQ MMIO region !");
+        return;
+    }
+
+    /* Create alias */
+    memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
+                             &phb->pci_mmio, start, size);
+    memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
+    phb->m64_mapped[index] = true;
+}
+
+static void pnv_phb3_check_all_m64s(PnvPhb3State *phb)
+{
+    uint64_t i;
+
+    for (i=0; i<PHB_NUM_M64; i++) {
+        pnv_phb3_check_m64(phb, i);
+    }
+}
+
+static void pnv_phb3_lxivt_write(PnvPhb3State *phb, unsigned idx, uint64_t val)
+{
+    uint8_t server, prio;
+
+    phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER_MASK |
+                                  IODA2_LXIVT_PRIORITY_MASK |
+                                  IODA2_LXIVT_NODE_ID_MASK);
+    server = GETFIELD(IODA2_LXIVT_SERVER, val);
+    prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
+
+    ics_simple_write_xive(phb->lsi_ics, idx, server, prio, prio);
+}
+
+static uint64_t *pnv_phb3_ioda_access(PnvPhb3State *phb,
+                                      unsigned *out_table, unsigned *out_idx)
+{
+    uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+    unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+    unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+    unsigned int mask;
+    uint64_t *tptr = NULL;
+
+    switch(table) {
+    case IODA2_TBL_LIST:
+        tptr = phb->ioda_LIST;
+        mask = 7;
+        break;
+    case IODA2_TBL_LXIVT:
+        tptr = phb->ioda_LXIVT;
+        mask = 7;
+        break;
+    case IODA2_TBL_IVC_CAM:
+    case IODA2_TBL_RBA:
+        mask = 31;
+        break;
+    case IODA2_TBL_RCAM:
+        mask = 63;
+        break;
+    case IODA2_TBL_MRT:
+        mask = 7;
+        break;
+    case IODA2_TBL_PESTA:
+    case IODA2_TBL_PESTB:
+        mask = 255;
+        break;
+    case IODA2_TBL_TVT:
+        tptr = phb->ioda_TVT;
+        mask = 255;
+        break;
+    case IODA2_TBL_TCAM:
+    case IODA2_TBL_TDR:
+        mask = 63;
+        break;
+    case IODA2_TBL_M64BT:
+        tptr = phb->ioda_M64BT;
+        mask = 15;
+        break;
+    case IODA2_TBL_M32DT:
+        tptr = phb->ioda_MDT;
+        mask = 255;
+        break;
+    case IODA2_TBL_PEEV:
+        tptr = phb->ioda_PEEV;
+        mask = 3;
+        break;
+    default:
+        DBG_ERR(phb, "Unknown IODA table idx %d", table);
+        return NULL;
+    }
+    index &= mask;
+    if (out_idx) {
+        *out_idx = index;
+    }
+    if (out_table) {
+        *out_table = table;
+    }
+    if (adreg & PHB_IODA_AD_AUTOINC) {
+        index = (index + 1) & mask;
+        adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+    }
+    if (tptr) {
+        tptr += index;
+    }
+    phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+    return tptr;
+}
+
+static uint64_t pnv_phb3_ioda_read(PnvPhb3State *phb)
+{
+        unsigned table;
+        uint64_t *tptr;
+
+        tptr = pnv_phb3_ioda_access(phb, &table, NULL);
+        if (!tptr) {
+            /* Return 0 on unsupported tables, not ff's */
+            return 0;
+        }
+        return *tptr;
+}
+
+static void pnv_phb3_ioda_write(PnvPhb3State *phb, uint64_t val)
+{
+        unsigned table, idx;
+        uint64_t *tptr;
+
+        tptr = pnv_phb3_ioda_access(phb, &table, &idx);
+        if (!tptr) {
+            return;
+        }
+
+        /* Handle side effects */
+        switch(table) {
+        case IODA2_TBL_LXIVT:
+            pnv_phb3_lxivt_write(phb, idx, val);
+            break;
+        case IODA2_TBL_M64BT:
+            *tptr = val;
+            pnv_phb3_check_m64(phb, idx);
+            break;
+        default:
+            *tptr = val;
+        }
+}
+
+/* This is called whenever the PHB LSI, MSI source ID register or
+ * the PBCQ irq filters are written.
+ */
+void pnv_phb3_remap_irqs(PnvPhb3State *phb)
+{
+    uint32_t local, global, count, mask, comp;
+    uint64_t baren;
+
+    /* First check if we are enabled. Unlike real HW we don't separate TX and RX
+     * so we enable if both are set
+     */
+    baren = phb->pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+    if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
+        !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
+        phb->lsi_ics->offset = 0;
+        return;
+    }
+
+    /* Grab local LSI source ID */
+    local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
+
+    /* Grab global one and compare */
+    global = GETFIELD(PBCQ_NEST_LSI_SRC,
+                      phb->pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
+    if (global != local) {
+        /* This happens during initialization, let's come back when we
+         * are properly configured
+         */
+        phb->lsi_ics->offset = 0;
+        return;
+    }
+
+    /* Get the base on the powerbus */
+    comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
+                    phb->pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
+    mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
+                    phb->pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
+    count = ((~mask) + 1) & 0x7ffff;
+    phb->total_irq = count;
+
+    /* Sanity checks */
+    if ((global + 8) > count) {
+        DBG_MAP(phb, "LSIs out of reach: LSI base=%d total irq=%d",
+                global, count);
+    }
+    if (count > 2048) {
+        DBG_MAP(phb, "More interrupts than supported: %d", count);
+    }
+    if ((comp & mask) != comp) {
+        DBG_MAP(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
+                comp, mask);
+        comp &= mask;
+    }
+    /* Setup LSI offset */
+    phb->lsi_ics->offset = comp + global;
+
+    /* Setup MSI offset */
+    pnv_phb3_msi_update_config(phb->msis, comp, count);
+
+    DBG_MAP(phb, "Initialized for %d interrupts @0x%x, LSI off=%d",
+            count, comp, global);
+}
+
+static void pnv_phb3_lsi_src_id_write(PnvPhb3State *phb, uint64_t val)
+{
+    /* Sanitize content */
+    val &= PHB_LSI_SRC_ID_MASK;
+    phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
+    pnv_phb3_remap_irqs(phb);
+}
+
+static void pnv_phb3_rtc_invalidate(PnvPhb3State *phb, uint64_t val)
+{
+    PnvPhb3DMASpace *ds;
+
+    /* Always invalidate all for now ... */
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        ds->pe_num = PHB_INVALID_PE;
+    }
+}
+
+
+static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
+{
+    uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+
+    if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
+        if (!ds->msi32_mapped) {
+            memory_region_add_subregion(&ds->dma_mr, 0xffff0000, &ds->msi32_mr);
+            ds->msi32_mapped = true;
+        }
+    } else {
+        if (ds->msi32_mapped) {
+            memory_region_del_subregion(&ds->dma_mr, &ds->msi32_mr);
+            ds->msi32_mapped = false;
+        }
+    }
+
+    if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
+        if (!ds->msi64_mapped) {
+            memory_region_add_subregion(&ds->dma_mr,
+                                        (1ull << 60), &ds->msi64_mr);
+            ds->msi64_mapped = true;
+        }
+    } else {
+        if (ds->msi64_mapped) {
+            memory_region_del_subregion(&ds->dma_mr, &ds->msi64_mr);
+            ds->msi64_mapped = false;
+        }
+    }
+}
+
+static void pnv_phb3_update_all_msi_regions(PnvPhb3State *phb)
+{
+    PnvPhb3DMASpace *ds;
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        pnv_phb3_update_msi_regions(ds);
+    }
+}
+
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
+{
+    PnvPhb3State *phb = opaque;
+    bool changed;
+
+    /* Special case configuration data */
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        pnv_phb3_config_write(phb, off & 0x3, size, val);
+        return;
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        DBG_ERR(phb, "Invalid register access, offset: 0x%x size: %d",
+                (unsigned int)off, size);
+        return;
+    }
+
+    /* Handle masking */
+    switch(off) {
+    case PHB_M64_UPPER_BITS:
+        val &= 0xfffc000000000000ull;
+        break;
+    }
+
+    /* Record whether it changed */
+    changed = phb->regs[off >> 3] != val;
+
+    /* Store in register cache first */
+    phb->regs[off >> 3] = val;
+
+    /* Handle side effects */
+    switch(off) {
+    case PHB_PHB3_CONFIG:
+        if (changed) {
+            pnv_phb3_update_all_msi_regions(phb);
+        }
+        /* fall through */
+    case PHB_M32_BASE_ADDR:
+    case PHB_M32_BASE_MASK:
+    case PHB_M32_START_ADDR:
+        if (changed) {
+            pnv_phb3_check_m32(phb);
+        }
+        break;
+    case PHB_M64_UPPER_BITS:
+        if (changed) {
+            pnv_phb3_check_all_m64s(phb);
+        }
+        break;
+    case PHB_LSI_SOURCE_ID:
+        if (changed) {
+            pnv_phb3_lsi_src_id_write(phb, val);
+        }
+        break;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        pnv_phb3_ioda_write(phb, val);
+        break;
+
+    /* RTC invalidation */
+    case PHB_RTC_INVALIDATE:
+        pnv_phb3_rtc_invalidate(phb, val);
+        break;
+
+    /* FFI request */
+    case PHB_FFI_REQUEST:
+        pnv_phb3_msi_ffi(phb->msis, val);
+        break;
+
+    /* Silent simple writes */
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_RBA_BAR:
+    case PHB_IVT_BAR:
+    case PHB_FFI_LOCK:
+        break;
+
+#ifdef DISPLAY_UNIMPLENTED_REG
+    /* Noise on anything else */
+    default:
+        DBG_ERR(phb, "reg_write 0x%x=%016llx",
+               (unsigned int)off, (unsigned long long)val);
+#endif
+    }
+}
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+    PnvPhb3State *phb = opaque;
+    uint64_t val;
+
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        return pnv_phb3_config_read(phb, off & 0x3, size);
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        DBG_ERR(phb, "Invalid register access, offset: 0x%x size: %d",
+                (unsigned int)off, size);
+        return ~0ull;
+    }
+
+    /* Default read from cache */
+    val = phb->regs[off >> 3];
+
+    switch(off) {
+    /* Simulate venice DD2.0 */
+    case PHB_VERSION:
+        return 0x000000a300000005ull;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        return pnv_phb3_ioda_read(phb);
+
+    /* Link training always appears trained */
+    case PHB_PCIE_DLP_TRAIN_CTL:
+        return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
+
+    /* FFI Lock */
+    case PHB_FFI_LOCK:
+        /* Set lock and return previous value */
+        phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
+        return val;
+
+    /* Silent simple reads */
+    case PHB_PHB3_CONFIG:
+    case PHB_M32_BASE_ADDR:
+    case PHB_M32_BASE_MASK:
+    case PHB_M32_START_ADDR:
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_RTC_INVALIDATE:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_RBA_BAR:
+    case PHB_IVT_BAR:
+    case PHB_M64_UPPER_BITS:
+        break;
+
+#ifdef DISPLAY_UNIMPLENTED_REG
+    /* Noise on anything else */
+    default:
+        DBG_ERR(phb, "reg_read 0x%x=%016llx",
+                (unsigned int)off, (unsigned long long)val);
+#endif
+    }
+    return val;
+}
+
+static const MemoryRegionOps pnv_phb3_reg_ops = {
+    .read = pnv_phb3_reg_read,
+    .write = pnv_phb3_reg_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    /* Check that out properly ... */
+    return irq_num & 3;
+}
+
+static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
+{
+    PnvPhb3State *phb = opaque;
+
+    /* LSI only ... */
+    if (irq_num > 3) {
+        DBG_ERR(phb, "Unknown IRQ to set %d", irq_num);
+    }
+    qemu_set_irq(phb->lsi_ics->qirqs[irq_num], level);
+}
+
+static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
+{
+    uint64_t rtt, addr;
+    uint16_t rte;
+    int bus_num;
+
+    /* Already resolved ? */
+    if (ds->pe_num != PHB_INVALID_PE) {
+        return true;
+    }
+
+    /* We need to lookup the RTT */
+    rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+    if (!(rtt & PHB_RBA_BAR_ENABLE)) {
+        DBG_ERR(ds->phb, "DMA with RTT BAR disabled !");
+        // Set error bits ? fence ? ...
+        return false;
+    }
+
+    /* Read RTE */
+    bus_num = pci_bus_num(ds->bus);
+    addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+    addr += 2 * ((bus_num << 8) | ds->devfn);
+    if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+        DBG_ERR(ds->phb, "Failed to read RTT entry at %016llx",
+                (unsigned long long)addr);
+        // Set error bits ? fence ? ...
+        return false;
+    }
+    rte = be16_to_cpu(rte);
+
+    /* Fail upon reading of invalid PE# */
+    if (rte >= PHB_NUM_PE) {
+        DBG_ERR(ds->phb, "RTE for RID 0x%x invalid (%04x)", ds->devfn, rte);
+        // Set error bits ? fence ? ...
+        return false;
+    }
+    ds->pe_num = rte;
+    return true;
+}
+
+static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
+                                   bool is_write, uint64_t tve,
+                                   IOMMUTLBEntry *tlb)
+{
+    uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
+    int32_t  lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
+    uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
+    uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
+
+    DBG_DMA(ds->phb, "xlate %016llx:%c TVE=%016llx",
+            (unsigned long long)addr, is_write ? 'W' : 'R',
+            (unsigned long long)tve);
+
+    DBG_DMA(ds->phb, " tta=%016llx lev=%d tts=%d tps=%d",
+            (unsigned long long)tta, lev, tts, tps);
+
+    /* Invalid levels */
+    if (lev > 4) {
+        DBG_ERR(ds->phb, "Invalid #levels in TVE %d", lev);
+        return;
+    }
+
+    /* IO Page Size of 0 means untranslated, else use TCEs */
+    if (tps == 0) {
+        /* We only support non-translate in top window
+         * XXX FIX THAT, Venice/Murano support it on bottom window
+         * above 4G and Naples suports it on everything
+         */
+        if (!(tve & PPC_BIT(51))) {
+            DBG_ERR(ds->phb, "xlate for invalid non-translate TVE");
+            return;
+        }
+        // XXX Handle boundaries */
+
+        /* XXX Use 4k pages like q35 ... for now */
+        DBG_DMA(ds->phb, " non-translate ok");
+        tlb->iova = addr & 0xfffffffffffff000ull;
+        tlb->translated_addr = addr & 0x0003fffffffff000ull;
+        tlb->addr_mask = 0xfffull;
+        tlb->perm = IOMMU_RW;
+    } else {
+        uint32_t tce_shift, tbl_shift, sh;
+        uint64_t base, taddr, tce, tce_mask;
+
+        /* TVE disabled ? */
+        if (tts == 0) {
+            DBG_ERR(ds->phb, "xlate for invalid translated TVE");
+            return;
+        }
+
+        /* Address bits per bottom level TCE entry */
+        tce_shift = tps + 11;
+
+        /* Address bits per table level */
+        tbl_shift = tts + 8;
+
+        /* Top level table base address */
+        base = tta << 12;
+
+        /* Total shift to first level */
+        sh = tbl_shift * lev + tce_shift;
+
+        DBG_DMA(ds->phb, " tce_shift %d tbl_shift %d", tce_shift, tbl_shift);
+
+        // XXX Multi-level untested */
+        while ((lev--) >= 0) {
+            /* Grab the TCE address */
+            taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+            if (dma_memory_read(&address_space_memory, taddr, &tce, sizeof(tce))) {
+                DBG_ERR(ds->phb, "Failed to read TCE at 0x%016llx",
+                        (unsigned long long)taddr);
+                return;
+            }
+            tce = be64_to_cpu(tce);
+            DBG_DMA(ds->phb, " lev %d taddr 0x%016llx tce 0x%016llx", lev + 1,
+                    (unsigned long long)taddr, (unsigned long long)tce);
+
+            /* Check permission for indirect TCE */
+            if ((lev >= 0) && !(tce & 3)) {
+                DBG_ERR(ds->phb, "Invalid indirect TCE at 0x%016llx",
+                        (unsigned long long)taddr);
+                DBG_ERR(ds->phb, " xlate %016llx:%c TVE=%016llx",
+                        (unsigned long long)addr, is_write ? 'W' : 'R',
+                        (unsigned long long)tve);
+                DBG_ERR(ds->phb, " tta=%016llx lev=%d tts=%d tps=%d",
+                        (unsigned long long)tta, lev, tts, tps);
+                return;
+            }
+            sh -= tbl_shift;
+            base = tce & ~0xfffull;
+        }
+
+        /* We exit the loop with TCE being the final TCE */
+        tce_mask = ~((1ull << tce_shift) - 1);
+        tlb->iova = addr & tce_mask;
+        tlb->translated_addr = tce & tce_mask;
+        tlb->addr_mask = ~tce_mask;
+        tlb->perm = tce & 3;
+        if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+            DBG_ERR(ds->phb, "TCE access fault at 0x%016llx",
+                    (unsigned long long)taddr);
+            DBG_ERR(ds->phb, " xlate %016llx:%c TVE=%016llx",
+                    (unsigned long long)addr, is_write ? 'W' : 'R',
+                    (unsigned long long)tve);
+            DBG_ERR(ds->phb, " tta=%016llx lev=%d tts=%d tps=%d",
+                    (unsigned long long)tta, lev, tts, tps);
+        }
+    }
+}
+
+static IOMMUTLBEntry pnv_phb3_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+                                              bool is_write)
+{
+    PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
+    int tve_sel;
+    uint64_t tve, cfg;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE,
+    };
+
+    /* Resolve PE# */
+    if (!pnv_phb3_resolve_pe(ds)) {
+        DBG_ERR(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return ret;
+    }
+
+    DBG_DMA(ds->phb, "xlate bus @%p (%d) devfn 0x%x PE %d ds @%p",
+            ds->bus, pci_bus_num(ds->bus),ds->devfn, ds->pe_num, ds);
+
+    /* Check top bits */
+    switch (addr >> 60) {
+    case 00:
+        /* DMA or 32-bit MSI ? */
+        cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+        if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
+            ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+            DBG_ERR(ds->phb, "xlate on 32-bit MSI region");
+            return ret;
+        }
+        /* Choose TVE XXX Use PHB3 Control Register */
+        tve_sel = (addr >> 59) & 1;
+        tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+        DBG_DMA(ds->phb, " TVE_SEL=%d -> %d",
+                tve_sel, ds->pe_num * 2 + tve_sel);
+        pnv_phb3_translate_tve(ds, addr, is_write, tve, &ret);
+        break;
+    case 01:
+        DBG_ERR(ds->phb, "xlate on 64-bit MSI region");
+        break;
+    default:
+        DBG_ERR(ds->phb, "xlate on unsupported address 0x%016llx",
+                (unsigned long long)addr);
+    }
+    return ret;
+}
+
+static const MemoryRegionIOMMUOps pnv_phb3_iommu_ops = {
+    .translate = pnv_phb3_translate_iommu,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
+                               uint64_t data, unsigned size)
+{
+    PnvPhb3DMASpace *ds = opaque;
+
+    /* Resolve PE# */
+    if (!pnv_phb3_resolve_pe(ds)) {
+        DBG_ERR(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return;
+    }
+
+    pnv_phb3_msi_send(ds->phb->msis, addr, data, ds->pe_num);
+}
+
+static const MemoryRegionOps pnv_phb3_msi_ops = {
+    /* There is no .read as the read result is undefined by PCI spec */
+    .read = NULL,
+    .write = pnv_phb3_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    PnvPhb3State *phb = opaque;
+    PnvPhb3DMASpace *ds;
+
+    DBG_DMA(phb, "get IOMMU for bus @%p devfn 0x%x", bus, devfn);
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        if (ds->bus == bus && ds->devfn == devfn) {
+            DBG_DMA(phb, " found @%p", ds);
+            break;
+        }
+    }
+    if (ds == NULL) {
+        ds = g_malloc0(sizeof(PnvPhb3DMASpace));
+        ds->bus = bus;
+        ds->devfn = devfn;
+        ds->pe_num = PHB_INVALID_PE;
+        ds->phb = phb;
+        memory_region_init_iommu(&ds->dma_mr, OBJECT(phb),
+                                 &pnv_phb3_iommu_ops, "phb3_iommu", UINT64_MAX);
+        address_space_init(&ds->dma_as, &ds->dma_mr, "phb3_iommu");
+        memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+                              ds, "msi32", 0x10000);
+        memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+                              ds, "msi64", 0x100000);
+        pnv_phb3_update_msi_regions(ds);
+
+        QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+        DBG_DMA(phb, " created @%p", ds);
+    }
+    return &ds->dma_as;
+}
+
+static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    k->max_dev = 1;
+}
+
+#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus"
+
+static const TypeInfo pnv_phb3_root_bus_info = {
+    .name = TYPE_PNV_PHB3_ROOT_BUS,
+    .parent = TYPE_PCIE_BUS,
+    .class_init = pnv_phb3_root_bus_class_init,
+};
+
+static void pnv_phb3_initfn(Object *obj)
+{
+    PnvPhb3State *phb = PNV_PHB3(obj);
+
+    phb->lsi_ics = ICS(object_new(TYPE_ICS_SIMPLE));
+    object_property_add_child(OBJECT(phb), "ics", OBJECT(phb->lsi_ics), NULL);
+    /* Default init ... will be fixed by HW inits */
+    phb->lsi_ics->offset = 0;
+    phb->lsi_ics->nr_irqs = PHB_NUM_LSI;
+    QLIST_INIT(&phb->dma_spaces);
+}
+
+static void pnv_phb3_realize(DeviceState *dev, Error **errp)
+{
+    PnvPhb3State *phb = PNV_PHB3(dev);
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    Error *error = NULL;
+    int i;
+
+    memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
+                       PCI_MMIO_TOTAL_SIZE);
+
+    /* PHB3 doesn't support IO space. However, qemu gets very upset if
+     * we don't have an IO region to anchor IO BARs onto so we just
+     * initialize one which we never hook up to anything
+     */
+    memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
+
+    memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
+                          "phb3-regs", 0x1000);
+
+    /* Realize LSI ICS */
+    xics_add_ics(phb->xics, phb->lsi_ics);
+    object_property_set_bool(OBJECT(phb->lsi_ics), true, "realized", &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    for (i = 0; i < PHB_NUM_LSI; i++)
+        ics_simple_set_irq_type(phb->lsi_ics, i, true);
+
+    pci->bus = pci_register_bus(dev, "phb3-root-bus",
+                                pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
+                                &phb->pci_mmio, &phb->pci_io,
+                                0, 4, TYPE_PNV_PHB3_ROOT_BUS);
+    pci->bus->devfn_max = 1;
+    pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+}
+
+void pnv_phb3_update_regions(PnvPhb3State *phb)
+{
+    /* Unmap first always */
+    if (phb->regs_mapped) {
+        memory_region_del_subregion(&phb->pbcq->phbbar, &phb->mr_regs);
+        phb->regs_mapped = false;
+    }
+
+    /* Map registers if enabled */
+    if (phb->pbcq->phb_mapped) {
+        /* XXX We should use the PHB BAR 2 register but we don't ... */
+        memory_region_add_subregion(&phb->pbcq->phbbar, 0, &phb->mr_regs);
+        phb->regs_mapped = true;
+    }
+
+    /* Check/update m32 */
+    if (phb->m32_mapped) {
+        pnv_phb3_check_m32(phb);
+    }
+}
+
+static void pnv_phb3_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_phb3_realize;
+}
+
+static const TypeInfo pnv_phb3_type_info = {
+    .name = TYPE_PNV_PHB3,
+    .parent = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(PnvPhb3State),
+    .class_init = pnv_phb3_class_init,
+    .instance_init = pnv_phb3_initfn,
+};
+
+static void pnv_phb3_register_types(void)
+{
+    type_register_static(&pnv_phb3_type_info);
+    type_register_static(&pnv_phb3_root_bus_info);
+}
+
+type_init(pnv_phb3_register_types)
+
+void pnv_phb3_create(PnvChip *chip, XICSState *xics, uint32_t idx)
+{
+    struct DeviceState *dev;
+    PnvPhb3State *phb;
+    PnvPBCQState *pbcq;
+    PCIHostState *pcih;
+    PCIDevice *pdev;
+    PCIBridge *bdev;
+    uint8_t chassis;
+
+    chassis = chip->chip_id * 4 + idx;
+
+    /* Create PBCQ */
+    dev = qdev_create(&chip->xscom->bus, TYPE_PNV_PBCQ);
+    qdev_prop_set_uint32(dev, "phb_id", idx);
+    qdev_init_nofail(dev);
+    pbcq = PNV_PBCQ(dev);
+
+    /* Create PHB3 */
+    dev = qdev_create(NULL, TYPE_PNV_PHB3);
+    phb = PNV_PHB3(dev);
+    phb->pbcq = pbcq;
+    pbcq->phb = phb;
+    phb->xics = xics;
+    qdev_init_nofail(dev);
+    pcih = PCI_HOST_BRIDGE(phb);
+
+    /* Create MSI source */
+    phb->msis = pnv_phb3_msi_create(phb);
+
+    /* Add root complex */
+    pdev = pci_create_multifunction(pcih->bus, 0, false, TYPE_PNV_PHB3_RC);
+    qdev_prop_set_uint8(&pdev->qdev, "chassis", chassis);
+    qdev_prop_set_uint16(&pdev->qdev, "slot", 1);
+    qdev_init_nofail(&pdev->qdev);
+    bdev = PCI_BRIDGE(pdev);
+
+    /* Setup bus for that chip */
+    chip->phb[idx] = pci_bridge_get_sec_bus(bdev);
+}
diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
new file mode 100644
index 0000000..fa6a2d1
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -0,0 +1,338 @@
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/msi.h"
+#include "hw/ppc/xics.h"
+
+#define PHB3_MAX_MSI     2048
+
+//#define DEBUG_MSI       1
+#define DEBUG_MSI     0
+
+typedef struct Phb3MsiState {
+    struct ICSState ics;
+    PnvPhb3State *phb;
+    uint64_t rba[PHB3_MAX_MSI/64];
+    uint32_t rba_sum;
+} Phb3MsiState;
+
+#define TYPE_PHB3_MSI "phb3-msi"
+#define PHB3_MSI(obj) OBJECT_CHECK(Phb3MsiState, (obj), TYPE_PHB3_MSI)
+
+#define DBG_ERR(p, fmt, ...) do {                                       \
+    if (1) fprintf(stderr, "PHB3(%s): " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while(0)
+
+#define DBG_MSI(p, fmt, ...) do {                                       \
+    if (DEBUG_MSI) fprintf(stderr, "PHB3(%s): " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while(0)
+
+
+static void phb3_msi_initfn(Object *obj)
+{
+    Phb3MsiState *msis = PHB3_MSI(obj);
+
+    /* Will be overriden later */
+    msis->ics.offset = 0;
+
+    /* Hard wire 2048, we ignore the fact that 8 of them can be
+     * taken over by LSIs at this point
+     */
+    msis->ics.nr_irqs = PHB3_MAX_MSI;
+}
+
+static uint64_t phb3_msi_ive_addr(Phb3MsiState *msis, int srcno)
+{
+    uint64_t ivtbar = msis->phb->regs[PHB_IVT_BAR >> 3];
+    uint64_t phbctl = msis->phb->regs[PHB_CONTROL >> 3];
+
+    if (!(ivtbar & PHB_IVT_BAR_ENABLE)) {
+        DBG_ERR(msis->phb, "Failed access to disable IVT BAR !");
+        return 0;
+    }
+
+    if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) {
+        DBG_ERR(msis->phb, "MSI out of bounds (%d vs %ld)",
+                srcno, (long)(ivtbar & PHB_IVT_LENGTH_MASK));
+        return 0;
+    }
+
+    ivtbar &= PHB_IVT_BASE_ADDRESS_MASK;
+
+    if (phbctl & PHB_CTRL_IVE_128_BYTES) {
+        return ivtbar + 128 * srcno;
+    } else {
+        return ivtbar + 16 * srcno;
+    }
+}
+
+static bool phb3_msi_read_ive(Phb3MsiState *msis, int srcno, uint64_t *out_ive)
+{
+    uint64_t ive_addr, ive;
+
+    ive_addr = phb3_msi_ive_addr(msis, srcno);
+    if (!ive_addr) {
+        return false;
+    }
+    if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) {
+        DBG_ERR(msis->phb, "Failed to read IVE at 0x%016llx",
+                (unsigned long long)ive_addr);
+        return false;
+    }
+    *out_ive = be64_to_cpu(ive);
+
+    return true;
+}
+
+static void phb3_msi_set_p(Phb3MsiState *msis, int srcno, uint8_t gen)
+{
+    uint64_t ive_addr;
+    uint8_t p = 0x01 | (gen << 1);
+
+    ive_addr = phb3_msi_ive_addr(msis, srcno);
+    if (!ive_addr) {
+        return;
+    }
+    DBG_MSI(msis->phb, "MSI %d: setting P", srcno);
+    if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) {
+        DBG_ERR(msis->phb, "Failed to write IVE (set P) at 0x%016llx",
+                (unsigned long long)ive_addr);
+    }
+#ifdef DEBUG_MSI
+    {
+        uint64_t ive;
+
+        if (dma_memory_read(&address_space_memory, ive_addr, &ive, 8)) {
+            DBG_ERR(ds->phb, "Failed to read IVE (set P) at 0x%016llx",
+                    (unsigned long long)ive_addr);
+        }
+        DBG_MSI(msis->phb, " IVE readback: %016llx",
+                (unsigned long long)be64_to_cpu(ive));
+    }
+#endif
+}
+
+static void phb3_msi_set_q(Phb3MsiState *msis, int srcno)
+{
+    uint64_t ive_addr;
+    uint8_t q = 0x01;
+
+    ive_addr = phb3_msi_ive_addr(msis, srcno);
+    if (!ive_addr) {
+        return;
+    }
+    DBG_MSI(msis->phb, "MSI %d: setting Q", srcno);
+    if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) {
+        DBG_ERR(ds->phb, "Failed to write IVE (set Q) at 0x%016llx",
+                (unsigned long long)ive_addr);
+    }
+#ifdef DEBUG_MSI
+    {
+        uint64_t ive;
+
+        if (dma_memory_read(&address_space_memory, ive_addr, &ive, 8)) {
+            DBG_ERR(ds->phb, "Failed to read IVE (set P) at 0x%016llx",
+                    (unsigned long long)ive_addr);
+        }
+        DBG_MSI(msis->phb, " IVE readback: %016llx",
+                (unsigned long long)be64_to_cpu(ive));
+    }
+#endif
+}
+
+static void phb3_msi_try_send(Phb3MsiState *msis, int srcno, bool ignore_p)
+{
+    uint64_t ive;
+    uint64_t server, prio, pq, gen;
+
+    if (!phb3_msi_read_ive(msis, srcno, &ive)) {
+        return;
+    }
+
+    server = GETFIELD(IODA2_IVT_SERVER, ive);
+    prio = GETFIELD(IODA2_IVT_PRIORITY, ive);
+    pq = GETFIELD(IODA2_IVT_Q, ive);
+    if (!ignore_p) {
+        pq |= GETFIELD(IODA2_IVT_P, ive) << 1;
+    }
+    gen = GETFIELD(IODA2_IVT_GEN, ive);
+
+    DBG_MSI(msis->phb, "MSI %d: try_send, ive=0x%016llx eff pq=%d", srcno,
+            (unsigned long long)ive, (int)pq);
+
+    switch(pq) {
+    case 0: /* 00 */
+        if (prio == 0xff) {
+            /* Masked, set Q */
+            phb3_msi_set_q(msis, srcno);
+        } else {
+            /* Enabled, set P and send */
+            phb3_msi_set_p(msis, srcno, gen);
+            icp_irq(&msis->ics, server, srcno + msis->ics.offset, prio);
+        }
+        break;
+    case 2: /* 10 */
+        /* Already pending, set Q */
+        phb3_msi_set_q(msis, srcno); 
+        break;
+    case 1: /* 01 */
+    case 3: /* 11 */
+    default:
+        /* Just drop stuff if Q already set */
+        break;
+    }
+}
+
+static void phb3_msi_set_irq(void *opaque, int srcno, int val)
+{
+    Phb3MsiState *msis = opaque;
+
+    if (val) {
+        phb3_msi_try_send(msis, srcno, false);
+    }
+}
+
+
+void pnv_phb3_msi_send(Phb3MsiState *msis, uint64_t addr, uint16_t data,
+                       int32_t dev_pe)
+{
+    uint64_t ive;
+    uint16_t pe;
+    uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+    if (src >= msis->ics.nr_irqs) {
+        DBG_ERR(msis->phb, "MSI %d out of bounds", src);
+        return;
+    }
+    if (dev_pe >= 0) {
+        if (!phb3_msi_read_ive(msis, src, &ive)) {
+            return;
+        }
+        pe = GETFIELD(IODA2_IVT_PE, ive);
+        if (pe != dev_pe) {
+            DBG_ERR(msis->phb, "MSI %d send by PE#%d but assigned to PE#%d",
+                    src, dev_pe, pe);
+            return;
+        }
+    }
+    qemu_irq_pulse(msis->ics.qirqs[src]);
+
+}
+
+void pnv_phb3_msi_ffi(Phb3MsiState *msis, uint64_t val)
+{
+    /* Emit interrupt */
+    pnv_phb3_msi_send(msis, val, 0, -1);
+
+    /* Clear FFI lock */
+    msis->phb->regs[PHB_FFI_LOCK >> 3] = 0;
+}
+
+static void phb3_msi_reject(ICSState *ics, uint32_t nr)
+{
+    Phb3MsiState *msis = PHB3_MSI(ics);
+    unsigned int srcno = nr - ics->offset;
+    unsigned int idx = srcno >> 6;
+    unsigned int bit = 1ull << (srcno & 0x3f);
+
+    assert(srcno < PHB3_MAX_MSI);
+
+    DBG_MSI(msis->phb, "MSI %d rejected", srcno);
+
+    msis->rba[idx] |= bit;
+    msis->rba_sum |= (1u << idx);
+}
+
+static void phb3_msi_resend(ICSState *ics)
+{
+    Phb3MsiState *msis = PHB3_MSI(ics);
+    unsigned int i,j;
+
+    if (msis->rba_sum == 0) {
+        return;
+    }
+
+    DBG_MSI(msis->phb, "MSI resend...");
+
+    for (i = 0; i < 32; i++) {
+        if ((msis->rba_sum & (1u << i)) == 0) {
+            continue;
+        }
+        msis->rba_sum &= ~(1u << i);
+        for (j = 0; j < 64; j++) {
+            if ((msis->rba[i] & (1ull << j)) == 0) {
+                continue;
+            }
+            msis->rba[i] &= ~(1u << j);
+            phb3_msi_try_send(msis, i * 64 + j, true);
+        }
+    }
+}
+
+static void phb3_msi_reset(DeviceState *dev)
+{
+    Phb3MsiState *msis = PHB3_MSI(dev);
+
+    memset(msis->rba, 0, sizeof(msis->rba));
+    msis->rba_sum = 0;
+}
+
+void pnv_phb3_msi_update_config(Phb3MsiState *msis, uint32_t base,
+                                uint32_t count)
+{
+    if (count > PHB3_MAX_MSI) {
+        count = PHB3_MAX_MSI;
+    }
+    msis->ics.nr_irqs = count;
+    msis->ics.offset = base;
+}
+
+static void phb3_msi_realize(DeviceState *dev, Error **errp)
+{
+    Phb3MsiState *msis = PHB3_MSI(dev);
+
+    msis->ics.irqs = NULL;
+    msis->ics.qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msis, PHB3_MAX_MSI);
+}
+
+static void phb3_msi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ICSStateClass *isc = ICS_CLASS(klass);
+
+    dc->realize = phb3_msi_realize;
+    dc->reset = phb3_msi_reset;
+    isc->reject = phb3_msi_reject;
+    isc->resend = phb3_msi_resend;
+}
+
+Phb3MsiState *pnv_phb3_msi_create(PnvPhb3State *phb)
+{
+    Phb3MsiState *msis;
+    DeviceState *dev;
+
+    dev = qdev_create(NULL, TYPE_PHB3_MSI);
+    msis = PHB3_MSI(dev);
+    msis->phb = phb;
+    phb->msis = msis;
+    xics_add_ics(phb->xics, &msis->ics);
+    qdev_init_nofail(dev);
+
+    return msis;
+}
+
+static const TypeInfo phb3_msi_info = {
+    .name = TYPE_PHB3_MSI,
+    .parent = TYPE_ICS,
+    .instance_size = sizeof(Phb3MsiState),
+    .class_init = phb3_msi_class_init,
+    .class_size = sizeof(ICSStateClass),
+    .instance_init = phb3_msi_initfn,
+};
+
+static void pnv_phb3_msi_register_types(void)
+{
+    type_register_static(&phb3_msi_info);
+}
+
+type_init(pnv_phb3_msi_register_types)
diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c
new file mode 100644
index 0000000..e7006ab
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_pbcq.c
@@ -0,0 +1,314 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2014
+ */
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+
+#include <libfdt.h>
+
+static bool pnv_pbcq_xscom_read(XScomDevice *dev, uint32_t range,
+                                uint32_t offset, uint64_t *out_val)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(dev);
+
+    switch(range) {
+    case 0:
+        *out_val = pbcq->nest_regs[offset];
+        break;
+    case 1:
+        *out_val = pbcq->pci_regs[offset];
+        break;
+    case 2:
+        if (offset == PBCQ_SPCI_ASB_DATA) {
+            if (!pbcq->phb) {
+                *out_val = ~0ull;
+                break;
+            }
+            *out_val = pnv_phb3_reg_read(pbcq->phb,
+                                     pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8);
+            break;
+        }
+        *out_val = pbcq->spci_regs[offset];
+        break;
+    default:
+            return false;
+    }
+    return true;
+}
+
+static void pnv_pbcq_update_map(PnvPBCQState *pbcq)
+{
+    uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+    uint64_t bar, mask, size;
+
+    /*
+     * NOTE: This will really not work well if those are remapped
+     * after the PHB has created its sub regions. We could do better
+     * if we had a way to resize regions but we don't really care
+     * that much in practice as the stuff below really only happens
+     * once early during boot
+     */
+
+    /* Handle unmaps */
+    if (pbcq->mmio0_mapped && !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0);
+        pbcq->mmio0_mapped = false;
+    }
+    if (pbcq->mmio1_mapped && !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1);
+        pbcq->mmio1_mapped = false;
+    }
+    if (pbcq->phb_mapped && !(bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->phbbar);
+        pbcq->phb_mapped = false;
+    }
+
+    /* Update PHB if it exists */
+    if (pbcq->phb) {
+        pnv_phb3_update_regions(pbcq->phb);
+    }
+
+    /* Handle maps */
+    if (!pbcq->mmio0_mapped && (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14;
+        mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0];
+        size = ((~mask) >> 14) + 1;
+        memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0);
+        pbcq->mmio0_mapped = true;
+        pbcq->mmio0_base = bar;
+        pbcq->mmio0_size = size;
+    }
+    if (!pbcq->mmio1_mapped && (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14;
+        mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1];
+        size = ((~mask) >> 14) + 1;
+        memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1);
+        pbcq->mmio1_mapped = true;
+        pbcq->mmio1_base = bar;
+        pbcq->mmio1_size = size;
+    }
+    if (!pbcq->phb_mapped && (bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14;
+        size = 0x1000;
+        memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar);
+        pbcq->phb_mapped = true;
+    }
+
+    /* Update PHB if it exists */
+    if (pbcq->phb) {
+        pnv_phb3_update_regions(pbcq->phb);
+    }
+}
+
+static bool pnv_pbcq_xnest_write(PnvPBCQState *pbcq, uint32_t reg, uint64_t val)
+{
+    switch(reg) {
+    case PBCQ_NEST_MMIO_BAR0:
+    case PBCQ_NEST_MMIO_BAR1:
+    case PBCQ_NEST_MMIO_MASK0:
+    case PBCQ_NEST_MMIO_MASK1:
+        if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] &
+            (PBCQ_NEST_BAR_EN_MMIO0 |
+             PBCQ_NEST_BAR_EN_MMIO1)) {
+            printf("WARNING: PH3: Changing enabled BAR unsupported\n");
+        }
+        pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull;
+        return true;
+    case PBCQ_NEST_PHB_BAR:
+        if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) {
+            printf("WARNING: PH3: Changing enabled BAR unsupported\n");
+        }
+        pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull;
+        return true;
+    case PBCQ_NEST_BAR_EN:
+        pbcq->nest_regs[reg] = val & 0xf800000000000000ull;
+        pnv_pbcq_update_map(pbcq);
+        pnv_phb3_remap_irqs(pbcq->phb);
+        return true;
+    case PBCQ_NEST_IRSN_COMPARE:
+    case PBCQ_NEST_IRSN_MASK:
+        pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP_MASK;
+        pnv_phb3_remap_irqs(pbcq->phb);
+        return true;
+    case PBCQ_NEST_LSI_SRC_ID:
+        pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC_MASK;
+        pnv_phb3_remap_irqs(pbcq->phb);
+        return true;
+    }
+
+    /* XXX Don't error out on other regs for now ... */
+    return true;
+}
+
+static bool pnv_pbcq_xpci_write(PnvPBCQState *pbcq, uint32_t reg, uint64_t val)
+{
+    switch(reg) {
+    case PBCQ_PCI_BAR2:
+        pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull;
+        pnv_pbcq_update_map(pbcq);
+        break;
+    }
+
+    /* XXX Don't error out on other regs for now ... */
+    return true;
+}
+
+static bool pnv_pbcq_xspci_write(PnvPBCQState *pbcq, uint32_t reg, uint64_t val)
+{
+    switch(reg) {
+    case PBCQ_SPCI_ASB_ADDR:
+        pbcq->spci_regs[reg] = val & 0xfff;
+        return true;
+    case PBCQ_SPCI_ASB_STATUS:
+        pbcq->spci_regs[reg] &= ~val;
+        return true;
+    case PBCQ_SPCI_ASB_DATA:
+        if (!pbcq->phb) {
+            return true;
+        }
+        pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], val, 8);
+        return true;
+        //   case PBCQ_SPCI_AIB_CAPP_EN:
+        //   case PBCQ_SPCI_CAPP_SEC_TMR:
+    }
+
+    /* XXX Don't error out on other regs for now ... */
+    return true;
+}
+
+static bool pnv_pbcq_xscom_write(XScomDevice *dev, uint32_t range,
+                                 uint32_t offset, uint64_t val)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(dev);
+
+    switch(range) {
+    case 0:
+            return pnv_pbcq_xnest_write(pbcq, offset, val);
+    case 1:
+            return pnv_pbcq_xpci_write(pbcq, offset, val);
+    case 2:
+            return pnv_pbcq_xspci_write(pbcq, offset, val);
+    default:
+            return false;
+    }
+}
+
+static void pnv_pbcq_default_bars(PnvPBCQState *pbcq)
+{
+    uint64_t mm0, mm1, reg;
+
+    mm0 = 0x3d00000000000ull +
+            0x4000000000ull * pbcq->chip_id +
+            0x1000000000ull * pbcq->phb_id;
+    mm1 = 0x3ff8000000000ull +
+            0x0200000000ull * pbcq->chip_id +
+            0x0080000000ull * pbcq->phb_id;
+    reg = 0x3fffe40000000ull +
+            0x0000400000ull * pbcq->chip_id +
+            0x0000100000ull * pbcq->phb_id;
+
+    pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14;
+    pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14;
+    pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14;
+}
+
+static void pnv_pbcq_realize(DeviceState *dev, Error **errp)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(dev);
+    XScomBus *xb = XSCOM_BUS(dev->parent_bus);
+    XScomDevice *xd = XSCOM_DEVICE(dev);
+
+    assert(pbcq->phb_id < 4);
+
+    /* Copy chip ID over for ease of access */
+    pbcq->chip_id = xb->chip_id;
+
+    /* Calculate XSCOM bases */
+    pbcq->nest_xbase = 0x02012000 + 0x400 * pbcq->phb_id;
+    pbcq->pci_xbase  = 0x09012000 + 0x400 * pbcq->phb_id;
+    pbcq->spci_xbase = 0x09013c00 + 0x040 * pbcq->phb_id;
+    xd->ranges[0].addr = pbcq->nest_xbase;
+    xd->ranges[0].size = PBCQ_NEST_REGS_COUNT;
+    xd->ranges[1].addr = pbcq->pci_xbase;
+    xd->ranges[1].size = PBCQ_PCI_REGS_COUNT;
+    xd->ranges[2].addr = pbcq->spci_xbase;
+    xd->ranges[2].size = PBCQ_SPCI_REGS_COUNT;
+
+    /* XXX Fix OPAL to do that: establish default BAR values */
+    pnv_pbcq_default_bars(pbcq);
+}
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+
+static int pnv_pbcq_devnode(XScomDevice *dev, void *fdt)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(dev);
+
+    _FDT((fdt_property_cell(fdt, "ibm,phb-index", pbcq->phb_id)));
+
+    return 0;
+}
+
+static Property pnv_pbcq_properties[] = {
+        DEFINE_PROP_UINT32("phb_id", PnvPBCQState, phb_id, 0),
+        DEFINE_PROP_END_OF_LIST(),
+
+};
+
+static void pnv_pbcq_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XScomDeviceClass *k = XSCOM_DEVICE_CLASS(klass);
+    static const char *compat[] = { "ibm,power8-pbcq", NULL };
+
+    k->devnode = pnv_pbcq_devnode;
+    k->read = pnv_pbcq_xscom_read;
+    k->write = pnv_pbcq_xscom_write;
+    k->dt_name = "pbcq";
+    k->dt_compatible = compat;
+
+    dc->realize = pnv_pbcq_realize;
+    dc->props = pnv_pbcq_properties;
+}
+
+static const TypeInfo pnv_pbcq_type_info = {
+    .name          = TYPE_PNV_PBCQ,
+    .parent        = TYPE_XSCOM_DEVICE,
+    .instance_size = sizeof(PnvPBCQState),
+    .class_init    = pnv_pbcq_class_init,
+};
+
+static void pnv_pbcq_register_types(void)
+{
+    type_register_static(&pnv_pbcq_type_info);
+}
+
+type_init(pnv_pbcq_register_types)
diff --git a/hw/pci-host/pnv_phb3_rc.c b/hw/pci-host/pnv_phb3_rc.c
new file mode 100644
index 0000000..27c33fb
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_rc.c
@@ -0,0 +1,132 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2014
+ */
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pcie_port.h"
+
+static void pnv_phb3_rc_write_config(PCIDevice *d,
+                                     uint32_t address, uint32_t val, int len)
+{
+    uint32_t root_cmd =
+        pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND);
+
+    pci_bridge_write_config(d, address, val, len);
+    pcie_cap_slot_write_config(d, address, val, len);
+    pcie_aer_write_config(d, address, val, len);
+    pcie_aer_root_write_config(d, address, val, len, root_cmd);
+}
+
+static void pnv_phb3_rc_reset(DeviceState *qdev)
+{
+    PCIDevice *d = PCI_DEVICE(qdev);
+
+    pcie_cap_root_reset(d);
+    pcie_cap_deverr_reset(d);
+    pcie_cap_slot_reset(d);
+    pcie_cap_arifwd_reset(d);
+    pcie_aer_root_reset(d);
+    pci_bridge_reset(qdev);
+    pci_bridge_disable_base_limit(d);
+}
+
+static int pnv_phb3_rc_initfn(PCIDevice *d)
+{
+    PCIEPort *p = PCIE_PORT(d);
+    PCIESlot *s = PCIE_SLOT(d);
+    int rc;
+
+    DEVICE(d)->id = "pcie";
+    rc = pci_bridge_initfn(d, TYPE_PCIE_BUS);
+    if (rc < 0) {
+        printf("phb3-rc: pci_bridge_init() error %d !\n", rc);
+        return rc;
+    }
+    /* XXX Make that a property ? Allow for only one device (8 functions) */
+    pci_bridge_get_sec_bus(PCI_BRIDGE(d))->devfn_max = 8;
+
+    pcie_port_init_reg(d);
+
+    rc = pcie_cap_init(d, 0x48, PCI_EXP_TYPE_ROOT_PORT, p->port);
+    if (rc < 0) {
+        printf("phb3-rc: pcie_cap_init() error %d !\n", rc);
+        goto err_bridge;
+    }
+    pcie_cap_arifwd_init(d);
+    pcie_cap_deverr_init(d);
+    pcie_cap_slot_init(d, s->slot);
+    pcie_chassis_create(s->chassis);
+    rc = pcie_chassis_add_slot(s);
+    if (rc < 0) {
+        printf("phb3-rc: pcie_chassis_add_slot() error %d !\n", rc);
+        goto err_pcie_cap;
+    }
+    pcie_cap_root_init(d);
+    rc = pcie_aer_init(d, 0x100);
+    if (rc < 0) {
+        printf("phb3-rc: pcie_aer_init() error %d !\n", rc);
+        goto err_slot;
+    }
+    pcie_aer_root_init(d);
+    return 0;
+
+err_slot:
+    pcie_chassis_del_slot(s);
+err_pcie_cap:
+    pcie_cap_exit(d);
+err_bridge:
+    pci_bridge_exitfn(d);
+    return rc;
+}
+
+static void pnv_phb3_rc_exitfn(PCIDevice *d)
+{
+    PCIESlot *s = PCIE_SLOT(d);
+
+    pcie_aer_exit(d);
+    pcie_chassis_del_slot(s);
+    pcie_cap_exit(d);
+    pci_bridge_exitfn(d);
+}
+
+static void pnv_phb3_rc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->is_express = 1;
+    k->is_bridge = 1;
+    k->init = pnv_phb3_rc_initfn;
+    k->exit = pnv_phb3_rc_exitfn;
+    k->config_write = pnv_phb3_rc_write_config;
+    k->vendor_id = PCI_VENDOR_ID_IBM;
+    k->device_id = 0x03dc;
+    k->revision = 0;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->desc = "IBM PHB3 PCIE Root Port";
+    dc->reset = pnv_phb3_rc_reset;
+}
+
+static const TypeInfo pnv_phb3_rc_info = {
+    .name          = TYPE_PNV_PHB3_RC,
+    .parent        = TYPE_PCIE_SLOT,
+    .class_init    = pnv_phb3_rc_class_init,
+};
+
+static void pnv_phb3_rc_register_types(void)
+{
+    type_register_static(&pnv_phb3_rc_info);
+}
+
+type_init(pnv_phb3_rc_register_types)
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index ae6efbd..d808802 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -44,9 +44,15 @@
 #include "hw/ppc/xics.h"
 #include "hw/ppc/pnv_xscom.h"
 
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/msi.h"
 #include "hw/isa/isa.h"
 #include "hw/char/serial.h"
 #include "hw/timer/mc146818rtc.h"
+#include "hw/pci-host/pnv_phb3.h"
+
 #include "exec/address-spaces.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -507,9 +513,11 @@ static void pnv_lpc_irq_handler_cpld(void *opaque, int n, int level)
 }
 
 static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
-                            bool has_lpc, bool has_lpc_irq)
+                            bool has_lpc, bool has_lpc_irq,
+                            unsigned int num_phbs)
 {
     PnvChip *chip = &sys->chips[chip_no];
+    unsigned int i;
 
     if (chip_no >= PNV_MAX_CHIPS) {
             return;
@@ -545,6 +553,11 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no,
 
     /* Create the simplified OCC model */
     pnv_occ_create(chip);
+
+    /* Create a PCI, for now do one chip with 2 PHBs */
+    for (i = 0; i < num_phbs; i++) {
+        pnv_phb3_create(chip, sys->xics, i);
+    }
 }
 
 static void ppc_powernv_init(MachineState *machine)
@@ -568,6 +581,9 @@ static void ppc_powernv_init(MachineState *machine)
     void *fdt;
     int i;
 
+    /* MSIs are supported on this platform */
+    msi_supported = true;
+
     /* Set up Interrupt Controller before we create the VCPUs */
     xics = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
                             XICS_IRQS_POWERNV);
@@ -607,9 +623,9 @@ static void ppc_powernv_init(MachineState *machine)
      */
     sys->num_chips = 1;
 
-    /* Create only one chip for now with an LPC bus
+    /* Create only one chip for now with an LPC bus and one PHB
      */
-    pnv_create_chip(sys, 0, true, false);
+    pnv_create_chip(sys, 0, true, false, 1);
 
     /* Grab chip 0's ISA bus */
     isa_bus = sys->chips[0].lpc_bus;
diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h
new file mode 100644
index 0000000..d70bf8b
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb3.h
@@ -0,0 +1,145 @@
+#ifndef _HW_PNV_PHB3_H
+#define _HW_PNV_PHB3_H
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2014
+ */
+#include "hw/hw.h"
+#include "hw/ppc/pnv.h"
+#include "hw/pci/pci_host.h"
+#include "exec/address-spaces.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "sysemu/cpus.h"
+#include "qom/cpu.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/xics.h"
+
+#define PHB_NUM_M64	16
+#define PHB_NUM_REGS	(0x1000 >> 3)
+#define PHB_NUM_LSI	8
+#define PHB_NUM_PE	256
+
+#define PCI_MMIO_TOTAL_SIZE	(0x1ull << 60)
+
+#define IODA2_PCI_BUS_MAX 256
+
+typedef struct PnvPBCQState PnvPBCQState;
+typedef struct PnvPhb3State PnvPhb3State;
+typedef struct PnvPhb3DMASpace PnvPhb3DMASpace;
+
+/* We don't want to include xics.h here */
+typedef struct XICSState XICSState;
+typedef struct ICSState ICSState;
+
+/* Similarily with pnv_phb3_msi */
+typedef struct Phb3MsiState Phb3MsiState;
+
+/* We have one such address space wrapper per possible device
+ * under the PHB since they need to be assigned statically at
+ * qemu device creation time. The relationship to a PE is done
+ * later dynamically. This means we can potentially create a lot
+ * of these guys. Q35 stores them as some kind of radix tree but
+ * we never really need to do fast lookups so instead we simply
+ * keep a QLIST of them for now, we can add the radix if needed
+ * later on.
+ *
+ * We do cache the PE number to speed things up a bit though.
+ */
+struct PnvPhb3DMASpace {
+    PCIBus *bus;
+    uint8_t devfn;
+    int pe_num;		/* Cached PE number */
+#define PHB_INVALID_PE	(-1)
+    PnvPhb3State *phb;
+    AddressSpace dma_as;
+    MemoryRegion dma_mr;
+    MemoryRegion msi32_mr;
+    MemoryRegion msi64_mr;
+    bool msi32_mapped;
+    bool msi64_mapped;
+    QLIST_ENTRY(PnvPhb3DMASpace) list;
+};
+
+struct PnvPhb3State {
+    PCIHostState parent_obj;
+    MemoryRegion mr_m32;
+    MemoryRegion mr_m64[PHB_NUM_M64];
+    MemoryRegion mr_regs;
+    bool regs_mapped;
+    bool m32_mapped;
+    bool m64_mapped[PHB_NUM_M64];
+    MemoryRegion pci_mmio;
+    MemoryRegion pci_io;
+    uint64_t regs[PHB_NUM_REGS];
+    PnvPBCQState *pbcq;
+    uint64_t ioda_LIST[8];
+    uint64_t ioda_LXIVT[8];
+    uint64_t ioda_TVT[512];
+    uint64_t ioda_M64BT[16];
+    uint64_t ioda_MDT[256];
+    uint64_t ioda_PEEV[4];
+    uint32_t total_irq;
+    XICSState *xics;
+    ICSState *lsi_ics;
+    Phb3MsiState *msis;
+    QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces;
+};
+
+struct PnvPBCQState {
+    XScomDevice xd;
+    uint32_t chip_id;
+    uint32_t phb_id;
+    uint32_t nest_xbase;
+    uint32_t spci_xbase;
+    uint32_t pci_xbase;
+    uint64_t nest_regs[PBCQ_NEST_REGS_COUNT];
+    uint64_t spci_regs[PBCQ_SPCI_REGS_COUNT];
+    uint64_t pci_regs[PBCQ_PCI_REGS_COUNT];
+    MemoryRegion mmbar0;
+    MemoryRegion mmbar1;
+    MemoryRegion phbbar;
+    bool mmio0_mapped;
+    bool mmio1_mapped;
+    bool phb_mapped;
+    uint64_t mmio0_base;
+    uint64_t mmio0_size;
+    uint64_t mmio1_base;
+    uint64_t mmio1_size;
+    PnvPhb3State *phb;
+};
+
+#define TYPE_PNV_PBCQ "pnv-pbcq"
+#define PNV_PBCQ(obj) \
+     OBJECT_CHECK(PnvPBCQState, (obj), TYPE_PNV_PBCQ)
+
+
+#define TYPE_PNV_PHB3 "pnv-phb3"
+#define PNV_PHB3(obj) \
+     OBJECT_CHECK(PnvPhb3State, (obj), TYPE_PNV_PHB3)
+
+#define TYPE_PNV_PHB3_RC "pnv-phb3-rc"
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size);
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size);
+void pnv_phb3_update_regions(PnvPhb3State *phb);
+void pnv_phb3_remap_irqs(PnvPhb3State *phb);
+void pnv_phb3_create(PnvChip *chip, XICSState *xics, uint32_t idx);
+Phb3MsiState *pnv_phb3_msi_create(PnvPhb3State *phb);
+void pnv_phb3_msi_update_config(Phb3MsiState *msis, uint32_t base,
+                                uint32_t count);
+void pnv_phb3_msi_send(Phb3MsiState *msis, uint64_t addr, uint16_t data,
+                       int32_t dev_pe);
+void pnv_phb3_msi_ffi(Phb3MsiState *msis, uint64_t val);
+
+#endif /* _HW_PNV_PHB3_H */
diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h
new file mode 100644
index 0000000..daad8fc
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb3_regs.h
@@ -0,0 +1,505 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PHB3_REGS_H
+#define __PHB3_REGS_H
+
+#define PPC_BIT(bit)		(0x8000000000000000UL >> (bit))
+#define PPC_BIT32(bit)		(0x80000000UL >> (bit))
+#define PPC_BIT8(bit)		(0x80UL >> (bit))
+#define PPC_BITMASK(bs,be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+#define PPC_BITMASK32(bs,be)	((PPC_BIT32(bs) - PPC_BIT32(be))|PPC_BIT32(bs))
+#define PPC_BITLSHIFT(be)	(63 - (be))
+#define PPC_BITLSHIFT32(be)	(31 - (be))
+
+/* Extract field fname from val */
+#define GETFIELD(fname, val)			\
+	(((val) & fname##_MASK) >> fname##_LSH)
+
+/* Set field fname of oval to fval
+ * NOTE: oval isn't modified, the combined result is returned
+ */
+#define SETFIELD(fname, oval, fval)			\
+	(((oval) & ~fname##_MASK) | \
+	 ((((typeof(oval))(fval)) << fname##_LSH) & fname##_MASK))
+
+/*
+ * PBCQ XSCOM registers
+ */
+
+#define PBCQ_NEST_IRSN_COMPARE	0x1a
+#define PBCQ_NEST_IRSN_COMP_MASK      PPC_BITMASK(0,18)
+#define PBCQ_NEST_IRSN_COMP_LSH       PPC_BITLSHIFT(18)
+#define PBCQ_NEST_IRSN_MASK	0x1b
+#define PBCQ_NEST_LSI_SRC_ID	0x1f
+#define   PBCQ_NEST_LSI_SRC_MASK     PPC_BITMASK(0,7)
+#define   PBCQ_NEST_LSI_SRC_LSH      PPC_BITLSHIFT(7)
+#define PBCQ_NEST_REGS_COUNT	0x46
+#define PBCQ_NEST_MMIO_BAR0	0x40
+#define PBCQ_NEST_MMIO_BAR1	0x41
+#define PBCQ_NEST_PHB_BAR	0x42
+#define PBCQ_NEST_MMIO_MASK0	0x43
+#define PBCQ_NEST_MMIO_MASK1	0x44
+#define PBCQ_NEST_BAR_EN	0x45
+#define   PBCQ_NEST_BAR_EN_MMIO0    PPC_BIT(0)
+#define   PBCQ_NEST_BAR_EN_MMIO1    PPC_BIT(1)
+#define   PBCQ_NEST_BAR_EN_PHB      PPC_BIT(2)
+#define   PBCQ_NEST_BAR_EN_IRSN_RX  PPC_BIT(3)
+#define   PBCQ_NEST_BAR_EN_IRSN_TX  PPC_BIT(4)
+
+#define PBCQ_PCI_REGS_COUNT	0x15
+#define PBCQ_PCI_BAR2		0x0b
+
+#define PBCQ_SPCI_REGS_COUNT	0x5
+#define PBCQ_SPCI_ASB_ADDR	0x0
+#define PBCQ_SPCI_ASB_STATUS	0x1
+#define PBCQ_SPCI_ASB_DATA	0x2
+#define PBCQ_SPCI_AIB_CAPP_EN	0x3
+#define PBCQ_SPCI_CAPP_SEC_TMR	0x4
+
+/*
+ * PHB MMIO registers
+ */
+
+/* PHB Fundamental register set A */
+#define PHB_LSI_SOURCE_ID		0x100
+#define   PHB_LSI_SRC_ID_MASK		PPC_BITMASK(5,12)
+#define   PHB_LSI_SRC_ID_LSH		PPC_BITLSHIFT(12)
+#define PHB_DMA_CHAN_STATUS		0x110
+#define   PHB_DMA_CHAN_ANY_ERR		PPC_BIT(27)
+#define   PHB_DMA_CHAN_ANY_ERR1		PPC_BIT(28)
+#define   PHB_DMA_CHAN_ANY_FREEZE	PPC_BIT(29)
+#define PHB_CPU_LOADSTORE_STATUS	0x120
+#define   PHB_CPU_LS_ANY_ERR		PPC_BIT(27)
+#define   PHB_CPU_LS_ANY_ERR1		PPC_BIT(28)
+#define   PHB_CPU_LS_ANY_FREEZE		PPC_BIT(29)
+#define PHB_DMA_MSI_NODE_ID		0x128
+#define   PHB_DMAMSI_NID_FIXED		PPC_BIT(0)
+#define   PHB_DMAMSI_NID_MASK		PPC_BITMASK(24,31)
+#define   PHB_DMAMSI_NID_LSH		PPC_BITLSHIFT(31)
+#define PHB_CONFIG_DATA			0x130
+#define PHB_LOCK0			0x138
+#define PHB_CONFIG_ADDRESS		0x140
+#define   PHB_CA_ENABLE			PPC_BIT(0)
+#define	  PHB_CA_BUS_MASK		PPC_BITMASK(4,11)
+#define   PHB_CA_BUS_LSH		PPC_BITLSHIFT(11)
+#define   PHB_CA_DEV_MASK		PPC_BITMASK(12,16)
+#define   PHB_CA_DEV_LSH		PPC_BITLSHIFT(16)
+#define   PHB_CA_FUNC_MASK		PPC_BITMASK(17,19)
+#define   PHB_CA_FUNC_LSH		PPC_BITLSHIFT(19)
+#define   PHB_CA_REG_MASK		PPC_BITMASK(20,31)
+#define   PHB_CA_REG_LSH		PPC_BITLSHIFT(31)
+#define   PHB_CA_PE_MASK		PPC_BITMASK(40,47)
+#define   PHB_CA_PE_LSH			PPC_BITLSHIFT(47)
+#define PHB_LOCK1			0x148
+#define PHB_IVT_BAR			0x150
+#define   PHB_IVT_BAR_ENABLE		PPC_BIT(0)
+#define   PHB_IVT_BASE_ADDRESS_MASK	PPC_BITMASK(14,48)
+#define   PHB_IVT_BASE_ADDRESS_LSH	PPC_BITLSHIFT(48)
+#define   PHB_IVT_LENGTH_MASK		PPC_BITMASK(52,63)
+#define   PHB_IVT_LENGTH_ADDRESS_LSH	PPC_BITLSHIFT(63)
+#define PHB_RBA_BAR			0x158
+#define   PHB_RBA_BAR_ENABLE		PPC_BIT(0)
+#define   PHB_RBA_BASE_ADDRESS_MASK	PPC_BITMASK(14,55)
+#define   PHB_RBA_BASE_ADDRESS_LSH	PPC_BITLSHIFT(55)
+#define PHB_PHB3_CONFIG			0x160
+#define   PHB_PHB3C_64B_TCE_EN		PPC_BIT(2)
+#define   PHB_PHB3C_32BIT_MSI_EN	PPC_BIT(8)
+#define   PHB_PHB3C_64BIT_MSI_EN	PPC_BIT(14)
+#define   PHB_PHB3C_M32_EN		PPC_BIT(16)
+#define PHB_RTT_BAR			0x168
+#define   PHB_RTT_BAR_ENABLE		PPC_BIT(0)
+#define   PHB_RTT_BASE_ADDRESS_MASK	PPC_BITMASK(14,46)
+#define   PHB_RTT_BASE_ADDRESS_LSH	PPC_BITLSHIFT(46)
+#define PHB_PELTV_BAR			0x188
+#define   PHB_PELTV_BAR_ENABLE		PPC_BIT(0)
+#define   PHB_PELTV_BASE_ADDRESS_MASK	PPC_BITMASK(14,50)
+#define   PHB_PELTV_BASE_ADDRESS_LSH	PPC_BITLSHIFT(50)
+#define PHB_M32_BASE_ADDR		0x190
+#define PHB_M32_BASE_MASK		0x198
+#define PHB_M32_START_ADDR		0x1a0
+#define PHB_PEST_BAR			0x1a8
+#define   PHB_PEST_BAR_ENABLE		PPC_BIT(0)
+#define   PHB_PEST_BASE_ADDRESS_MASK	PPC_BITMASK(14,51)
+#define   PHB_PEST_BASE_ADDRESS_LSH	PPC_BITLSHIFT(51)
+#define PHB_M64_UPPER_BITS		0x1f0
+#define PHB_INTREP_TIMER		0x1f8
+#define PHB_DMARD_SYNC			0x200
+#define PHB_RTC_INVALIDATE		0x208
+#define   PHB_RTC_INVALIDATE_ALL	PPC_BIT(0)
+#define   PHB_RTC_INVALIDATE_RID_MASK	PPC_BITMASK(16,31)
+#define   PHB_RTC_INVALIDATE_RID_LSH	PPC_BITLSHIFT(31)
+#define PHB_TCE_KILL			0x210
+#define   PHB_TCE_KILL_ALL		PPC_BIT(0)
+#define PHB_TCE_SPEC_CTL		0x218
+#define PHB_IODA_ADDR			0x220
+#define   PHB_IODA_AD_AUTOINC		PPC_BIT(0)
+#define	  PHB_IODA_AD_TSEL_MASK		PPC_BITMASK(11,15)
+#define	  PHB_IODA_AD_TSEL_LSH		PPC_BITLSHIFT(15)
+#define	  PHB_IODA_AD_TADR_MASK		PPC_BITMASK(55,63)
+#define	  PHB_IODA_AD_TADR_LSH		PPC_BITLSHIFT(63)
+#define PHB_IODA_DATA0			0x228
+#define PHB_FFI_REQUEST			0x238
+#define   PHB_FFI_LOCK_CLEAR		PPC_BIT(3)
+#define   PHB_FFI_REQUEST_ISN_MASK	PPC_BITMASK(49,59)
+#define   PHB_FFI_REQUEST_ISN_LSH	PPC_BITLSHIFT(59)
+#define PHB_FFI_LOCK			0x240
+#define   PHB_FFI_LOCK_STATE		PPC_BIT(0)
+#define PHB_XIVE_UPDATE			0x248 /* Broken in DD1 */
+#define PHB_PHB3_GEN_CAP		0x250
+#define PHB_PHB3_TCE_CAP		0x258
+#define PHB_PHB3_IRQ_CAP		0x260
+#define PHB_PHB3_EEH_CAP		0x268
+#define PHB_IVC_INVALIDATE		0x2a0
+#define   PHB_IVC_INVALIDATE_ALL	PPC_BIT(0)
+#define   PHB_IVC_INVALIDATE_SID_MASK	PPC_BITMASK(16,31)
+#define   PHB_IVC_INVALIDATE_SID_LSH	PPC_BITLSHIFT(31)
+#define PHB_IVC_UPDATE			0x2a8
+#define   PHB_IVC_UPDATE_ENABLE_P	PPC_BIT(0)
+#define   PHB_IVC_UPDATE_ENABLE_Q	PPC_BIT(1)
+#define   PHB_IVC_UPDATE_ENABLE_SERVER	PPC_BIT(2)
+#define   PHB_IVC_UPDATE_ENABLE_PRI	PPC_BIT(3)
+#define   PHB_IVC_UPDATE_ENABLE_GEN	PPC_BIT(4)
+#define   PHB_IVC_UPDATE_ENABLE_CON	PPC_BIT(5)
+#define   PHB_IVC_UPDATE_GEN_MATCH_MASK	PPC_BITMASK(6, 7)
+#define   PHB_IVC_UPDATE_GEN_MATCH_LSH	PPC_BITLSHIFT(7)
+#define   PHB_IVC_UPDATE_SERVER_MASK	PPC_BITMASK(8, 23)
+#define   PHB_IVC_UPDATE_SERVER_LSH	PPC_BITLSHIFT(23)
+#define   PHB_IVC_UPDATE_PRI_MASK	PPC_BITMASK(24, 31)
+#define   PHB_IVC_UPDATE_PRI_LSH	PPC_BITLSHIFT(31)
+#define   PHB_IVC_UPDATE_GEN_MASK	PPC_BITMASK(32,33)
+#define   PHB_IVC_UPDATE_GEN_LSH	PPC_BITLSHIFT(33)
+#define   PHB_IVC_UPDATE_P_MASK		PPC_BITMASK(34,34)
+#define   PHB_IVC_UPDATE_P_LSH		PPC_BITLSHIFT(34)
+#define   PHB_IVC_UPDATE_Q_MASK		PPC_BITMASK(35,35)
+#define   PHB_IVC_UPDATE_Q_LSH		PPC_BITLSHIFT(35)
+#define   PHB_IVC_UPDATE_SID_MASK	PPC_BITMASK(48,63)
+#define   PHB_IVC_UPDATE_SID_LSH	PPC_BITLSHIFT(63)
+#define PHB_PAPR_ERR_INJ_CTL		0x2b0
+#define   PHB_PAPR_ERR_INJ_CTL_INB	PPC_BIT(0)
+#define   PHB_PAPR_ERR_INJ_CTL_OUTB	PPC_BIT(1)
+#define   PHB_PAPR_ERR_INJ_CTL_STICKY	PPC_BIT(2)
+#define   PHB_PAPR_ERR_INJ_CTL_CFG	PPC_BIT(3)
+#define   PHB_PAPR_ERR_INJ_CTL_RD	PPC_BIT(4)
+#define   PHB_PAPR_ERR_INJ_CTL_WR	PPC_BIT(5)
+#define   PHB_PAPR_ERR_INJ_CTL_FREEZE	PPC_BIT(6)
+#define PHB_PAPR_ERR_INJ_ADDR		0x2b8
+#define   PHB_PAPR_ERR_INJ_ADDR_MMIO_MASK	PPC_BITMASK(16,63)
+#define   PHB_PAPR_ERR_INJ_ADDR_MMIO_LSH	PPC_BITLSHIFT(63)
+#define PHB_PAPR_ERR_INJ_MASK		0x2c0
+#define   PHB_PAPR_ERR_INJ_MASK_CFG_MASK	PPC_BITMASK(4,11)
+#define   PHB_PAPR_ERR_INJ_MASK_CFG_LSH		PPC_BITLSHIFT(11)
+#define   PHB_PAPR_ERR_INJ_MASK_MMIO_MASK	PPC_BITMASK(16,63)
+#define   PHB_PAPR_ERR_INJ_MASK_MMIO_LSH 	PPC_BITLSHIFT(63)
+#define PHB_ETU_ERR_SUMMARY		0x2c8
+
+/*  UTL registers */
+#define UTL_SYS_BUS_CONTROL		0x400
+#define UTL_STATUS			0x408
+#define UTL_SYS_BUS_AGENT_STATUS	0x410
+#define UTL_SYS_BUS_AGENT_ERR_SEVERITY	0x418
+#define UTL_SYS_BUS_AGENT_IRQ_EN	0x420
+#define UTL_SYS_BUS_BURST_SZ_CONF	0x440
+#define UTL_REVISION_ID			0x448
+#define UTL_BCLK_DOMAIN_DBG1		0x460
+#define UTL_BCLK_DOMAIN_DBG2		0x468
+#define UTL_BCLK_DOMAIN_DBG3		0x470
+#define UTL_BCLK_DOMAIN_DBG4		0x478
+#define UTL_BCLK_DOMAIN_DBG5		0x480
+#define UTL_BCLK_DOMAIN_DBG6		0x488
+#define UTL_OUT_POST_HDR_BUF_ALLOC	0x4c0
+#define UTL_OUT_POST_DAT_BUF_ALLOC	0x4d0
+#define UTL_IN_POST_HDR_BUF_ALLOC	0x4e0
+#define UTL_IN_POST_DAT_BUF_ALLOC	0x4f0
+#define UTL_OUT_NP_BUF_ALLOC		0x500
+#define UTL_IN_NP_BUF_ALLOC		0x510
+#define UTL_PCIE_TAGS_ALLOC		0x520
+#define UTL_GBIF_READ_TAGS_ALLOC	0x530
+#define UTL_PCIE_PORT_CONTROL		0x540
+#define UTL_PCIE_PORT_STATUS		0x548
+#define UTL_PCIE_PORT_ERROR_SEV		0x550
+#define UTL_PCIE_PORT_IRQ_EN		0x558
+#define UTL_RC_STATUS			0x560
+#define UTL_RC_ERR_SEVERITY		0x568
+#define UTL_RC_IRQ_EN			0x570
+#define UTL_EP_STATUS			0x578
+#define UTL_EP_ERR_SEVERITY		0x580
+#define UTL_EP_ERR_IRQ_EN		0x588
+#define UTL_PCI_PM_CTRL1		0x590
+#define UTL_PCI_PM_CTRL2		0x598
+#define UTL_GP_CTL1			0x5a0
+#define UTL_GP_CTL2			0x5a8
+#define UTL_PCLK_DOMAIN_DBG1		0x5b0
+#define UTL_PCLK_DOMAIN_DBG2		0x5b8
+#define UTL_PCLK_DOMAIN_DBG3		0x5c0
+#define UTL_PCLK_DOMAIN_DBG4		0x5c8
+
+/* PCI-E Stack registers */
+#define PHB_PCIE_SYSTEM_CONFIG		0x600
+#define PHB_PCIE_BUS_NUMBER		0x608
+#define PHB_PCIE_SYSTEM_TEST		0x618
+#define PHB_PCIE_LINK_MANAGEMENT	0x630
+#define   PHB_PCIE_LM_LINK_ACTIVE	PPC_BIT(8)
+#define PHB_PCIE_DLP_TRAIN_CTL		0x640
+#define   PHB_PCIE_DLP_TCTX_DISABLE	PPC_BIT(1)
+#define   PHB_PCIE_DLP_TCRX_DISABLED	PPC_BIT(16)
+#define   PHB_PCIE_DLP_INBAND_PRESENCE	PPC_BIT(19)
+#define   PHB_PCIE_DLP_TC_DL_LINKUP	PPC_BIT(21)
+#define   PHB_PCIE_DLP_TC_DL_PGRESET	PPC_BIT(22)
+#define   PHB_PCIE_DLP_TC_DL_LINKACT	PPC_BIT(23)
+#define PHB_PCIE_SLOP_LOOPBACK_STATUS	0x648
+#define PHB_PCIE_SYS_LINK_INIT		0x668
+#define PHB_PCIE_UTL_CONFIG		0x670
+#define PHB_PCIE_DLP_CONTROL		0x678
+#define PHB_PCIE_UTL_ERRLOG1		0x680
+#define PHB_PCIE_UTL_ERRLOG2		0x688
+#define PHB_PCIE_UTL_ERRLOG3		0x690
+#define PHB_PCIE_UTL_ERRLOG4		0x698
+#define PHB_PCIE_DLP_ERRLOG1		0x6a0
+#define PHB_PCIE_DLP_ERRLOG2		0x6a8
+#define PHB_PCIE_DLP_ERR_STATUS		0x6b0
+#define PHB_PCIE_DLP_ERR_COUNTERS	0x6b8
+#define PHB_PCIE_UTL_ERR_INJECT		0x6c0
+#define PHB_PCIE_TLDLP_ERR_INJECT	0x6c8
+#define PHB_PCIE_LANE_EQ_CNTL0		0x6d0
+#define PHB_PCIE_LANE_EQ_CNTL1		0x6d8
+#define PHB_PCIE_LANE_EQ_CNTL2		0x6e0
+#define PHB_PCIE_LANE_EQ_CNTL3		0x6e8
+#define PHB_PCIE_STRAPPING		0x700
+
+/* Fundamental register set B */
+#define PHB_VERSION			0x800
+#define PHB_RESET			0x808
+#define PHB_CONTROL			0x810
+#define   PHB_CTRL_IVE_128_BYTES        PPC_BIT(24)
+#define PHB_AIB_RX_CRED_INIT_TIMER	0x818
+#define PHB_AIB_RX_CMD_CRED		0x820
+#define PHB_AIB_RX_DATA_CRED		0x828
+#define PHB_AIB_TX_CMD_CRED		0x830
+#define PHB_AIB_TX_DATA_CRED		0x838
+#define PHB_AIB_TX_CHAN_MAPPING		0x840
+#define PHB_AIB_TAG_ENABLE		0x858
+#define PHB_AIB_FENCE_CTRL		0x860
+#define PHB_TCE_TAG_ENABLE		0x868
+#define PHB_TCE_WATERMARK		0x870
+#define PHB_TIMEOUT_CTRL1		0x878
+#define PHB_TIMEOUT_CTRL2		0x880
+#define PHB_QUIESCE_DMA_G		0x888
+#define PHB_AIB_TAG_STATUS		0x900
+#define PHB_TCE_TAG_STATUS		0x908
+
+/* FIR & Error registers */
+#define PHB_LEM_FIR_ACCUM		0xc00
+#define PHB_LEM_FIR_AND_MASK		0xc08
+#define PHB_LEM_FIR_OR_MASK		0xc10
+#define PHB_LEM_ERROR_MASK		0xc18
+#define PHB_LEM_ERROR_AND_MASK		0xc20
+#define PHB_LEM_ERROR_OR_MASK		0xc28
+#define PHB_LEM_ACTION0			0xc30
+#define PHB_LEM_ACTION1			0xc38
+#define PHB_LEM_WOF			0xc40
+#define PHB_ERR_STATUS			0xc80
+#define PHB_ERR1_STATUS			0xc88
+#define PHB_ERR_INJECT			0xc90
+#define PHB_ERR_LEM_ENABLE		0xc98
+#define PHB_ERR_IRQ_ENABLE		0xca0
+#define PHB_ERR_FREEZE_ENABLE		0xca8
+#define PHB_ERR_AIB_FENCE_ENABLE	0xcb0
+#define PHB_ERR_LOG_0			0xcc0
+#define PHB_ERR_LOG_1			0xcc8
+#define PHB_ERR_STATUS_MASK		0xcd0
+#define PHB_ERR1_STATUS_MASK		0xcd8
+
+#define PHB_OUT_ERR_STATUS		0xd00
+#define PHB_OUT_ERR1_STATUS		0xd08
+#define PHB_OUT_ERR_INJECT		0xd10
+#define PHB_OUT_ERR_LEM_ENABLE		0xd18
+#define PHB_OUT_ERR_IRQ_ENABLE		0xd20
+#define PHB_OUT_ERR_FREEZE_ENABLE	0xd28
+#define PHB_OUT_ERR_AIB_FENCE_ENABLE	0xd30
+#define PHB_OUT_ERR_LOG_0		0xd40
+#define PHB_OUT_ERR_LOG_1		0xd48
+#define PHB_OUT_ERR_STATUS_MASK		0xd50
+#define PHB_OUT_ERR1_STATUS_MASK	0xd58
+
+#define PHB_INA_ERR_STATUS		0xd80
+#define PHB_INA_ERR1_STATUS		0xd88
+#define PHB_INA_ERR_INJECT		0xd90
+#define PHB_INA_ERR_LEM_ENABLE		0xd98
+#define PHB_INA_ERR_IRQ_ENABLE		0xda0
+#define PHB_INA_ERR_FREEZE_ENABLE	0xda8
+#define PHB_INA_ERR_AIB_FENCE_ENABLE	0xdb0
+#define PHB_INA_ERR_LOG_0		0xdc0
+#define PHB_INA_ERR_LOG_1		0xdc8
+#define PHB_INA_ERR_STATUS_MASK		0xdd0
+#define PHB_INA_ERR1_STATUS_MASK	0xdd8
+
+#define PHB_INB_ERR_STATUS		0xe00
+#define PHB_INB_ERR1_STATUS		0xe08
+#define PHB_INB_ERR_INJECT		0xe10
+#define PHB_INB_ERR_LEM_ENABLE		0xe18
+#define PHB_INB_ERR_IRQ_ENABLE		0xe20
+#define PHB_INB_ERR_FREEZE_ENABLE	0xe28
+#define PHB_INB_ERR_AIB_FENCE_ENABLE	0xe30
+#define PHB_INB_ERR_LOG_0		0xe40
+#define PHB_INB_ERR_LOG_1		0xe48
+#define PHB_INB_ERR_STATUS_MASK		0xe50
+#define PHB_INB_ERR1_STATUS_MASK	0xe58
+
+/* Performance monitor & Debug registers */
+#define PHB_TRACE_CONTROL		0xf80
+#define PHB_PERFMON_CONFIG		0xf88
+#define PHB_PERFMON_CTR0		0xf90
+#define PHB_PERFMON_CTR1		0xf98
+#define PHB_PERFMON_CTR2		0xfa0
+#define PHB_PERFMON_CTR3		0xfa8
+#define PHB_HOTPLUG_OVERRIDE		0xfb0
+#define   PHB_HPOVR_FORCE_RESAMPLE	PPC_BIT(9)
+#define   PHB_HPOVR_PRESENCE_A		PPC_BIT(10)
+#define   PHB_HPOVR_PRESENCE_B		PPC_BIT(11)
+#define   PHB_HPOVR_LINK_ACTIVE		PPC_BIT(12)
+#define   PHB_HPOVR_LINK_BIFURCATED	PPC_BIT(13)
+#define   PHB_HPOVR_LINK_LANE_SWAPPED	PPC_BIT(14)
+
+/*
+ * IODA2 on-chip tables
+ */
+
+#define IODA2_TBL_LIST		1
+#define IODA2_TBL_LXIVT		2
+#define IODA2_TBL_IVC_CAM	3
+#define IODA2_TBL_RBA		4
+#define IODA2_TBL_RCAM		5
+#define IODA2_TBL_MRT		6
+#define IODA2_TBL_PESTA		7
+#define IODA2_TBL_PESTB		8
+#define IODA2_TBL_TVT		9
+#define IODA2_TBL_TCAM		10
+#define IODA2_TBL_TDR		11
+#define IODA2_TBL_M64BT		16
+#define IODA2_TBL_M32DT		17
+#define IODA2_TBL_PEEV		20
+
+/* LXIVT */
+#define IODA2_LXIVT_SERVER_MASK		PPC_BITMASK(8,23)
+#define IODA2_LXIVT_SERVER_LSH		PPC_BITLSHIFT(23)
+#define IODA2_LXIVT_PRIORITY_MASK	PPC_BITMASK(24,31)
+#define IODA2_LXIVT_PRIORITY_LSH	PPC_BITLSHIFT(31)
+#define IODA2_LXIVT_NODE_ID_MASK	PPC_BITMASK(56,63)
+#define IODA2_LXIVT_NODE_ID_LSH		PPC_BITLSHIFT(63)
+
+/* IVT */
+#define IODA2_IVT_SERVER_MASK		PPC_BITMASK(0,23)
+#define IODA2_IVT_SERVER_LSH		PPC_BITLSHIFT(23)
+#define IODA2_IVT_PRIORITY_MASK		PPC_BITMASK(24,31)
+#define IODA2_IVT_PRIORITY_LSH		PPC_BITLSHIFT(31)
+#define IODA2_IVT_GEN_MASK		PPC_BITMASK(37,38)
+#define IODA2_IVT_GEN_LSH		PPC_BITLSHIFT(38)
+#define IODA2_IVT_P_MASK		PPC_BITMASK(39,39)
+#define IODA2_IVT_P_LSH			PPC_BITLSHIFT(39)
+#define IODA2_IVT_Q_MASK		PPC_BITMASK(47,47)
+#define IODA2_IVT_Q_LSH			PPC_BITLSHIFT(47)
+#define IODA2_IVT_PE_MASK		PPC_BITMASK(48,63)
+#define IODA2_IVT_PE_LSH		PPC_BITLSHIFT(63)
+
+/* TVT */
+#define IODA2_TVT_TABLE_ADDR_MASK	PPC_BITMASK(0,47)
+#define IODA2_TVT_TABLE_ADDR_LSH	PPC_BITLSHIFT(47)
+#define IODA2_TVT_NUM_LEVELS_MASK	PPC_BITMASK(48,50)
+#define IODA2_TVT_NUM_LEVELS_LSH	PPC_BITLSHIFT(50)
+#define   IODA2_TVE_1_LEVEL	0
+#define   IODA2_TVE_2_LEVELS	1
+#define   IODA2_TVE_3_LEVELS	2
+#define   IODA2_TVE_4_LEVELS	3
+#define   IODA2_TVE_5_LEVELS	4
+#define IODA2_TVT_TCE_TABLE_SIZE_MASK	PPC_BITMASK(51,55)
+#define IODA2_TVT_TCE_TABLE_SIZE_LSH	PPC_BITLSHIFT(55)
+#define IODA2_TVT_IO_PSIZE_MASK		PPC_BITMASK(59,63)
+#define IODA2_TVT_IO_PSIZE_LSH		PPC_BITLSHIFT(63)
+
+/* PESTA */
+#define IODA2_PESTA_MMIO_FROZEN		PPC_BIT(0)
+
+/* PESTB */
+#define IODA2_PESTB_DMA_STOPPED		PPC_BIT(0)
+
+/* M32DT */
+#define IODA2_M32DT_PE_MASK		PPC_BITMASK(8,15)
+#define IODA2_M32DT_PE_LSH		PPC_BITLSHIFT(15)
+
+/* M64BT */
+#define IODA2_M64BT_ENABLE		PPC_BIT(0)
+#define IODA2_M64BT_SINGLE_PE		PPC_BIT(1)
+#define IODA2_M64BT_BASE_MASK		PPC_BITMASK(2,31)
+#define IODA2_M64BT_BASE_LSH		PPC_BITLSHIFT(31)
+#define IODA2_M64BT_MASK_MASK		PPC_BITMASK(34,63)
+#define IODA2_M64BT_MASK_LSH		PPC_BITLSHIFT(63)
+#define IODA2_M64BT_SINGLE_BASE_MASK	PPC_BITMASK(2,26)
+#define IODA2_M64BT_SINGLE_BASE_LSH	PPC_BITLSHIFT(26)
+#define IODA2_M64BT_PE_HI_MASK		PPC_BITMASK(27,31)
+#define IODA2_M64BT_PE_HI_LSH		PPC_BITLSHIFT(31)
+#define IODA2_M64BT_SINGLE_MASK_MASK	PPC_BITMASK(34,58)
+#define IODA2_M64BT_SINGLE_MASK_LSH	PPC_BITLSHIFT(58)
+#define IODA2_M64BT_PE_LOW_MASK		PPC_BITMASK(59,63)
+#define IODA2_M64BT_PE_LOW_LSH		PPC_BITLSHIFT(63)
+
+/*
+ * IODA2 in-memory tables
+ */
+
+/* PEST
+ *
+ * 2x8 bytes entries, PEST0 and PEST1
+ */
+
+#define IODA2_PEST0_MMIO_CAUSE		PPC_BIT(2)
+#define IODA2_PEST0_CFG_READ		PPC_BIT(3)
+#define IODA2_PEST0_CFG_WRITE		PPC_BIT(4)
+#define IODA2_PEST0_TTYPE_MASK		PPC_BITMASK(5,7)
+#define IODA2_PEST0_TTYPE_LSH		PPC_BITLSHIFT(7)
+#define   PEST_TTYPE_DMA_WRITE		0
+#define   PEST_TTYPE_MSI		1
+#define   PEST_TTYPE_DMA_READ		2
+#define   PEST_TTYPE_DMA_READ_RESP	3
+#define   PEST_TTYPE_MMIO_LOAD		4
+#define   PEST_TTYPE_MMIO_STORE		5
+#define   PEST_TTYPE_OTHER		7
+#define IODA2_PEST0_CA_RETURN		PPC_BIT(8)
+#define IODA2_PEST0_UTL_RTOS_TIMEOUT	PPC_BIT(8) /* Same bit as CA return */
+#define IODA2_PEST0_UR_RETURN		PPC_BIT(9)
+#define IODA2_PEST0_UTL_NONFATAL	PPC_BIT(10)
+#define IODA2_PEST0_UTL_FATAL		PPC_BIT(11)
+#define IODA2_PEST0_PARITY_UE		PPC_BIT(13)
+#define IODA2_PEST0_UTL_CORRECTABLE	PPC_BIT(14)
+#define IODA2_PEST0_UTL_INTERRUPT	PPC_BIT(15)
+#define IODA2_PEST0_MMIO_XLATE		PPC_BIT(16)
+#define IODA2_PEST0_IODA2_ERROR		PPC_BIT(16) /* Same bit as MMIO xlate */
+#define IODA2_PEST0_TCE_PAGE_FAULT	PPC_BIT(18)
+#define IODA2_PEST0_TCE_ACCESS_FAULT	PPC_BIT(19)
+#define IODA2_PEST0_DMA_RESP_TIMEOUT	PPC_BIT(20)
+#define IODA2_PEST0_AIB_SIZE_INVALID	PPC_BIT(21)
+#define IODA2_PEST0_LEM_BIT_MASK	PPC_BITMASK(26,31)
+#define IODA2_PEST0_LEM_BIT_LSH		PPC_BITLSHIFT(31)
+#define IODA2_PEST0_RID_MASK		PPC_BITMASK(32,47)
+#define IODA2_PEST0_RID_LSH		PPC_BITLSHIFT(47)
+#define IODA2_PEST0_MSI_DATA_MASK	PPC_BITMASK(48,63)
+#define IODA2_PEST0_MSI_DATA_LSH	PPC_BITLSHIFT(63)
+
+#define IODA2_PEST1_FAIL_ADDR_MASK	PPC_BITMASK(3,63)
+#define IODA2_PEST1_FAIL_ADDR_LSH	PPC_BITLSHIFT(63)
+
+
+#endif /* __PHB3_REGS_H */
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index c488f12..98d75b0 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -26,6 +26,7 @@ typedef struct PnvLpcController PnvLpcController;
 typedef struct PnvPsiController PnvPsiController;
 typedef struct XICSState XICSState;
 typedef struct PnvOCCState PnvOCCState;
+typedef struct PCIBus PCIBus;
 
 /* Should we turn that into a QOjb of some sort ? */
 typedef struct PnvChip {
@@ -35,6 +36,8 @@ typedef struct PnvChip {
     ISABus           *lpc_bus;
     PnvPsiController *psi;
     PnvOCCState      *occ;
+#define PNV_MAX_CHIP_PHB	4
+    PCIBus           *phb[PNV_MAX_CHIP_PHB];
 } PnvChip;
 
 typedef struct PnvSystem {
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 85d2fb9..31dab8b 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -205,6 +205,7 @@ void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr);
 uint32_t icp_accept(ICPState *ss);
 uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
 void icp_eoi(XICSState *icp, int server, uint32_t xirr);
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority);
 
 void ics_simple_write_xive(ICSState *ics, int nr, int server,
                            uint8_t priority, uint8_t saved_priority);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 49/77] ppc/pnv: Create a default PCI layout
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (47 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 50/77] ppc: Update LPCR definitions Benjamin Herrenschmidt
                   ` (30 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This creates a legacy PCIe->PCI bridge under the PHB by default to which
a bunch of standard devices are attached. Currently:

  - VGA (as specified by -vga)
  - USB (with keyboard and mouse if graphcis is enabled)
  - AHCI
  - e1000

This gives us something close to a standard OpenPower platform.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/pnv.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index d808802..179f93b 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -52,6 +52,9 @@
 #include "hw/char/serial.h"
 #include "hw/timer/mc146818rtc.h"
 #include "hw/pci-host/pnv_phb3.h"
+#include "hw/usb.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci.h"
 
 #include "exec/address-spaces.h"
 #include "qemu/config-file.h"
@@ -85,6 +88,9 @@ typedef struct sPowerNVMachineState sPowerNVMachineState;
 /**
  * sPowerNVMachineState:
  */
+
+#define MAX_SATA_PORTS     6
+
 struct sPowerNVMachineState {
     /*< private >*/
     MachineState parent_obj;
@@ -492,6 +498,71 @@ static const VMStateDescription vmstate_powernv = {
     .minimum_version_id = 1,
 };
 
+/* Returns whether we want to use VGA or not */
+static int pnv_vga_init(PCIBus *pci_bus)
+{
+    switch (vga_interface_type) {
+    case VGA_NONE:
+        return false;
+    case VGA_DEVICE:
+        return true;
+    case VGA_STD:
+    case VGA_VIRTIO:
+        return pci_vga_init(pci_bus) != NULL;
+    default:
+        fprintf(stderr, "This vga model is not supported,"
+                "currently it only supports -vga std\n");
+        exit(0);
+    }
+}
+
+static void pnv_nic_init(PCIBus *pci_bus)
+{
+    int i;
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+        DeviceState *dev;
+        PCIDevice *pdev;
+        Error *err = NULL;
+
+        pdev = pci_create(pci_bus, -1, "e1000");
+        dev = &pdev->qdev;
+        qdev_set_nic_properties(dev, nd);
+        object_property_set_bool(OBJECT(dev), true, "realized", &err);
+        if (err) {
+            error_report_err(err);
+            object_unparent(OBJECT(dev));
+            exit(1);
+        }
+    }
+}
+
+static void pnv_storage_init(PCIBus *pci_bus)
+{
+    DriveInfo *hd[MAX_SATA_PORTS];
+    PCIDevice *ahci;
+
+    /* Add an AHCI device. We use an ICH9 since that's all we have
+     * at hand for PCI AHCI but it shouldn't really matter
+     */
+    ahci = pci_create_simple(pci_bus, -1, "ich9-ahci");
+    g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports);
+    ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+    ahci_ide_create_devs(ahci, hd);
+}
+
+static PCIBus *pnv_create_pci_legacy_bridge(PCIBus *parent, uint8_t chassis_nr)
+{
+    PCIDevice *dev;
+
+    dev = pci_create_multifunction(parent, 0, false, "pci-bridge");
+    qdev_prop_set_uint8(&dev->qdev, "chassis_nr", chassis_nr);
+    dev->qdev.id = "pci";
+    qdev_init_nofail(&dev->qdev);
+    return pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+}
+
 static void pnv_lpc_irq_handler_cpld(void *opaque, int n, int level)
 {
 #define MAX_ISA_IRQ 16
@@ -575,7 +646,9 @@ static void ppc_powernv_init(MachineState *machine)
     sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
     PnvSystem *sys = &pnv_machine->sys;
     XICSState *xics;
+    PCIBus *pbus;
     ISABus *isa_bus;
+    bool has_gfx = false;
     long fw_size;
     char *filename;
     void *fdt;
@@ -636,6 +709,30 @@ static void ppc_powernv_init(MachineState *machine)
     /* Create an RTC ISA device too */
     rtc_init(isa_bus, 2000, NULL);
 
+    /* Add a PCI switch */
+    pbus = pnv_create_pci_legacy_bridge(sys->chips[0].phb[0], 128);
+
+    /* Graphics */
+    if (pnv_vga_init(pbus)) {
+        has_gfx = true;
+        machine->usb |= defaults_enabled() && !machine->usb_disabled;
+    }
+    if (machine->usb) {
+        pci_create_simple(pbus, -1, "nec-usb-xhci");
+        if (has_gfx) {
+            USBBus *usb_bus = usb_bus_find(-1);
+
+            usb_create_simple(usb_bus, "usb-kbd");
+            usb_create_simple(usb_bus, "usb-mouse");
+        }
+    }
+
+    /* Add NIC */
+    pnv_nic_init(pbus);
+
+    /* Add storage */
+    pnv_storage_init(pbus);
+
     if (bios_name == NULL) {
         bios_name = FW_FILE_NAME;
     }
@@ -709,7 +806,7 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data)
     NMIClass *nc = NMI_CLASS(oc);
 
     mc->init = ppc_powernv_init;
-    mc->block_default_type = IF_SCSI;
+    mc->block_default_type = IF_IDE;
     mc->max_cpus = MAX_CPUS;
     mc->no_parallel = 1;
     mc->default_boot_order = NULL;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 50/77] ppc: Update LPCR definitions
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (48 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 49/77] ppc/pnv: Create a default PCI layout Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 51/77] ppc: Use a helper to filter writes to LPCR Benjamin Herrenschmidt
                   ` (29 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Includes all the bits up to ISA 2.07

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index a7236cf..ca6c961 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -493,12 +493,16 @@ struct ppc_slb_t {
 #define LPCR_VPM1         (1ull << (63-1))
 #define LPCR_ISL          (1ull << (63-2))
 #define LPCR_KBV          (1ull << (63-3))
+#define LPCR_DPFD_SHIFT   (63-11)
+#define LPCR_DPFD         (0x3ull << LPCR_DPFD_SHIFT)
+#define LPCR_VRMASD_SHIFT (63-16)
+#define LPCR_VRMASD       (0x1full << LPCR_VRMASD_SHIFT)
+#define LPCR_RMLS_SHIFT   (63-37)
+#define LPCR_RMLS         (0xfull << LPCR_RMLS_SHIFT)
 #define LPCR_ILE          (1ull << (63-38))
-#define LPCR_MER          (1ull << (63-52))
-#define LPCR_LPES0        (1ull << (63-60))
-#define LPCR_LPES1        (1ull << (63-61))
 #define LPCR_AIL_SHIFT    (63-40)      /* Alternate interrupt location */
 #define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
+#define LPCR_ONL          (1ull << (63-45))
 #define LPCR_P7_PECE0     (1ull << (63-49))
 #define LPCR_P7_PECE1     (1ull << (63-50))
 #define LPCR_P7_PECE2     (1ull << (63-51))
@@ -507,6 +511,12 @@ struct ppc_slb_t {
 #define LPCR_P8_PECE2     (1ull << (63-49))
 #define LPCR_P8_PECE3     (1ull << (63-50))
 #define LPCR_P8_PECE4     (1ull << (63-51))
+#define LPCR_MER          (1ull << (63-52))
+#define LPCR_TC           (1ull << (63-54))
+#define LPCR_LPES0        (1ull << (63-60))
+#define LPCR_LPES1        (1ull << (63-61))
+#define LPCR_RMI          (1ull << (63-62))
+#define LPCR_HDICE        (1ull << (63-63))
 
 #define msr_sf   ((env->msr >> MSR_SF)   & 1)
 #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 51/77] ppc: Use a helper to filter writes to LPCR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (49 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 50/77] ppc: Update LPCR definitions Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 52/77] ppc: Cosmetic, align some comments Benjamin Herrenschmidt
                   ` (28 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This handles filtering bits based on what is implemented by a
given architecture version. We also use it to copy to LPCR
some of the relevant 970 HID4 bits.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/helper.h         |  1 +
 target-ppc/mmu-hash64.c     | 58 +++++++++++++++++++++++++++++++++++++++++++++
 target-ppc/translate_init.c | 56 ++++++++++++++++++++++++++++---------------
 3 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 8292dd8..23889fe 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
 DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
+DEF_HELPER_2(store_lpcr, void, env, tl)
 #endif
 DEF_HELPER_1(check_tlb_flush, void, env)
 #endif
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index e489fa4..835245a 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -667,3 +667,61 @@ void ppc_hash64_store_hpte(CPUPPCState *env,
         stq_phys(cs->as, env->htab_base + pte_index + HASH_PTE_SIZE_64/2, pte1);
     }
 }
+
+void helper_store_lpcr(CPUPPCState *env, target_ulong val)
+{
+    uint64_t lpcr = 0;
+
+    /* Filter out bits */
+    switch(env->mmu_model) {
+    case POWERPC_MMU_64B: /* 970 */
+        if (val & 0x40) {
+            lpcr |= LPCR_LPES0;
+        }
+        if (val & 0x8000000000000000ull) {
+            lpcr |= LPCR_LPES1;
+        }
+        if (val & 0x20) {
+            lpcr |= (0x4ull << LPCR_RMLS_SHIFT);
+        }
+        if (val & 0x4000000000000000ull) {
+            lpcr |= (0x2ull << LPCR_RMLS_SHIFT);
+        }
+        if (val & 0x2000000000000000ull) {
+            lpcr |= (0x1ull << LPCR_RMLS_SHIFT);
+        }
+        env->spr[SPR_RMOR] = ((lpcr >> 41) & 0xffffull) << 26;
+
+        /* XXX We could also write LPID from HID4 here
+         * but since we don't tag any translation on it
+         * it doesn't actually matter
+         */
+        /* XXX For proper emulation of 970 we also need
+         * to dig HRMOR out of HID5
+         */
+        break;
+    case POWERPC_MMU_2_03: /* P5p */
+        lpcr = val & (LPCR_RMLS | LPCR_ILE |
+                      LPCR_LPES0 | LPCR_LPES1 |
+                      LPCR_RMI | LPCR_HDICE);
+        break;
+    case POWERPC_MMU_2_06: /* P7 */
+        lpcr = val & (LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_DPFD |
+                      LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
+                      LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2 |
+                      LPCR_MER | LPCR_TC |
+                      LPCR_LPES0 | LPCR_LPES1 | LPCR_HDICE);
+        break;
+    case POWERPC_MMU_2_07: /* P8 */
+        lpcr = val & (LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV |
+                      LPCR_DPFD | LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
+                      LPCR_AIL | LPCR_ONL | LPCR_P8_PECE0 | LPCR_P8_PECE1 |
+                      LPCR_P8_PECE2 | LPCR_P8_PECE3 | LPCR_P8_PECE4 |
+                      LPCR_MER | LPCR_TC | LPCR_LPES0 | LPCR_HDICE);
+        break;
+    default:
+        ;
+    }
+    env->spr[SPR_LPCR] = lpcr;
+}
+
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 8a1ce85..853a084 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7450,16 +7450,6 @@ static void gen_spr_970_hior(CPUPPCState *env)
                  0x00000000);
 }
 
-static void gen_spr_970_lpar(CPUPPCState *env)
-{
-    /* Logical partitionning */
-    /* PPC970: HID4 is effectively the LPCR */
-    spr_register(env, SPR_970_HID4, "HID4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
 static void gen_spr_book3s_common(CPUPPCState *env)
 {
     spr_register(env, SPR_CTRL, "SPR_CTRL",
@@ -7679,15 +7669,6 @@ static void gen_spr_power5p_ear(CPUPPCState *env)
                  0x00000000);
 }
 
-static void gen_spr_power5p_lpar(CPUPPCState *env)
-{
-    /* Logical partitionning */
-    spr_register_kvm(env, SPR_LPCR, "LPCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
-}
-
 #if !defined(CONFIG_USER_ONLY)
 static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
 {
@@ -7699,7 +7680,44 @@ static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
     spr_store_dump_spr(sprn);
     tcg_temp_free(hmer);
 }
+
+static void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
+}
+
+static void spr_write_970_hid4(DisasContext *ctx, int sprn, int gprn)
+{
+#if defined (TARGET_PPC64)
+    spr_write_generic(ctx, sprn, gprn);
+    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
+#endif
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+static void gen_spr_970_lpar(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* Logical partitionning */
+    /* PPC970: HID4 is effectively the LPCR */
+    spr_register(env, SPR_970_HID4, "HID4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_970_hid4,
+                 0x00000000);
+#endif
+}
+
+static void gen_spr_power5p_lpar(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* Logical partitionning */
+    spr_register_kvm(env, SPR_LPCR, "LPCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_lpcr,
+                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
 #endif
+}
 
 static void gen_spr_book3s_ids(CPUPPCState *env)
 {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 52/77] ppc: Cosmetic, align some comments
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (50 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 51/77] ppc: Use a helper to filter writes to LPCR Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 53/77] ppc: Add proper real mode translation support Benjamin Herrenschmidt
                   ` (27 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/mmu-hash32.c | 4 ++--
 target-ppc/mmu_helper.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/target-ppc/mmu-hash32.c b/target-ppc/mmu-hash32.c
index dfee358..a2f4ce2 100644
--- a/target-ppc/mmu-hash32.c
+++ b/target-ppc/mmu-hash32.c
@@ -41,8 +41,8 @@
 
 struct mmu_ctx_hash32 {
     hwaddr raddr;      /* Real address              */
-    int prot;                      /* Protection bits           */
-    int key;                       /* Access key                */
+    int prot;          /* Protection bits           */
+    int key;           /* Access key                */
 };
 
 static int ppc_hash32_pp_prot(int key, int pp, int nx)
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 54bc5d1..18d34cc 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -58,11 +58,11 @@ typedef struct mmu_ctx_t mmu_ctx_t;
 struct mmu_ctx_t {
     hwaddr raddr;      /* Real address              */
     hwaddr eaddr;      /* Effective address         */
-    int prot;                      /* Protection bits           */
+    int prot;          /* Protection bits           */
     hwaddr hash[2];    /* Pagetable hash values     */
-    target_ulong ptem;             /* Virtual segment ID | API  */
-    int key;                       /* Access key                */
-    int nx;                        /* Non-execute area          */
+    target_ulong ptem; /* Virtual segment ID | API  */
+    int key;           /* Access key                */
+    int nx;            /* Non-execute area          */
 };
 
 /* Common routines used by software and hardware TLBs emulation */
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 53/77] ppc: Add proper real mode translation support
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (51 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 52/77] ppc: Cosmetic, align some comments Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 54/77] ppc: Fix 64K pages support in full emulation Benjamin Herrenschmidt
                   ` (26 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This adds proper support for translating real mode addresses based
on the combination of HV and LPCR bits. This handles HRMOR offset
for hypervisor real mode, and both RMA and VRMA modes for guest
real mode. PAPR mode adjusts the offsets appropriately to match the
RMA used in TCG, but we need to limit to the max supported by the
implementation (16G).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr.c              |   7 +++
 target-ppc/mmu-hash64.c     | 146 ++++++++++++++++++++++++++++++++++++++------
 target-ppc/mmu-hash64.h     |   1 +
 target-ppc/translate_init.c |  10 ++-
 4 files changed, 144 insertions(+), 20 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d8a84ca..13fe2d5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1741,6 +1741,13 @@ static void ppc_spapr_init(MachineState *machine)
             spapr->vrma_adjust = 1;
             spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
         }
+
+        /* Actually we don't support unbounded RMA anymore since we
+         * added proper emulation of HV mode. The max we can get is
+         * 16G which also happens to be what we configure for PAPR
+         * mode so make sure we don't do anything bigger than that
+         */
+        spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
     }
 
     if (spapr->rma_size > node0_size) {
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 835245a..328998f 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -504,12 +504,41 @@ static void ppc_hash64_set_dsi(CPUState *cs, CPUPPCState *env, uint64_t dar, uin
     env->error_code = 0;
 }
 
+static int64_t ppc_hash64_get_rmls(CPUPPCState *env)
+{
+    uint64_t lpcr = env->spr[SPR_LPCR];
+
+    /*
+     * This is the full 4 bits encoding of POWER8. Previous
+     * CPUs only support a subset of these but the filtering
+     * is done when writing LPCR
+     */
+    switch((lpcr & LPCR_RMLS) >> LPCR_RMLS_SHIFT) {
+    case 0x8: /* 32MB */
+        return 0x2000000ull;
+    case 0x3: /* 64MB */
+        return 0x4000000ull;
+    case 0x7: /* 128MB */
+        return 0x8000000ull;
+    case 0x4: /* 256MB */
+        return 0x10000000ull;
+    case 0x2: /* 1GB */
+        return 0x40000000ull;
+    case 0x1: /* 16GB */
+        return 0x400000000ull;
+    default:
+        /* What to do here ??? */
+        return 0;
+    }
+}
+
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
                                 int rwx, int mmu_idx)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    ppc_slb_t *slb;
+    ppc_slb_t *slb_ptr;
+    ppc_slb_t slb;
     hwaddr pte_offset;
     ppc_hash_pte64_t pte;
     int pp_prot, amr_prot, prot;
@@ -519,11 +548,52 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
     assert((rwx == 0) || (rwx == 1) || (rwx == 2));
 
+    /* Note on LPCR usage: 970 uses HID4, but our special variant
+     * of store_spr copies relevant fields into env->spr[SPR_LPCR].
+     * Similarily we filter unimplemented bits when storing into
+     * LPCR depending on the MMU version. This code can thus just
+     * use the LPCR "as-is".
+     */
+
     /* 1. Handle real mode accesses */
     if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
-        /* Translation is off */
-        /* In real mode the top 4 effective address bits are ignored */
+        /* Translation is supposedly "off"  */
+        /* In real mode the top 4 effective address bits are (mostly) ignored */
         raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
+
+        /* In HV mode, add HRMOR if top EA bit is clear */
+        if (msr_hv) {
+            if (!(eaddr >> 63)) {
+                raddr |= env->spr[SPR_HRMOR];
+            }
+        } else {
+            /* Otherwise, check VPM for RMA vs VRMA */
+            if (env->spr[SPR_LPCR] & LPCR_VPM0) {
+                uint32_t vrmasd;
+                /* VRMA, we make up an SLB entry */
+                slb.vsid = SLB_VSID_VRMA;
+                vrmasd = (env->spr[SPR_LPCR] & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
+                slb.vsid |= (vrmasd << 4) & (SLB_VSID_L | SLB_VSID_LP);
+                slb.esid = SLB_ESID_V;
+                goto skip_slb;
+            }
+            /* RMA. Check bounds in RMLS */
+            if (raddr < ppc_hash64_get_rmls(env)) {
+              raddr |= env->spr[SPR_RMOR];
+            } else {
+                /* The access failed, generate the approriate interrupt */
+                if (rwx == 2) {
+                    ppc_hash64_set_isi(cs, env, 0x08000000);
+                } else {
+                    dsisr = 0x08000000;
+                    if (rwx == 1) {
+                        dsisr |= 0x02000000;
+                    }
+                    ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
+                }
+                return 1;
+            }
+        }
         tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
                      PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
                      TARGET_PAGE_SIZE);
@@ -531,9 +601,8 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
     }
 
     /* 2. Translation is on, so look up the SLB */
-    slb = slb_lookup(env, eaddr);
-
-    if (!slb) {
+    slb_ptr = slb_lookup(env, eaddr);
+    if (!slb_ptr) {
         if (rwx == 2) {
             cs->exception_index = POWERPC_EXCP_ISEG;
             env->error_code = 0;
@@ -545,14 +614,29 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
         return 1;
     }
 
+    /* We grab a local copy because we can modify it (or get a
+     * pre-cooked one from the VRMA code
+     */
+    slb = *slb_ptr;
+
+    /* 2.5 Clamp L||LP in ISL mode */
+    if (env->spr[SPR_LPCR] & LPCR_ISL) {
+         slb.vsid &= ~SLB_VSID_LLP_MASK;
+    }
+
     /* 3. Check for segment level no-execute violation */
-    if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
+    if ((rwx == 2) && (slb.vsid & SLB_VSID_N)) {
         ppc_hash64_set_isi(cs, env, 0x10000000);
         return 1;
     }
 
+    /* We go straight here for VRMA translations as none of the
+     * above applies in that case
+     */
+ skip_slb:
+
     /* 4. Locate the PTE in the hash table */
-    pte_offset = ppc_hash64_htab_lookup(env, slb, eaddr, &pte);
+    pte_offset = ppc_hash64_htab_lookup(env, &slb, eaddr, &pte);
     if (pte_offset == -1) {
         dsisr = 0x40000000;
         if (rwx == 2) {
@@ -570,7 +654,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
     /* 5. Check access permissions */
 
-    pp_prot = ppc_hash64_pte_prot(env, slb, pte);
+    pp_prot = ppc_hash64_pte_prot(env, &slb, pte);
     amr_prot = ppc_hash64_amr_prot(env, pte);
     prot = pp_prot & amr_prot;
 
@@ -615,7 +699,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
     /* 7. Determine the real address from the PTE */
 
-    raddr = ppc_hash64_pte_raddr(slb, pte, eaddr);
+    raddr = ppc_hash64_pte_raddr(&slb, pte, eaddr);
 
     tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
                  prot, mmu_idx, TARGET_PAGE_SIZE);
@@ -625,26 +709,50 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
 hwaddr ppc_hash64_get_phys_page_debug(CPUPPCState *env, target_ulong addr)
 {
-    ppc_slb_t *slb;
-    hwaddr pte_offset;
+    ppc_slb_t slb;
+    ppc_slb_t *slb_ptr;
+    hwaddr pte_offset, raddr;
     ppc_hash_pte64_t pte;
 
+    /* Handle real mode */
     if (msr_dr == 0) {
-        /* In real mode the top 4 effective address bits are ignored */
-        return addr & 0x0FFFFFFFFFFFFFFFULL;
-    }
+        raddr = addr & 0x0FFFFFFFFFFFFFFFULL;
 
-    slb = slb_lookup(env, addr);
-    if (!slb) {
+        /* In HV mode, add HRMOR if top EA bit is clear */
+        if (msr_hv & !(addr >> 63)) {
+            return raddr | env->spr[SPR_HRMOR];
+        }
+
+        /* Otherwise, check VPM for RMA vs VRMA */
+        if (env->spr[SPR_LPCR] & LPCR_VPM0) {
+            uint32_t vrmasd;
+
+            /* VRMA, we make up an SLB entry */
+            slb.vsid = SLB_VSID_VRMA;
+            vrmasd = (env->spr[SPR_LPCR] & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
+            slb.vsid |= (vrmasd << 4) & (SLB_VSID_L | SLB_VSID_LP);
+            slb.esid = SLB_ESID_V;
+            goto skip_slb;
+        }
+        /* RMA. Check bounds in RMLS */
+        if (raddr < ppc_hash64_get_rmls(env)) {
+            return raddr | env->spr[SPR_RMOR];
+        }
         return -1;
     }
 
-    pte_offset = ppc_hash64_htab_lookup(env, slb, addr, &pte);
+    slb_ptr = slb_lookup(env, addr);
+    if (!slb_ptr) {
+        return -1;
+    }
+    slb = *slb_ptr;
+ skip_slb:
+    pte_offset = ppc_hash64_htab_lookup(env, &slb, addr, &pte);
     if (pte_offset == -1) {
         return -1;
     }
 
-    return ppc_hash64_pte_raddr(slb, pte, addr) & TARGET_PAGE_MASK;
+    return ppc_hash64_pte_raddr(&slb, pte, addr) & TARGET_PAGE_MASK;
 }
 
 void ppc_hash64_store_hpte(CPUPPCState *env,
diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
index 291750f..729f718 100644
--- a/target-ppc/mmu-hash64.h
+++ b/target-ppc/mmu-hash64.h
@@ -29,6 +29,7 @@ void ppc_hash64_store_hpte(CPUPPCState *env, target_ulong index,
 #define SLB_VSID_B_256M         0x0000000000000000ULL
 #define SLB_VSID_B_1T           0x4000000000000000ULL
 #define SLB_VSID_VSID           0x3FFFFFFFFFFFF000ULL
+#define SLB_VSID_VRMA           (0x0001FFFFFF000000ULL | SLB_VSID_B_1T)
 #define SLB_VSID_PTEM           (SLB_VSID_B | SLB_VSID_VSID)
 #define SLB_VSID_KS             0x0000000000000800ULL
 #define SLB_VSID_KP             0x0000000000000400ULL
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 853a084..504564d 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8558,11 +8558,19 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
     /* Set emulated LPCR to not send interrupts to hypervisor. Note that
      * under KVM, the actual HW LPCR will be set differently by KVM itself,
      * the settings below ensure proper operations with TCG in absence of
-     * a real hypervisor
+     * a real hypervisor.
+     *
+     * Clearing VPM0 will also cause us to use RMOR in mmu-hash64.c for
+     * real mode accesses, which thankfully defaults to 0 and isn't
+     * accessible in guest mode.
      */
     lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
     lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
 
+    /* Set RMLS to the max (ie, 16G) */
+    lpcr->default_value &= ~LPCR_RMLS;
+    lpcr->default_value |= 1ull << LPCR_RMLS_SHIFT;
+
     /* P7 and P8 has slightly different PECE bits, mostly because P8 adds
      * bit 47 and 48 which are reserved on P7. Here we set them all, which
      * will work as expected for both implementations
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 54/77] ppc: Fix 64K pages support in full emulation
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (52 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 53/77] ppc: Add proper real mode translation support Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 55/77] ppc/pnv+spapr: Add "ibm, pa-features" property to the device-tree Benjamin Herrenschmidt
                   ` (25 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We were always advertising only 4K & 16M. Additionally the code wasn't
properly matching the page size with the PTE content, which meant we
could potentially hit an incorrect PTE if the guest used multiple sizes.

Finally, honor the CPU capabilities when decoding the size from the SLB
so we don't try to use 64K pages on 970.

This still doesn't add support for MPSS (Multiple Page Sizes per Segment)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/spapr_hcall.c        | 10 +++------
 target-ppc/cpu.h            |  7 ++++--
 target-ppc/mmu-hash64.c     | 54 +++++++++++++++++++++++++++++++++++----------
 target-ppc/translate_init.c | 22 +++++++++++++++---
 4 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 7e2cb4b..ba1bcaf 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -56,7 +56,6 @@ static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
     va_low &= 0x7ff;
     if (v & HPTE64_V_LARGE) {
         rb |= 1;                         /* L field */
-#if 0 /* Disable that P7 specific bit for now */
         if (r & 0xff000) {
             /* non-16MB large page, must be 64k */
             /* (masks depend on page size) */
@@ -64,7 +63,6 @@ static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
             rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
             rb |= (va_low & 0xfe);       /* AVAL field */
         }
-#endif
     } else {
         /* 4kB page */
         rb |= (va_low & 0x7ff) << 12;   /* remaining 11b of AVA */
@@ -98,14 +96,12 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     target_ulong index;
     uint64_t token;
 
-    /* only handle 4k and 16M pages for now */
+    /* Handle non-4K pages */
     if (pteh & HPTE64_V_LARGE) {
-#if 0 /* We don't support 64k pages yet */
         if ((ptel & 0xf000) == 0x1000) {
             /* 64k page */
-        } else
-#endif
-        if ((ptel & 0xff000) == 0) {
+            page_shift = 16;
+        } else if ((ptel & 0xff000) == 0) {
             /* 16M page */
             page_shift = 24;
             /* lowest AVA bit must be 0 for 16M pages */
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index ca6c961..3fc3e1d 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -115,16 +115,19 @@ enum powerpc_mmu_t {
 #define POWERPC_MMU_64       0x00010000
 #define POWERPC_MMU_1TSEG    0x00020000
 #define POWERPC_MMU_AMR      0x00040000
+#define POWERPC_MMU_64K      0x00080000
     /* 64 bits PowerPC MMU                                     */
     POWERPC_MMU_64B        = POWERPC_MMU_64 | 0x00000001,
     /* Architecture 2.03 and later (has LPCR) */
     POWERPC_MMU_2_03       = POWERPC_MMU_64 | 0x00000002,
     /* Architecture 2.06 variant                               */
     POWERPC_MMU_2_06       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
-                             | POWERPC_MMU_AMR | 0x00000003,
+                             | POWERPC_MMU_AMR | POWERPC_MMU_64K
+                             | 0x00000003,
     /* Architecture 2.07 variant                               */
     POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
-                             | POWERPC_MMU_AMR | 0x00000004,
+                             | POWERPC_MMU_AMR | POWERPC_MMU_64K
+                             | 0x00000004,
 #endif /* defined(TARGET_PPC64) */
 };
 
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 328998f..9071fe9 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -348,9 +348,31 @@ void ppc_hash64_stop_access(uint64_t token)
     }
 }
 
+/* Returns the effective page shift or 0. MPSS isn't supported yet so
+ * this will always be the slb_pshift or 0
+ */
+static uint32_t ppc_hash64_pte_size_decode(uint64_t pte1, uint32_t slb_pshift)
+{
+    switch(slb_pshift) {
+    case 12:
+        return 12;
+    case 16:
+        if ((pte1 & 0xf000) == 0x1000) {
+            return 16;
+        }
+        return 0;
+    case 24:
+        if ((pte1 & 0xff000) == 0) {
+            return 24;
+        }
+        return 0;
+    }
+    return 0;
+}
+
 static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
-                                     bool secondary, target_ulong ptem,
-                                     ppc_hash_pte64_t *pte)
+                                     uint32_t slb_pshift, bool secondary,
+                                     target_ulong ptem, ppc_hash_pte64_t *pte)
 {
     int i;
     uint64_t token;
@@ -369,6 +391,13 @@ static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
         if ((pte0 & HPTE64_V_VALID)
             && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
             && HPTE64_V_COMPARE(pte0, ptem)) {
+            uint32_t pshift = ppc_hash64_pte_size_decode(pte1, slb_pshift);
+            if (pshift == 0) {
+                continue;
+            }
+            /* We don't do anything with pshift yet as qemu TLB only deals
+             * with 4K pages anyway
+             */
             pte->pte0 = pte0;
             pte->pte1 = pte1;
             ppc_hash64_stop_access(token);
@@ -382,7 +411,7 @@ static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
     return -1;
 }
 
-static uint64_t ppc_hash64_page_shift(ppc_slb_t *slb)
+static uint64_t ppc_hash64_page_shift(CPUPPCState *env, ppc_slb_t *slb)
 {
     uint64_t epnshift;
 
@@ -392,7 +421,8 @@ static uint64_t ppc_hash64_page_shift(ppc_slb_t *slb)
      * encoded in the PTE */
     if ((slb->vsid & SLB_VSID_LLP_MASK) == SLB_VSID_4K) {
         epnshift = TARGET_PAGE_BITS;
-    } else if ((slb->vsid & SLB_VSID_LLP_MASK) == SLB_VSID_64K) {
+    } else if ((slb->vsid & SLB_VSID_LLP_MASK) == SLB_VSID_64K &&
+               (env->mmu_model & POWERPC_MMU_64K)) {
         epnshift = TARGET_PAGE_BITS_64K;
     } else {
         epnshift = TARGET_PAGE_BITS_16M;
@@ -408,7 +438,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
     hwaddr hash;
     uint64_t vsid, epnshift, epnmask, epn, ptem;
 
-    epnshift = ppc_hash64_page_shift(slb);
+    epnshift = ppc_hash64_page_shift(env, slb);
     epnmask = ~((1ULL << epnshift) - 1);
 
     if (slb->vsid & SLB_VSID_B) {
@@ -436,7 +466,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
             env->htab_base, env->htab_mask, vsid, ptem,  hash);
-    pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte);
+    pte_offset = ppc_hash64_pteg_search(env, hash, epnshift, 0, ptem, pte);
 
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
@@ -446,14 +476,14 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
                 env->htab_mask, vsid, ptem, ~hash);
 
-        pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte);
+        pte_offset = ppc_hash64_pteg_search(env, ~hash, epnshift, 1, ptem, pte);
     }
 
     return pte_offset;
 }
 
-static hwaddr ppc_hash64_pte_raddr(ppc_slb_t *slb, ppc_hash_pte64_t pte,
-                                   target_ulong eaddr)
+static hwaddr ppc_hash64_pte_raddr(CPUPPCState *env, ppc_slb_t *slb,
+                                   ppc_hash_pte64_t pte, target_ulong eaddr)
 {
     hwaddr mask;
     int target_page_bits;
@@ -461,7 +491,7 @@ static hwaddr ppc_hash64_pte_raddr(ppc_slb_t *slb, ppc_hash_pte64_t pte,
     /*
      * We support 4K, 64K and 16M now
      */
-    target_page_bits = ppc_hash64_page_shift(slb);
+    target_page_bits = ppc_hash64_page_shift(env, slb);
     mask = (1ULL << target_page_bits) - 1;
     return (rpn & ~mask) | (eaddr & mask);
 }
@@ -699,7 +729,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
 
     /* 7. Determine the real address from the PTE */
 
-    raddr = ppc_hash64_pte_raddr(&slb, pte, eaddr);
+    raddr = ppc_hash64_pte_raddr(env, &slb, pte, eaddr);
 
     tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
                  prot, mmu_idx, TARGET_PAGE_SIZE);
@@ -752,7 +782,7 @@ hwaddr ppc_hash64_get_phys_page_debug(CPUPPCState *env, target_ulong addr)
         return -1;
     }
 
-    return ppc_hash64_pte_raddr(&slb, pte, addr) & TARGET_PAGE_MASK;
+    return ppc_hash64_pte_raddr(env, &slb, pte, addr) & TARGET_PAGE_MASK;
 }
 
 void ppc_hash64_store_hpte(CPUPPCState *env,
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 504564d..b623206 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9998,8 +9998,8 @@ static void ppc_cpu_initfn(Object *obj)
     if (pcc->sps) {
         env->sps = *pcc->sps;
     } else if (env->mmu_model & POWERPC_MMU_64) {
-        /* Use default sets of page sizes */
-        static const struct ppc_segment_page_sizes defsps = {
+	/* Use default sets of page sizes. We don't support MPSS */
+        static const struct ppc_segment_page_sizes defsps_4k = {
             .sps = {
                 { .page_shift = 12, /* 4K */
                   .slb_enc = 0,
@@ -10011,7 +10011,23 @@ static void ppc_cpu_initfn(Object *obj)
                 },
             },
         };
-        env->sps = defsps;
+        static const struct ppc_segment_page_sizes defsps_64k = {
+            .sps = {
+                { .page_shift = 12, /* 4K */
+                  .slb_enc = 0,
+                  .enc = { { .page_shift = 12, .pte_enc = 0 } }
+                },
+                { .page_shift = 16, /* 64K */
+                  .slb_enc = 0x110,
+                  .enc = { { .page_shift = 16, .pte_enc = 1 } }
+                },
+                { .page_shift = 24, /* 16M */
+                  .slb_enc = 0x100,
+                  .enc = { { .page_shift = 24, .pte_enc = 0 } }
+                },
+            },
+        };
+        env->sps = (env->mmu_model & POWERPC_MMU_64K) ? defsps_64k : defsps_4k;
     }
 #endif /* defined(TARGET_PPC64) */
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 55/77] ppc/pnv+spapr: Add "ibm, pa-features" property to the device-tree
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (53 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 54/77] ppc: Fix 64K pages support in full emulation Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 56/77] ppc: Fix conditions for delivering external interrupts to a guest Benjamin Herrenschmidt
                   ` (24 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

This is currently missing on both PowerNV and PAPR

FIXME: Split patch & fix tabs

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/pnv.c                | 8 ++++++++
 target-ppc/translate_init.c | 1 +
 2 files changed, 9 insertions(+)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 179f93b..1787dd1 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -275,6 +275,11 @@ static void powernv_create_cpu_node(void *fdt, CPUState *cs, int smt_threads)
     uint32_t page_sizes_prop[64];
     size_t page_sizes_prop_size;
     char *nodename;
+    const uint8_t pa_features[] = { 24, 0,
+                                    0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
+                                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+                                    0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
 
     if ((index % smt_threads) != 0) {
         return;
@@ -349,6 +354,9 @@ static void powernv_create_cpu_node(void *fdt, CPUState *cs, int smt_threads)
                            page_sizes_prop, page_sizes_prop_size)));
     }
 
+    _FDT((fdt_property(fdt, "ibm,pa-features",
+                       pa_features, sizeof(pa_features))));
+
     /* XXX Just a hack for now */
     _FDT((fdt_property_cell(fdt, "ibm,chip-id", 0)));
 
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b623206..25f5b18 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10028,6 +10028,7 @@ static void ppc_cpu_initfn(Object *obj)
             },
         };
         env->sps = (env->mmu_model & POWERPC_MMU_64K) ? defsps_64k : defsps_4k;
+	env->ci_large_pages = env->mmu_model >= POWERPC_MMU_2_06;
     }
 #endif /* defined(TARGET_PPC64) */
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 56/77] ppc: Fix conditions for delivering external interrupts to a guest
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (54 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 55/77] ppc/pnv+spapr: Add "ibm, pa-features" property to the device-tree Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 57/77] ppc: Enforce setting MSR:EE, IR and DR when MSR:PR is set Benjamin Herrenschmidt
                   ` (23 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

External interrupts can bypass the MSR_EE test if they occur in guest
mode and LPES0 is clear. In that case they are directed to the hypervisor

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/excp_helper.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 3f77df7..eb65eee 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -795,6 +795,14 @@ static void ppc_hw_interrupt(CPUPPCState *env)
             return;
         }
     }
+    /* Extermal interrupt can ignore MSR:EE under some circumstances */
+    if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) {
+        bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0);
+        if (msr_ee != 0 || (env->has_hv_mode && msr_hv == 0 && !lpes0)) {
+            powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL);
+            return;
+        }
+    }
     if (msr_ce != 0) {
         /* External critical interrupt */
         if (env->pending_interrupts & (1 << PPC_INTERRUPT_CEXT)) {
@@ -840,17 +848,6 @@ static void ppc_hw_interrupt(CPUPPCState *env)
             powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DECR);
             return;
         }
-        /* External interrupt */
-        if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) {
-            /* Taking an external interrupt does not clear the external
-             * interrupt status
-             */
-#if 0
-            env->pending_interrupts &= ~(1 << PPC_INTERRUPT_EXT);
-#endif
-            powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL);
-            return;
-        }
         if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) {
             env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL);
             powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 57/77] ppc: Enforce setting MSR:EE, IR and DR when MSR:PR is set
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (55 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 56/77] ppc: Fix conditions for delivering external interrupts to a guest Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 58/77] ppc: Initial HDEC support Benjamin Herrenschmidt
                   ` (22 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The architecture specifies that any instruction that sets MSR:PR will also
set MSR:EE, IR and DR.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/helper_regs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 12af61c..09bc450 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -136,6 +136,10 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
         /* Change the exception prefix on PowerPC 601 */
         env->excp_prefix = ((value >> MSR_EP) & 1) * 0xFFF00000;
     }
+    /* If PR=1 then EE, IR and DR must be 1 */
+    if ((value >> MSR_PR) & 1) {
+        value |= (1 << MSR_EE) | (1 << MSR_DR) | (1 << MSR_IR);
+    }
 #endif
     env->msr = value;
     hreg_compute_hflags(env);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 58/77] ppc: Initial HDEC support
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (56 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 57/77] ppc: Enforce setting MSR:EE, IR and DR when MSR:PR is set Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 59/77] ppc: Add placeholder SPRs for DPDES and DHDES on P8 Benjamin Herrenschmidt
                   ` (21 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The current behaviour isn't completely right, as for the DEC, we
don't properly re-arm when wrapping around, but I will fix this
in a separate patch.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 hw/ppc/ppc.c                 | 17 ++++++++++++-----
 target-ppc/excp_helper.c     | 22 ++++++++++++----------
 target-ppc/helper.h          |  2 ++
 target-ppc/timebase_helper.c | 10 ++++++++++
 target-ppc/translate_init.c  | 30 ++++++++++++++++++++++++++++++
 5 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 3b14f09..ac3f65c 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -698,9 +698,18 @@ static inline void cpu_ppc_decr_lower(PowerPCCPU *cpu)
 
 static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
 {
+    CPUPPCState *env = &cpu->env;
+
     /* Raise it */
-    LOG_TB("raise decrementer exception\n");
-    ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
+    LOG_TB("raise hv decrementer exception\n");
+
+    /* The architecture specifies that we don't deliver HDEC
+     * interrupts in a PM state. Not only they don't cause a
+     * wakeup but they also get effectively discarded.
+     */
+    if (!env->in_pm_state) {
+        ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
+    }
 }
 
 static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu)
@@ -927,9 +936,7 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
     }
     /* Create new timer */
     tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu);
-    if (0) {
-        /* XXX: find a suitable condition to enable the hypervisor decrementer
-         */
+    if (env->has_hv_mode) {
         tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb,
                                                 cpu);
     } else {
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index eb65eee..28a529a 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -753,7 +753,6 @@ void ppc_cpu_do_interrupt(CPUState *cs)
 static void ppc_hw_interrupt(CPUPPCState *env)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
-    int hdice;
 #if 0
     CPUState *cs = CPU(cpu);
 
@@ -782,15 +781,13 @@ static void ppc_hw_interrupt(CPUPPCState *env)
         return;
     }
 #endif
-    if (0) {
-        /* XXX: find a suitable condition to enable the hypervisor mode */
-        hdice = env->spr[SPR_LPCR] & 1;
-    } else {
-        hdice = 0;
-    }
-    if ((msr_ee != 0 || msr_hv == 0 || msr_pr != 0) && hdice != 0) {
-        /* Hypervisor decrementer exception */
-        if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) {
+    /* Hypervisor decrementer exception */
+    if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) {
+        /* LPCR will be clear when not supported so this will work */
+        bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE);
+        if ((msr_ee != 0 || msr_hv == 0) && hdice) {
+            /* HDEC clears on delivery */
+            env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR);
             powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HDECR);
             return;
         }
@@ -942,6 +939,11 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
     cs->halted = 1;
     env->in_pm_state = true;
 
+    /* The architecture specifies that HDEC interrupts are
+     * discarded in PM states
+     */
+    env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR);
+
     /* Technically, nap doesn't set EE, but if we don't set it
      * then ppc_hw_interrupt() won't deliver. We could add some
      * other tests there based on LPCR but it's simpler to just
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 23889fe..9890920 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -598,6 +598,8 @@ DEF_HELPER_2(store_601_rtcl, void, env, tl)
 DEF_HELPER_2(store_601_rtcu, void, env, tl)
 DEF_HELPER_1(load_decr, tl, env)
 DEF_HELPER_2(store_decr, void, env, tl)
+DEF_HELPER_1(load_hdecr, tl, env)
+DEF_HELPER_2(store_hdecr, void, env, tl)
 DEF_HELPER_2(store_hid0_601, void, env, tl)
 DEF_HELPER_3(store_403_pbr, void, env, i32, tl)
 DEF_HELPER_1(load_40x_pit, tl, env)
diff --git a/target-ppc/timebase_helper.c b/target-ppc/timebase_helper.c
index 865dcbe..798b803 100644
--- a/target-ppc/timebase_helper.c
+++ b/target-ppc/timebase_helper.c
@@ -100,6 +100,16 @@ void helper_store_decr(CPUPPCState *env, target_ulong val)
     cpu_ppc_store_decr(env, val);
 }
 
+target_ulong helper_load_hdecr(CPUPPCState *env)
+{
+    return cpu_ppc_load_hdecr(env);
+}
+
+void helper_store_hdecr(CPUPPCState *env, target_ulong val)
+{
+    cpu_ppc_store_hdecr(env, val);
+}
+
 target_ulong helper_load_40x_pit(CPUPPCState *env)
 {
     return load_40x_pit(env);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 25f5b18..06061e2 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -292,6 +292,32 @@ static void spr_read_purr (DisasContext *ctx, int gprn, int sprn)
 {
     gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
 }
+
+/* HDECR */
+static void spr_read_hdecr (DisasContext *ctx, int gprn, int sprn)
+{
+    if (ctx->tb->cflags & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    gen_helper_load_hdecr(cpu_gpr[gprn], cpu_env);
+    if (ctx->tb->cflags & CF_USE_ICOUNT) {
+        gen_io_end();
+        gen_stop_exception(ctx);
+    }
+}
+
+static void spr_write_hdecr (DisasContext *ctx, int sprn, int gprn)
+{
+    if (ctx->tb->cflags & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    gen_helper_store_hdecr(cpu_env, cpu_gpr[gprn]);
+    if (ctx->tb->cflags & CF_USE_ICOUNT) {
+        gen_io_end();
+        gen_stop_exception(ctx);
+    }
+}
+
 #endif
 #endif
 
@@ -7716,6 +7742,10 @@ static void gen_spr_power5p_lpar(CPUPPCState *env)
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_lpcr,
                      KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
+    spr_register_hv(env, SPR_HDEC, "HDEC",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_hdecr, &spr_write_hdecr, 0);
 #endif
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 59/77] ppc: Add placeholder SPRs for DPDES and DHDES on P8
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (57 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 58/77] ppc: Initial HDEC support Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 60/77] ppc: LPCR is a HV resource Benjamin Herrenschmidt
                   ` (20 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We still need to eventually implement doorbells but at least this
makes us not crash when the SPRs are accessed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  2 ++
 target-ppc/translate_init.c | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 3fc3e1d..099b8da 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1395,6 +1395,8 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_UAMOR             (0x09D)
 #define SPR_MPC_ICTRL         (0x09E)
 #define SPR_MPC_BAR           (0x09F)
+#define SPR_DHDES             (0x0B1)
+#define SPR_DPDES             (0x0B0)
 #define SPR_DAWR              (0x0B4)
 #define SPR_RPR               (0x0BA)
 #define SPR_DAWRX             (0x0BC)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 06061e2..bfc7a6d 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8059,6 +8059,22 @@ static void gen_spr_power8_rpr(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power8_dbell(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_DPDES, "DPDES",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+    spr_register_hv(env, SPR_DHDES, "DHDES",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
     gen_spr_ne_601(env);
@@ -8111,6 +8127,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_power8_tm(env);
         gen_spr_vtb(env);
         gen_spr_power8_rpr(env);
+        gen_spr_power8_dbell(env);
     }
     if (version < BOOK3S_CPU_POWER8) {
         gen_spr_book3s_dbg(env);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 60/77] ppc: LPCR is a HV resource
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (58 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 59/77] ppc: Add placeholder SPRs for DPDES and DHDES on P8 Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 61/77] ppc: SPURR & PURR are HV writeable and privileged Benjamin Herrenschmidt
                   ` (19 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Don't allow access in guest mode

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index bfc7a6d..e3887e7 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7738,10 +7738,11 @@ static void gen_spr_power5p_lpar(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* Logical partitionning */
-    spr_register_kvm(env, SPR_LPCR, "LPCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_lpcr,
-                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
+    spr_register_kvm_hv(env, SPR_LPCR, "LPCR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_lpcr,
+                        KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
     spr_register_hv(env, SPR_HDEC, "HDEC",
                     SPR_NOACCESS, SPR_NOACCESS,
                     SPR_NOACCESS, SPR_NOACCESS,
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 61/77] ppc: SPURR & PURR are HV writeable and privileged
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (59 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 60/77] ppc: LPCR is a HV resource Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 62/77] ppc: Add dummy SPR_IC for POWER8 Benjamin Herrenschmidt
                   ` (18 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Those are HV writeable, so we provide a dummy write. We eventually need
to provide a better emulation but for now this will get us going.

We also make them non-user readable as per the architecture.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index e3887e7..e2efdf3 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -293,6 +293,12 @@ static void spr_read_purr (DisasContext *ctx, int gprn, int sprn)
     gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
 }
 
+__attribute__ (( unused ))
+static void spr_write_purr (DisasContext *ctx, int gprn, int sprn)
+{
+    // Temporary placeholder
+}
+
 /* HDECR */
 static void spr_read_hdecr (DisasContext *ctx, int gprn, int sprn)
 {
@@ -7860,14 +7866,16 @@ static void gen_spr_book3s_purr(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
-    spr_register_kvm(env, SPR_PURR,   "PURR",
-                     &spr_read_purr, SPR_NOACCESS,
-                     &spr_read_purr, SPR_NOACCESS,
-                     KVM_REG_PPC_PURR, 0x00000000);
-    spr_register_kvm(env, SPR_SPURR,   "SPURR",
-                     &spr_read_purr, SPR_NOACCESS,
-                     &spr_read_purr, SPR_NOACCESS,
-                     KVM_REG_PPC_SPURR, 0x00000000);
+    spr_register_kvm_hv(env, SPR_PURR,   "PURR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, &spr_write_purr,
+                        KVM_REG_PPC_PURR, 0x00000000);
+    spr_register_kvm_hv(env, SPR_SPURR,   "SPURR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, &spr_write_purr,
+                        KVM_REG_PPC_SPURR, 0x00000000);
 #endif
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 62/77] ppc: Add dummy SPR_IC for POWER8
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (60 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 61/77] ppc: SPURR & PURR are HV writeable and privileged Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 63/77] ppc: Initialize AMOR in PAPR mode Benjamin Herrenschmidt
                   ` (17 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

It's supposed to be an instruction counter. For now make us not
crash when accessing it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  1 +
 target-ppc/translate_init.c | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 099b8da..eb94244 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1690,6 +1690,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_MPC_MD_DBRAM1     (0x32A)
 #define SPR_RCPU_L2U_RA3      (0x32B)
 #define SPR_TAR               (0x32F)
+#define SPR_IC                (0x350)
 #define SPR_VTB               (0x351)
 #define SPR_MMCRC             (0x353)
 #define SPR_440_INV0          (0x370)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index e2efdf3..f3f6cf5 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8084,6 +8084,17 @@ static void gen_spr_power8_dbell(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power8_ic(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_IC, "IC",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
     gen_spr_ne_601(env);
@@ -8137,6 +8148,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_vtb(env);
         gen_spr_power8_rpr(env);
         gen_spr_power8_dbell(env);
+        gen_spr_power8_ic(env);
     }
     if (version < BOOK3S_CPU_POWER8) {
         gen_spr_book3s_dbg(env);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 63/77] ppc: Initialize AMOR in PAPR mode
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (61 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 62/77] ppc: Add dummy SPR_IC for POWER8 Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 64/77] ppc: Fix writing to AMR/UAMOR Benjamin Herrenschmidt
                   ` (16 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Make sure we give the guest full authorization

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index f3f6cf5..df24b97 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8615,6 +8615,7 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
 {
     CPUPPCState *env = &cpu->env;
     ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
+    ppc_spr_t *amor = &env->spr_cb[SPR_AMOR];
 
     /* PAPR always has exception vectors in RAM not ROM. To ensure this,
      * MSR[IP] should never be set.
@@ -8651,6 +8652,9 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
      */
     env->spr[SPR_LPCR] = lpcr->default_value;
 
+    /* Set a full AMOR so guest can use the AMR as it sees fit */
+    env->spr[SPR_AMOR] = amor->default_value = 0xffffffffffffffffull;
+
     /* Tell KVM that we're in PAPR mode */
     if (kvm_enabled()) {
         kvmppc_set_papr(cpu);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 64/77] ppc: Fix writing to AMR/UAMOR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (62 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 63/77] ppc: Initialize AMOR in PAPR mode Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 65/77] ppc: Add POWER8 IAMR register Benjamin Herrenschmidt
                   ` (15 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The masks weren't chosen nor applied properly. The architecture specifies
that writes to AMR are masked by UAMOR for PR=1, otherwise AMOR for HV=0.

The writes to UAMOR are masked by AMOR for HV=0

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 76 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index df24b97..aa9eecf 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -1094,30 +1094,72 @@ static void gen_spr_7xx (CPUPPCState *env)
 
 #ifdef TARGET_PPC64
 #ifndef CONFIG_USER_ONLY
-static void spr_read_uamr (DisasContext *ctx, int gprn, int sprn)
+static void spr_write_amr (DisasContext *ctx, int sprn, int gprn)
 {
-    gen_load_spr(cpu_gpr[gprn], SPR_AMR);
-    spr_load_dump_spr(SPR_AMR);
-}
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
 
-static void spr_write_uamr (DisasContext *ctx, int sprn, int gprn)
-{
-    gen_store_spr(SPR_AMR, cpu_gpr[gprn]);
+    /* Note, the HV=1 PR=0 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    if (ctx->pr) {
+	gen_load_spr(t1, SPR_UAMOR);
+    } else {
+        gen_load_spr(t1, SPR_AMOR);
+    }
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_AMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_AMR, t0);
     spr_store_dump_spr(SPR_AMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
 }
 
-static void spr_write_uamr_pr (DisasContext *ctx, int sprn, int gprn)
+static void spr_write_uamor (DisasContext *ctx, int sprn, int gprn)
 {
     TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /* Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
 
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
     gen_load_spr(t0, SPR_UAMOR);
-    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
-    gen_store_spr(SPR_AMR, t0);
-    spr_store_dump_spr(SPR_AMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_UAMOR, t0);
+    spr_store_dump_spr(SPR_UAMOR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
 }
 #endif /* CONFIG_USER_ONLY */
 
-static void gen_spr_amr (CPUPPCState *env)
+static void gen_spr_amr (CPUPPCState *env, bool has_iamr)
 {
 #ifndef CONFIG_USER_ONLY
     /* Virtual Page Class Key protection */
@@ -1125,15 +1167,17 @@ static void gen_spr_amr (CPUPPCState *env)
      * userspace accessible, 29 is privileged.  So we only need to set
      * the kvm ONE_REG id on one of them, we use 29 */
     spr_register(env, SPR_UAMR, "UAMR",
-                 &spr_read_uamr, &spr_write_uamr_pr,
-                 &spr_read_uamr, &spr_write_uamr,
+                 &spr_read_generic, &spr_write_amr,
+                 &spr_read_generic, &spr_write_amr,
                  0);
-    spr_register_kvm(env, SPR_AMR, "AMR",
+    spr_register_kvm_hv(env, SPR_AMR, "AMR",
                      SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_amr,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_AMR, 0);
-    spr_register_kvm(env, SPR_UAMOR, "UAMOR",
+    spr_register_kvm_hv(env, SPR_UAMOR, "UAMOR",
                      SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_uamor,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_UAMOR, 0);
     spr_register_hv(env, SPR_AMOR, "AMOR",
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 65/77] ppc: Add POWER8 IAMR register
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (63 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 64/77] ppc: Fix writing to AMR/UAMOR Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 66/77] ppc: Add a few more P8 PMU SPRs Benjamin Herrenschmidt
                   ` (14 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

With appropriate AMR-like masks. Not actually used by the translation
logic at that point

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  1 +
 target-ppc/translate_init.c | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index eb94244..756a66f 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1366,6 +1366,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_BOOKE_CSRR0       (0x03A)
 #define SPR_BOOKE_CSRR1       (0x03B)
 #define SPR_BOOKE_DEAR        (0x03D)
+#define SPR_IAMR              (0x03D)
 #define SPR_BOOKE_ESR         (0x03E)
 #define SPR_BOOKE_IVPR        (0x03F)
 #define SPR_MPC_EIE           (0x050)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index aa9eecf..eaa2ac5 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -1157,6 +1157,36 @@ static void spr_write_uamor (DisasContext *ctx, int sprn, int gprn)
     tcg_temp_free(t1);
     tcg_temp_free(t2);
 }
+
+static void spr_write_iamr (DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /* Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_IAMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_IAMR, t0);
+    spr_store_dump_spr(SPR_IAMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
 #endif /* CONFIG_USER_ONLY */
 
 static void gen_spr_amr (CPUPPCState *env, bool has_iamr)
@@ -1185,6 +1215,14 @@ static void gen_spr_amr (CPUPPCState *env, bool has_iamr)
                     SPR_NOACCESS, SPR_NOACCESS,
                     &spr_read_generic, &spr_write_generic,
                     0);
+    if (!has_iamr) {
+        return;
+    }
+    spr_register_hv(env, SPR_IAMR, "IAMR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_iamr,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
 #endif /* !CONFIG_USER_ONLY */
 }
 #endif /* TARGET_PPC64 */
@@ -8160,7 +8198,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
     case BOOK3S_CPU_POWER7:
     case BOOK3S_CPU_POWER8:
         gen_spr_book3s_ids(env);
-        gen_spr_amr(env);
+        gen_spr_amr(env, version >= BOOK3S_CPU_POWER8);
         gen_spr_book3s_purr(env);
         env->ci_large_pages = true;
         break;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 66/77] ppc: Add a few more P8 PMU SPRs
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (64 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 65/77] ppc: Add POWER8 IAMR register Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 67/77] ppc: Add dummy write to VTB Benjamin Herrenschmidt
                   ` (13 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  7 +++++++
 target-ppc/translate_init.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 756a66f..f7e653b 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1591,6 +1591,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_PERF0             (0x300)
 #define SPR_RCPU_MI_RBA0      (0x300)
 #define SPR_MPC_MI_CTR        (0x300)
+#define SPR_POWER_USIER       (0x300)
 #define SPR_PERF1             (0x301)
 #define SPR_RCPU_MI_RBA1      (0x301)
 #define SPR_POWER_UMMCR2      (0x301)
@@ -1640,6 +1641,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_PERFF             (0x30F)
 #define SPR_MPC_MD_TW         (0x30F)
 #define SPR_UPERF0            (0x310)
+#define SPR_POWER_SIER        (0x310)
 #define SPR_UPERF1            (0x311)
 #define SPR_POWER_MMCR2       (0x311)
 #define SPR_UPERF2            (0x312)
@@ -1703,7 +1705,12 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_440_ITV2          (0x376)
 #define SPR_440_ITV3          (0x377)
 #define SPR_440_CCR1          (0x378)
+#define SPR_TACR              (0x378)
+#define SPR_TCSCR             (0x379)
+#define SPR_CSIGR             (0x37a)
 #define SPR_DCRIPR            (0x37B)
+#define SPR_POWER_SPMC1       (0x37C)
+#define SPR_POWER_SPMC2       (0x37D)
 #define SPR_POWER_MMCRS       (0x37E)
 #define SPR_PPR               (0x380)
 #define SPR_750_GQR0          (0x390)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index eaa2ac5..08730b6 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7764,6 +7764,30 @@ static void gen_spr_power8_pmu_sup(CPUPPCState *env)
                      SPR_NOACCESS, SPR_NOACCESS,
                      &spr_read_generic, &spr_write_generic,
                      KVM_REG_PPC_MMCRS, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SIER, "SIER",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SIER, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SPMC1, "SPMC1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SPMC1, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SPMC2, "SPMC2",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SPMC2, 0x00000000);
+    spr_register_kvm(env, SPR_TACR, "TACR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_TACR, 0x00000000);
+    spr_register_kvm(env, SPR_TCSCR, "TCSCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_TCSCR, 0x00000000);
+    spr_register_kvm(env, SPR_CSIGR, "CSIGR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_CSIGR, 0x00000000);
 }
 
 static void gen_spr_power8_pmu_user(CPUPPCState *env)
@@ -7772,6 +7796,10 @@ static void gen_spr_power8_pmu_user(CPUPPCState *env)
                  &spr_read_ureg, SPR_NOACCESS,
                  &spr_read_ureg, &spr_write_ureg,
                  0x00000000);
+    spr_register(env, SPR_POWER_USIER, "USIER",
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+		 0x00000000);
 }
 
 static void gen_spr_power5p_ear(CPUPPCState *env)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 67/77] ppc: Add dummy write to VTB
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (65 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 66/77] ppc: Add a few more P8 PMU SPRs Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 68/77] ppc: Add dummy POWER8 MPPR register Benjamin Herrenschmidt
                   ` (12 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

The Hypervisor can write it. We don't handle that properly yet but
at least let's not blow up when it is written.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 08730b6..4ad2c94 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -299,6 +299,12 @@ static void spr_write_purr (DisasContext *ctx, int gprn, int sprn)
     // Temporary placeholder
 }
 
+__attribute__ (( unused ))
+static void spr_write_vtb (DisasContext *ctx, int gprn, int sprn)
+{
+    // Temporary placeholder
+}
+
 /* HDECR */
 static void spr_read_hdecr (DisasContext *ctx, int gprn, int sprn)
 {
@@ -8148,10 +8154,11 @@ static void gen_spr_power8_ebb(CPUPPCState *env)
 /* Virtual Time Base */
 static void gen_spr_vtb(CPUPPCState *env)
 {
-    spr_register(env, SPR_VTB, "VTB",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_tbl, SPR_NOACCESS,
-                 0x00000000);
+    spr_register_hv(env, SPR_VTB, "VTB",
+                   SPR_NOACCESS, SPR_NOACCESS,
+                   &spr_read_tbl, SPR_NOACCESS,
+                   &spr_read_tbl, spr_write_vtb,
+                   0x00000000);
 }
 
 static void gen_spr_power8_fscr(CPUPPCState *env)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 68/77] ppc: Add dummy POWER8 MPPR register
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (66 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 67/77] ppc: Add dummy write to VTB Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 69/77] ppc: Add dummy POWER8 PSPB SPR Benjamin Herrenschmidt
                   ` (11 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Controls the micropartition prefetch, this is pretty much meaningless
in full emulation (used for priming the caches on real HW).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  1 +
 target-ppc/translate_init.c | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index f7e653b..253d04b 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1399,6 +1399,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_DHDES             (0x0B1)
 #define SPR_DPDES             (0x0B0)
 #define SPR_DAWR              (0x0B4)
+#define SPR_MPPR              (0x0B8)
 #define SPR_RPR               (0x0BA)
 #define SPR_DAWRX             (0x0BC)
 #define SPR_HFSCR             (0x0BE)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4ad2c94..a178696 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8212,6 +8212,18 @@ static void gen_spr_power8_ic(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power8_book4(CPUPPCState *env)
+{
+    /* Add a number of P8 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_MPPR, "MPPR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
     gen_spr_ne_601(env);
@@ -8266,6 +8278,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_power8_rpr(env);
         gen_spr_power8_dbell(env);
         gen_spr_power8_ic(env);
+        gen_spr_power8_book4(env);
     }
     if (version < BOOK3S_CPU_POWER8) {
         gen_spr_book3s_dbg(env);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 69/77] ppc: Add dummy POWER8 PSPB SPR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (67 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 68/77] ppc: Add dummy POWER8 MPPR register Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 70/77] ppc: Add dummy CIABR SPR Benjamin Herrenschmidt
                   ` (10 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

On real hardware it allows temporary thread priority boosts, we don't
do threads and implementing it would be fairly tricky, so we just dummy
it or now.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  1 +
 target-ppc/translate_init.c | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 253d04b..334fcfe 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1396,6 +1396,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_UAMOR             (0x09D)
 #define SPR_MPC_ICTRL         (0x09E)
 #define SPR_MPC_BAR           (0x09F)
+#define SPR_PSPB              (0x09F)
 #define SPR_DHDES             (0x0B1)
 #define SPR_DPDES             (0x0B0)
 #define SPR_DAWR              (0x0B4)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index a178696..b1eba73 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8224,6 +8224,16 @@ static void gen_spr_power8_book4(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power8_pspb(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register(env, SPR_PSPB, "PSPB",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
     gen_spr_ne_601(env);
@@ -8278,6 +8288,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         gen_spr_power8_rpr(env);
         gen_spr_power8_dbell(env);
         gen_spr_power8_ic(env);
+        gen_spr_power8_pspb(env);
         gen_spr_power8_book4(env);
     }
     if (version < BOOK3S_CPU_POWER8) {
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 70/77] ppc: Add dummy CIABR SPR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (68 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 69/77] ppc: Add dummy POWER8 PSPB SPR Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR Benjamin Herrenschmidt
                   ` (9 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We should implement HW breakpoint/watchpoint, qemu supports them...

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            | 1 +
 target-ppc/translate_init.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 334fcfe..bf8892a 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1402,6 +1402,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_DAWR              (0x0B4)
 #define SPR_MPPR              (0x0B8)
 #define SPR_RPR               (0x0BA)
+#define SPR_CIABR             (0x0BB)
 #define SPR_DAWRX             (0x0BC)
 #define SPR_HFSCR             (0x0BE)
 #define SPR_VRSAVE            (0x100)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b1eba73..b5fd076 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7629,6 +7629,11 @@ static void gen_spr_book3s_207_dbg(CPUPPCState *env)
                     SPR_NOACCESS, SPR_NOACCESS,
                     &spr_read_generic, &spr_write_generic,
                     0x00000000);
+    spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+		        KVM_REG_PPC_CIABR, 0x00000000);
 }
 
 static void gen_spr_970_dbg(CPUPPCState *env)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (69 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 70/77] ppc: Add dummy CIABR SPR Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2016-03-02 20:22   ` Thomas Huth
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs Benjamin Herrenschmidt
                   ` (8 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            | 1 +
 target-ppc/translate_init.c | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index bf8892a..aa328a7 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1361,6 +1361,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_SRR1              (0x01B)
 #define SPR_CFAR              (0x01C)
 #define SPR_AMR               (0x01D)
+#define SPR_ACOP              (0x01F)
 #define SPR_BOOKE_PID         (0x030)
 #define SPR_BOOKE_DECAR       (0x036)
 #define SPR_BOOKE_CSRR0       (0x03A)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b5fd076..4ec532c 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8226,6 +8226,10 @@ static void gen_spr_power8_book4(CPUPPCState *env)
                     &spr_read_generic, SPR_NOACCESS,
                     &spr_read_generic, &spr_write_generic,
                     0);
+    spr_register(env, SPR_ACOP, "ACOP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0);
 #endif
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (70 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2016-03-02 20:30   ` Thomas Huth
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 73/77] ppc: Add KVM numbers to some P8 SPRs Benjamin Herrenschmidt
                   ` (7 subsequent siblings)
  79 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

WORT and PID this time

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/cpu.h            |  2 ++
 target-ppc/translate_init.c | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index aa328a7..6179fbc 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1363,6 +1363,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_AMR               (0x01D)
 #define SPR_ACOP              (0x01F)
 #define SPR_BOOKE_PID         (0x030)
+#define SPR_BOOKS_PID         (0x030)
 #define SPR_BOOKE_DECAR       (0x036)
 #define SPR_BOOKE_CSRR0       (0x03A)
 #define SPR_BOOKE_CSRR1       (0x03B)
@@ -1716,6 +1717,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
 #define SPR_POWER_SPMC1       (0x37C)
 #define SPR_POWER_SPMC2       (0x37D)
 #define SPR_POWER_MMCRS       (0x37E)
+#define SPR_WORT              (0x37F)
 #define SPR_PPR               (0x380)
 #define SPR_750_GQR0          (0x390)
 #define SPR_440_DNV0          (0x390)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4ec532c..bfdf028 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8226,10 +8226,18 @@ static void gen_spr_power8_book4(CPUPPCState *env)
                     &spr_read_generic, SPR_NOACCESS,
                     &spr_read_generic, &spr_write_generic,
                     0);
-    spr_register(env, SPR_ACOP, "ACOP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0);
+    spr_register_kvm(env, SPR_ACOP, "ACOP",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_ACOP, 0);
+    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PID, 0);
+    spr_register_kvm(env, SPR_WORT, "WORT",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_WORT, 0);
 #endif
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 73/77] ppc: Add KVM numbers to some P8 SPRs
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (71 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 74/77] ppc: Print HSRR0/HSRR1 in "info registers" Benjamin Herrenschmidt
                   ` (6 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate_init.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index bfdf028..fd084ca 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7619,21 +7619,21 @@ static void gen_spr_book3s_dbg(CPUPPCState *env)
 
 static void gen_spr_book3s_207_dbg(CPUPPCState *env)
 {
-    spr_register_hv(env, SPR_DAWR, "DAWR",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0x00000000);
-    spr_register_hv(env, SPR_DAWRX, "DAWRX",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0x00000000);
+    spr_register_kvm_hv(env, SPR_DAWR, "DAWR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_DAWR, 0x00000000);
+    spr_register_kvm_hv(env, SPR_DAWRX, "DAWRX",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_DAWRX, 0x00000000);
     spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
                         SPR_NOACCESS, SPR_NOACCESS,
                         SPR_NOACCESS, SPR_NOACCESS,
                         &spr_read_generic, &spr_write_generic,
-		        KVM_REG_PPC_CIABR, 0x00000000);
+                        KVM_REG_PPC_CIABR, 0x00000000);
 }
 
 static void gen_spr_970_dbg(CPUPPCState *env)
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 74/77] ppc: Print HSRR0/HSRR1 in "info registers"
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (72 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 73/77] ppc: Add KVM numbers to some P8 SPRs Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 75/77] ppc: Add dummy logmpp instruction Benjamin Herrenschmidt
                   ` (5 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

They are generally useful when debugging HV mode stuff

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index f76a0c3..b34d2c6 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11323,6 +11323,13 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                 env->spr[SPR_SPRG4], env->spr[SPR_SPRG5],
                 env->spr[SPR_SPRG6], env->spr[SPR_SPRG7]);
 
+#if defined(TARGET_PPC64)
+    if (env->excp_model == POWERPC_EXCP_POWER7 ||
+	env->excp_model == POWERPC_EXCP_POWER8) {
+        cpu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n",
+                    env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
+    }
+#endif
     if (env->excp_model == POWERPC_EXCP_BOOKE) {
         cpu_fprintf(f, "CSRR0 " TARGET_FMT_lx " CSRR1 " TARGET_FMT_lx
                        " MCSRR0 " TARGET_FMT_lx " MCSRR1 " TARGET_FMT_lx "\n",
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 75/77] ppc: Add dummy logmpp instruction
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (73 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 74/77] ppc: Print HSRR0/HSRR1 in "info registers" Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 76/77] ppc: Add slbfee. instruction Benjamin Herrenschmidt
                   ` (4 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

It's used by KVM for micropartition prefetch

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b34d2c6..1e3996d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3627,6 +3627,16 @@ static void gen_rvwinkle(DisasContext *ctx)
     gen_stop_exception(ctx);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
+
+static void gen_logmpp(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_HV;
+    /* This doesn't do anything in emulation */
+#endif /* defined(CONFIG_USER_ONLY) */
+}
 #endif /* #if defined(TARGET_PPC64) */
 
 /***                         Floating-point load                           ***/
@@ -9895,6 +9905,14 @@ GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
 GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
+
+/* This should be P8 Book4, not ISA207S, but I don't want to add a bit for that
+ * one dummy instruction. Note also that there's a discrepancy between the
+ * P8 Book4 which documents it as using RA while KVM implementation uses RB,
+ * so for now mark both fields as valid
+ */
+//GEN_HANDLER_E(logmpp, 0x1f, 0x12, 0x1f, 0x03E0F800, PPC_NONE, PPC2_ISA207S),
+GEN_HANDLER_E(logmpp, 0x1f, 0x12, 0x1f, 0x03E00000, PPC_NONE, PPC2_ISA207S),
 #endif
 GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW),
 GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW),
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 76/77] ppc: Add slbfee. instruction
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (74 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 75/77] ppc: Add dummy logmpp instruction Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 77/77] ppc: Fix CFAR updates Benjamin Herrenschmidt
                   ` (3 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

Used to lookup SLB entries by address, for some reason it was missing.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/helper.h     |  1 +
 target-ppc/mmu-hash64.c | 24 ++++++++++++++++++++++++
 target-ppc/translate.c  | 25 +++++++++++++++++++++++++
 3 files changed, 50 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 9890920..88ada3b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -551,6 +551,7 @@ DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
+DEF_HELPER_2(find_slb_esid, tl, env, tl)
 DEF_HELPER_FLAGS_1(slbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(slbie, TCG_CALL_NO_RWG, void, env, tl)
 #endif
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 9071fe9..f1e9666 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -188,6 +188,19 @@ static int ppc_load_slb_vsid(CPUPPCState *env, target_ulong rb,
     return 0;
 }
 
+static int ppc_find_slb_esid(CPUPPCState *env, target_ulong rb,
+                             target_ulong *rt)
+{
+    ppc_slb_t *slb = slb_lookup(env, rb);
+
+    if (!slb) {
+        return -1;
+    }
+
+    *rt = slb->vsid;
+    return 0;
+}
+
 void helper_store_slb(CPUPPCState *env, target_ulong rb, target_ulong rs)
 {
     if (ppc_store_slb(env, rb, rs) < 0) {
@@ -218,6 +231,17 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
     return rt;
 }
 
+target_ulong helper_find_slb_esid(CPUPPCState *env, target_ulong rb)
+{
+    target_ulong rt = 0;
+
+    if (ppc_find_slb_esid(env, rb, &rt) < 0) {
+        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
+                                   POWERPC_EXCP_INVAL);
+    }
+    return rt;
+}
+
 /*
  * 64-bit hash table MMU handling
  */
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1e3996d..b46ad72 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4949,6 +4949,30 @@ static void gen_slbmfev(DisasContext *ctx)
                              cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
+
+static void gen_slbfee_(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGLabel *l1;
+    TCGLabel *l2;
+
+    CHK_SV;
+
+    gen_helper_find_slb_esid(cpu_gpr[rS(ctx->opcode)], cpu_env,
+                             cpu_gpr[rB(ctx->opcode)]);
+    l1 = gen_new_label();
+    l2 = gen_new_label();
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rS(ctx->opcode)], -1, l1);
+    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_movi_tl(cpu_gpr[rS(ctx->opcode)], 0);
+    gen_set_label(l2);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
 #endif /* defined(TARGET_PPC64) */
 
 /***                      Lookaside buffer management                      ***/
@@ -9958,6 +9982,7 @@ GEN_HANDLER2(mtsrin_64b, "mtsrin", 0x1F, 0x12, 0x07, 0x001F0001,
 GEN_HANDLER2(slbmte, "slbmte", 0x1F, 0x12, 0x0C, 0x001F0001, PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 0x001F0001, PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, PPC_SEGMENT_64B),
+GEN_HANDLER2(slbfee_, "slbfee.", 0x1F, 0x13, 0x1E, 0x001F0000, PPC_SEGMENT_64B),
 #endif
 GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
 /* XXX Those instructions will need to be handled differently for
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* [Qemu-devel] [PATCH 77/77] ppc: Fix CFAR updates
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (75 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 76/77] ppc: Add slbfee. instruction Benjamin Herrenschmidt
@ 2015-11-11  0:28 ` Benjamin Herrenschmidt
  2015-11-11  0:42 ` [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (2 subsequent siblings)
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:28 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

We need to remove 4 from the NIP since it's already been moved by
4 by the dispatcher before the helper is called

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 target-ppc/translate.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b46ad72..d3e7d5b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4035,7 +4035,7 @@ static void gen_b(DisasContext *ctx)
     if (LK(ctx->opcode)) {
         gen_setlr(ctx, ctx->nip);
     }
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     gen_goto_tb(ctx, 0, target);
 }
 
@@ -4100,7 +4100,7 @@ static inline void gen_bcond(DisasContext *ctx, int type)
         }
         tcg_temp_free_i32(temp);
     }
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
@@ -4211,7 +4211,7 @@ static void gen_rfi(DisasContext *ctx)
 #else
     /* Restore CPU state */
     CHK_SV;
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     gen_helper_rfi(cpu_env);
     gen_sync_exception(ctx);
 #endif
@@ -4225,7 +4225,7 @@ static void gen_rfid(DisasContext *ctx)
 #else
     /* Restore CPU state */
     CHK_SV;
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     gen_helper_rfid(cpu_env);
     gen_sync_exception(ctx);
 #endif
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (76 preceding siblings ...)
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 77/77] ppc: Fix CFAR updates Benjamin Herrenschmidt
@ 2015-11-11  0:42 ` Benjamin Herrenschmidt
  2015-11-11  0:50 ` [Qemu-devel] " Eric Blake
  2015-11-11  0:57 ` Stewart Smith
  79 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:42 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel

On Wed, 2015-11-11 at 11:27 +1100, Benjamin Herrenschmidt wrote:
> This requires an OPAL firmware file which isn't included yet,
> will emulate enough to boot existing distros and run KVM
> inside TCG

Note that this doesn't migrate nor supports running under PR KVM,
both are possible but will take significantly more work (the latter
will need updates to PR KVM to emulate various aspects of the
MMU that it ignores for now).

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (77 preceding siblings ...)
  2015-11-11  0:42 ` [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
@ 2015-11-11  0:50 ` Eric Blake
  2015-11-11  0:56   ` Benjamin Herrenschmidt
  2015-11-11  0:57 ` Stewart Smith
  79 siblings, 1 reply; 198+ messages in thread
From: Eric Blake @ 2015-11-11  0:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 839 bytes --]

On 11/10/2015 05:27 PM, Benjamin Herrenschmidt wrote:
> This requires an OPAL firmware file which isn't included yet,
> will emulate enough to boot existing distros and run KVM
> inside TCG

[meta-comment]

It would be nice to include a diffstat (git defaults to doing this if
you use 'git format-patch --cover-letter' for generating the 0/77
letter), so that we can see at a glance how big this series is and what
files it touches.

In particular, I can't tell, without poking through 77 patches, if you
update the MAINTAINERS file to claim any new files added here.

Also, for a series this big, it's nice to point to a scratch git repo
that we can browse/clone from, without having to 'git am' all 77 mails.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  0:50 ` [Qemu-devel] " Eric Blake
@ 2015-11-11  0:56   ` Benjamin Herrenschmidt
  2015-11-11  3:27     ` [Qemu-devel] [Qemu-ppc] " Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  0:56 UTC (permalink / raw)
  To: Eric Blake, qemu-ppc; +Cc: qemu-devel

On Tue, 2015-11-10 at 17:50 -0700, Eric Blake wrote:
> 
> It would be nice to include a diffstat (git defaults to doing this if
> you use 'git format-patch --cover-letter' for generating the 0/77
> letter), so that we can see at a glance how big this series is and
> what files it touches.

Right. I'll do that next time. In the meantime, I've appended the
diffstat below.

> In particular, I can't tell, without poking through 77 patches, if
> you update the MAINTAINERS file to claim any new files added here.

I don't ... yet. I'll do it in the next spin. Thanks for pointing that
out.

> Also, for a series this big, it's nice to point to a scratch git repo
> that we can browse/clone from, without having to 'git am' all 77
> mails.

https://github.com/ozbenh/qemu

Cheers,
Ben.

 default-configs/ppc64-softmmu.mak   |    5 +-
 hmp-commands-info.hx                |    2 +
 hw/intc/Makefile.objs               |    2 +
 hw/intc/xics.c                      |  722 +++++++----------------
 hw/intc/xics_kvm.c                  |   91 +--
 hw/intc/xics_native.c               |  294 ++++++++++
 hw/intc/xics_spapr.c                |  423 ++++++++++++++
 hw/pci-bridge/pci_bridge_dev.c      |    3 +
 hw/pci-host/Makefile.objs           |    2 +
 hw/pci-host/pnv_phb3.c              | 1083 +++++++++++++++++++++++++++++++++++
 hw/pci-host/pnv_phb3_msi.c          |  338 +++++++++++
 hw/pci-host/pnv_phb3_pbcq.c         |  314 ++++++++++
 hw/pci-host/pnv_phb3_rc.c           |  132 +++++
 hw/pci/pci.c                        |   26 +-
 hw/ppc/Makefile.objs                |    2 +
 hw/ppc/pnv.c                        |  859 +++++++++++++++++++++++++++
 hw/ppc/pnv_lpc.c                    |  527 +++++++++++++++++
 hw/ppc/pnv_occ.c                    |  125 ++++
 hw/ppc/pnv_psi.c                    |  594 +++++++++++++++++++
 hw/ppc/pnv_xscom.c                  |  415 ++++++++++++++
 hw/ppc/ppc.c                        |   31 +-
 hw/ppc/spapr.c                      |   41 +-
 hw/ppc/spapr_events.c               |    8 +-
 hw/ppc/spapr_hcall.c                |   22 +-
 hw/ppc/spapr_pci.c                  |   10 +-
 hw/ppc/spapr_vio.c                  |    2 +-
 include/hw/pci-host/pnv_phb3.h      |  145 +++++
 include/hw/pci-host/pnv_phb3_regs.h |  505 ++++++++++++++++
 include/hw/pci-host/spapr.h         |    2 +-
 include/hw/pci/pci_bus.h            |    1 +
 include/hw/ppc/pnv.h                |   67 +++
 include/hw/ppc/pnv_xscom.h          |   73 +++
 include/hw/ppc/ppc.h                |    2 +
 include/hw/ppc/spapr.h              |    2 +-
 include/hw/ppc/spapr_vio.h          |    2 +-
 include/hw/ppc/xics.h               |   81 ++-
 include/hw/qdev-core.h              |    1 +
 linux-user/main.c                   |    1 +
 monitor.c                           |    3 +
 qdev-monitor.c                      |   13 +-
 target-ppc/cpu-models.c             |   12 +-
 target-ppc/cpu-models.h             |    4 +-
 target-ppc/cpu-qom.h                |    1 +
 target-ppc/cpu.h                    |  152 ++++-
 target-ppc/excp_helper.c            |  324 +++++++----
 target-ppc/helper.h                 |    6 +
 target-ppc/helper_regs.h            |   75 ++-
 target-ppc/machine.c                |    4 +-
 target-ppc/mmu-hash32.c             |    4 +-
 target-ppc/mmu-hash64.c             |  356 ++++++++++--
 target-ppc/mmu-hash64.h             |    1 +
 target-ppc/mmu_helper.c             |   17 +-
 target-ppc/timebase_helper.c        |   10 +
 target-ppc/translate.c              | 1009 +++++++++++++++++---------------
 target-ppc/translate_init.c         |  735 +++++++++++++++++++++---
 55 files changed, 8318 insertions(+), 1363 deletions(-)
 create mode 100644 hw/intc/xics_native.c
 create mode 100644 hw/intc/xics_spapr.c
 create mode 100644 hw/pci-host/pnv_phb3.c
 create mode 100644 hw/pci-host/pnv_phb3_msi.c
 create mode 100644 hw/pci-host/pnv_phb3_pbcq.c
 create mode 100644 hw/pci-host/pnv_phb3_rc.c
 create mode 100644 hw/ppc/pnv.c
 create mode 100644 hw/ppc/pnv_lpc.c
 create mode 100644 hw/ppc/pnv_occ.c
 create mode 100644 hw/ppc/pnv_psi.c
 create mode 100644 hw/ppc/pnv_xscom.c
 create mode 100644 include/hw/pci-host/pnv_phb3.h
 create mode 100644 include/hw/pci-host/pnv_phb3_regs.h
 create mode 100644 include/hw/ppc/pnv.h
 create mode 100644 include/hw/ppc/pnv_xscom.h


^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
                   ` (78 preceding siblings ...)
  2015-11-11  0:50 ` [Qemu-devel] " Eric Blake
@ 2015-11-11  0:57 ` Stewart Smith
  79 siblings, 0 replies; 198+ messages in thread
From: Stewart Smith @ 2015-11-11  0:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> This requires an OPAL firmware file which isn't included yet,
> will emulate enough to boot existing distros and run KVM
> inside TCG

For anyone wanting to try out this patchset, to build skiboot.lid,
grab https://github.com/open-power/skiboot.git and build.

You'll need ppc64 (big endian) cross compilers, available here:
https://www.kernel.org/pub/tools/crosstool/
and with that in your path everything should "just work".

The travis-ci test script for skiboot itself does build a qemu with
these patches and boots skiboot, so see:
https://github.com/open-power/skiboot/blob/master/.travis.yml
for how that works.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions Benjamin Herrenschmidt
@ 2015-11-11  0:59   ` Stewart Smith
  2015-11-16  5:01   ` David Gibson
  1 sibling, 0 replies; 198+ messages in thread
From: Stewart Smith @ 2015-11-11  0:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> We never released anything older than POWER8 DD2.0 and POWER8E DD2.1,
> so let's use these versions, without that some firmware or Linux code
> might fail to use some HW features that were non functional in earlier
> internal only spins of the chip.

We were getting it wrong with mambo a while ago and it caused some
oddness. There's zero reason for anyone to use pre-production processors
these days, so:

Acked-by: Stewart Smith <stewart@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  0:56   ` Benjamin Herrenschmidt
@ 2015-11-11  3:27     ` Alexey Kardashevskiy
  2015-11-11  3:38       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-11-11  3:27 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Eric Blake, qemu-ppc; +Cc: qemu-devel

On 11/11/2015 11:56 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2015-11-10 at 17:50 -0700, Eric Blake wrote:
>>
>> It would be nice to include a diffstat (git defaults to doing this if
>> you use 'git format-patch --cover-letter' for generating the 0/77
>> letter), so that we can see at a glance how big this series is and
>> what files it touches.
>
> Right. I'll do that next time. In the meantime, I've appended the
> diffstat below.


Some working example of qemu command line would not hurt. Known limitations 
as well. Like:

- it does not run under neither kvm-hv nor kvm-pr;
- cannot boot LE kernel;
...


>
>> In particular, I can't tell, without poking through 77 patches, if
>> you update the MAINTAINERS file to claim any new files added here.
>
> I don't ... yet. I'll do it in the next spin. Thanks for pointing that
> out.
>
>> Also, for a series this big, it's nice to point to a scratch git repo
>> that we can browse/clone from, without having to 'git am' all 77
>> mails.
>
> https://github.com/ozbenh/qemu


-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  3:27     ` [Qemu-devel] [Qemu-ppc] " Alexey Kardashevskiy
@ 2015-11-11  3:38       ` Benjamin Herrenschmidt
  2015-11-11  4:07         ` Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  3:38 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Eric Blake, qemu-ppc; +Cc: qemu-devel

On Wed, 2015-11-11 at 14:27 +1100, Alexey Kardashevskiy wrote:
> On 11/11/2015 11:56 AM, Benjamin Herrenschmidt wrote:
> > On Tue, 2015-11-10 at 17:50 -0700, Eric Blake wrote:
> > > 
> > > It would be nice to include a diffstat (git defaults to doing
> > > this if
> > > you use 'git format-patch --cover-letter' for generating the 0/77
> > > letter), so that we can see at a glance how big this series is
> > > and
> > > what files it touches.
> > 
> > Right. I'll do that next time. In the meantime, I've appended the
> > diffstat below.
> 
> 
> Some working example of qemu command line would not hurt. Known
> limitations 
> as well. Like:
> 
> - it does not run under neither kvm-hv nor kvm-pr;

It will never run under HV KVM,  indeed. I did mention somewhere else
that support for PR KVM was possible, just quite a bit of work. I will
add it to next version's cover letter.

> - cannot boot LE kernel;
> ...

It boots an LE kernel just fine :-)

Also next spin will have skiboot.lid

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  3:38       ` Benjamin Herrenschmidt
@ 2015-11-11  4:07         ` Alexey Kardashevskiy
  2015-11-11  4:16           ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-11-11  4:07 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Eric Blake, qemu-ppc; +Cc: qemu-devel

On 11/11/2015 02:38 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2015-11-11 at 14:27 +1100, Alexey Kardashevskiy wrote:
>> On 11/11/2015 11:56 AM, Benjamin Herrenschmidt wrote:
>>> On Tue, 2015-11-10 at 17:50 -0700, Eric Blake wrote:
>>>>
>>>> It would be nice to include a diffstat (git defaults to doing
>>>> this if
>>>> you use 'git format-patch --cover-letter' for generating the 0/77
>>>> letter), so that we can see at a glance how big this series is
>>>> and
>>>> what files it touches.
>>>
>>> Right. I'll do that next time. In the meantime, I've appended the
>>> diffstat below.
>>
>>
>> Some working example of qemu command line would not hurt. Known
>> limitations
>> as well. Like:
>>
>> - it does not run under neither kvm-hv nor kvm-pr;
>
> It will never run under HV KVM,  indeed. I did mention somewhere else
> that support for PR KVM was possible, just quite a bit of work. I will
> add it to next version's cover letter.
>
>> - cannot boot LE kernel;
>> ...
>
> It boots an LE kernel just fine :-)


aaaaand there is still no working example ;)


This:

p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine powernv \
-nographic -vga none -initrd t/le.cpio -kernel t/vml420le -bios \
skiboot.lid -smp 1,threads=1

just hangs at:

[1491287872,5] INIT: Waiting for kernel...
[1493257423,5] Assuming kernel at 0x20000000
[1494710040,5] INIT: Kernel loaded, size: 0 bytes (0 = unknown preload)
[1497506414,5] INIT: 64-bit LE kernel discovered
[1500827972,5] INIT: 64-bit kernel entry at 0x20010000
[1505594383,3] OCC: No HOMER detected, assuming no pstates
[1507983930,3] ELOG: Error getting buffer to log error
[1556792870,5] Free space in HEAP memory regions:
[1559724738,5] Region ibm,firmware-heap free: 12778984
[1561377946,5] Region ibm,firmware-allocs-memory@0000000000000000 free: 376992
[1563789914,5] Total free: 13155976
[1565066925,5] INIT: Starting kernel at 0x20010000, fdt at 0x30350610 (size 
0x2ce4)


With BE kernel and initrd, it goes further and boots fine:

[1589979488,5] Total free: 13155976
[1591411784,5] INIT: Starting kernel at 0x20010000, fdt at 0x30350610 (size 
0x2ce4)
[1687534980,3] OPAL: Trying a CPU re-init with flags: 0x1
CPU maps initialized for 1 thread per core
....


If I try LE disk image (ubuntu 14), it just crashes:

p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine powernv \
-nographic -vga none img/u14_32GB_cuda7.qcow2 -bios skiboot.lid \
-smp 1,threads=1
qemu: hardware error: qemu: could not load kernel'(null)'


CPU #0:
NIP 0000000000000000   LR 0000000000000000 CTR 0000000000000000 XER 
0000000000000000 CPU#0
MSR 0000000000000000 HID0 0000000000000000  HF 0000000000000000 iidx 0
didx 0
TB 00000000 00000000 DECR 00000000
...


>
> Also next spin will have skiboot.lid

Right. With the note that it goes with -bios :)




-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  4:07         ` Alexey Kardashevskiy
@ 2015-11-11  4:16           ` Benjamin Herrenschmidt
  2015-11-11  4:41             ` Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  4:16 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Eric Blake, qemu-ppc; +Cc: qemu-devel

On Wed, 2015-11-11 at 15:07 +1100, Alexey Kardashevskiy wrote:
> 
> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
> powernv \
> -nographic -vga none -initrd t/le.cpio -kernel t/vml420le -bios \
> skiboot.lid -smp 1,threads=1
> 
> just hangs at:
> 
> [1491287872,5] INIT: Waiting for kernel...
> [1493257423,5] Assuming kernel at 0x20000000
> [1494710040,5] INIT: Kernel loaded, size: 0 bytes (0 = unknown
> preload)
> [1497506414,5] INIT: 64-bit LE kernel discovered
> [1500827972,5] INIT: 64-bit kernel entry at 0x20010000
> [1505594383,3] OCC: No HOMER detected, assuming no pstates
> [1507983930,3] ELOG: Error getting buffer to log error
> [1556792870,5] Free space in HEAP memory regions:
> [1559724738,5] Region ibm,firmware-heap free: 12778984
> [1561377946,5] Region ibm,firmware-allocs-memory@0000000000000000
>  free: 376992
> [1563789914,5] Total free: 13155976
> [1565066925,5] INIT: Starting kernel at 0x20010000, fdt at 0x30350610
> (size 
> 0x2ce4)

Hrm, works for me, I've been testing various LE kernels including a
full ubuntu distro in there, we need to debug that further. Does that
same kernel actually work on real HW ?

> If I try LE disk image (ubuntu 14), it just crashes:
> 
> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
> powernv \
> -nographic -vga none img/u14_32GB_cuda7.qcow2 -bios skiboot.lid \
> -smp 1,threads=1
> qemu: hardware error: qemu: could not load kernel'(null)'

Right, we don't load kernels from disk, you need to pass a -kernel that
typically is the openpower bootloader (Linux + petitboot). My plan is
to make the pnv platform automatically extract these things from a ROM
image of an openpower eval board (aka palmetto) which you can build
from github. It's a bit too big to include as a binary in qemu however
(about 16M).

> Right. With the note that it goes with -bios :)

Not necessarily, it will be in the right place and loaded without an
explicit -bios.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  4:16           ` Benjamin Herrenschmidt
@ 2015-11-11  4:41             ` Alexey Kardashevskiy
  2015-11-11  4:47               ` Benjamin Herrenschmidt
  2015-11-27 10:21               ` Alexander Graf
  0 siblings, 2 replies; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-11-11  4:41 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Eric Blake, qemu-ppc; +Cc: qemu-devel

On 11/11/2015 03:16 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2015-11-11 at 15:07 +1100, Alexey Kardashevskiy wrote:
>>
>> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
>> powernv \
>> -nographic -vga none -initrd t/le.cpio -kernel t/vml420le -bios \
>> skiboot.lid -smp 1,threads=1
>>
>> just hangs at:
>>
>> [1491287872,5] INIT: Waiting for kernel...
>> [1493257423,5] Assuming kernel at 0x20000000
>> [1494710040,5] INIT: Kernel loaded, size: 0 bytes (0 = unknown
>> preload)
>> [1497506414,5] INIT: 64-bit LE kernel discovered
>> [1500827972,5] INIT: 64-bit kernel entry at 0x20010000
>> [1505594383,3] OCC: No HOMER detected, assuming no pstates
>> [1507983930,3] ELOG: Error getting buffer to log error
>> [1556792870,5] Free space in HEAP memory regions:
>> [1559724738,5] Region ibm,firmware-heap free: 12778984
>> [1561377946,5] Region ibm,firmware-allocs-memory@0000000000000000
>>   free: 376992
>> [1563789914,5] Total free: 13155976
>> [1565066925,5] INIT: Starting kernel at 0x20010000, fdt at 0x30350610
>> (size
>> 0x2ce4)
>
> Hrm, works for me, I've been testing various LE kernels including a
> full ubuntu distro in there, we need to debug that further. Does that
> same kernel actually work on real HW ?


Ok, as we figured out, CONFIG_PPC_EARLY_DEBUG is responsible for this as it 
does hypercalls in the very beginning.


>> If I try LE disk image (ubuntu 14), it just crashes:
>>
>> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
>> powernv \
>> -nographic -vga none img/u14_32GB_cuda7.qcow2 -bios skiboot.lid \
>> -smp 1,threads=1
>> qemu: hardware error: qemu: could not load kernel'(null)'
>
> Right, we don't load kernels from disk, you need to pass a -kernel that

Worth mentioning as well ;)

> typically is the openpower bootloader (Linux + petitboot). My plan is
> to make the pnv platform automatically extract these things from a ROM
> image of an openpower eval board (aka palmetto) which you can build
> from github. It's a bit too big to include as a binary in qemu however
> (about 16M).

git submodule?

Another note. I tried to know what the default devices are and got another 
crash:

(qemu) info qtree
bus: main-system-bus
   type System
   dev: phb3-msi, id ""
/home/aik/p/qemu-powernv/hw/core/sysbus.c:276:sysbus_dev_print: Object 
0x100279e9500 is not an instance of type sys-bus-device



-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  4:41             ` Alexey Kardashevskiy
@ 2015-11-11  4:47               ` Benjamin Herrenschmidt
  2015-11-27 10:21               ` Alexander Graf
  1 sibling, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-11  4:47 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Eric Blake, qemu-ppc; +Cc: qemu-devel

On Wed, 2015-11-11 at 15:41 +1100, Alexey Kardashevskiy wrote:
> Another note. I tried to know what the default devices are and got
> another 
> crash:
> 
> (qemu) info qtree
> bus: main-system-bus
>    type System
>    dev: phb3-msi, id ""
> /home/aik/p/qemu-powernv/hw/core/sysbus.c:276:sysbus_dev_print:
> Object 
> 0x100279e9500 is not an instance of type sys-bus-device

Hrm, that would be a new bug I introduced when adding MSIs yesterday,
I'll look into it.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
@ 2015-11-16  4:49   ` David Gibson
  2015-11-16 10:10     ` Benjamin Herrenschmidt
  2015-11-27 10:29   ` Alexander Graf
  1 sibling, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-16  4:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 9819 bytes --]

On Wed, Nov 11, 2015 at 11:27:15AM +1100, Benjamin Herrenschmidt wrote:
> We rework the way the MMU indices are calculated, providing separate
> indices for I and D side based on MSR:IR and MSR:DR respectively,
> and thus no longer need to flush the TLB on context changes. This also
> adds correct support for HV as a separate address space.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h         | 11 +++++++---
>  target-ppc/excp_helper.c | 11 ----------
>  target-ppc/helper_regs.h | 54 +++++++++++++++++++++++++++++++++++++++++-------
>  target-ppc/machine.c     |  4 +++-
>  target-ppc/translate.c   |  7 ++++---
>  5 files changed, 62 insertions(+), 25 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 9ef0859..aaa7117 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -462,6 +462,8 @@ struct ppc_slb_t {
>  #define MSR_EP   6  /* Exception prefix on 601                               */
>  #define MSR_IR   5  /* Instruction relocate                                  */
>  #define MSR_DR   4  /* Data relocate                                         */
> +#define MSR_IS   5  /* Instruction address space (BookE)                     */
> +#define MSR_DS   4  /* Data address space (BookE)                            */
>  #define MSR_PE   3  /* Protection enable on 403                              */
>  #define MSR_PX   2  /* Protection exclusive on 403                  x        */
>  #define MSR_PMM  2  /* Performance monitor mark on POWER            x        */
> @@ -505,6 +507,8 @@ struct ppc_slb_t {
>  #define msr_ep   ((env->msr >> MSR_EP)   & 1)
>  #define msr_ir   ((env->msr >> MSR_IR)   & 1)
>  #define msr_dr   ((env->msr >> MSR_DR)   & 1)
> +#define msr_is   ((env->msr >> MSR_IS)   & 1)
> +#define msr_ds   ((env->msr >> MSR_DS)   & 1)
>  #define msr_pe   ((env->msr >> MSR_PE)   & 1)
>  #define msr_px   ((env->msr >> MSR_PX)   & 1)
>  #define msr_pmm  ((env->msr >> MSR_PMM)  & 1)
> @@ -944,7 +948,7 @@ struct ppc_segment_page_sizes {
>  
>  /*****************************************************************************/
>  /* The whole PowerPC CPU context */
> -#define NB_MMU_MODES 3
> +#define NB_MMU_MODES    8
>  
>  #define PPC_CPU_OPCODES_LEN          0x40
>  #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
> @@ -1108,7 +1112,8 @@ struct CPUPPCState {
>      /* Those resources are used only in QEMU core */
>      target_ulong hflags;      /* hflags is a MSR & HFLAGS_MASK         */
>      target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
> -    int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
> +    int immu_idx;         /* precomputed MMU index to speed up insn access */
> +    int dmmu_idx;         /* precomputed MMU index to speed up data accesses */
>  
>      /* Power management */
>      int (*check_pow)(CPUPPCState *env);
> @@ -1249,7 +1254,7 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
>  #define MMU_USER_IDX 0
>  static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  {
> -    return env->mmu_idx;
> +    return ifetch ? env->immu_idx : env->dmmu_idx;
>  }
>  
>  #include "exec/cpu-all.h"
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 4250106..3e39098 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>  
>      if (env->spr[SPR_LPCR] & LPCR_AIL) {
>          new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
> -        /* If we disactivated any translation, flush TLBs */
> -        tlb_flush(cs, 1);
>      }
>  
>  #ifdef TARGET_PPC64
> @@ -674,14 +671,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      /* Reset exception state */
>      cs->exception_index = POWERPC_EXCP_NONE;
>      env->error_code = 0;
> -
> -    if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
> -        (env->mmu_model == POWERPC_MMU_BOOKE206)) {
> -        /* XXX: The BookE changes address space when switching modes,
> -                we should probably implement that as different MMU indexes,
> -                but for the moment we do it the slow way and flush all.  */
> -        tlb_flush(cs, 1);
> -    }
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 271fddf..f7edd5b 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -41,11 +41,50 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  
>  static inline void hreg_compute_mem_idx(CPUPPCState *env)
>  {
> -    /* Precompute MMU index */
> -    if (msr_pr == 0 && msr_hv != 0) {
> -        env->mmu_idx = 2;
> +    /* This is our encoding for server processors
> +     *
> +     *   0 = Guest User space virtual mode
> +     *   1 = Guest Kernel space virtual mode
> +     *   2 = Guest Kernel space real mode
> +     *   3 = HV User space virtual mode
> +     *   4 = HV Kernel space virtual mode
> +     *   5 = HV Kernel space real mode
> +     *
> +     * The combination PR=1 IR&DR=0 is invalid, we will treat
> +     * it as IR=DR=1

Hmm.. so being in problem state with translation off would certainly
be a bad idea, but would it actually behave this way on CPU hardware?

> +     *
> +     * For BookE, we need 8 MMU modes as follow:
> +     *
> +     *  0 = AS 0 HV User space
> +     *  1 = AS 0 HV Kernel space
> +     *  2 = AS 1 HV User space
> +     *  3 = AS 1 HV Kernel space
> +     *  4 = AS 0 Guest User space
> +     *  5 = AS 0 Guest Kernel space
> +     *  6 = AS 1 Guest User space
> +     *  7 = AS 1 Guest Kernel space
> +     */

I'm wondering if it might be simpler to unify these and allow all 8
theoretical possibilities (hv/guest * user/kernel * translationmode)
for both server and BookE.

> +    if (env->mmu_model & POWERPC_MMU_BOOKE) {
> +        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
> +        env->immu_idx += msr_is ? 2 : 0;
> +        env->dmmu_idx += msr_ds ? 2 : 0;
> +        env->immu_idx += msr_gs ? 4 : 0;
> +        env->dmmu_idx += msr_gs ? 4 : 0;
>      } else {
> -        env->mmu_idx = 1 - msr_pr;
> +        /* First calucalte a base value independent of HV */
> +        if (msr_pr != 0) {
> +            /* User space, ignore IR and DR */
> +            env->immu_idx = env->dmmu_idx = 0;
> +        } else {
> +            /* Kernel, setup a base I/D value */
> +            env->immu_idx = msr_ir ? 1 : 2;
> +            env->dmmu_idx = msr_dr ? 1 : 2;
> +        }
> +        /* Then offset it for HV */
> +        if (msr_hv) {
> +            env->immu_idx += 3;
> +            env->dmmu_idx += 3;
> +        }
>      }
>  }
>  
> @@ -82,9 +121,10 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      }
>      if (((value >> MSR_IR) & 1) != msr_ir ||
>          ((value >> MSR_DR) & 1) != msr_dr) {
> -        /* Flush all tlb when changing translation mode */
> -        tlb_flush(cs, 1);
> -        excp = POWERPC_EXCP_NONE;
> +        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +    }
> +    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
> +        ((value >> MSR_GS) & 1) != msr_gs) {
>          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>      }
>      if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index f4ac761..b969492 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
>      qemu_get_betls(f, &env->nip);
>      qemu_get_betls(f, &env->hflags);
>      qemu_get_betls(f, &env->hflags_nmsr);
> -    qemu_get_sbe32s(f, &env->mmu_idx);

Have I missed something, or do you still need a read here to read the
mmux_idx, even though you'll ignore it, otherwise you'll get out of
sync and break migration from an old stream.

>      qemu_get_sbe32(f); /* Discard unused power_mode */
>  
> +    /* Ignore saved mmu_idx, recompute */
> +    hreg_compute_mem_idx(env);
> +
>      return 0;
>  }
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 308ad68..6d9f252 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -11220,8 +11220,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>                  env->nip, env->lr, env->ctr, cpu_read_xer(env),
>                  cs->cpu_index);
>      cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
> -                TARGET_FMT_lx " idx %d\n", env->msr, env->spr[SPR_HID0],
> -                env->hflags, env->mmu_idx);
> +                TARGET_FMT_lx " iidx %d didx %d\n",
> +                env->msr, env->spr[SPR_HID0],
> +                env->hflags, env->immu_idx, env->dmmu_idx);
>  #if !defined(NO_TIMER_DUMP)
>      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
>  #if !defined(CONFIG_USER_ONLY)
> @@ -11426,7 +11427,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>      ctx.spr_cb = env->spr_cb;
>      ctx.pr = msr_pr;
>      ctx.hv = !msr_pr && msr_hv;
> -    ctx.mem_idx = env->mmu_idx;
> +    ctx.mem_idx = env->dmmu_idx;
>      ctx.insns_flags = env->insns_flags;
>      ctx.insns_flags2 = env->insns_flags2;
>      ctx.access_type = -1;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
@ 2015-11-16  5:00   ` David Gibson
  2015-11-16 10:16     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:00 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 11282 bytes --]

On Wed, Nov 11, 2015 at 11:27:16AM +1100, Benjamin Herrenschmidt wrote:
> On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.
> 
> However, those instructions often come in bursts of 3 or more (context
> switch will favor a series of slbie's for example to an slbia if the
> SLB has less than a certain number of entries in it, and tlbie's can
> happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
> at a time.
> 
> Doing a tlb_flush() each time is a waste of time. We end up doing a memset
> of the whole TLB, reloading it for the next instruction, memset'ing again,
> etc...
> 
> Those instructions don't have to take effect immediately. For slbie, they
> can wait for the next context synchronizing event. For tlbie, the next
> tlbsync.
> 
> This implements batching by keeping a flag that indicates that we have a
> TLB in need of flushing. We check it on interrupts, rfi's, isync's and
> tlbsync and flush the TLB if needed.
> 
> This reduces the number of tlb_flush() on a boot to a ubuntu installer
> first dialog screen from roughly 360K down to 36K.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hw/ppc/spapr_hcall.c     | 12 +++++++++---
>  target-ppc/cpu.h         |  2 ++
>  target-ppc/excp_helper.c |  9 +++++++++
>  target-ppc/helper.h      |  1 +
>  target-ppc/helper_regs.h | 13 +++++++++++++
>  target-ppc/mmu-hash64.c  | 12 +++---------
>  target-ppc/mmu_helper.c  |  9 ++++++++-
>  target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
>  8 files changed, 81 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index cebceea..7e2cb4b 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>  
>      switch (ret) {
>      case REMOVE_SUCCESS:
> +        check_tlb_flush(env);
>          return H_SUCCESS;
>  
>      case REMOVE_NOT_FOUND:
> @@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                                    target_ulong opcode, target_ulong *args)
>  {
>      CPUPPCState *env = &cpu->env;
> +    target_ulong rc = H_SUCCESS;
>      int i;
>  
>      for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
> @@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>              break;
>  
>          case REMOVE_PARM:
> -            return H_PARAMETER;
> +            rc = H_PARAMETER;
> +            goto exit;
>  
>          case REMOVE_HW:
> -            return H_HARDWARE;
> +            rc = H_HARDWARE;
> +            goto exit;
>          }
>      }
> + exit:
> +    check_tlb_flush(env);
>  
> -    return H_SUCCESS;
> +    return rc;
>  }
>  
>  static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aaa7117..e6c43f9 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1013,6 +1013,8 @@ struct CPUPPCState {
>      /* PowerPC 64 SLB area */
>      ppc_slb_t slb[MAX_SLB_ENTRIES];
>      int32_t slb_nr;
> +    /* tcg TLB needs flush (deferred slb inval instruction typically) */
> +    uint32_t tlb_need_flush;
>  #endif
>      /* segment registers */
>      hwaddr htab_base;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 3e39098..c1d6605 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      /* Reset exception state */
>      cs->exception_index = POWERPC_EXCP_NONE;
>      env->error_code = 0;
> +
> +    /* Any interrupt is context synchronizing, check if TCG TLB
> +     * needs a delayed flush on ppc64
> +     */
> +    check_tlb_flush(env);
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> @@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>                    __func__, env, env->pending_interrupts,
>                    cs->interrupt_request, (int)msr_me, (int)msr_ee);
>  #endif
> +
>      /* External reset */
>      if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
>          env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
> @@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
>       * as rfi is always the last insn of a TB
>       */
>      cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +
> +    /* Context synchronizing: check if TCG TLB needs flush */
> +    check_tlb_flush(env);
>  }
>  
>  void helper_rfi(CPUPPCState *env)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 869be15..ff2d50b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
>  DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  #endif
> +DEF_HELPER_1(check_tlb_flush, void, env)
>  #endif
>  
>  DEF_HELPER_3(lmw, void, env, tl, i32)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index f7edd5b..57da931 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      return excp;
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void check_tlb_flush(CPUPPCState *env)
> +{
> +    CPUState *cs = CPU(ppc_env_get_cpu(env));
> +    if (env->tlb_need_flush) {
> +        env->tlb_need_flush = 0;
> +        tlb_flush(cs, 1);
> +    }
> +}
> +#else
> +static inline void check_tlb_flush(CPUPPCState *env) { }
> +#endif
> +
>  #endif /* !defined(__HELPER_REGS_H__) */
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 7df6ede..71e1d14 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env)
>  
>  void helper_slbia(CPUPPCState *env)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -    int n, do_invalidate;
> +    int n;
>  
> -    do_invalidate = 0;
>      /* XXX: Warning: slbia never invalidates the first segment */
>      for (n = 1; n < env->slb_nr; n++) {
>          ppc_slb_t *slb = &env->slb[n];
> @@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
>               *      and we still don't have a tlb_flush_mask(env, n, mask)
>               *      in QEMU, we just invalidate all TLBs
>               */
> -            do_invalidate = 1;
> +            env->tlb_need_flush = true;
>          }
>      }
> -    if (do_invalidate) {
> -        tlb_flush(CPU(cpu), 1);
> -    }
>  }
>  
>  void helper_slbie(CPUPPCState *env, target_ulong addr)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
>      ppc_slb_t *slb;
>  
>      slb = slb_lookup(env, addr);
> @@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask)
>           *      in QEMU, we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = true;
>      }
>  }
>  
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index e52d0e5..54bc5d1 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -23,6 +23,7 @@
>  #include "mmu-hash64.h"
>  #include "mmu-hash32.h"
>  #include "exec/cpu_ldst.h"
> +#include "helper_regs.h"
>  
>  //#define DEBUG_MMU
>  //#define DEBUG_BATS
> @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>      case POWERPC_MMU_2_03:
>      case POWERPC_MMU_2_06:
>      case POWERPC_MMU_2_07:
> +        env->tlb_need_flush = 0;
>  #endif /* defined(TARGET_PPC64) */
>          tlb_flush(CPU(cpu), 1);
>          break;

Any particular reason you're leaving this one as an immediate rather
than deferred flush?

> @@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
>           *      we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = 1;
>          break;
>  #endif /* defined(TARGET_PPC64) */
>      default:
> @@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
>  }
>  
>  
> +void helper_check_tlb_flush(CPUPPCState *env)
> +{
> +    check_tlb_flush(env);
> +}
> +
>  /*****************************************************************************/
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 6d9f252..e18d204 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
>  {
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void gen_check_tlb_flush(DisasContext *ctx)
> +{
> +    TCGv_i32 t = tcg_temp_new_i32();
> +    TCGLabel *l = gen_new_label();
> +
> +    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> +    gen_helper_check_tlb_flush(cpu_env);
> +    gen_set_label(l);
> +    tcg_temp_free_i32(t);
> +}
> +#else
> +static inline void gen_check_tlb_flush(DisasContext *ctx) { }
> +#endif
> +
>  /* isync */
>  static void gen_isync(DisasContext *ctx)
>  {
> +    /*
> +     * We need to check for a pending TLB flush. This can only happen in
> +     * kernel mode however so check MSR_PR
> +     */
> +    if (!ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>      gen_stop_exception(ctx);
>  }
>  
> @@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
>  /* sync */
>  static void gen_sync(DisasContext *ctx)
>  {
> +    uint32_t l = (ctx->opcode >> 21) & 3;
> +
> +    /*
> +     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> +     * This can only happen in kernel mode however so check MSR_PR as well.
> +     */
> +    if (l == 2 && !ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>  }
>  
>  /* wait */
> @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }
> -    /* This has no effect: it should ensure that all previous
> -     * tlbie have completed
> +    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
> +     * embedded however needs to deal with tlbsync. We don't try to be
> +     * fancy and swallow the overhead of checking for both.
>       */
> -    gen_stop_exception(ctx);
> +    gen_check_tlb_flush(ctx);
>  #endif
>  }
>  

Should you be clearing the pending flush flag cpu_reset()?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions Benjamin Herrenschmidt
  2015-11-11  0:59   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
@ 2015-11-16  5:01   ` David Gibson
  2015-11-16 10:17     ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:01 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3078 bytes --]

On Wed, Nov 11, 2015 at 11:27:17AM +1100, Benjamin Herrenschmidt wrote:
> We never released anything older than POWER8 DD2.0 and POWER8E DD2.1,
> so let's use these versions, without that some firmware or Linux code
> might fail to use some HW features that were non functional in earlier
> internal only spins of the chip.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Hmm, I'm just wondering if replacing the old CPU models could have
implications for migration from old images.

> ---
>  target-ppc/cpu-models.c | 12 ++++++------
>  target-ppc/cpu-models.h |  4 ++--
>  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
> index 4d5ab4b..349783e 100644
> --- a/target-ppc/cpu-models.c
> +++ b/target-ppc/cpu-models.c
> @@ -1138,10 +1138,10 @@
>                  "POWER7 v2.3")
>      POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7,
>                  "POWER7+ v2.1")
> -    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8,
> -                "POWER8E v1.0")
> -    POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
> -                "POWER8 v1.0")
> +    POWERPC_DEF("POWER8E_v2.1",  CPU_POWERPC_POWER8E_v21,            POWER8,
> +                "POWER8E v2.1")
> +    POWERPC_DEF("POWER8_v2.0",   CPU_POWERPC_POWER8_v20,             POWER8,
> +                "POWER8 v2.0")
>      POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
>                  "PowerPC 970 v2.2")
>      POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
> @@ -1389,8 +1389,8 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
>      { "POWER5gs", "POWER5+_v2.1" },
>      { "POWER7", "POWER7_v2.3" },
>      { "POWER7+", "POWER7+_v2.1" },
> -    { "POWER8E", "POWER8E_v1.0" },
> -    { "POWER8", "POWER8_v1.0" },
> +    { "POWER8E", "POWER8E_v2.1" },
> +    { "POWER8", "POWER8_v2.0" },
>      { "970", "970_v2.2" },
>      { "970fx", "970fx_v3.1" },
>      { "970mp", "970mp_v1.1" },
> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
> index 9d80e72..2992427 100644
> --- a/target-ppc/cpu-models.h
> +++ b/target-ppc/cpu-models.h
> @@ -557,9 +557,9 @@ enum {
>      CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
>      CPU_POWERPC_POWER7P_v21        = 0x004A0201,
>      CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
> -    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
> +    CPU_POWERPC_POWER8E_v21        = 0x004B0201,
>      CPU_POWERPC_POWER8_BASE        = 0x004D0000,
> -    CPU_POWERPC_POWER8_v10         = 0x004D0100,
> +    CPU_POWERPC_POWER8_v20         = 0x004D0200,
>      CPU_POWERPC_970_v22            = 0x00390202,
>      CPU_POWERPC_970FX_v10          = 0x00391100,
>      CPU_POWERPC_970FX_v20          = 0x003C0200,

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 05/77] ppc: Update SPR definitions
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions Benjamin Herrenschmidt
@ 2015-11-16  5:06   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 6312 bytes --]

On Wed, Nov 11, 2015 at 11:27:18AM +1100, Benjamin Herrenschmidt wrote:
> Add definitions for additional SPR numbers and SPR bit definitions
> that will be relevant for subsequent improvements to POWER8 emulation
> 
> Also fix the definition of LPIDR which was incorrect (and is different
> for server and embedded).
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 47 insertions(+), 7 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index e6c43f9..611367f 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -470,9 +470,17 @@ struct ppc_slb_t {
>  #define MSR_RI   1  /* Recoverable interrupt                        1        */
>  #define MSR_LE   0  /* Little-endian mode                           1 hflags */
>  
> -#define LPCR_ILE (1 << (63-38))
> -#define LPCR_AIL_SHIFT (63-40)      /* Alternate interrupt location */
> -#define LPCR_AIL (3 << LPCR_AIL_SHIFT)
> +/* LPCR bits */
> +#define LPCR_VPM0         (1ull << (63-0))
> +#define LPCR_VPM1         (1ull << (63-1))
> +#define LPCR_ISL          (1ull << (63-2))
> +#define LPCR_KBV          (1ull << (63-3))
> +#define LPCR_ILE          (1ull << (63-38))
> +#define LPCR_MER          (1ull << (63-52))
> +#define LPCR_LPES0        (1ull << (63-60))
> +#define LPCR_LPES1        (1ull << (63-61))
> +#define LPCR_AIL_SHIFT    (63-40)      /* Alternate interrupt location */
> +#define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
>  
>  #define msr_sf   ((env->msr >> MSR_SF)   & 1)
>  #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
> @@ -1338,6 +1346,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_UAMOR             (0x09D)
>  #define SPR_MPC_ICTRL         (0x09E)
>  #define SPR_MPC_BAR           (0x09F)
> +#define SPR_DAWR              (0x0B4)
> +#define SPR_RPR               (0x0BA)
> +#define SPR_DAWRX             (0x0BC)
> +#define SPR_HFSCR             (0x0BE)
>  #define SPR_VRSAVE            (0x100)
>  #define SPR_USPRG0            (0x100)
>  #define SPR_USPRG1            (0x101)
> @@ -1392,19 +1404,25 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_HSRR1             (0x13B)
>  #define SPR_BOOKE_IAC4        (0x13B)
>  #define SPR_BOOKE_DAC1        (0x13C)
> -#define SPR_LPIDR             (0x13D)
> +#define SPR_MMCRH             (0x13C)
>  #define SPR_DABR2             (0x13D)
>  #define SPR_BOOKE_DAC2        (0x13D)
> +#define SPR_TFMR              (0x13D)
>  #define SPR_BOOKE_DVC1        (0x13E)
>  #define SPR_LPCR              (0x13E)
>  #define SPR_BOOKE_DVC2        (0x13F)
> +#define SPR_LPIDR             (0x13F)
>  #define SPR_BOOKE_TSR         (0x150)
> +#define SPR_HMER              (0x150)
> +#define SPR_HMEER             (0x151)
>  #define SPR_PCR               (0x152)
> +#define SPR_BOOKE_LPIDR       (0x152)
>  #define SPR_BOOKE_TCR         (0x154)
>  #define SPR_BOOKE_TLB0PS      (0x158)
>  #define SPR_BOOKE_TLB1PS      (0x159)
>  #define SPR_BOOKE_TLB2PS      (0x15A)
>  #define SPR_BOOKE_TLB3PS      (0x15B)
> +#define SPR_AMOR	      (0x15D)
>  #define SPR_BOOKE_MAS7_MAS3   (0x174)
>  #define SPR_BOOKE_IVOR0       (0x190)
>  #define SPR_BOOKE_IVOR1       (0x191)
> @@ -1622,6 +1640,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_RCPU_L2U_RA3      (0x32B)
>  #define SPR_TAR               (0x32F)
>  #define SPR_VTB               (0x351)
> +#define SPR_MMCRC             (0x353)
>  #define SPR_440_INV0          (0x370)
>  #define SPR_440_INV1          (0x371)
>  #define SPR_440_INV2          (0x372)
> @@ -1655,6 +1674,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_440_DVLIM         (0x398)
>  #define SPR_750_WPAR          (0x399)
>  #define SPR_440_IVLIM         (0x399)
> +#define SPR_TSCR	      (0x399)
>  #define SPR_750_DMAU          (0x39A)
>  #define SPR_750_DMAL          (0x39B)
>  #define SPR_440_RSTCFG        (0x39B)
> @@ -1829,9 +1849,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define   L1CSR1_ICE		0x00000001	/* Instruction Cache Enable */
>  
>  /* HID0 bits */
> -#define HID0_DEEPNAP        (1 << 24)
> -#define HID0_DOZE           (1 << 23)
> -#define HID0_NAP            (1 << 22)
> +#define HID0_DEEPNAP        (1 << 24)	        /* pre-2.06 */
> +#define HID0_DOZE           (1 << 23)	        /* pre-2.06 */
> +#define HID0_NAP            (1 << 22)	        /* pre-2.06 */
> +#define HID0_HILE           (1ull << (63-19))   /* POWER8 */
>  
>  /*****************************************************************************/
>  /* PowerPC Instructions types definitions                                    */
> @@ -2180,6 +2201,25 @@ enum {
>      PCR_TM_DIS          = 1ull << (63-2), /* Trans. memory disable (POWER8) */
>  };
>  
> +/* HMER/HMEER */
> +enum {

Any particular reason for using an enum rather than defines like the rest?

> +    HMER_MALFUNCTION_ALERT      = 1ull << (63-0),
> +    HMER_PROC_RECV_DONE         = 1ull << (63-2),
> +    HMER_PROC_RECV_ERROR_MASKED = 1ull << (63-3),
> +    HMER_TFAC_ERROR             = 1ull << (63-4),
> +    HMER_TFMR_PARITY_ERROR      = 1ull << (63-5),
> +    HMER_XSCOM_FAIL             = 1ull << (63-8),
> +    HMER_XSCOM_DONE             = 1ull << (63-9),
> +    HMER_PROC_RECV_AGAIN        = 1ull << (63-11),
> +    HMER_WARN_RISE              = 1ull << (63-14),
> +    HMER_WARN_FALL              = 1ull << (63-15),
> +    HMER_SCOM_FIR_HMI           = 1ull << (63-16),
> +    HMER_TRIG_FIR_HMI           = 1ull << (63-17),
> +    HMER_HYP_RESOURCE_ERR       = 1ull << (63-20),
> +    HMER_XSCOM_STATUS_MASK      = 7ull << (63-23),
> +    HMER_XSCOM_STATUS_LSH       = (63-23),

Is that last one correct?

> +};
> +
>  /*****************************************************************************/
>  
>  static inline target_ulong cpu_read_xer(CPUPPCState *env)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs Benjamin Herrenschmidt
@ 2015-11-16  5:09   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 7555 bytes --]

On Wed, Nov 11, 2015 at 11:27:19AM +1100, Benjamin Herrenschmidt wrote:
> The current set of spr_register_* macros only take the user and
> supervisor function pointers. To make the transition easy, we
> don't change that but we add "_hv" variants that can be used to
> register all 3 sets.
> 
> To simplify the transition, users of the "old" macro will set the
> hypervisor callback to be the same as the supervisor one. The new
> registration function only needs to be used for registers that are
> either hypervisor only or behave differently in HV mode.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate.c      | 26 ++++++++++++++++----------
>  target-ppc/translate_init.c | 35 +++++++++++++++++++++++++++++++----
>  2 files changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index e18d204..a2fe1b5 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4299,14 +4299,17 @@ static inline void gen_op_mfspr(DisasContext *ctx)
>      void (*read_cb)(DisasContext *ctx, int gprn, int sprn);
>      uint32_t sprn = SPR(ctx->opcode);
>  
> -#if !defined(CONFIG_USER_ONLY)
> -    if (ctx->hv)
> +#if defined(CONFIG_USER_ONLY)
> +    read_cb = ctx->spr_cb[sprn].uea_read;
> +#else
> +    if (ctx->pr) {
> +        read_cb = ctx->spr_cb[sprn].uea_read;
> +    } else if (ctx->hv) {
>          read_cb = ctx->spr_cb[sprn].hea_read;
> -    else if (!ctx->pr)
> +    } else if (!ctx->pr) {
>          read_cb = ctx->spr_cb[sprn].oea_read;
> -    else
> +    }
>  #endif
> -        read_cb = ctx->spr_cb[sprn].uea_read;
>      if (likely(read_cb != NULL)) {
>          if (likely(read_cb != SPR_NOACCESS)) {
>              (*read_cb)(ctx, rD(ctx->opcode), sprn);
> @@ -4450,14 +4453,17 @@ static void gen_mtspr(DisasContext *ctx)
>      void (*write_cb)(DisasContext *ctx, int sprn, int gprn);
>      uint32_t sprn = SPR(ctx->opcode);
>  
> -#if !defined(CONFIG_USER_ONLY)
> -    if (ctx->hv)
> +#if defined(CONFIG_USER_ONLY)
> +    write_cb = ctx->spr_cb[sprn].uea_write;
> +#else
> +    if (ctx->pr) {
> +        write_cb = ctx->spr_cb[sprn].uea_write;
> +    } else if (ctx->hv) {
>          write_cb = ctx->spr_cb[sprn].hea_write;
> -    else if (!ctx->pr)
> +    } else {
>          write_cb = ctx->spr_cb[sprn].oea_write;
> -    else
> +    }
>  #endif
> -        write_cb = ctx->spr_cb[sprn].uea_write;
>      if (likely(write_cb != NULL)) {
>          if (likely(write_cb != SPR_NOACCESS)) {
>              (*write_cb)(ctx, sprn, rS(ctx->opcode));
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index e88dc7f..30a03ce 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -578,17 +578,33 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val)
>  #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
>                           oea_read, oea_write, one_reg_id, initial_value)       \
>      _spr_register(env, num, name, uea_read, uea_write, initial_value)
> +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
> +                            oea_read, oea_write, hea_read, hea_write,          \
> +                            one_reg_id, initial_value)                         \
> +    _spr_register(env, num, name, uea_read, uea_write, initial_value)
>  #else
>  #if !defined(CONFIG_KVM)
>  #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
> -                         oea_read, oea_write, one_reg_id, initial_value) \
> +                         oea_read, oea_write, one_reg_id, initial_value)       \
> +    _spr_register(env, num, name, uea_read, uea_write,                         \
> +                  oea_read, oea_write, oea_read, oea_write, initial_value)
> +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
> +                            oea_read, oea_write, hea_read, hea_write,          \
> +                            one_reg_id, initial_value)                         \
>      _spr_register(env, num, name, uea_read, uea_write,                         \
> -                  oea_read, oea_write, initial_value)
> +                  oea_read, oea_write, hea_read, hea_write, initial_value)
>  #else
>  #define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
> -                         oea_read, oea_write, one_reg_id, initial_value) \
> +                         oea_read, oea_write, one_reg_id, initial_value)       \
> +    _spr_register(env, num, name, uea_read, uea_write,                         \
> +                  oea_read, oea_write, oea_read, oea_write,                    \
> +                  one_reg_id, initial_value)
> +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
> +                            oea_read, oea_write, hea_read, hea_write,          \
> +                            one_reg_id, initial_value)                         \
>      _spr_register(env, num, name, uea_read, uea_write,                         \
> -                  oea_read, oea_write, one_reg_id, initial_value)
> +                  oea_read, oea_write, hea_read, hea_write,                    \
> +                  one_reg_id, initial_value)
>  #endif
>  #endif
>  
> @@ -597,6 +613,13 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val)
>      spr_register_kvm(env, num, name, uea_read, uea_write,                      \
>                       oea_read, oea_write, 0, initial_value)
>  
> +#define spr_register_hv(env, num, name, uea_read, uea_write,                   \
> +                        oea_read, oea_write, hea_read, hea_write,              \
> +                        initial_value)                                         \
> +    spr_register_kvm_hv(env, num, name, uea_read, uea_write,                   \
> +                        oea_read, oea_write, hea_read, hea_write,              \
> +                        0, initial_value)
> +
>  static inline void _spr_register(CPUPPCState *env, int num,
>                                   const char *name,
>                                   void (*uea_read)(DisasContext *ctx, int gprn, int sprn),
> @@ -605,6 +628,8 @@ static inline void _spr_register(CPUPPCState *env, int num,
>  
>                                   void (*oea_read)(DisasContext *ctx, int gprn, int sprn),
>                                   void (*oea_write)(DisasContext *ctx, int sprn, int gprn),
> +                                 void (*hea_read)(DisasContext *opaque, int gprn, int sprn),
> +                                 void (*hea_write)(DisasContext *opaque, int sprn, int gprn),
>  #endif
>  #if defined(CONFIG_KVM)
>                                   uint64_t one_reg_id,
> @@ -632,6 +657,8 @@ static inline void _spr_register(CPUPPCState *env, int num,
>  #if !defined(CONFIG_USER_ONLY)
>      spr->oea_read = oea_read;
>      spr->oea_write = oea_write;
> +    spr->hea_read = hea_read;
> +    spr->hea_write = hea_write;
>  #endif
>  #if defined(CONFIG_KVM)
>      spr->one_reg_id = one_reg_id,

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 08/77] ppc: Add number of threads per core to the processor definition
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition Benjamin Herrenschmidt
@ 2015-11-16  5:16   ` David Gibson
  2015-11-20  0:29     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3026 bytes --]

On Wed, Nov 11, 2015 at 11:27:21AM +1100, Benjamin Herrenschmidt wrote:
> Also use it to clamp the max SMT mode and ensure that the cpu_dt_id
> are offset by that value in order to preserve consistency with the
> HW implementations.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/cpu-qom.h        | 1 +
>  target-ppc/translate_init.c | 8 +++++++-
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
> index 6967a80..fef23fd 100644
> --- a/target-ppc/cpu-qom.h
> +++ b/target-ppc/cpu-qom.h
> @@ -68,6 +68,7 @@ typedef struct PowerPCCPUClass {
>      uint32_t flags;
>      int bfd_mach;
>      uint32_t l1_dcache_size, l1_icache_size;
> +    uint32_t threads_per_core;
>  #if defined(TARGET_PPC64)
>      const struct ppc_segment_page_sizes *sps;
>  #endif
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index c743eb1..1d402e1 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8193,6 +8193,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
>                   POWERPC_FLAG_BUS_CLK;
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x10000;
> +    pcc->threads_per_core = 2;
>  }
>  
>  static void powerpc_get_compat(Object *obj, Visitor *v,
> @@ -8339,6 +8340,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x8000;
>      pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
> +    pcc->threads_per_core = 4;
>  }
>  
>  static void init_proc_POWER8(CPUPPCState *env)
> @@ -8419,6 +8421,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>      pcc->l1_dcache_size = 0x8000;
>      pcc->l1_icache_size = 0x8000;
>      pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
> +    pcc->threads_per_core = 8;
>  }
>  #endif /* defined (TARGET_PPC64) */
>  
> @@ -9074,6 +9077,9 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>  #endif
>  
>  #if !defined(CONFIG_USER_ONLY)
> +    if (max_smt > pcc->threads_per_core) {
> +        max_smt = pcc->threads_per_core;
> +    }
>      if (smp_threads > max_smt) {
>          error_setg(errp, "Cannot support more than %d threads on PPC with %s",
>                     max_smt, kvm_enabled() ? "KVM" : "TCG");
> @@ -9094,7 +9100,7 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>      }
>  
>  #if !defined(CONFIG_USER_ONLY)
> -    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
> +    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * pcc->threads_per_core
>          + (cs->cpu_index % smp_threads);
>  #endif
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper Benjamin Herrenschmidt
@ 2015-11-16  5:30   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 4216 bytes --]

On Wed, Nov 11, 2015 at 11:27:24AM +1100, Benjamin Herrenschmidt wrote:
> And move the code adjusting the MSR mask and calling kvmppc_set_papr()
> to it. This allows us to add a few more things such as disabling setting
> of MSR:HV and appropriate LPCR bits which will be used when fixing
> the exception model.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  hw/ppc/spapr.c              | 12 +++---------
>  target-ppc/cpu.h            |  1 +
>  target-ppc/translate_init.c | 37 ++++++++++++++++++++++++++++++++++++-
>  3 files changed, 40 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 37d071e..610629e 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1606,15 +1606,8 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
>      /* Set time-base frequency to 512 MHz */
>      cpu_ppc_tb_init(env, TIMEBASE_FREQ);
>  
> -    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
> -     * MSR[IP] should never be set.
> -     */
> -    env->msr_mask &= ~(1 << 6);
> -
> -    /* Tell KVM that we're in PAPR mode */
> -    if (kvm_enabled()) {
> -        kvmppc_set_papr(cpu);
> -    }
> +    /* Enable PAPR mode in TCG or KVM */
> +    cpu_ppc_set_papr(cpu);
>  
>      if (cpu->max_compat) {
>          if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
> @@ -1791,6 +1784,7 @@ static void ppc_spapr_init(MachineState *machine)
>              fprintf(stderr, "Unable to find PowerPC CPU definition\n");
>              exit(1);
>          }
> +
>          spapr_cpu_init(spapr, cpu);
>      }
>  
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 611367f..357b6e7 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1229,6 +1229,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
>  void store_booke_tsr (CPUPPCState *env, target_ulong val);
>  void ppc_tlb_invalidate_all (CPUPPCState *env);
>  void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
> +void cpu_ppc_set_papr(PowerPCCPU *cpu);
>  #endif
>  #endif
>  
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 1d402e1..7bcfbc0 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8423,8 +8423,43 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>      pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
>      pcc->threads_per_core = 8;
>  }
> -#endif /* defined (TARGET_PPC64) */
>  
> +#if !defined(CONFIG_USER_ONLY)
> +
> +void cpu_ppc_set_papr(PowerPCCPU *cpu)
> +{
> +    CPUPPCState *env = &cpu->env;
> +    ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
> +
> +    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
> +     * MSR[IP] should never be set.
> +     *
> +     * We also disallow setting of MSR_HV
> +     */
> +    env->msr_mask &= ~((1ull << MSR_EP) | MSR_HVB);
> +
> +    /* Set emulated LPCR to not send interrupts to hypervisor. Note that
> +     * under KVM, the actual HW LPCR will be set differently by KVM itself,
> +     * the settings below ensure proper operations with TCG in absence of
> +     * a real hypervisor
> +     */
> +    lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
> +    lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
> +
> +    /* We should be followed by a CPU reset but update the active value
> +     * just in case...
> +     */
> +    env->spr[SPR_LPCR] = lpcr->default_value;
> +
> +    /* Tell KVM that we're in PAPR mode */
> +    if (kvm_enabled()) {
> +        kvmppc_set_papr(cpu);
> +    }
> +}
> +
> +#endif /* !defined(CONFIG_USER_ONLY) */
> +
> +#endif /* defined (TARGET_PPC64) */
>  
>  /*****************************************************************************/
>  /* Generic CPU instantiation routine                                         */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only Benjamin Herrenschmidt
@ 2015-11-16  5:34   ` David Gibson
  2015-11-16 10:21     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1811 bytes --]

On Wed, Nov 11, 2015 at 11:27:26AM +1100, Benjamin Herrenschmidt wrote:
> Not that anything remotely recent supports tlbia but ...
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/translate.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 10eb9e3..014fe5e 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4836,7 +4836,7 @@ static void gen_tlbia(DisasContext *ctx)
>  #if defined(CONFIG_USER_ONLY)
>      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>  #else
> -    if (unlikely(ctx->pr)) {
> +    if (unlikely(ctx->pr || !ctx->hv)) {

If I'm reading your previous patch correctly, ctx->hv won't be set
with in problem state, so I think the ctx->pr check is redundant.

>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }
> @@ -4850,7 +4850,7 @@ static void gen_tlbiel(DisasContext *ctx)
>  #if defined(CONFIG_USER_ONLY)
>      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>  #else
> -    if (unlikely(ctx->pr)) {
> +    if (unlikely(ctx->pr || !ctx->hv)) {
>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }
> @@ -4864,7 +4864,7 @@ static void gen_tlbie(DisasContext *ctx)
>  #if defined(CONFIG_USER_ONLY)
>      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>  #else
> -    if (unlikely(ctx->pr)) {
> +    if (unlikely(ctx->pr || !ctx->hv)) {
>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops Benjamin Herrenschmidt
@ 2015-11-16  5:40   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2333 bytes --]

On Wed, Nov 11, 2015 at 11:27:29AM +1100, Benjamin Herrenschmidt wrote:
> Otherwise tight loops at smt_low for example, which OPAL does,
> eat so much CPU that we can't boot a kernel anymore. With that,
> I can boot 8 CPUs just fine with powernv.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate.c | 19 +++++++++++++++++--
>  1 file changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 3974cd2..e8bbd59 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1396,6 +1396,19 @@ GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER);
>  /* nor & nor. */
>  GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER);
>  
> +#if defined(TARGET_PPC64)
> +static void gen_pause(DisasContext *ctx)
> +{
> +    TCGv_i32 t0 = tcg_const_i32(0);
> +    tcg_gen_st_i32(t0, cpu_env,
> +                   -offsetof(PowerPCCPU, env) + offsetof(CPUState, halted));
> +    tcg_temp_free_i32(t0);
> +
> +    /* Stop translation, this gives other CPUs a chance to run */
> +    gen_exception_err(ctx, EXCP_HLT, 1);
> +}
> +#endif /* defined(TARGET_PPC64) */
> +
>  /* or & or. */
>  static void gen_or(DisasContext *ctx)
>  {
> @@ -1468,6 +1481,10 @@ static void gen_or(DisasContext *ctx)
>              tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50);
>              gen_store_spr(SPR_PPR, t0);
>              tcg_temp_free(t0);
> +            /* Pause us out of TCG otherwise spin loops with smt_low
> +             * eat too much CPU and the kernel hangs
> +             */
> +            gen_pause(ctx);
>          }
>  #endif
>      }
> @@ -1493,8 +1510,6 @@ static void gen_ori(DisasContext *ctx)
>      target_ulong uimm = UIMM(ctx->opcode);
>  
>      if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
> -        /* NOP */
> -        /* XXX: should handle special NOPs for POWER series */
>          return;
>      }
>      tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8 Benjamin Herrenschmidt
@ 2015-11-16  5:41   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16  5:41 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1883 bytes --]

On Wed, Nov 11, 2015 at 11:27:30AM +1100, Benjamin Herrenschmidt wrote:
> This will enable decoding of hrfid
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate_init.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 76f20ea..f11e7d0 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8303,7 +8303,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>                         PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
>                         PPC_MEM_SYNC | PPC_MEM_EIEIO |
>                         PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
> -                       PPC_64B | PPC_ALTIVEC |
> +                       PPC_64B | PPC_64H | PPC_ALTIVEC |
>                         PPC_SEGMENT_64B | PPC_SLBI |
>                         PPC_POPCNTB | PPC_POPCNTWD;
>      pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
> @@ -8380,7 +8380,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>                         PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
>                         PPC_MEM_SYNC | PPC_MEM_EIEIO |
>                         PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
> -                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
> +                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
>                         PPC_SEGMENT_64B | PPC_SLBI |
>                         PPC_POPCNTB | PPC_POPCNTWD;
>      pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-16  4:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-16 10:10     ` Benjamin Herrenschmidt
  2015-11-16 12:42       ` David Gibson
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-16 10:10 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

Thanks for reviewing !

On Mon, 2015-11-16 at 15:49 +1100, David Gibson wrote:
> >  static inline void hreg_compute_mem_idx(CPUPPCState *env)
> >  {
> > -    /* Precompute MMU index */
> > -    if (msr_pr == 0 && msr_hv != 0) {
> > -        env->mmu_idx = 2;
> > +    /* This is our encoding for server processors
> > +     *
> > +     *   0 = Guest User space virtual mode
> > +     *   1 = Guest Kernel space virtual mode
> > +     *   2 = Guest Kernel space real mode
> > +     *   3 = HV User space virtual mode
> > +     *   4 = HV Kernel space virtual mode
> > +     *   5 = HV Kernel space real mode
> > +     *
> > +     * The combination PR=1 IR&DR=0 is invalid, we will treat
> > +     * it as IR=DR=1
> 
> Hmm.. so being in problem state with translation off would certainly
> be a bad idea, but would it actually behave this way on CPU hardware?

No, it's not allowed in HW. I think (maybe in another patch) I enforce
it. Architecturally, setting PR=1 will fore IR, DR and EE to 1

> > +     *
> > +     * For BookE, we need 8 MMU modes as follow:
> > +     *
> > +     *  0 = AS 0 HV User space
> > +     *  1 = AS 0 HV Kernel space
> > +     *  2 = AS 1 HV User space
> > +     *  3 = AS 1 HV Kernel space
> > +     *  4 = AS 0 Guest User space
> > +     *  5 = AS 0 Guest Kernel space
> > +     *  6 = AS 1 Guest User space
> > +     *  7 = AS 1 Guest Kernel space
> > +     */
> 
> I'm wondering if it might be simpler to unify these and allow all 8
> theoretical possibilities (hv/guest * user/kernel * translationmode)
> for both server and BookE.

I don't see the point. Server doesn't have "AS" and the distinction
only appears in that single function... 

> > +    if (env->mmu_model & POWERPC_MMU_BOOKE) {
> > +        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
> > +        env->immu_idx += msr_is ? 2 : 0;
> > +        env->dmmu_idx += msr_ds ? 2 : 0;
> > +        env->immu_idx += msr_gs ? 4 : 0;
> > +        env->dmmu_idx += msr_gs ? 4 : 0;
> >      } else {
> > -        env->mmu_idx = 1 - msr_pr;
> > +        /* First calucalte a base value independent of HV */
> > +        if (msr_pr != 0) {
> > +            /* User space, ignore IR and DR */
> > +            env->immu_idx = env->dmmu_idx = 0;
> > +        } else {
> > +            /* Kernel, setup a base I/D value */
> > +            env->immu_idx = msr_ir ? 1 : 2;
> > +            env->dmmu_idx = msr_dr ? 1 : 2;
> > +        }
> > +        /* Then offset it for HV */
> > +        if (msr_hv) {
> > +            env->immu_idx += 3;
> > +            env->dmmu_idx += 3;
> > +        }
> >      }
> >  }
> >  
> > @@ -82,9 +121,10 @@ static inline int hreg_store_msr(CPUPPCState
> > *env, target_ulong value,
> >      }
> >      if (((value >> MSR_IR) & 1) != msr_ir ||
> >          ((value >> MSR_DR) & 1) != msr_dr) {
> > -        /* Flush all tlb when changing translation mode */
> > -        tlb_flush(cs, 1);
> > -        excp = POWERPC_EXCP_NONE;
> > +        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> > +    }
> > +    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
> > +        ((value >> MSR_GS) & 1) != msr_gs) {
> >          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> >      }
> >      if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
> > diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> > index f4ac761..b969492 100644
> > --- a/target-ppc/machine.c
> > +++ b/target-ppc/machine.c
> > @@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void
> > *opaque, int version_id)
> >      qemu_get_betls(f, &env->nip);
> >      qemu_get_betls(f, &env->hflags);
> >      qemu_get_betls(f, &env->hflags_nmsr);
> > -    qemu_get_sbe32s(f, &env->mmu_idx);
> 
> Have I missed something, or do you still need a read here to read the
> mmux_idx, even though you'll ignore it, otherwise you'll get out of
> sync and break migration from an old stream.

I am not completely cognizant of the migration stuff, that's very
possible yes. Do I need to read into a dummy local ? Or is there
a way to just say "drop 4 bytes from stream" ?

Note that I have generally completely overlooked the migration impact
of my patches, this is something that I need to do but I wouldn't mind
your help identifying the parts.

> >      qemu_get_sbe32(f); /* Discard unused power_mode */
> >  
> > +    /* Ignore saved mmu_idx, recompute */
> > +    hreg_compute_mem_idx(env);
> > +
> >      return 0;
> >  }
> >  
> > diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> > index 308ad68..6d9f252 100644
> > --- a/target-ppc/translate.c
> > +++ b/target-ppc/translate.c
> > @@ -11220,8 +11220,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE
> > *f, fprintf_function cpu_fprintf,
> >                  env->nip, env->lr, env->ctr, cpu_read_xer(env),
> >                  cs->cpu_index);
> >      cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx
> > "  HF "
> > -                TARGET_FMT_lx " idx %d\n", env->msr, env-
> > >spr[SPR_HID0],
> > -                env->hflags, env->mmu_idx);
> > +                TARGET_FMT_lx " iidx %d didx %d\n",
> > +                env->msr, env->spr[SPR_HID0],
> > +                env->hflags, env->immu_idx, env->dmmu_idx);
> >  #if !defined(NO_TIMER_DUMP)
> >      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
> >  #if !defined(CONFIG_USER_ONLY)
> > @@ -11426,7 +11427,7 @@ void gen_intermediate_code(CPUPPCState
> > *env, struct TranslationBlock *tb)
> >      ctx.spr_cb = env->spr_cb;
> >      ctx.pr = msr_pr;
> >      ctx.hv = !msr_pr && msr_hv;
> > -    ctx.mem_idx = env->mmu_idx;
> > +    ctx.mem_idx = env->dmmu_idx;
> >      ctx.insns_flags = env->insns_flags;
> >      ctx.insns_flags2 = env->insns_flags2;
> >      ctx.access_type = -1;
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes
  2015-11-16  5:00   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-16 10:16     ` Benjamin Herrenschmidt
  2015-11-19  6:09       ` David Gibson
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-16 10:16 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Mon, 2015-11-16 at 16:00 +1100, David Gibson wrote:
> 
> >  //#define DEBUG_MMU
> >  //#define DEBUG_BATS
> > @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> >      case POWERPC_MMU_2_03:
> >      case POWERPC_MMU_2_06:
> >      case POWERPC_MMU_2_07:
> > +        env->tlb_need_flush = 0;
> >  #endif /* defined(TARGET_PPC64) */
> >          tlb_flush(CPU(cpu), 1);
> >          break;
> 
> Any particular reason you're leaving this one as an immediate rather
> than deferred flush?

A couple yes. It's mostly unused on server CPUs (we don't do tlbia),
and it's used by ppc_cpu_reset(). In that latter case, I like having
everything really cleaned up ... 

> Should you be clearing the pending flush flag cpu_reset()?

That should happen as a result of the above.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-16  5:01   ` David Gibson
@ 2015-11-16 10:17     ` Benjamin Herrenschmidt
  2015-11-17  0:11       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-16 10:17 UTC (permalink / raw)
  To: David Gibson; +Cc: Alexey Kardashevskiy, qemu-ppc, qemu-devel

On Mon, 2015-11-16 at 16:01 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:17AM +1100, Benjamin Herrenschmidt wrote:
> > We never released anything older than POWER8 DD2.0 and POWER8E DD2.1,
> > so let's use these versions, without that some firmware or Linux code
> > might fail to use some HW features that were non functional in earlier
> > internal only spins of the chip.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> Hmm, I'm just wondering if replacing the old CPU models could have
> implications for migration from old images.

I don't know, I'm not that familiar with migration. Alexey, what do you
reckon ? I'd think KVM images use the real PVR so should be a non-
issue, does anybody actually cares about migration of old TCG images ?

> > ---
> >  target-ppc/cpu-models.c | 12 ++++++------
> >  target-ppc/cpu-models.h |  4 ++--
> >  2 files changed, 8 insertions(+), 8 deletions(-)
> > 
> > diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
> > index 4d5ab4b..349783e 100644
> > --- a/target-ppc/cpu-models.c
> > +++ b/target-ppc/cpu-models.c
> > @@ -1138,10 +1138,10 @@
> >                  "POWER7 v2.3")
> >      POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7,
> >                  "POWER7+ v2.1")
> > -    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8,
> > -                "POWER8E v1.0")
> > -    POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
> > -                "POWER8 v1.0")
> > +    POWERPC_DEF("POWER8E_v2.1",  CPU_POWERPC_POWER8E_v21,            POWER8,
> > +                "POWER8E v2.1")
> > +    POWERPC_DEF("POWER8_v2.0",   CPU_POWERPC_POWER8_v20,             POWER8,
> > +                "POWER8 v2.0")
> >      POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
> >                  "PowerPC 970 v2.2")
> >      POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
> > @@ -1389,8 +1389,8 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
> >      { "POWER5gs", "POWER5+_v2.1" },
> >      { "POWER7", "POWER7_v2.3" },
> >      { "POWER7+", "POWER7+_v2.1" },
> > -    { "POWER8E", "POWER8E_v1.0" },
> > -    { "POWER8", "POWER8_v1.0" },
> > +    { "POWER8E", "POWER8E_v2.1" },
> > +    { "POWER8", "POWER8_v2.0" },
> >      { "970", "970_v2.2" },
> >      { "970fx", "970fx_v3.1" },
> >      { "970mp", "970mp_v1.1" },
> > diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
> > index 9d80e72..2992427 100644
> > --- a/target-ppc/cpu-models.h
> > +++ b/target-ppc/cpu-models.h
> > @@ -557,9 +557,9 @@ enum {
> >      CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
> >      CPU_POWERPC_POWER7P_v21        = 0x004A0201,
> >      CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
> > -    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
> > +    CPU_POWERPC_POWER8E_v21        = 0x004B0201,
> >      CPU_POWERPC_POWER8_BASE        = 0x004D0000,
> > -    CPU_POWERPC_POWER8_v10         = 0x004D0100,
> > +    CPU_POWERPC_POWER8_v20         = 0x004D0200,
> >      CPU_POWERPC_970_v22            = 0x00390202,
> >      CPU_POWERPC_970FX_v10          = 0x00391100,
> >      CPU_POWERPC_970FX_v20          = 0x003C0200,
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only
  2015-11-16  5:34   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-16 10:21     ` Benjamin Herrenschmidt
  2015-11-18  0:06       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-16 10:21 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Mon, 2015-11-16 at 16:34 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:26AM +1100, Benjamin Herrenschmidt
> wrote:
> > Not that anything remotely recent supports tlbia but ...
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > ---
> >  target-ppc/translate.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> > index 10eb9e3..014fe5e 100644
> > --- a/target-ppc/translate.c
> > +++ b/target-ppc/translate.c
> > @@ -4836,7 +4836,7 @@ static void gen_tlbia(DisasContext *ctx)
> >  #if defined(CONFIG_USER_ONLY)
> >      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >  #else
> > -    if (unlikely(ctx->pr)) {
> > +    if (unlikely(ctx->pr || !ctx->hv)) {
> 
> If I'm reading your previous patch correctly, ctx->hv won't be set
> with in problem state, so I think the ctx->pr check is redundant.

Ah you are right. I do have second thoughts about that previous patch
now that you mention it however. In the real MSR, HV and PR are
independant, I wonder if I'm better off making the check explicit...

The reason I did it this way is that afaik, there is no such thing
as a usermode hypervisor resource in the architecture, so any
hypervisor resource is also a supervisor mode one, but having
ctx->hv be 0 when MSR:HV=1 + MSR:PR=1 might make it easy to write
incorrect code in other places when deciding for example how to direct
interrupts.

I'll need to think a bit more about this one.

> >          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >          return;
> >      }
> > @@ -4850,7 +4850,7 @@ static void gen_tlbiel(DisasContext *ctx)
> >  #if defined(CONFIG_USER_ONLY)
> >      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >  #else
> > -    if (unlikely(ctx->pr)) {
> > +    if (unlikely(ctx->pr || !ctx->hv)) {
> >          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >          return;
> >      }
> > @@ -4864,7 +4864,7 @@ static void gen_tlbie(DisasContext *ctx)
> >  #if defined(CONFIG_USER_ONLY)
> >      gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >  #else
> > -    if (unlikely(ctx->pr)) {
> > +    if (unlikely(ctx->pr || !ctx->hv)) {
> >          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> >          return;
> >      }
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-16 10:10     ` Benjamin Herrenschmidt
@ 2015-11-16 12:42       ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-16 12:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1626 bytes --]

On Mon, Nov 16, 2015 at 09:10:24PM +1100, Benjamin Herrenschmidt wrote:
> Thanks for reviewing !
> On Mon, 2015-11-16 at 15:49 +1100, David Gibson wrote:
[snip]
> > > diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> > > index f4ac761..b969492 100644
> > > --- a/target-ppc/machine.c
> > > +++ b/target-ppc/machine.c
> > > @@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void
> > > *opaque, int version_id)
> > >      qemu_get_betls(f, &env->nip);
> > >      qemu_get_betls(f, &env->hflags);
> > >      qemu_get_betls(f, &env->hflags_nmsr);
> > > -    qemu_get_sbe32s(f, &env->mmu_idx);
> > 
> > Have I missed something, or do you still need a read here to read the
> > mmux_idx, even though you'll ignore it, otherwise you'll get out of
> > sync and break migration from an old stream.
> 
> I am not completely cognizant of the migration stuff, that's very
> possible yes. Do I need to read into a dummy local ? Or is there
> a way to just say "drop 4 bytes from stream" ?

There's actually an example of this just below :)

> Note that I have generally completely overlooked the migration impact
> of my patches, this is something that I need to do but I wouldn't mind
> your help identifying the parts.

Ok, I'll do what I can.

> > >      qemu_get_sbe32(f); /* Discard unused power_mode */

          ^^^
	  example of skipping a redundant value in the migration
	  stream

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-16 10:17     ` Benjamin Herrenschmidt
@ 2015-11-17  0:11       ` Alexey Kardashevskiy
  2015-11-17  0:40         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-11-17  0:11 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, David Gibson; +Cc: qemu-ppc, qemu-devel

On 11/16/2015 09:17 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2015-11-16 at 16:01 +1100, David Gibson wrote:
>> On Wed, Nov 11, 2015 at 11:27:17AM +1100, Benjamin Herrenschmidt wrote:
>>> We never released anything older than POWER8 DD2.0 and POWER8E DD2.1,
>>> so let's use these versions, without that some firmware or Linux code
>>> might fail to use some HW features that were non functional in earlier
>>> internal only spins of the chip.
>>>
>>> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>>
>> Hmm, I'm just wondering if replacing the old CPU models could have
>> implications for migration from old images.
>
> I don't know, I'm not that familiar with migration. Alexey, what do you
> reckon ? I'd think KVM images use the real PVR so should be a non-
> issue, does anybody actually cares about migration of old TCG images ?


I have never heard of anyone testing migration of TCG guests. KVM guests 
use host PVR anyway.

I just wonder that we "never released anything older than" but we still 
support them in the kernel - why do not we wipe these there too?

>
>>> ---
>>>   target-ppc/cpu-models.c | 12 ++++++------
>>>   target-ppc/cpu-models.h |  4 ++--
>>>   2 files changed, 8 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
>>> index 4d5ab4b..349783e 100644
>>> --- a/target-ppc/cpu-models.c
>>> +++ b/target-ppc/cpu-models.c
>>> @@ -1138,10 +1138,10 @@
>>>                   "POWER7 v2.3")
>>>       POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7,
>>>                   "POWER7+ v2.1")
>>> -    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8,
>>> -                "POWER8E v1.0")
>>> -    POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
>>> -                "POWER8 v1.0")
>>> +    POWERPC_DEF("POWER8E_v2.1",  CPU_POWERPC_POWER8E_v21,            POWER8,
>>> +                "POWER8E v2.1")
>>> +    POWERPC_DEF("POWER8_v2.0",   CPU_POWERPC_POWER8_v20,             POWER8,
>>> +                "POWER8 v2.0")
>>>       POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
>>>                   "PowerPC 970 v2.2")
>>>       POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
>>> @@ -1389,8 +1389,8 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
>>>       { "POWER5gs", "POWER5+_v2.1" },
>>>       { "POWER7", "POWER7_v2.3" },
>>>       { "POWER7+", "POWER7+_v2.1" },
>>> -    { "POWER8E", "POWER8E_v1.0" },
>>> -    { "POWER8", "POWER8_v1.0" },
>>> +    { "POWER8E", "POWER8E_v2.1" },
>>> +    { "POWER8", "POWER8_v2.0" },
>>>       { "970", "970_v2.2" },
>>>       { "970fx", "970fx_v3.1" },
>>>       { "970mp", "970mp_v1.1" },
>>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>>> index 9d80e72..2992427 100644
>>> --- a/target-ppc/cpu-models.h
>>> +++ b/target-ppc/cpu-models.h
>>> @@ -557,9 +557,9 @@ enum {
>>>       CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
>>>       CPU_POWERPC_POWER7P_v21        = 0x004A0201,
>>>       CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
>>> -    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
>>> +    CPU_POWERPC_POWER8E_v21        = 0x004B0201,
>>>       CPU_POWERPC_POWER8_BASE        = 0x004D0000,
>>> -    CPU_POWERPC_POWER8_v10         = 0x004D0100,
>>> +    CPU_POWERPC_POWER8_v20         = 0x004D0200,
>>>       CPU_POWERPC_970_v22            = 0x00390202,
>>>       CPU_POWERPC_970FX_v10          = 0x00391100,
>>>       CPU_POWERPC_970FX_v20          = 0x003C0200,
>>


-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC Benjamin Herrenschmidt
@ 2015-11-17  0:32   ` Alexey Kardashevskiy
  2015-11-17  0:40     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-11-17  0:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 11/11/2015 11:27 AM, Benjamin Herrenschmidt wrote:
> This adds a model of the POWER8 LPC controller. It is then used
> by the PowerNV code to attach a UART and RTC, which, with the right
> version of OPAL firmware, will provide a working console.
>
> This version of the LPC controller model doesn't yet implement
> support for the SerIRQ deserializer present in the Naples version
> of the chip though some preliminary work is there.
>

Is this LPC controller one per a chip or per a machine?
In general it is quite nice when "-nodefaults" does not create neither PHB 
nor LPC so the user can add them manually with parameters different than 
defaults.


-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-11-17  0:32   ` Alexey Kardashevskiy
@ 2015-11-17  0:40     ` Benjamin Herrenschmidt
  2015-12-01  6:43       ` [Qemu-devel] [Qemu-ppc] " David Gibson
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-17  0:40 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-ppc; +Cc: qemu-devel

On Tue, 2015-11-17 at 11:32 +1100, Alexey Kardashevskiy wrote:
> On 11/11/2015 11:27 AM, Benjamin Herrenschmidt wrote:
> > This adds a model of the POWER8 LPC controller. It is then used
> > by the PowerNV code to attach a UART and RTC, which, with the right
> > version of OPAL firmware, will provide a working console.
> > 
> > This version of the LPC controller model doesn't yet implement
> > support for the SerIRQ deserializer present in the Naples version
> > of the chip though some preliminary work is there.
> > 
> 
> Is this LPC controller one per a chip or per a machine?

Per chip but we usually only wire one up per machine.

> In general it is quite nice when "-nodefaults" does not create
> neither PHB nor LPC so the user can add them manually with parameters
> different than defaults.

In this case though, PHB and LPC bridges are all part of the P8 chip,
and I'm trying to represent that topology as best as possible.

I think "-nodefaults" for Pnv should only be about the devices we
attach to the LPC/PHB not the busses themselves.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions
  2015-11-17  0:11       ` Alexey Kardashevskiy
@ 2015-11-17  0:40         ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-17  0:40 UTC (permalink / raw)
  To: Alexey Kardashevskiy, David Gibson; +Cc: qemu-ppc, qemu-devel

On Tue, 2015-11-17 at 11:11 +1100, Alexey Kardashevskiy wrote:
> 
> I have never heard of anyone testing migration of TCG guests. KVM guests 
> use host PVR anyway.
> 
> I just wonder that we "never released anything older than" but we still 
> support them in the kernel - why do not we wipe these there too?

We could... it was handy in the early days when some selected partners
had access to pre-release systems, to tell them to use upstream kernels
:)

In the case of qemu, however, it's more harmful than anything else.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only
  2015-11-16 10:21     ` Benjamin Herrenschmidt
@ 2015-11-18  0:06       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-18  0:06 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Mon, 2015-11-16 at 21:21 +1100, Benjamin Herrenschmidt wrote:
> 
> Ah you are right. I do have second thoughts about that previous patch
> now that you mention it however. In the real MSR, HV and PR are
> independant, I wonder if I'm better off making the check explicit...
> 
> The reason I did it this way is that afaik, there is no such thing
> as a usermode hypervisor resource in the architecture, so any
> hypervisor resource is also a supervisor mode one, but having
> ctx->hv be 0 when MSR:HV=1 + MSR:PR=1 might make it easy to write
> incorrect code in other places when deciding for example how to
> direct
> interrupts.
> 
> I'll need to think a bit more about this one.

So I took out that bit in the previous patch, since we already seem
to check ctx.pr explicitly in most places anyway. There was one where
we didn't which I fixed (in the SMT ops).

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge Benjamin Herrenschmidt
@ 2015-11-18 12:31   ` Paolo Bonzini
  2015-11-18 12:41     ` [Qemu-devel] [PATCH for-2.5 " Paolo Bonzini
  0 siblings, 1 reply; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-18 12:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel, Michael S. Tsirkin



On 11/11/2015 01:27, Benjamin Herrenschmidt wrote:
>      if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) {
> +        /* SHCP gets upset if we try to use slot 0 */
> +        br->sec_bus.devfn_min = PCI_FUNC_MAX;
>          dev->config[PCI_INTERRUPT_PIN] = 0x1;
>          memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
>                             shpc_bar_size(dev));

This needs backwards compatibility gunk unfortunately.  However we
should fix it in 2.5 because it's a bug.  I'll send a patch.

Paolo

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices Benjamin Herrenschmidt
@ 2015-11-18 12:34   ` Paolo Bonzini
  2015-11-18 20:06     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-18 12:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel



On 11/11/2015 01:27, Benjamin Herrenschmidt wrote:
> This allows a bus class to tell whether a given bus has room for
> any new device. max_dev isn't sufficient as the rules can depend
> on some arguments or can differ between instances of a bus. This
> will be used by PCI in subsequent patches
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  include/hw/qdev-core.h |  1 +
>  qdev-monitor.c         | 13 ++++++++-----
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 8057aed..6f3dd8d 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -202,6 +202,7 @@ struct BusClass {
>       */
>      char *(*get_fw_dev_path)(DeviceState *dev);
>      void (*reset)(BusState *bus);
> +    bool (*can_add_device)(BusState *bus, QemuOpts *opts);
>      BusRealize realize;
>      BusUnrealize unrealize;
>  
> diff --git a/qdev-monitor.c b/qdev-monitor.c
> index a35098f..4023357 100644
> --- a/qdev-monitor.c
> +++ b/qdev-monitor.c
> @@ -384,7 +384,7 @@ static inline bool qbus_is_full(BusState *bus)
>   * Return the bus if found, else %NULL.
>   */
>  static BusState *qbus_find_recursive(BusState *bus, const char *name,
> -                                     const char *bus_typename)
> +                                     const char *bus_typename, QemuOpts *opts)
>  {
>      BusChild *kid;
>      BusState *pick, *child, *ret;
> @@ -398,7 +398,10 @@ static BusState *qbus_find_recursive(BusState *bus, const char *name,
>      }
>  
>      if (match && !qbus_is_full(bus)) {
> -        return bus;             /* root matches and isn't full */
> +        BusClass *bc = BUS_GET_CLASS(bus);
> +        if (!bc->can_add_device || bc->can_add_device(bus, opts)) {
> +            return bus;             /* root matches and isn't full */
> +	}
>      }
>  
>      pick = match ? bus : NULL;
> @@ -406,7 +409,7 @@ static BusState *qbus_find_recursive(BusState *bus, const char *name,
>      QTAILQ_FOREACH(kid, &bus->children, sibling) {
>          DeviceState *dev = kid->child;
>          QLIST_FOREACH(child, &dev->child_bus, sibling) {
> -            ret = qbus_find_recursive(child, name, bus_typename);
> +		ret = qbus_find_recursive(child, name, bus_typename, opts);

Tabs for indentation.  There are other occurrences in the patch.

Apart from this,

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>

>              if (ret && !qbus_is_full(ret)) {
>                  return ret;     /* a descendant matches and isn't full */
>              }
> @@ -436,7 +439,7 @@ static BusState *qbus_find(const char *path, Error **errp)
>              assert(!path[0]);
>              elem[0] = len = 0;
>          }
> -        bus = qbus_find_recursive(sysbus_get_default(), elem, NULL);
> +        bus = qbus_find_recursive(sysbus_get_default(), elem, NULL, NULL);
>          if (!bus) {
>              error_setg(errp, "Bus '%s' not found", elem);
>              return NULL;
> @@ -542,7 +545,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
>              return NULL;
>          }
>      } else if (dc->bus_type != NULL) {
> -        bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type);
> +	    bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type, opts);
>          if (!bus || qbus_is_full(bus)) {
>              error_setg(errp, "No '%s' bus found for device '%s'",
>                         dc->bus_type, driver);
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max Benjamin Herrenschmidt
@ 2015-11-18 12:35   ` Paolo Bonzini
  0 siblings, 0 replies; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-18 12:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel, Michael S. Tsirkin



On 11/11/2015 01:27, Benjamin Herrenschmidt wrote:
> This adds a devfn_max field to PCIBus and adds a pci_can_add_device()
> function which, if no "addr" (aka devfn) is specified, will tell whether
> there is any slot free between devfn_min and devfn_max.
> 
> This will be used by some PCI root complex implementations that support
> only one direct child to avoid having qemu put dumb devices at different
> slot numbers.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

CCing maintainer.

Paolo

> ---
>  hw/pci/pci.c             | 22 ++++++++++++++++++++++
>  include/hw/pci/pci_bus.h |  1 +
>  2 files changed, 23 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..7003f7c 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -108,6 +108,27 @@ static uint16_t pcibus_numa_node(PCIBus *bus)
>      return NUMA_NODE_UNASSIGNED;
>  }
>  
> +static bool pci_can_add_device(BusState *bus, QemuOpts *opts)
> +{
> +    unsigned int devfn, max;
> +    PCIBus *pbus = PCI_BUS(bus);
> +
> +    /* If address is specified, say yes and let it fail if that doesn't work */
> +    if (qemu_opt_get(opts, "addr") != NULL) {
> +        return true;
> +    }
> +    max = ARRAY_SIZE(pbus->devices);
> +    if (pbus->devfn_max && pbus->devfn_max < max) {
> +       max = pbus->devfn_max;
> +    }
> +    for (devfn = pbus->devfn_min ; devfn < max; devfn += PCI_FUNC_MAX) {
> +        if (!pbus->devices[devfn]) {
> +            break;
> +        }
> +    }
> +    return devfn < max;
> +}
> +
>  static void pci_bus_class_init(ObjectClass *klass, void *data)
>  {
>      BusClass *k = BUS_CLASS(klass);
> @@ -119,6 +140,7 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
>      k->realize = pci_bus_realize;
>      k->unrealize = pci_bus_unrealize;
>      k->reset = pcibus_reset;
> +    k->can_add_device = pci_can_add_device;
>  
>      pbc->is_root = pcibus_is_root;
>      pbc->bus_num = pcibus_num;
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..02055d4 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -23,6 +23,7 @@ struct PCIBus {
>      PCIIOMMUFunc iommu_fn;
>      void *iommu_opaque;
>      uint8_t devfn_min;
> +    uint8_t devfn_max;
>      pci_set_irq_fn set_irq;
>      pci_map_irq_fn map_irq;
>      pci_route_irq_fn route_intx_to_irq;
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH for-2.5 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-18 12:31   ` Paolo Bonzini
@ 2015-11-18 12:41     ` Paolo Bonzini
  2015-11-18 14:21       ` Michael S. Tsirkin
  0 siblings, 1 reply; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-18 12:41 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel, Michael S. Tsirkin

On 18/11/2015 13:31, Paolo Bonzini wrote:
> 
> 
> On 11/11/2015 01:27, Benjamin Herrenschmidt wrote:
>>      if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) {
>> +        /* SHCP gets upset if we try to use slot 0 */
>> +        br->sec_bus.devfn_min = PCI_FUNC_MAX;
>>          dev->config[PCI_INTERRUPT_PIN] = 0x1;
>>          memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
>>                             shpc_bar_size(dev));
> 
> This needs backwards compatibility gunk unfortunately.  However we
> should fix it in 2.5 because it's a bug.  I'll send a patch.

Actually it turns out that the forbidden configuration is blocked elsewhere:

$ x86_64-softmmu/qemu-system-x86_64 \
	-device pci-bridge,id=foo,chassis_nr=1 \
	-device virtio-scsi-pci,bus=foo
qemu-system-x86_64: -device virtio-scsi-pci,bus=foo: Unsupported PCI
slot 0 for standard hotplug controller. Valid slots are between 1 and 31.

so this patch is just allowing the above command line to work.  There's
no effect with or without the patch if addr=0, so the patch is good for
2.5 IMO.

Michael, can you queue it?

Paolo

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH for-2.5 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-18 12:41     ` [Qemu-devel] [PATCH for-2.5 " Paolo Bonzini
@ 2015-11-18 14:21       ` Michael S. Tsirkin
  2015-11-18 14:25         ` Paolo Bonzini
  0 siblings, 1 reply; 198+ messages in thread
From: Michael S. Tsirkin @ 2015-11-18 14:21 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-ppc, qemu-devel

On Wed, Nov 18, 2015 at 01:41:47PM +0100, Paolo Bonzini wrote:
> On 18/11/2015 13:31, Paolo Bonzini wrote:
> > 
> > 
> > On 11/11/2015 01:27, Benjamin Herrenschmidt wrote:
> >>      if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) {
> >> +        /* SHCP gets upset if we try to use slot 0 */
> >> +        br->sec_bus.devfn_min = PCI_FUNC_MAX;
> >>          dev->config[PCI_INTERRUPT_PIN] = 0x1;
> >>          memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar",
> >>                             shpc_bar_size(dev));
> > 
> > This needs backwards compatibility gunk unfortunately.  However we
> > should fix it in 2.5 because it's a bug.  I'll send a patch.
> 
> Actually it turns out that the forbidden configuration is blocked elsewhere:
> 
> $ x86_64-softmmu/qemu-system-x86_64 \
> 	-device pci-bridge,id=foo,chassis_nr=1 \
> 	-device virtio-scsi-pci,bus=foo
> qemu-system-x86_64: -device virtio-scsi-pci,bus=foo: Unsupported PCI
> slot 0 for standard hotplug controller. Valid slots are between 1 and 31.
> 
> so this patch is just allowing the above command line to work.  There's
> no effect with or without the patch if addr=0, so the patch is good for
> 2.5 IMO.
> 
> Michael, can you queue it?
> 
> Paolo

This depends on the devfn_min thing, right?
I'm yet to review it, generally I'd prefer to
avoid changing device allocation rules since
that makes it so easy to break compatibility.

Assuming addresses are all explicitly stated,
is there even a problem?
If not, maybe we can defer everything to after 2.5.

-- 
MST

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH for-2.5 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-18 14:21       ` Michael S. Tsirkin
@ 2015-11-18 14:25         ` Paolo Bonzini
  2015-11-18 16:38           ` Michael S. Tsirkin
  0 siblings, 1 reply; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-18 14:25 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: qemu-ppc, qemu-devel



On 18/11/2015 15:21, Michael S. Tsirkin wrote:
> This depends on the devfn_min thing, right?
> I'm yet to review it, generally I'd prefer to
> avoid changing device allocation rules since
> that makes it so easy to break compatibility.
> 
> Assuming addresses are all explicitly stated,
> is there even a problem?

It makes it a bit easier to use pci-bridge; it is a bug, albeit one that
only affects direct usage through the command line of PCI bridges.

Paolo

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH for-2.5 44/77] pci-bridge: Set a supported devfn_min for bridge
  2015-11-18 14:25         ` Paolo Bonzini
@ 2015-11-18 16:38           ` Michael S. Tsirkin
  0 siblings, 0 replies; 198+ messages in thread
From: Michael S. Tsirkin @ 2015-11-18 16:38 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-ppc, qemu-devel

On Wed, Nov 18, 2015 at 03:25:25PM +0100, Paolo Bonzini wrote:
> 
> 
> On 18/11/2015 15:21, Michael S. Tsirkin wrote:
> > This depends on the devfn_min thing, right?
> > I'm yet to review it, generally I'd prefer to
> > avoid changing device allocation rules since
> > that makes it so easy to break compatibility.
> > 
> > Assuming addresses are all explicitly stated,
> > is there even a problem?
> 
> It makes it a bit easier to use pci-bridge; it is a bug, albeit one that
> only affects direct usage through the command line of PCI bridges.
> 
> Paolo

Yes but not a regression ... I'll look at how
invasive the change it depends on is.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices
  2015-11-18 12:34   ` Paolo Bonzini
@ 2015-11-18 20:06     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-18 20:06 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-ppc; +Cc: qemu-devel

On Wed, 2015-11-18 at 13:34 +0100, Paolo Bonzini wrote:

 ../..

> Tabs for indentation.  There are other occurrences in the patch.

Ooops. sorry, missed those. Do you need a respin ?

> Apart from this,
> 
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> >              if (ret && !qbus_is_full(ret)) {
> >                  return ret;     /* a descendant matches and isn't
> > full */
> >              }
> > @@ -436,7 +439,7 @@ static BusState *qbus_find(const char *path,
> > Error **errp)
> >              assert(!path[0]);
> >              elem[0] = len = 0;
> >          }
> > -        bus = qbus_find_recursive(sysbus_get_default(), elem,
> > NULL);
> > +        bus = qbus_find_recursive(sysbus_get_default(), elem,
> > NULL, NULL);
> >          if (!bus) {
> >              error_setg(errp, "Bus '%s' not found", elem);
> >              return NULL;
> > @@ -542,7 +545,7 @@ DeviceState *qdev_device_add(QemuOpts *opts,
> > Error **errp)
> >              return NULL;
> >          }
> >      } else if (dc->bus_type != NULL) {
> > -        bus = qbus_find_recursive(sysbus_get_default(), NULL, dc-
> > >bus_type);
> > +	    bus = qbus_find_recursive(sysbus_get_default(), NULL,
> > dc->bus_type, opts);
> >          if (!bus || qbus_is_full(bus)) {
> >              error_setg(errp, "No '%s' bus found for device '%s'",
> >                         dc->bus_type, driver);
> > 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes
  2015-11-16 10:16     ` Benjamin Herrenschmidt
@ 2015-11-19  6:09       ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1135 bytes --]

On Mon, Nov 16, 2015 at 09:16:08PM +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-11-16 at 16:00 +1100, David Gibson wrote:
> > 
> > >  //#define DEBUG_MMU
> > >  //#define DEBUG_BATS
> > > @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> > >      case POWERPC_MMU_2_03:
> > >      case POWERPC_MMU_2_06:
> > >      case POWERPC_MMU_2_07:
> > > +        env->tlb_need_flush = 0;
> > >  #endif /* defined(TARGET_PPC64) */
> > >          tlb_flush(CPU(cpu), 1);
> > >          break;
> > 
> > Any particular reason you're leaving this one as an immediate rather
> > than deferred flush?
> 
> A couple yes. It's mostly unused on server CPUs (we don't do tlbia),
> and it's used by ppc_cpu_reset(). In that latter case, I like having
> everything really cleaned up ... 
> 
> > Should you be clearing the pending flush flag cpu_reset()?
> 
> That should happen as a result of the above.

Ok.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s Benjamin Herrenschmidt
@ 2015-11-19  6:11   ` David Gibson
  2015-11-19 10:21     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:11 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 8616 bytes --]

On Wed, Nov 11, 2015 at 11:27:20AM +1100, Benjamin Herrenschmidt wrote:
> We don't give them a KVM reg number yet as no current KVM version
> supports HV mode.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Looks sane, though I haven't explicitly checked each of the
implementations against the architecture.

> ---
>  target-ppc/translate_init.c | 140 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 137 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 30a03ce..c743eb1 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -1104,6 +1104,11 @@ static void gen_spr_amr (CPUPPCState *env)
>                       SPR_NOACCESS, SPR_NOACCESS,
>                       &spr_read_generic, &spr_write_generic,
>                       KVM_REG_PPC_UAMOR, 0);
> +    spr_register_hv(env, SPR_AMOR, "AMOR",
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    &spr_read_generic, &spr_write_generic,
> +                    0);
>  #endif /* !CONFIG_USER_ONLY */
>  }
>  #endif /* TARGET_PPC64 */
> @@ -7490,6 +7495,20 @@ static void gen_spr_book3s_dbg(CPUPPCState *env)
>                       KVM_REG_PPC_DABRX, 0x00000000);
>  }
>  
> +static void gen_spr_book3s_207_dbg(CPUPPCState *env)
> +{
> +    spr_register_hv(env, SPR_DAWR, "DAWR",
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    &spr_read_generic, &spr_write_generic,
> +                    0x00000000);
> +    spr_register_hv(env, SPR_DAWRX, "DAWRX",
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    &spr_read_generic, &spr_write_generic,
> +                    0x00000000);
> +}
> +
>  static void gen_spr_970_dbg(CPUPPCState *env)
>  {
>      /* Breakpoints */
> @@ -7654,15 +7673,116 @@ static void gen_spr_power5p_lpar(CPUPPCState *env)
>      spr_register_kvm(env, SPR_LPCR, "LPCR",
>                       SPR_NOACCESS, SPR_NOACCESS,
>                       &spr_read_generic, &spr_write_generic,
> -                     KVM_REG_PPC_LPCR, 0x00000000);
> +                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
>  }
>  
> +#if !defined(CONFIG_USER_ONLY)
> +static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
> +{
> +    TCGv hmer = tcg_temp_new();
> +
> +    gen_load_spr(hmer, sprn);
> +    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
> +    gen_store_spr(sprn, hmer);
> +    spr_store_dump_spr(sprn);
> +    tcg_temp_free(hmer);
> +}
> +#endif
> +
>  static void gen_spr_book3s_ids(CPUPPCState *env)
>  {
> +    /* FIXME: Will need to deal with thread vs core only SPRs */
> +
>      /* Processor identification */
> -    spr_register(env, SPR_PIR, "PIR",
> +    spr_register_hv(env, SPR_PIR, "PIR",
>                   SPR_NOACCESS, SPR_NOACCESS,
> -                 &spr_read_generic, &spr_write_pir,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, NULL,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HID0, "HID0",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_TSCR, "TSCR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HMER, "HMER",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_hmer,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HMEER, "HMEER",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_TFMR, "TFMR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_LPIDR, "LPIDR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HFSCR, "HFSCR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_MMCRC, "MMCRC",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_MMCRH, "MMCRH",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HSPRG0, "HSPRG0",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HSPRG1, "HSPRG1",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HSRR0, "HSRR0",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HSRR1, "HSRR1",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HDAR, "HDAR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HDSISR, "HDSISR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_RMOR, "RMOR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
> +    spr_register_hv(env, SPR_HRMOR, "HRMOR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
>                   0x00000000);
>  }
>  
> @@ -7868,6 +7988,17 @@ static void gen_spr_power8_fscr(CPUPPCState *env)
>                       KVM_REG_PPC_FSCR, initval);
>  }
>  
> +static void gen_spr_power8_rpr(CPUPPCState *env)
> +{
> +#if !defined(CONFIG_USER_ONLY)
> +    spr_register_hv(env, SPR_RPR, "RPR",
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    SPR_NOACCESS, SPR_NOACCESS,
> +                    &spr_read_generic, &spr_write_generic,
> +                    0x00000103070F1F3F);
> +#endif
> +}
> +
>  static void init_proc_book3s_64(CPUPPCState *env, int version)
>  {
>      gen_spr_ne_601(env);
> @@ -7919,9 +8050,12 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
>          gen_spr_power8_pmu_user(env);
>          gen_spr_power8_tm(env);
>          gen_spr_vtb(env);
> +        gen_spr_power8_rpr(env);
>      }
>      if (version < BOOK3S_CPU_POWER8) {
>          gen_spr_book3s_dbg(env);
> +    } else {
> +        gen_spr_book3s_207_dbg(env);
>      }
>  #if !defined(CONFIG_USER_ONLY)
>      switch (version) {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation Benjamin Herrenschmidt
@ 2015-11-19  6:19   ` David Gibson
  2015-11-19 10:23     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1680 bytes --]

On Wed, Nov 11, 2015 at 11:27:22AM +1100, Benjamin Herrenschmidt wrote:
> XXX This patch needs double checking... It fixed 32-bit userspace
> but I'm not sure it's right. I wonder whether msr_is_64bit() should
> be applied to env->msr, not msr, but I need to double check the
> architecture.

Hrm, I'm not really sure where I'd look in the arch, but
msr_is_64bit(env->msr) seems like it would make more sense to me.
The current logic means that rfi, ostensibly a 32-bit instruction will
have different behaviour depending on the upper bits of SRR1, which
seems a unexpected.

> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/excp_helper.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index c1d6605..00fae60 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -878,13 +878,13 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
>      CPUState *cs = CPU(ppc_env_get_cpu(env));
>  
>  #if defined(TARGET_PPC64)
> +    msr = msr & msrm;
>      if (msr_is_64bit(env, msr)) {
>          nip = (uint64_t)nip;
> -        msr &= (uint64_t)msrm;
>      } else {
>          nip = (uint32_t)nip;
> -        msr = (uint32_t)(msr & msrm);
>          if (keep_msrh) {
> +	    msr &= 0xffffffff;
>              msr |= env->msr & ~((uint64_t)0xFFFFFFFF);
>          }
>      }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV Benjamin Herrenschmidt
@ 2015-11-19  6:20   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2245 bytes --]

On Wed, Nov 11, 2015 at 11:27:23AM +1100, Benjamin Herrenschmidt wrote:
> This helper is only used by the various instructions that can alter
> MSR and not interrupts. Add a comment to that effect to the interrupt
> code as well in case somebody wants to change this
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/excp_helper.c | 8 ++++++--
>  target-ppc/helper_regs.h | 4 ++--
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 00fae60..83e6c07 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -662,8 +662,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          }
>      }
>  #endif
> -    /* XXX: we don't use hreg_store_msr here as already have treated
> -     *      any special case that could occur. Just store MSR and update hflags
> +    /* We don't use hreg_store_msr here as already have treated
> +     * any special case that could occur. Just store MSR and update hflags
> +     *
> +     * Note: We *MUST* not use hreg_store_msr() as-is anyway because it
> +     * will prevent setting of the HV bit which some exceptions might need
> +     * to do.
>       */
>      env->msr = new_msr & env->msr_mask;
>      hreg_compute_hflags(env);
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 57da931..12af61c 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -114,8 +114,8 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      excp = 0;
>      value &= env->msr_mask;
>  #if !defined(CONFIG_USER_ONLY)
> -    if (!alter_hv) {
> -        /* mtmsr cannot alter the hypervisor state */
> +    /* Neither mtmsr nor guest state can alter HV */
> +    if (!alter_hv || !(env->msr & MSR_HVB)) {
>          value &= ~MSR_HVB;
>          value |= env->msr & MSR_HVB;
>      }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 12/77] ppc: Better figure out if processor has HV mode
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode Benjamin Herrenschmidt
@ 2015-11-19  6:22   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 4140 bytes --]

On Wed, Nov 11, 2015 at 11:27:25AM +1100, Benjamin Herrenschmidt wrote:
> We use an env. flag which is set to the initial value of MSR_HVB in
> the msr_mask. We also adjust the POWER8 mask to set SHV.
> 
> Also use this to adjust ctx.hv so that it is *set* when the processor
> doesn't have an HV mode (970 with Apple mode for example), thus enabling
> hypervisor instructions/SPRs.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/cpu.h            |  4 ++++
>  target-ppc/translate.c      |  4 +++-
>  target-ppc/translate_init.c | 21 ++++++++++++++++-----
>  3 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 357b6e7..062644e 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1113,6 +1113,10 @@ struct CPUPPCState {
>      hwaddr mpic_iack;
>      /* true when the external proxy facility mode is enabled */
>      bool mpic_proxy;
> +    /* set when the processor has an HV mode, thus HV priv
> +     * instructions and SPRs are diallowed if MSR:HV is 0
> +     */
> +    bool has_hv_mode;
>  #endif
>  
>      /* Those resources are used only during code translation */
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a2fe1b5..10eb9e3 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -11465,8 +11465,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>      ctx.exception = POWERPC_EXCP_NONE;
>      ctx.spr_cb = env->spr_cb;
>      ctx.pr = msr_pr;
> -    ctx.hv = !msr_pr && msr_hv;
>      ctx.mem_idx = env->dmmu_idx;
> +#if !defined(CONFIG_USER_ONLY)
> +    ctx.hv = !msr_pr && (msr_hv || !env->has_hv_mode);
> +#endif
>      ctx.insns_flags = env->insns_flags;
>      ctx.insns_flags2 = env->insns_flags2;
>      ctx.access_type = -1;
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 7bcfbc0..76f20ea 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8391,7 +8391,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>                          PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
>                          PPC2_TM;
>      pcc->msr_mask = (1ull << MSR_SF) |
> -                    (1ull << MSR_TM) |
> +                    (1ull << MSR_SHV) |
> +		    (1ull << MSR_TM) |
>                      (1ull << MSR_VR) |
>                      (1ull << MSR_VSX) |
>                      (1ull << MSR_EE) |
> @@ -9748,10 +9749,7 @@ static void ppc_cpu_reset(CPUState *s)
>      pcc->parent_reset(s);
>  
>      msr = (target_ulong)0;
> -    if (0) {
> -        /* XXX: find a suitable condition to enable the hypervisor mode */
> -        msr |= (target_ulong)MSR_HVB;
> -    }
> +    msr |= (target_ulong)MSR_HVB;
>      msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */
>      msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */
>      msr |= (target_ulong)1 << MSR_EP;
> @@ -9852,6 +9850,19 @@ static void ppc_cpu_initfn(Object *obj)
>      env->bfd_mach = pcc->bfd_mach;
>      env->check_pow = pcc->check_pow;
>  
> +    /* Mark HV mode as supported if the CPU has an MSR_HV bit
> +     * in the msr_mask. The mask can later be cleared by PAPR
> +     * mode but the hv mode support will remain, thus enforcing
> +     * that we cannot use priv. instructions in guest in PAPR
> +     * mode. For 970 we currently simply don't set HV in msr_mask
> +     * thus simulating an "Apple mode" 970. If we ever want to
> +     * support 970 HV mode, we'll have to add a processor attribute
> +     * of some sort.
> +     */
> +#if !defined(CONFIG_USER_ONLY)
> +    env->has_hv_mode = !!(env->msr_mask & MSR_HVB);
> +#endif
> +
>  #if defined(TARGET_PPC64)
>      if (pcc->sps) {
>          env->sps = *pcc->sps;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation Benjamin Herrenschmidt
@ 2015-11-19  6:26   ` David Gibson
  2015-11-19 10:26     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1980 bytes --]

On Wed, Nov 11, 2015 at 11:27:28AM +1100, Benjamin Herrenschmidt wrote:
> From: Michael Neuling <mikey@neuling.org>
> 
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Looks correct, though my memory of C promotion rules is obviously a
bit stale, since I'm not immediately seeing why the original was wrong.

> ---
>  target-ppc/translate.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index bd5df40..3974cd2 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -4391,7 +4391,7 @@ static void gen_mtmsrd(DisasContext *ctx)
>          /* Special form that does not need any synchronisation */
>          TCGv t0 = tcg_temp_new();
>          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
> -        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 << MSR_EE)));
> +        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
>          tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
>          tcg_temp_free(t0);
>      } else {
> @@ -4422,7 +4422,7 @@ static void gen_mtmsr(DisasContext *ctx)
>          /* Special form that does not need any synchronisation */
>          TCGv t0 = tcg_temp_new();
>          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
> -        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 << MSR_EE)));
> +        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
>          tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
>          tcg_temp_free(t0);
>      } else {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model Benjamin Herrenschmidt
@ 2015-11-19  6:44   ` David Gibson
  2015-11-19 10:31     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:44 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 17131 bytes --]

On Wed, Nov 11, 2015 at 11:27:31AM +1100, Benjamin Herrenschmidt wrote:
> Properly implement LPES0/1 handling for HV vs. !HV mode and fix AIL
> implementation.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h            |   2 +
>  target-ppc/excp_helper.c    | 175 ++++++++++++++++++++++----------------------
>  target-ppc/translate_init.c |   2 +-
>  3 files changed, 92 insertions(+), 87 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 062644e..8185812 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -162,6 +162,8 @@ enum powerpc_excp_t {
>      POWERPC_EXCP_970,
>      /* POWER7 exception model           */
>      POWERPC_EXCP_POWER7,
> +    /* POWER8 exception model           */
> +    POWERPC_EXCP_POWER8,
>  #endif /* defined(TARGET_PPC64) */
>  };
>  
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 83e6c07..716b27b 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -74,22 +74,14 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      CPUState *cs = CPU(cpu);
>      CPUPPCState *env = &cpu->env;
>      target_ulong msr, new_msr, vector;
> -    int srr0, srr1, asrr0, asrr1;
> -    int lpes0, lpes1, lev;
> +    int srr0, srr1, asrr0, asrr1, lev, ail;
> +    bool lpes0;
>  
> -    if (0) {
> -        /* XXX: find a suitable condition to enable the hypervisor mode */
> -        lpes0 = (env->spr[SPR_LPCR] >> 1) & 1;
> -        lpes1 = (env->spr[SPR_LPCR] >> 2) & 1;
> -    } else {
> -        /* Those values ensure we won't enter the hypervisor mode */
> -        lpes0 = 0;
> -        lpes1 = 1;
> -    }
>  
>      qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
>                    " => %08x (%02x)\n", env->nip, excp, env->error_code);
>  
> +
>      /* new srr1 value excluding must-be-zero bits */
>      if (excp_model == POWERPC_EXCP_BOOKE) {
>          msr = env->msr;
> @@ -97,8 +89,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          msr = env->msr & ~0x783f0000ULL;
>      }
>  
> -    /* new interrupt handler msr */
> -    new_msr = env->msr & ((target_ulong)1 << MSR_ME);
> +    /* new interrupt handler msr preserves existing HV and ME unless
> +     * explicitly overriden
> +     */
> +    new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);

Ouch.  The fact that MSR_ME is a bit number, but MSR_HVB is a mask is
certainly confusing, but that's a pre-existing problem.

>      /* target registers */
>      srr0 = SPR_SRR0;
> @@ -106,6 +100,33 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      asrr0 = -1;
>      asrr1 = -1;
>  
> +    /* Exception targetting modifiers
> +     *
> +     * LPES0 is supported on POWER7/8
> +     * LPES1 is not supported (old iSeries mode)
> +     *
> +     * On anything else, we behave as if LPES0 is 1
> +     * (externals don't alter MSR:HV)
> +     *
> +     * AIL is initialized here but can be cleared by
> +     * selected exceptions
> +     */
> +#if defined(TARGET_PPC64)
> +    if (excp_model == POWERPC_EXCP_POWER7 ||
> +        excp_model == POWERPC_EXCP_POWER8) {
> +        lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0);
> +        if (excp_model == POWERPC_EXCP_POWER8) {
> +            ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT;
> +        } else {
> +            ail = 0;
> +        }
> +    } else
> +#endif /* defined(TARGET_PPC64) */
> +    {
> +        lpes0 = true;
> +        ail = 0;
> +    }
> +
>      switch (excp) {
>      case POWERPC_EXCP_NONE:
>          /* Should never happen */
> @@ -141,10 +162,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>              cs->halted = 1;
>              cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>          }
> -        if (0) {
> -            /* XXX: find a suitable condition to enable the hypervisor mode */
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
> +        new_msr |= (target_ulong)MSR_HVB;
> +        ail = 0;
>  
>          /* machine check exceptions don't have ME set */
>          new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -169,23 +188,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      case POWERPC_EXCP_DSI:       /* Data storage exception                   */
>          LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
>                   "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_next;
>      case POWERPC_EXCP_ISI:       /* Instruction storage exception            */
>          LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
>                   "\n", msr, env->nip);
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          msr |= env->error_code;
>          goto store_next;
>      case POWERPC_EXCP_EXTERNAL:  /* External input                           */
>          cs = CPU(cpu);
>  
> -        if (lpes0 == 1) {
> +        if (!lpes0) {
>              new_msr |= (target_ulong)MSR_HVB;
> +            new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
> +            srr0 = SPR_HSRR0;
> +            srr1 = SPR_HSRR1;
>          }
>          if (env->mpic_proxy) {
>              /* IACK the IRQ on delivery */
> @@ -193,9 +209,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          }
>          goto store_next;
>      case POWERPC_EXCP_ALIGN:     /* Alignment exception                      */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          /* XXX: this is false */
>          /* Get rS/rD and rA from faulting opcode */
>          env->spr[SPR_DSISR] |= (cpu_ldl_code(env, (env->nip - 4))
> @@ -210,9 +223,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>                  env->error_code = 0;
>                  return;
>              }
> -            if (lpes1 == 0) {
> -                new_msr |= (target_ulong)MSR_HVB;
> -            }
>              msr |= 0x00100000;
>              if (msr_fe0 == msr_fe1) {
>                  goto store_next;
> @@ -221,23 +231,14 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>              break;
>          case POWERPC_EXCP_INVAL:
>              LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
> -            if (lpes1 == 0) {
> -                new_msr |= (target_ulong)MSR_HVB;
> -            }
>              msr |= 0x00080000;
>              env->spr[SPR_BOOKE_ESR] = ESR_PIL;
>              break;
>          case POWERPC_EXCP_PRIV:
> -            if (lpes1 == 0) {
> -                new_msr |= (target_ulong)MSR_HVB;
> -            }
>              msr |= 0x00040000;
>              env->spr[SPR_BOOKE_ESR] = ESR_PPR;
>              break;
>          case POWERPC_EXCP_TRAP:
> -            if (lpes1 == 0) {
> -                new_msr |= (target_ulong)MSR_HVB;
> -            }
>              msr |= 0x00020000;
>              env->spr[SPR_BOOKE_ESR] = ESR_PTR;
>              break;
> @@ -249,27 +250,23 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          }
>          goto store_current;
>      case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_current;
>      case POWERPC_EXCP_SYSCALL:   /* System call exception                    */
>          dump_syscall(env);
>          lev = env->error_code;
> +
> +        /* "PAPR mode" built-in hypercall emulation */
>          if ((lev == 1) && cpu_ppc_hypercall) {
>              cpu_ppc_hypercall(cpu);
>              return;
>          }
> -        if (lev == 1 || (lpes0 == 0 && lpes1 == 0)) {
> +        if (lev == 1) {
>              new_msr |= (target_ulong)MSR_HVB;
>          }
>          goto store_next;
>      case POWERPC_EXCP_APU:       /* Auxiliary processor unavailable          */
>          goto store_current;
>      case POWERPC_EXCP_DECR:      /* Decrementer exception                    */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_next;
>      case POWERPC_EXCP_FIT:       /* Fixed-interval timer interrupt           */
>          /* FIT on 4xx */
> @@ -338,21 +335,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          } else {
>              new_msr &= ~((target_ulong)1 << MSR_ME);
>          }
> -
> -        if (0) {
> -            /* XXX: find a suitable condition to enable the hypervisor mode */
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
> +        new_msr |= (target_ulong)MSR_HVB;
> +        ail = 0;
>          goto store_next;
>      case POWERPC_EXCP_DSEG:      /* Data segment exception                   */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_next;
>      case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_next;
>      case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
>          srr0 = SPR_HSRR0;
> @@ -361,21 +349,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
>          goto store_next;
>      case POWERPC_EXCP_TRACE:     /* Trace exception                          */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_next;
>      case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
>          srr0 = SPR_HSRR0;
>          srr1 = SPR_HSRR1;
>          new_msr |= (target_ulong)MSR_HVB;
>          new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
> +        ail = 0;

Do you need to set ail explicitly here, given the general ail logic below?

>          goto store_next;
>      case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
>          srr0 = SPR_HSRR0;
>          srr1 = SPR_HSRR1;
>          new_msr |= (target_ulong)MSR_HVB;
>          new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
> +        ail = 0;
>          goto store_next;
>      case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
>          srr0 = SPR_HSRR0;
> @@ -390,19 +377,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
>          goto store_next;
>      case POWERPC_EXCP_VPU:       /* Vector unavailable exception             */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_current;
>      case POWERPC_EXCP_VSXU:       /* VSX unavailable exception               */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_current;
>      case POWERPC_EXCP_FU:         /* Facility unavailable exception          */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          goto store_current;
>      case POWERPC_EXCP_PIT:       /* Programmable interval timer interrupt    */
>          LOG_EXCP("PIT exception\n");
> @@ -421,9 +399,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>                    "is not implemented yet !\n");
>          goto store_next;
>      case POWERPC_EXCP_IFTLB:     /* Instruction fetch TLB error              */
> -        if (lpes1 == 0) { /* XXX: check this */
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          switch (excp_model) {
>          case POWERPC_EXCP_602:
>          case POWERPC_EXCP_603:
> @@ -440,9 +415,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          }
>          break;
>      case POWERPC_EXCP_DLTLB:     /* Data load TLB miss                       */
> -        if (lpes1 == 0) { /* XXX: check this */
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          switch (excp_model) {
>          case POWERPC_EXCP_602:
>          case POWERPC_EXCP_603:
> @@ -459,9 +431,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          }
>          break;
>      case POWERPC_EXCP_DSTLB:     /* Data store TLB miss                      */
> -        if (lpes1 == 0) { /* XXX: check this */
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          switch (excp_model) {
>          case POWERPC_EXCP_602:
>          case POWERPC_EXCP_603:
> @@ -567,9 +536,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>                    "is not implemented yet !\n");
>          goto store_next;
>      case POWERPC_EXCP_PERFM:     /* Embedded performance monitor interrupt   */
> -        if (lpes1 == 0) {
> -            new_msr |= (target_ulong)MSR_HVB;
> -        }
>          /* XXX: TODO */
>          cpu_abort(cs,
>                    "Performance counter exception is not implemented yet !\n");
> @@ -613,6 +579,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      }
>      /* Save MSR */
>      env->spr[srr1] = msr;
> +
> +    /* Sanity check */
> +    if (!(env->msr_mask & MSR_HVB) && (srr0 == SPR_HSRR0)) {
> +        cpu_abort(cs, "Trying to deliver HV exception %d with no HV support\n", excp);
> +    }
> +
>      /* If any alternate SRR register are defined, duplicate saved values */
>      if (asrr0 != -1) {
>          env->spr[asrr0] = env->spr[srr0];
> @@ -621,13 +593,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>          env->spr[asrr1] = env->spr[srr1];
>      }
>  
> -    if (env->spr[SPR_LPCR] & LPCR_AIL) {
> -        new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    }
> -
> +    /* Sort out endianness of interrupt, this differs depending on the
> +     * CPU, the HV mode, etc...
> +     */
>  #ifdef TARGET_PPC64
>      if (excp_model == POWERPC_EXCP_POWER7) {
> -        if (env->spr[SPR_LPCR] & LPCR_ILE) {
> +        if (!(new_msr & MSR_HVB) && (env->spr[SPR_LPCR] & LPCR_ILE)) {
> +            new_msr |= (target_ulong)1 << MSR_LE;
> +        }
> +    } else if (excp_model == POWERPC_EXCP_POWER8) {
> +        if (new_msr & MSR_HVB) {
> +            if (env->spr[SPR_HID0] & HID0_HILE) {
> +                new_msr |= (target_ulong)1 << MSR_LE;
> +            }
> +        } else if (env->spr[SPR_LPCR] & LPCR_ILE) {
>              new_msr |= (target_ulong)1 << MSR_LE;
>          }
>      } else if (msr_ile) {
> @@ -646,6 +625,30 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>                    excp);
>      }
>      vector |= env->excp_prefix;
> +
> +    /* AIL only works if there is no HV transition and we are running with
> +     * translations enabled
> +     */
> +    if (!((msr >> MSR_IR) & 1) || !((msr >> MSR_DR) & 1) ||
> +        ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) {
> +        ail = 0;
> +    }
> +    /* Handle AIL */
> +    if (ail) {
> +        new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> +        switch(ail) {
> +        case 2:
> +            vector |= 0x18000;
> +            break;
> +        case 3:
> +            vector |= 0xc000000000004000ull;
> +            break;
> +        default:
> +            cpu_abort(cs, "Invalid AIL combination %d\n", ail);
> +            break;
> +        }
> +    }
> +
>  #if defined(TARGET_PPC64)
>      if (excp_model == POWERPC_EXCP_BOOKE) {
>          if (env->spr[SPR_BOOKE_EPCR] & EPCR_ICM) {
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index f11e7d0..8a50273 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8412,7 +8412,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>  #if defined(CONFIG_SOFTMMU)
>      pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
>  #endif
> -    pcc->excp_model = POWERPC_EXCP_POWER7;
> +    pcc->excp_model = POWERPC_EXCP_POWER8;
>      pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
>      pcc->bfd_mach = bfd_mach_ppc64;
>      pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions Benjamin Herrenschmidt
@ 2015-11-19  6:46   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:46 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 453 bytes --]

On Wed, Nov 11, 2015 at 11:27:32AM +1100, Benjamin Herrenschmidt wrote:
> We were initializing unused ones and missing some
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode Benjamin Herrenschmidt
@ 2015-11-19  6:50   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-19  6:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 5343 bytes --]

On Wed, Nov 11, 2015 at 11:27:33AM +1100, Benjamin Herrenschmidt wrote:
> Under some circumstances, we need to direct ISI and DSI interrupts
> at the hypervisor, turning them into HISI/HDSI, and using different
> SPRs (HDSISR and HDAR) depending on the combination of MSR_DR and
> the corresponding VPM bits in LPCR.
> 
> This moves part of the code into helpers that are fixed to select
> the right exception type and registers. On pre-P7 processors, LPCR
> is 0 which provides the old behaviour of directing the interrupts
> at the supervisor.
> 
> Thanks to Andrei Warkentin for finding a bug when HV=1
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/mmu-hash64.c | 66 +++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 47 insertions(+), 19 deletions(-)
> 
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 71e1d14..e489fa4 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -466,6 +466,44 @@ static hwaddr ppc_hash64_pte_raddr(ppc_slb_t *slb, ppc_hash_pte64_t pte,
>      return (rpn & ~mask) | (eaddr & mask);
>  }
>  
> +static void ppc_hash64_set_isi(CPUState *cs, CPUPPCState *env, uint64_t error_code)
> +{
> +    bool vpm;
> +
> +    if (msr_ir) {
> +        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
> +    } else {
> +        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
> +    }
> +    if (vpm && !msr_hv) {
> +        cs->exception_index = POWERPC_EXCP_HISI;
> +    } else {
> +        cs->exception_index = POWERPC_EXCP_ISI;
> +    }
> +    env->error_code = error_code;
> +}
> +
> +static void ppc_hash64_set_dsi(CPUState *cs, CPUPPCState *env, uint64_t dar, uint64_t dsisr)
> +{
> +    bool vpm;
> +
> +    if (msr_dr) {
> +        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
> +    } else {
> +        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
> +    }
> +    if (vpm && msr_hv) {
> +        cs->exception_index = POWERPC_EXCP_HDSI;
> +        env->spr[SPR_HDAR] = dar;
> +        env->spr[SPR_HDSISR] = dsisr;
> +    } else {
> +        cs->exception_index = POWERPC_EXCP_DSI;
> +        env->spr[SPR_DAR] = dar;
> +        env->spr[SPR_DSISR] = dsisr;
> +   }
> +    env->error_code = 0;
> +}
> +
>  int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
>                                  int rwx, int mmu_idx)
>  {
> @@ -475,7 +513,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
>      hwaddr pte_offset;
>      ppc_hash_pte64_t pte;
>      int pp_prot, amr_prot, prot;
> -    uint64_t new_pte1;
> +    uint64_t new_pte1, dsisr;
>      const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
>      hwaddr raddr;
>  
> @@ -509,26 +547,21 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
>  
>      /* 3. Check for segment level no-execute violation */
>      if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
> -        cs->exception_index = POWERPC_EXCP_ISI;
> -        env->error_code = 0x10000000;
> +        ppc_hash64_set_isi(cs, env, 0x10000000);
>          return 1;
>      }
>  
>      /* 4. Locate the PTE in the hash table */
>      pte_offset = ppc_hash64_htab_lookup(env, slb, eaddr, &pte);
>      if (pte_offset == -1) {
> +        dsisr = 0x40000000;
>          if (rwx == 2) {
> -            cs->exception_index = POWERPC_EXCP_ISI;
> -            env->error_code = 0x40000000;
> +            ppc_hash64_set_isi(cs, env, dsisr);
>          } else {
> -            cs->exception_index = POWERPC_EXCP_DSI;
> -            env->error_code = 0;
> -            env->spr[SPR_DAR] = eaddr;
>              if (rwx == 1) {
> -                env->spr[SPR_DSISR] = 0x42000000;
> -            } else {
> -                env->spr[SPR_DSISR] = 0x40000000;
> +                dsisr |= 0x02000000;
>              }
> +            ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
>          }
>          return 1;
>      }
> @@ -545,14 +578,9 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
>          /* Access right violation */
>          qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
>          if (rwx == 2) {
> -            cs->exception_index = POWERPC_EXCP_ISI;
> -            env->error_code = 0x08000000;
> +            ppc_hash64_set_isi(cs, env, 0x08000000);
>          } else {
> -            target_ulong dsisr = 0;
> -
> -            cs->exception_index = POWERPC_EXCP_DSI;
> -            env->error_code = 0;
> -            env->spr[SPR_DAR] = eaddr;
> +            dsisr = 0;
>              if (need_prot[rwx] & ~pp_prot) {
>                  dsisr |= 0x08000000;
>              }
> @@ -562,7 +590,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, target_ulong eaddr,
>              if (need_prot[rwx] & ~amr_prot) {
>                  dsisr |= 0x00200000;
>              }
> -            env->spr[SPR_DSISR] = dsisr;
> +            ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
>          }
>          return 1;
>      }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform Benjamin Herrenschmidt
@ 2015-11-19  8:58   ` Stewart Smith
  2015-11-20  8:21   ` David Gibson
  1 sibling, 0 replies; 198+ messages in thread
From: Stewart Smith @ 2015-11-19  8:58 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> --- /dev/null
> +++ b/hw/ppc/pnv.c
> @@ -0,0 +1,600 @@
<snip>
> +static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
> +{
<snip>
> +    _FDT((fdt_begin_node(fdt, "chosen")));
> +    _FDT((fdt_property(fdt, "linux,initrd-start",
> +                       &start_prop, sizeof(start_prop))));
> +    _FDT((fdt_property(fdt, "linux,initrd-end",
> +                       &end_prop, sizeof(end_prop))));
> +    _FDT((fdt_end_node(fdt)));

(this time replying to list as well)

This misses out kernel command line, which means -append doesn't work.

This patch fixes that:

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 1787dd19c67a..82d6321228f6 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -379,7 +379,7 @@ static void powernv_create_cpu_node(void *fdt, CPUState *cs, int smt_threads)
     _FDT((fdt_end_node(fdt)));
 }
 
-static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
+static void *powernv_create_fdt(PnvSystem *sys, const char *kernel_cmdline, uint32_t initrd_base, uint32_t initrd_size)
 {
     void *fdt;
     CPUState *cs;
@@ -423,6 +423,9 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
     g_free(buf);
 
     _FDT((fdt_begin_node(fdt, "chosen")));
+    if (kernel_cmdline) {
+        _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
+    }
     _FDT((fdt_property(fdt, "linux,initrd-start",
                        &start_prop, sizeof(start_prop))));
     _FDT((fdt_property(fdt, "linux,initrd-end",
@@ -781,7 +784,8 @@ static void ppc_powernv_init(MachineState *machine)
             initrd_base = 0;
             initrd_size = 0;
     }
-    fdt = powernv_create_fdt(sys, initrd_base, initrd_size);
+    fdt = powernv_create_fdt(sys, machine->kernel_cmdline,
+                             initrd_base, initrd_size);
     cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
 }
 

^ permalink raw reply related	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s
  2015-11-19  6:11   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-19 10:21     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-19 10:21 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-11-19 at 17:11 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:20AM +1100, Benjamin Herrenschmidt
> wrote:
> > We don't give them a KVM reg number yet as no current KVM version
> > supports HV mode.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> Looks sane, though I haven't explicitly checked each of the
> implementations against the architecture.

Almost all of those are just "placeholders". IE, we just read and write
whatever is passed into the SPR array. What might be interesting is to
what extend we actually use the values later on, which happens by bits
and pieces in separate patches, or in many cases, not at all.

I think it might be worth adding to the SPR array a bitmask of the
implemented bits though, so that we force the unimplemented ones to 0
rather than storing them, and possibly warn on attempts to write 1 to
an unimplemented one.

Something to put on the TODO list...

Cheers,
Ben.


> > ---
> >  target-ppc/translate_init.c | 140
> > +++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 137 insertions(+), 3 deletions(-)
> > 
> > diff --git a/target-ppc/translate_init.c b/target-
> > ppc/translate_init.c
> > index 30a03ce..c743eb1 100644
> > --- a/target-ppc/translate_init.c
> > +++ b/target-ppc/translate_init.c
> > @@ -1104,6 +1104,11 @@ static void gen_spr_amr (CPUPPCState *env)
> >                       SPR_NOACCESS, SPR_NOACCESS,
> >                       &spr_read_generic, &spr_write_generic,
> >                       KVM_REG_PPC_UAMOR, 0);
> > +    spr_register_hv(env, SPR_AMOR, "AMOR",
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    &spr_read_generic, &spr_write_generic,
> > +                    0);
> >  #endif /* !CONFIG_USER_ONLY */
> >  }
> >  #endif /* TARGET_PPC64 */
> > @@ -7490,6 +7495,20 @@ static void gen_spr_book3s_dbg(CPUPPCState
> > *env)
> >                       KVM_REG_PPC_DABRX, 0x00000000);
> >  }
> >  
> > +static void gen_spr_book3s_207_dbg(CPUPPCState *env)
> > +{
> > +    spr_register_hv(env, SPR_DAWR, "DAWR",
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    &spr_read_generic, &spr_write_generic,
> > +                    0x00000000);
> > +    spr_register_hv(env, SPR_DAWRX, "DAWRX",
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    &spr_read_generic, &spr_write_generic,
> > +                    0x00000000);
> > +}
> > +
> >  static void gen_spr_970_dbg(CPUPPCState *env)
> >  {
> >      /* Breakpoints */
> > @@ -7654,15 +7673,116 @@ static void
> > gen_spr_power5p_lpar(CPUPPCState *env)
> >      spr_register_kvm(env, SPR_LPCR, "LPCR",
> >                       SPR_NOACCESS, SPR_NOACCESS,
> >                       &spr_read_generic, &spr_write_generic,
> > -                     KVM_REG_PPC_LPCR, 0x00000000);
> > +                     KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
> >  }
> >  
> > +#if !defined(CONFIG_USER_ONLY)
> > +static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
> > +{
> > +    TCGv hmer = tcg_temp_new();
> > +
> > +    gen_load_spr(hmer, sprn);
> > +    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
> > +    gen_store_spr(sprn, hmer);
> > +    spr_store_dump_spr(sprn);
> > +    tcg_temp_free(hmer);
> > +}
> > +#endif
> > +
> >  static void gen_spr_book3s_ids(CPUPPCState *env)
> >  {
> > +    /* FIXME: Will need to deal with thread vs core only SPRs */
> > +
> >      /* Processor identification */
> > -    spr_register(env, SPR_PIR, "PIR",
> > +    spr_register_hv(env, SPR_PIR, "PIR",
> >                   SPR_NOACCESS, SPR_NOACCESS,
> > -                 &spr_read_generic, &spr_write_pir,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, NULL,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HID0, "HID0",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_TSCR, "TSCR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HMER, "HMER",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_hmer,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HMEER, "HMEER",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_TFMR, "TFMR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_LPIDR, "LPIDR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HFSCR, "HFSCR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_MMCRC, "MMCRC",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_MMCRH, "MMCRH",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HSPRG0, "HSPRG0",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HSPRG1, "HSPRG1",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HSRR0, "HSRR0",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HSRR1, "HSRR1",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HDAR, "HDAR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HDSISR, "HDSISR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_RMOR, "RMOR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> > +                 0x00000000);
> > +    spr_register_hv(env, SPR_HRMOR, "HRMOR",
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 SPR_NOACCESS, SPR_NOACCESS,
> > +                 &spr_read_generic, &spr_write_generic,
> >                   0x00000000);
> >  }
> >  
> > @@ -7868,6 +7988,17 @@ static void gen_spr_power8_fscr(CPUPPCState
> > *env)
> >                       KVM_REG_PPC_FSCR, initval);
> >  }
> >  
> > +static void gen_spr_power8_rpr(CPUPPCState *env)
> > +{
> > +#if !defined(CONFIG_USER_ONLY)
> > +    spr_register_hv(env, SPR_RPR, "RPR",
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    SPR_NOACCESS, SPR_NOACCESS,
> > +                    &spr_read_generic, &spr_write_generic,
> > +                    0x00000103070F1F3F);
> > +#endif
> > +}
> > +
> >  static void init_proc_book3s_64(CPUPPCState *env, int version)
> >  {
> >      gen_spr_ne_601(env);
> > @@ -7919,9 +8050,12 @@ static void init_proc_book3s_64(CPUPPCState
> > *env, int version)
> >          gen_spr_power8_pmu_user(env);
> >          gen_spr_power8_tm(env);
> >          gen_spr_vtb(env);
> > +        gen_spr_power8_rpr(env);
> >      }
> >      if (version < BOOK3S_CPU_POWER8) {
> >          gen_spr_book3s_dbg(env);
> > +    } else {
> > +        gen_spr_book3s_207_dbg(env);
> >      }
> >  #if !defined(CONFIG_USER_ONLY)
> >      switch (version) {
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation
  2015-11-19  6:19   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-19 10:23     ` Benjamin Herrenschmidt
  2015-11-20  0:26       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-19 10:23 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-11-19 at 17:19 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:22AM +1100, Benjamin Herrenschmidt
> wrote:
> > XXX This patch needs double checking... It fixed 32-bit userspace
> > but I'm not sure it's right. I wonder whether msr_is_64bit() should
> > be applied to env->msr, not msr, but I need to double check the
> > architecture.
> 
> Hrm, I'm not really sure where I'd look in the arch, but
> msr_is_64bit(env->msr) seems like it would make more sense to me.
> The current logic means that rfi, ostensibly a 32-bit instruction
> will
> have different behaviour depending on the upper bits of SRR1, which
> seems a unexpected.

I only just discovered that rfi is actually gone from arch 2.07 :-)

I'll dig a bit more tomorrow.

Cheers,
Ben.

> 
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > ---
> >  target-ppc/excp_helper.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> > index c1d6605..00fae60 100644
> > --- a/target-ppc/excp_helper.c
> > +++ b/target-ppc/excp_helper.c
> > @@ -878,13 +878,13 @@ static inline void do_rfi(CPUPPCState *env,
> > target_ulong nip, target_ulong msr,
> >      CPUState *cs = CPU(ppc_env_get_cpu(env));
> >  
> >  #if defined(TARGET_PPC64)
> > +    msr = msr & msrm;
> >      if (msr_is_64bit(env, msr)) {
> >          nip = (uint64_t)nip;
> > -        msr &= (uint64_t)msrm;
> >      } else {
> >          nip = (uint32_t)nip;
> > -        msr = (uint32_t)(msr & msrm);
> >          if (keep_msrh) {
> > +	    msr &= 0xffffffff;
> >              msr |= env->msr & ~((uint64_t)0xFFFFFFFF);
> >          }
> >      }
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation
  2015-11-19  6:26   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-19 10:26     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-19 10:26 UTC (permalink / raw)
  To: David Gibson; +Cc: Michael Neuling, qemu-ppc, qemu-devel

On Thu, 2015-11-19 at 17:26 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:28AM +1100, Benjamin Herrenschmidt
> wrote:
> > From: Michael Neuling <mikey@neuling.org>
> > 
> > Signed-off-by: Michael Neuling <mikey@neuling.org>
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Looks correct, though my memory of C promotion rules is obviously a
> bit stale, since I'm not immediately seeing why the original was
> wrong.

Well, at least it makes things work :-)

> > ---
> >  target-ppc/translate.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> > index bd5df40..3974cd2 100644
> > --- a/target-ppc/translate.c
> > +++ b/target-ppc/translate.c
> > @@ -4391,7 +4391,7 @@ static void gen_mtmsrd(DisasContext *ctx)
> >          /* Special form that does not need any synchronisation */
> >          TCGv t0 = tcg_temp_new();
> >          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 <<
> > MSR_RI) | (1 << MSR_EE));
> > -        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 <<
> > MSR_EE)));
> > +        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 <<
> > MSR_RI) | (1 << MSR_EE)));
> >          tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
> >          tcg_temp_free(t0);
> >      } else {
> > @@ -4422,7 +4422,7 @@ static void gen_mtmsr(DisasContext *ctx)
> >          /* Special form that does not need any synchronisation */
> >          TCGv t0 = tcg_temp_new();
> >          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 <<
> > MSR_RI) | (1 << MSR_EE));
> > -        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~((1 << MSR_RI) | (1 <<
> > MSR_EE)));
> > +        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(target_ulong)((1 <<
> > MSR_RI) | (1 << MSR_EE)));
> >          tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
> >          tcg_temp_free(t0);
> >      } else {
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model
  2015-11-19  6:44   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-19 10:31     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-19 10:31 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-11-19 at 17:44 +1100, David Gibson wrote:
> 
> > -    /* new interrupt handler msr */
> > -    new_msr = env->msr & ((target_ulong)1 << MSR_ME);
> > +    /* new interrupt handler msr preserves existing HV and ME unless
> > +     * explicitly overriden
> > +     */
> > +    new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
> 
> Ouch.  The fact that MSR_ME is a bit number, but MSR_HVB is a mask is
> certainly confusing, but that's a pre-existing problem.

That shit bit me more than once indeed, but it's a fix for another day.

 .../...

> > 
> >      case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
> >          srr0 = SPR_HSRR0;
> >          srr1 = SPR_HSRR1;
> >          new_msr |= (target_ulong)MSR_HVB;
> >          new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
> > +        ail = 0;
> 
> Do you need to set ail explicitly here, given the general ail logic below?

Not on this indeed. I think that's a remnant of how that patch evolved.
We do need to clear unconditionally on other things like machine
checks, I'll give that another sweep.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation
  2015-11-19 10:23     ` Benjamin Herrenschmidt
@ 2015-11-20  0:26       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-20  0:26 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-11-19 at 21:23 +1100, Benjamin Herrenschmidt wrote:
> 
> I only just discovered that rfi is actually gone from arch 2.07 :-)
> 
> I'll dig a bit more tomorrow.

Ok, so I had a closer look and tore that stuff appart even more :-)

If you are curious, feel free to check out github. I've removed
the MSR mask completely, I can't figure out what it's supposed
to be about. I've quickly tested 64-bit powernv/pseries, 32-bit
userspace on 64-bit pseries kernel, and 32-bit Mac99 (ubuntu).

Cheers,
Ben.

> Cheers,
> Ben.
> 
> > 
> > > 
> > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > > ---
> > >  target-ppc/excp_helper.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> > > index c1d6605..00fae60 100644
> > > --- a/target-ppc/excp_helper.c
> > > +++ b/target-ppc/excp_helper.c
> > > @@ -878,13 +878,13 @@ static inline void do_rfi(CPUPPCState *env,
> > > target_ulong nip, target_ulong msr,
> > >      CPUState *cs = CPU(ppc_env_get_cpu(env));
> > >  
> > >  #if defined(TARGET_PPC64)
> > > +    msr = msr & msrm;
> > >      if (msr_is_64bit(env, msr)) {
> > >          nip = (uint64_t)nip;
> > > -        msr &= (uint64_t)msrm;
> > >      } else {
> > >          nip = (uint32_t)nip;
> > > -        msr = (uint32_t)(msr & msrm);
> > >          if (keep_msrh) {
> > > +	    msr &= 0xffffffff;
> > >              msr |= env->msr & ~((uint64_t)0xFFFFFFFF);
> > >          }
> > >      }

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 08/77] ppc: Add number of threads per core to the processor definition
  2015-11-16  5:16   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-20  0:29     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-20  0:29 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Mon, 2015-11-16 at 16:16 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:21AM +1100, Benjamin Herrenschmidt
> wrote:
> > Also use it to clamp the max SMT mode and ensure that the cpu_dt_id
> > are offset by that value in order to preserve consistency with the
> > HW implementations.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

In fact this is broken ;-) All the CPUs without thread get 0 in the new
field which trips at init time since we can create 0 threads :-) It
broke for example 32-bit stuff. I've fixed that in github.

> > ---
> >  target-ppc/cpu-qom.h        | 1 +
> >  target-ppc/translate_init.c | 8 +++++++-
> >  2 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
> > index 6967a80..fef23fd 100644
> > --- a/target-ppc/cpu-qom.h
> > +++ b/target-ppc/cpu-qom.h
> > @@ -68,6 +68,7 @@ typedef struct PowerPCCPUClass {
> >      uint32_t flags;
> >      int bfd_mach;
> >      uint32_t l1_dcache_size, l1_icache_size;
> > +    uint32_t threads_per_core;
> >  #if defined(TARGET_PPC64)
> >      const struct ppc_segment_page_sizes *sps;
> >  #endif
> > diff --git a/target-ppc/translate_init.c b/target-
> > ppc/translate_init.c
> > index c743eb1..1d402e1 100644
> > --- a/target-ppc/translate_init.c
> > +++ b/target-ppc/translate_init.c
> > @@ -8193,6 +8193,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void
> > *data)
> >                   POWERPC_FLAG_BUS_CLK;
> >      pcc->l1_dcache_size = 0x8000;
> >      pcc->l1_icache_size = 0x10000;
> > +    pcc->threads_per_core = 2;
> >  }
> >  
> >  static void powerpc_get_compat(Object *obj, Visitor *v,
> > @@ -8339,6 +8340,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void
> > *data)
> >      pcc->l1_dcache_size = 0x8000;
> >      pcc->l1_icache_size = 0x8000;
> >      pcc->interrupts_big_endian =
> > ppc_cpu_interrupts_big_endian_lpcr;
> > +    pcc->threads_per_core = 4;
> >  }
> >  
> >  static void init_proc_POWER8(CPUPPCState *env)
> > @@ -8419,6 +8421,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void
> > *data)
> >      pcc->l1_dcache_size = 0x8000;
> >      pcc->l1_icache_size = 0x8000;
> >      pcc->interrupts_big_endian =
> > ppc_cpu_interrupts_big_endian_lpcr;
> > +    pcc->threads_per_core = 8;
> >  }
> >  #endif /* defined (TARGET_PPC64) */
> >  
> > @@ -9074,6 +9077,9 @@ static void ppc_cpu_realizefn(DeviceState
> > *dev, Error **errp)
> >  #endif
> >  
> >  #if !defined(CONFIG_USER_ONLY)
> > +    if (max_smt > pcc->threads_per_core) {
> > +        max_smt = pcc->threads_per_core;
> > +    }
> >      if (smp_threads > max_smt) {
> >          error_setg(errp, "Cannot support more than %d threads on
> > PPC with %s",
> >                     max_smt, kvm_enabled() ? "KVM" : "TCG");
> > @@ -9094,7 +9100,7 @@ static void ppc_cpu_realizefn(DeviceState
> > *dev, Error **errp)
> >      }
> >  
> >  #if !defined(CONFIG_USER_ONLY)
> > -    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
> > +    cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * pcc-
> > >threads_per_core
> >          + (cs->cpu_index % smp_threads);
> >  #endif
> >  
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie Benjamin Herrenschmidt
@ 2015-11-20  7:02   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-20  7:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1731 bytes --]

On Wed, Nov 11, 2015 at 11:27:27AM +1100, Benjamin Herrenschmidt wrote:
> Otherwise it will trip on the forms used in recent architecture.
> 
> Ideally, we should have different handlers for different architecture
> levels but our current implementation of TLB flushing is dumb enough
> that this will do for now.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 014fe5e..bd5df40 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -9952,8 +9952,10 @@ GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 0x001F0001, PPC_SEGMENT_64B),
>  GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, PPC_SEGMENT_64B),
>  #endif
>  GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
> -GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x03FF0001, PPC_MEM_TLBIE),
> -GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x03FF0001, PPC_MEM_TLBIE),
> +/* XXX Those instructions will need to be handled differently for
> + * different ISA versions */
> +GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
> +GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
>  GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x03FFFC01, PPC_SLBI),

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts Benjamin Herrenschmidt
@ 2015-11-20  7:45   ` David Gibson
  2015-11-24  0:44     ` Benjamin Herrenschmidt
  2015-11-24  0:51     ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 198+ messages in thread
From: David Gibson @ 2015-11-20  7:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 9188 bytes --]

On Wed, Nov 11, 2015 at 11:27:34AM +1100, Benjamin Herrenschmidt wrote:
> Recent server processors use the Hypervisor Emulation Assistance
> interrupt for illegal instructions and *some* type of SPR accesses.
> 
> Also the code was always generating inval instructions even for priv
> violations due to setting the wrong flags
> 
> Finally, the checking for PR/HV was open coded everywhere.
> 
> This reworks it all, using little helper macros for checking, and
> adding the HV interrupt (which gets converted back to program check
> in the slow path of excp_helper.c on CPUs that don't want it).
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

[snip]
>  static void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
> @@ -4340,7 +4350,7 @@ static inline void gen_op_mfspr(DisasContext *ctx)
>                  printf("Trying to read privileged spr %d (0x%03x) at "
>                         TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>              }
> -            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> +            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
>          }
>      } else {
>          /* Not defined */
> @@ -4348,7 +4358,25 @@ static inline void gen_op_mfspr(DisasContext *ctx)
>                   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>          printf("Trying to read invalid spr %d (0x%03x) at "
>                 TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);

So, I'm not 100% following the logic below, but it looks like the
existing code used SPR_NOACCESS to mark things which generated a
privilege exception compared to NULL for things which generated an
invalid instruction exception.  Using that encoding, can you simplify
the logic here?  Alternatively can you use the logic here to avoid the
SPR_NOACESS encoding?

> -        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +
> +        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
> +         * it can generate a priv, a hv emu or a no-op
> +         */
> +        if (sprn & 0x10) {
> +            if (ctx->pr) {
> +                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +            }
> +        } else {
> +            if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 || sprn == 6) {
> +                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +            }
> +        }
> +#if !defined(CONFIG_USER_ONLY)
> +        /* HV priv */
> +        if (ctx->spr_cb[sprn].hea_read) {
> +            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +        }

If you're in PR mode, and it's an SPR with an hea_read function and
has the 0x10 bit set, won't this call gen_priv_exception twice?

I also see no path here which will call gen_inval_exception(), is that
right?  If you're in HV mode and it's a truly invalid SPRN, isn't that
what you'd want?

> +#endif
>      }
>  }



>  
> @@ -4395,13 +4423,9 @@ static void gen_mtcrf(DisasContext *ctx)
>  #if defined(TARGET_PPC64)
>  static void gen_mtmsrd(DisasContext *ctx)
>  {
> -#if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -#else
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -        return;
> -    }
> +    CHK_SV;
> +
> +#if !defined(CONFIG_USER_ONLY)
>      if (ctx->opcode & 0x00010000) {
>          /* Special form that does not need any synchronisation */
>          TCGv t0 = tcg_temp_new();
> @@ -4420,20 +4444,16 @@ static void gen_mtmsrd(DisasContext *ctx)
>          /* Note that mtmsr is not always defined as context-synchronizing */
>          gen_stop_exception(ctx);
>      }
> -#endif
> +#endif /* !defined(CONFIG_USER_ONLY) */
>  }
> -#endif
> +#endif /* defined(TARGET_PPC64) */
>  
>  static void gen_mtmsr(DisasContext *ctx)
>  {
> -#if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -#else
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> -        return;
> -    }
> -    if (ctx->opcode & 0x00010000) {
> +    CHK_SV;
> +
> +#if !defined(CONFIG_USER_ONLY)
> +   if (ctx->opcode & 0x00010000) {
>          /* Special form that does not need any synchronisation */
>          TCGv t0 = tcg_temp_new();
>          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE));
> @@ -4488,7 +4508,7 @@ static void gen_mtspr(DisasContext *ctx)
>                       TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>              printf("Trying to write privileged spr %d (0x%03x) at "
>                     TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> -            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> +            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
>          }
>      } else {
>          /* Not defined */
> @@ -4496,7 +4516,25 @@ static void gen_mtspr(DisasContext *ctx)
>                   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
>          printf("Trying to write invalid spr %d (0x%03x) at "
>                 TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> -        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +
> +        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
> +         * it can generate a priv, a hv emu or a no-op
> +         */
> +        if (sprn & 0x10) {
> +            if (ctx->pr) {
> +                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +            }
> +        } else {
> +            if (ctx->pr || sprn == 0) {
> +                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +            }
> +        }
> +#if !defined(CONFIG_USER_ONLY)
> +        /* HV priv */
> +        if (ctx->spr_cb[sprn].hea_write) {
> +            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> +        }
> +#endif

Same concerns here as for mfspr.

[snip]
>  /* tlbiel */
>  static void gen_tlbiel(DisasContext *ctx)
>  {
>  #if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> +    GEN_PRIV;
>  #else
> -    if (unlikely(ctx->pr || !ctx->hv)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -        return;
> -    }
> +    CHK_SV;

You have CHK_SV here, but the original code checks for HV, as does
your new code for tlbia and tlbiel, is that right?

[snip]
>  /* tlbsync */
>  static void gen_tlbsync(DisasContext *ctx)
>  {
>  #if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -#else
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -        return;
> -    }
> +    GEN_PRIV;
> +#else    
> +    CHK_HV;
> +

Old code didn't check for HV, mode, but AFAICT it should have, so this
looks correct.

[snip]
> @@ -5941,18 +5921,16 @@ static void gen_mfapidi(DisasContext *ctx)
>  static void gen_tlbiva(DisasContext *ctx)
>  {
>  #if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> +    GEN_PRIV;
>  #else
>      TCGv t0;
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -        return;
> -    }
> +
> +    CHK_SV;

Is the same thing as tlbivax, or some ancient instruction?  AFAICT the
ISA says tlbivax is hypervisor privileged.

>      t0 = tcg_temp_new();
>      gen_addr_reg_index(ctx, t0);
>      gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
>      tcg_temp_free(t0);
> -#endif
> +#endif /* defined(CONFIG_USER_ONLY) */
>  }

[snip]
>  static void gen_tlbivax_booke206(DisasContext *ctx)
>  {
>  #if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> +    GEN_PRIV;
>  #else
>      TCGv t0;
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -        return;
> -    }
>  
> +    CHK_SV;

ISA says tlbivax is hypervisor privileged when the CPU has a
hypervisor mode, which I guess booke206 probably doesn't?

>      t0 = tcg_temp_new();
>      gen_addr_reg_index(ctx, t0);
> -
>      gen_helper_booke206_tlbivax(cpu_env, t0);
>      tcg_temp_free(t0);
> -#endif
> +#endif /* defined(CONFIG_USER_ONLY) */
>  }
>  
>  static void gen_tlbilx_booke206(DisasContext *ctx)
>  {
>  #if defined(CONFIG_USER_ONLY)
> -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> +    GEN_PRIV;
>  #else
>      TCGv t0;
> -    if (unlikely(ctx->pr)) {
> -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> -        return;
> -    }
>  
> +    CHK_SV;

And apparently hv vs. sv privilege of tlbilx depends on the EPCR
register.  Again, may not be relevant for 2.06.

>      t0 = tcg_temp_new();
>      gen_addr_reg_index(ctx, t0);
>  
> @@ -6672,7 +6574,7 @@ static void gen_tlbilx_booke206(DisasContext *ctx)
>      }
>  
>      tcg_temp_free(t0);
> -#endif
> +#endif /* defined(CONFIG_USER_ONLY) */
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8 Benjamin Herrenschmidt
@ 2015-11-20  7:48   ` David Gibson
  2015-11-24  0:58     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-20  7:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 12981 bytes --]

On Wed, Nov 11, 2015 at 11:27:35AM +1100, Benjamin Herrenschmidt wrote:
> Those instructions are only available in hypervisor real mode and
> allow cache inhibited garded access to devices in that mode.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h            |  4 +++-
>  target-ppc/translate.c      | 56 +++++++++++++++++++++++++++++++++++----------
>  target-ppc/translate_init.c |  6 +++--
>  3 files changed, 51 insertions(+), 15 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 23479b1..3d22a4f 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1913,6 +1913,8 @@ enum {
>      PPC_POPCNTB        = 0x0000000000001000ULL,
>      /*   string load / store                                                 */
>      PPC_STRING         = 0x0000000000002000ULL,
> +    /*   real mode cache inhibited load / store                              */
> +    PPC_CILDST         = 0x0000000000004000ULL,
>  
>      /* Floating-point unit extensions                                        */
>      /*   Optional floating point instructions                                */
> @@ -2027,7 +2029,7 @@ enum {
>                          | PPC_MFAPIDI | PPC_TLBIVA | PPC_TLBIVAX \
>                          | PPC_4xx_COMMON | PPC_40x_ICBT | PPC_RFMCI \
>                          | PPC_RFDI | PPC_DCR | PPC_DCRX | PPC_DCRUX \
> -                        | PPC_POPCNTWD)
> +                        | PPC_POPCNTWD | PPC_CILDST)
>  
>      /* extended type values */
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 3f657b1..4d01fd0 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -189,7 +189,7 @@ struct DisasContext {
>      uint32_t opcode;
>      uint32_t exception;
>      /* Routine used to access memory */
> -    bool pr, hv;
> +    bool pr, hv, dr;
>      int mem_idx;
>      int access_type;
>      /* Translation flags */
> @@ -380,9 +380,11 @@ typedef struct opcode_t {
>  #if defined(CONFIG_USER_ONLY)
>  #define CHK_HV GEN_PRIV
>  #define CHK_SV GEN_PRIV
> +#define CHK_HVDR GEN_PRIV

I'm guessing this is supposed to be CHK_HVRM as below.

>  #else
>  #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
>  #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
> +#define CHK_HVRM do { if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) GEN_PRIV; } while(0)
>  #endif
>  
>  #define CHK_NONE
> @@ -2887,7 +2889,7 @@ static void glue(gen_, name##u)(DisasContext *ctx)
>  }
>  
>  #define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
> -static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
> +static void glue(gen_, name##ux)(DisasContext *ctx)                           \

Extraneous change.

>  {                                                                             \
>      TCGv EA;                                                                  \
>      if (unlikely(rA(ctx->opcode) == 0 ||                                      \
> @@ -2903,18 +2905,23 @@ static void glue(gen_, name##ux)(DisasContext *ctx)
>      tcg_temp_free(EA);                                                        \
>  }
>  
> -#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2)                        \
> +#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
>  static void glue(gen_, name##x)(DisasContext *ctx)                            \
>  {                                                                             \
>      TCGv EA;                                                                  \
> +    chk;                                                                      \
>      gen_set_access_type(ctx, ACCESS_INT);                                     \
>      EA = tcg_temp_new();                                                      \
>      gen_addr_reg_index(ctx, EA);                                              \
>      gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
>      tcg_temp_free(EA);                                                        \
>  }
> +
>  #define GEN_LDX(name, ldop, opc2, opc3, type)                                 \
> -    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE)
> +    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_NONE)
> +
> +#define GEN_LDX_HVRM(name, ldop, opc2, opc3, type)                            \
> +    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
>  
>  #define GEN_LDS(name, ldop, op, type)                                         \
>  GEN_LD(name, ldop, op | 0x20, type);                                          \
> @@ -2940,6 +2947,12 @@ GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B);
>  /* ldx */
>  GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B);
>  
> +/* CI load/store variants */
> +GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
> +GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x15, PPC_CILDST)
> +GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
> +GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
> +
>  static void gen_ld(DisasContext *ctx)
>  {
>      TCGv EA;
> @@ -3058,10 +3071,11 @@ static void glue(gen_, name##ux)(DisasContext *ctx)
>      tcg_temp_free(EA);                                                        \
>  }
>  
> -#define GEN_STX_E(name, stop, opc2, opc3, type, type2)                        \
> +#define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
>  static void glue(gen_, name##x)(DisasContext *ctx)                            \
>  {                                                                             \
>      TCGv EA;                                                                  \
> +    chk;                                                                      \
>      gen_set_access_type(ctx, ACCESS_INT);                                     \
>      EA = tcg_temp_new();                                                      \
>      gen_addr_reg_index(ctx, EA);                                              \
> @@ -3069,7 +3083,10 @@ static void glue(gen_, name##x)(DisasContext *ctx)                            \
>      tcg_temp_free(EA);                                                        \
>  }
>  #define GEN_STX(name, stop, opc2, opc3, type)                                 \
> -    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE)
> +    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE, CHK_NONE)
> +
> +#define GEN_STX_HVRM(name, stop, opc2, opc3, type)                            \
> +    GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
>  
>  #define GEN_STS(name, stop, op, type)                                         \
>  GEN_ST(name, stop, op | 0x20, type);                                          \
> @@ -3086,6 +3103,10 @@ GEN_STS(stw, st32, 0x04, PPC_INTEGER);
>  #if defined(TARGET_PPC64)
>  GEN_STUX(std, st64, 0x15, 0x05, PPC_64B);
>  GEN_STX(std, st64, 0x15, 0x04, PPC_64B);
> +GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
> +GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
> +GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
> +GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
>  
>  static void gen_std(DisasContext *ctx)
>  {
> @@ -3171,7 +3192,7 @@ static inline void gen_qemu_ld64ur(DisasContext *ctx, TCGv arg1, TCGv arg2)
>      TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
>      tcg_gen_qemu_ld_i64(arg1, arg2, ctx->mem_idx, op);
>  }
> -GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX);
> +GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE);
>  #endif  /* TARGET_PPC64 */
>  
>  /* sthbrx */
> @@ -3197,7 +3218,7 @@ static inline void gen_qemu_st64r(DisasContext *ctx, TCGv arg1, TCGv arg2)
>      TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
>      tcg_gen_qemu_st_i64(arg1, arg2, ctx->mem_idx, op);
>  }
> -GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX);
> +GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE);
>  #endif  /* TARGET_PPC64 */
>  
>  /***                    Integer load and store multiple                    ***/
> @@ -10156,7 +10177,7 @@ GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
>  GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
>  #define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
>  GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
> -#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2)                        \
> +#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
>  GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
>  #define GEN_LDS(name, ldop, op, type)                                         \
>  GEN_LD(name, ldop, op | 0x20, type)                                           \
> @@ -10173,7 +10194,13 @@ GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B)
>  GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B)
>  GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B)
>  GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B)
> -GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX)
> +GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE)
> +
> +/* HV/P7 and later only */
> +GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
> +GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x18, PPC_CILDST)
> +GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
> +GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
>  #endif
>  GEN_LDX(lhbr, ld16ur, 0x16, 0x18, PPC_INTEGER)
>  GEN_LDX(lwbr, ld32ur, 0x16, 0x10, PPC_INTEGER)
> @@ -10189,7 +10216,7 @@ GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
>  GEN_HANDLER(stop##u, opc, 0xFF, 0xFF, 0x00000000, type),
>  #define GEN_STUX(name, stop, opc2, opc3, type)                                \
>  GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
> -#define GEN_STX_E(name, stop, opc2, opc3, type, type2)                        \
> +#define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
>  GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
>  #define GEN_STS(name, stop, op, type)                                         \
>  GEN_ST(name, stop, op | 0x20, type)                                           \
> @@ -10203,7 +10230,11 @@ GEN_STS(stw, st32, 0x04, PPC_INTEGER)
>  #if defined(TARGET_PPC64)
>  GEN_STUX(std, st64, 0x15, 0x05, PPC_64B)
>  GEN_STX(std, st64, 0x15, 0x04, PPC_64B)
> -GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX)
> +GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE)
> +GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
> +GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
> +GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
> +GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
>  #endif
>  GEN_STX(sthbr, st16r, 0x16, 0x1C, PPC_INTEGER)
>  GEN_STX(stwbr, st32r, 0x16, 0x14, PPC_INTEGER)
> @@ -11369,6 +11400,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>      ctx.spr_cb = env->spr_cb;
>      ctx.pr = msr_pr;
>      ctx.mem_idx = env->dmmu_idx;
> +    ctx.dr = msr_dr;
>  #if !defined(CONFIG_USER_ONLY)
>      ctx.hv = !msr_pr && (msr_hv || !env->has_hv_mode);
>  #endif
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 5210b25..8d82bc8 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8320,7 +8320,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>                         PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
>                         PPC_64B | PPC_64H | PPC_ALTIVEC |
>                         PPC_SEGMENT_64B | PPC_SLBI |
> -                       PPC_POPCNTB | PPC_POPCNTWD;
> +                       PPC_POPCNTB | PPC_POPCNTWD |
> +                       PPC_CILDST;
>      pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
>                          PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
>                          PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
> @@ -8397,7 +8398,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>                         PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
>                         PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
>                         PPC_SEGMENT_64B | PPC_SLBI |
> -                       PPC_POPCNTB | PPC_POPCNTWD;
> +                       PPC_POPCNTB | PPC_POPCNTWD |
> +                       PPC_CILDST;
>      pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
>                          PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
>                          PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool Benjamin Herrenschmidt
@ 2015-11-20  7:49   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-20  7:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3556 bytes --]

On Wed, Nov 11, 2015 at 11:27:36AM +1100, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>


> ---
>  target-ppc/translate.c | 39 +++++++++++++++++++--------------------
>  1 file changed, 19 insertions(+), 20 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 4d01fd0..a5ab2eb 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -189,21 +189,20 @@ struct DisasContext {
>      uint32_t opcode;
>      uint32_t exception;
>      /* Routine used to access memory */
> -    bool pr, hv, dr;
> +    bool pr, hv, dr, le_mode;
>      int mem_idx;
>      int access_type;
>      /* Translation flags */
> -    int le_mode;
>      TCGMemOp default_tcg_memop_mask;
>  #if defined(TARGET_PPC64)
> -    int sf_mode;
> -    int has_cfar;
> +    bool sf_mode;
> +    bool has_cfar;
>  #endif
> -    int fpu_enabled;
> -    int altivec_enabled;
> -    int vsx_enabled;
> -    int spe_enabled;
> -    int tm_enabled;
> +    bool fpu_enabled;
> +    bool altivec_enabled;
> +    bool vsx_enabled;
> +    bool spe_enabled;
> +    bool tm_enabled;
>      ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
>      int singlestep_enabled;
>      uint64_t insns_flags;
> @@ -380,7 +379,7 @@ typedef struct opcode_t {
>  #if defined(CONFIG_USER_ONLY)
>  #define CHK_HV GEN_PRIV
>  #define CHK_SV GEN_PRIV
> -#define CHK_HVDR GEN_PRIV
> +#define CHK_HVRM GEN_PRIV
>  #else
>  #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
>  #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
> @@ -11407,31 +11406,31 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>      ctx.insns_flags = env->insns_flags;
>      ctx.insns_flags2 = env->insns_flags2;
>      ctx.access_type = -1;
> -    ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0;
> +    ctx.le_mode = !!(env->hflags & (1 << MSR_LE));
>      ctx.default_tcg_memop_mask = ctx.le_mode ? MO_LE : MO_BE;
>  #if defined(TARGET_PPC64)
>      ctx.sf_mode = msr_is_64bit(env, env->msr);
>      ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
>  #endif
> -    ctx.fpu_enabled = msr_fp;
> +    ctx.fpu_enabled = !!msr_fp;
>      if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
> -        ctx.spe_enabled = msr_spe;
> +        ctx.spe_enabled = !!msr_spe;
>      else
> -        ctx.spe_enabled = 0;
> +        ctx.spe_enabled = false;
>      if ((env->flags & POWERPC_FLAG_VRE) && msr_vr)
> -        ctx.altivec_enabled = msr_vr;
> +        ctx.altivec_enabled = !!msr_vr;
>      else
> -        ctx.altivec_enabled = 0;
> +        ctx.altivec_enabled = false;
>      if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) {
> -        ctx.vsx_enabled = msr_vsx;
> +        ctx.vsx_enabled = !!msr_vsx;
>      } else {
> -        ctx.vsx_enabled = 0;
> +        ctx.vsx_enabled = false;
>      }
>  #if defined(TARGET_PPC64)
>      if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
> -        ctx.tm_enabled = msr_tm;
> +        ctx.tm_enabled = !!msr_tm;
>      } else {
> -        ctx.tm_enabled = 0;
> +        ctx.tm_enabled = false;
>      }
>  #endif
>      if ((env->flags & POWERPC_FLAG_SE) && msr_se)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 24/77] ppc: Move exception generation code out of line
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 24/77] ppc: Move exception generation code out of line Benjamin Herrenschmidt
@ 2015-11-20  7:53   ` David Gibson
  2015-11-24  0:59     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-20  7:53 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2221 bytes --]

On Wed, Nov 11, 2015 at 11:27:37AM +1100, Benjamin Herrenschmidt wrote:
> There's no point inlining this, if you hit the exception case you exit
> anyway,

That doesn't quite seem relevant - IIUC this is affecting inlining in
the code generation path, rather than the code execution path.

> and not inlining saves about 100K of code size (and cache
> footprint).

That sounds like a win, though.

> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/translate.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a5ab2eb..ac62942 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -279,7 +279,8 @@ void gen_update_current_nip(void *opaque)
>      tcg_gen_movi_tl(cpu_nip, ctx->nip);
>  }
>  
> -static inline void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
> +static void __attribute__((noinline))
> +gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)

I thought we generally avoided bare gcc attributes in qemu, but I
don't see a helper macro for it and I do see it used in a few other
places, so I guess its ok.

>  {
>      TCGv_i32 t0, t1;
>      if (ctx->exception == POWERPC_EXCP_NONE) {
> @@ -293,7 +294,8 @@ static inline void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t
>      ctx->exception = (excp);
>  }
>  
> -static inline void gen_exception(DisasContext *ctx, uint32_t excp)
> +static void __attribute__((noinline))
> +gen_exception(DisasContext *ctx, uint32_t excp)
>  {
>      TCGv_i32 t0;
>      if (ctx->exception == POWERPC_EXCP_NONE) {
> @@ -305,7 +307,8 @@ static inline void gen_exception(DisasContext *ctx, uint32_t excp)
>      ctx->exception = (excp);
>  }
>  
> -static inline void gen_debug_exception(DisasContext *ctx)
> +static void __attribute__((noinline))
> +gen_debug_exception(DisasContext *ctx)
>  {
>      TCGv_i32 t0;
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions Benjamin Herrenschmidt
@ 2015-11-20  8:06   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-20  8:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 16269 bytes --]

On Wed, Nov 11, 2015 at 11:27:38AM +1100, Benjamin Herrenschmidt wrote:
> This adds the ISA 2.06 and later power management instructions
> (doze, nap, sleep and rvwinkle) and associated wakeup cause testing
> in LPCR
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Looks fine, though I haven't checked against the ISA in detail.

> ---
>  target-ppc/cpu.h            | 26 ++++++++++++-
>  target-ppc/excp_helper.c    | 59 +++++++++++++++++++++++++++++
>  target-ppc/helper.h         |  1 +
>  target-ppc/translate.c      | 66 ++++++++++++++++++++++++++++++++
>  target-ppc/translate_init.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-
>  5 files changed, 241 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 3d22a4f..a7236cf 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -300,6 +300,15 @@ enum {
>  };
>  
>  /*****************************************************************************/
> +/* PM instructions */
> +typedef enum {
> +    PPC_PM_DOZE,
> +    PPC_PM_NAP,
> +    PPC_PM_SLEEP,
> +    PPC_PM_RVWINKLE,
> +} powerpc_pm_insn_t;
> +
> +/*****************************************************************************/
>  /* Input pins model                                                          */
>  typedef enum powerpc_input_t powerpc_input_t;
>  enum powerpc_input_t {
> @@ -490,6 +499,14 @@ struct ppc_slb_t {
>  #define LPCR_LPES1        (1ull << (63-61))
>  #define LPCR_AIL_SHIFT    (63-40)      /* Alternate interrupt location */
>  #define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
> +#define LPCR_P7_PECE0     (1ull << (63-49))
> +#define LPCR_P7_PECE1     (1ull << (63-50))
> +#define LPCR_P7_PECE2     (1ull << (63-51))
> +#define LPCR_P8_PECE0     (1ull << (63-47))
> +#define LPCR_P8_PECE1     (1ull << (63-48))
> +#define LPCR_P8_PECE2     (1ull << (63-49))
> +#define LPCR_P8_PECE3     (1ull << (63-50))
> +#define LPCR_P8_PECE4     (1ull << (63-51))
>  
>  #define msr_sf   ((env->msr >> MSR_SF)   & 1)
>  #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
> @@ -1126,6 +1143,11 @@ struct CPUPPCState {
>       * instructions and SPRs are diallowed if MSR:HV is 0
>       */
>      bool has_hv_mode;
> +    /* On P7/P8, set when in PM state, we need to handle resume
> +     * in a special way (such as routing some resume causes to
> +     * 0x100), so flag this here.
> +     */
> +    bool in_pm_state;
>  #endif
>  
>      /* Those resources are used only during code translation */
> @@ -2069,6 +2091,8 @@ enum {
>      PPC2_FP_CVT_S64    = 0x0000000000010000ULL,
>      /* Transactional Memory (ISA 2.07, Book II)                              */
>      PPC2_TM            = 0x0000000000020000ULL,
> +    /* Server PM instructgions (ISA 2.06, Book III)                          */
> +    PPC2_PM_ISA206     = 0x0000000000040000ULL,
>  
>  #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
>                          PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
> @@ -2076,7 +2100,7 @@ enum {
>                          PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
>                          PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
>                          PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
> -                        PPC2_FP_CVT_S64 | PPC2_TM)
> +                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206)
>  };
>  
>  /*****************************************************************************/
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 80a70f4..3f77df7 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -100,6 +100,44 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      asrr0 = -1;
>      asrr1 = -1;
>  
> +    /* check for special resume at 0x100 from doze/nap/sleep/winkle on P7/P8 */
> +    if (env->in_pm_state) {
> +        env->in_pm_state = false;
> +
> +        /* Pretend to be returning from doze always as we don't lose state */
> +        msr |= (0x1ull << (63 - 47));
> +
> +        /* Non-machine check are routed to 0x100 with a wakeup cause
> +         * encoded in SRR1
> +         */
> +        if (excp != POWERPC_EXCP_MCHECK) {
> +            switch(excp) {
> +            case POWERPC_EXCP_RESET:
> +                msr |= 0x4ull << (63-45);
> +                break;
> +            case POWERPC_EXCP_EXTERNAL:
> +                msr |= 0x8ull << (63-45);
> +                break;
> +            case POWERPC_EXCP_DECR:
> +                msr |= 0x6ull << (63-45);
> +                break;
> +            case POWERPC_EXCP_SDOOR:
> +                msr |= 0x5ull << (63-45);
> +                break;
> +            case POWERPC_EXCP_SDOOR_HV:
> +                msr |= 0x3ull << (63-45);
> +                break;
> +            case POWERPC_EXCP_HV_MAINT:
> +                msr |= 0xaull << (63-45);
> +                break;
> +            default:
> +                cpu_abort(cs, "Unsupported exception %d in Power Save mode\n",
> +                          excp);
> +            }
> +            excp = POWERPC_EXCP_RESET;
> +        }
> +    }
> +
>      /* Exception targetting modifiers
>       *
>       * LPES0 is supported on POWER7/8
> @@ -898,6 +936,27 @@ void helper_store_msr(CPUPPCState *env, target_ulong val)
>      }
>  }
>  
> +#if defined(TARGET_PPC64)
> +void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
> +{
> +    CPUState *cs;
> +
> +    cs = CPU(ppc_env_get_cpu(env));
> +    cs->halted = 1;
> +    env->in_pm_state = true;
> +
> +    /* Technically, nap doesn't set EE, but if we don't set it
> +     * then ppc_hw_interrupt() won't deliver. We could add some
> +     * other tests there based on LPCR but it's simpler to just
> +     * whack EE in. It will be cleared by the 0x100 at wakeup
> +     * anyway. It will still be observable by the guest in SRR1
> +     * but this doesn't seem to be a problem.
> +     */
> +    env->msr |= (1ull << MSR_EE);
> +    helper_raise_exception(env, EXCP_HLT);
> +}
> +#endif /* defined(TARGET_PPC64) */
> +
>  static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr,
>                            target_ulong msrm, int keep_msrh)
>  {
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index ff2d50b..8292dd8 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -13,6 +13,7 @@ DEF_HELPER_1(rfci, void, env)
>  DEF_HELPER_1(rfdi, void, env)
>  DEF_HELPER_1(rfmci, void, env)
>  #if defined(TARGET_PPC64)
> +DEF_HELPER_2(pminsn, void, env, i32)
>  DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  #endif
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index ac62942..f76a0c3 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3567,6 +3567,68 @@ static void gen_wait(DisasContext *ctx)
>      gen_exception_err(ctx, EXCP_HLT, 1);
>  }
>  
> +#if defined(TARGET_PPC64)
> +static void gen_doze(DisasContext *ctx)
> +{
> +#if defined(CONFIG_USER_ONLY)
> +    GEN_PRIV;
> +#else
> +    TCGv_i32 t;
> +
> +    CHK_HV;
> +    t = tcg_const_i32(PPC_PM_DOZE);
> +    gen_helper_pminsn(cpu_env, t);
> +    tcg_temp_free_i32(t);
> +    gen_stop_exception(ctx);
> +#endif /* defined(CONFIG_USER_ONLY) */
> +}
> +
> +static void gen_nap(DisasContext *ctx)
> +{
> +#if defined(CONFIG_USER_ONLY)
> +    GEN_PRIV;
> +#else
> +    TCGv_i32 t;
> +
> +    CHK_HV;
> +    t = tcg_const_i32(PPC_PM_NAP);
> +    gen_helper_pminsn(cpu_env, t);
> +    tcg_temp_free_i32(t);
> +    gen_stop_exception(ctx);
> +#endif /* defined(CONFIG_USER_ONLY) */
> +}
> +
> +static void gen_sleep(DisasContext *ctx)
> +{
> +#if defined(CONFIG_USER_ONLY)
> +    GEN_PRIV;
> +#else
> +    TCGv_i32 t;
> +
> +    CHK_HV;
> +    t = tcg_const_i32(PPC_PM_SLEEP);
> +    gen_helper_pminsn(cpu_env, t);
> +    tcg_temp_free_i32(t);
> +    gen_stop_exception(ctx);
> +#endif /* defined(CONFIG_USER_ONLY) */
> +}
> +
> +static void gen_rvwinkle(DisasContext *ctx)
> +{
> +#if defined(CONFIG_USER_ONLY)
> +    GEN_PRIV;
> +#else
> +    TCGv_i32 t;
> +
> +    CHK_HV;
> +    t = tcg_const_i32(PPC_PM_RVWINKLE);
> +    gen_helper_pminsn(cpu_env, t);
> +    tcg_temp_free_i32(t);
> +    gen_stop_exception(ctx);
> +#endif /* defined(CONFIG_USER_ONLY) */
> +}
> +#endif /* #if defined(TARGET_PPC64) */
> +
>  /***                         Floating-point load                           ***/
>  #define GEN_LDF(name, ldop, opc, type)                                        \
>  static void glue(gen_, name)(DisasContext *ctx)                                       \
> @@ -9828,6 +9890,10 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
>  GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
>  #if defined(TARGET_PPC64)
>  GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B),
> +GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
> +GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
> +GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
> +GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206),
>  GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
>  #endif
>  GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW),
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 8d82bc8..8a1ce85 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8297,10 +8297,45 @@ static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr)
>      return false;
>  }
>  
> +static bool cpu_has_work_POWER7(CPUState *cs)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (cs->halted) {
> +        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
> +            return false;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
> +            return true;
> +        }
> +        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
> +            return true;
> +        }
> +        return false;
> +    } else {
> +        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
> +    }
> +}
> +
>  POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
>      PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
> +    CPUClass *cc = CPU_CLASS(oc);
>  
>      dc->fw_name = "PowerPC,POWER7";
>      dc->desc = "POWER7";
> @@ -8309,6 +8344,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>      pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
>      pcc->init_proc = init_proc_POWER7;
>      pcc->check_pow = check_pow_nocheck;
> +    cc->has_work = cpu_has_work_POWER7;
>      pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
>                         PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
>                         PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
> @@ -8325,7 +8361,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
>      pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
>                          PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
>                          PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
> -                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64;
> +                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
> +                        PPC2_PM_ISA206;
>      pcc->msr_mask = (1ull << MSR_SF) |
>                      (1ull << MSR_VR) |
>                      (1ull << MSR_VSX) |
> @@ -8375,10 +8412,53 @@ static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr)
>      return false;
>  }
>  
> +static bool cpu_has_work_POWER8(CPUState *cs)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (cs->halted) {
> +        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
> +            return false;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) {
> +            return true;
> +        }
> +        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
> +            (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) {
> +            return true;
> +        }
> +        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
> +            return true;
> +        }
> +        return false;
> +    } else {
> +        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
> +    }
> +}
> +
>  POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
>      PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
> +    CPUClass *cc = CPU_CLASS(oc);
>  
>      dc->fw_name = "PowerPC,POWER8";
>      dc->desc = "POWER8";
> @@ -8387,6 +8467,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>      pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
>      pcc->init_proc = init_proc_POWER8;
>      pcc->check_pow = check_pow_nocheck;
> +    cc->has_work = cpu_has_work_POWER8;
>      pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
>                         PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
>                         PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
> @@ -8406,7 +8487,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>                          PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
>                          PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
>                          PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
> -                        PPC2_TM;
> +                        PPC2_TM | PPC2_PM_ISA206;
>      pcc->msr_mask = (1ull << MSR_SF) |
>                      (1ull << MSR_SHV) |
>  		    (1ull << MSR_TM) |
> @@ -8464,6 +8545,13 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
>      lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
>      lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
>  
> +    /* P7 and P8 has slightly different PECE bits, mostly because P8 adds
> +     * bit 47 and 48 which are reserved on P7. Here we set them all, which
> +     * will work as expected for both implementations
> +     */
> +    lpcr->default_value |= LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
> +                           LPCR_P8_PECE3 | LPCR_P8_PECE4;
> +
>      /* We should be followed by a CPU reset but update the active value
>       * just in case...
>       */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform Benjamin Herrenschmidt
  2015-11-19  8:58   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
@ 2015-11-20  8:21   ` David Gibson
  2015-11-24  1:45     ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-11-20  8:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 26092 bytes --]

On Wed, Nov 11, 2015 at 11:27:39AM +1100, Benjamin Herrenschmidt wrote:
> No devices yet, not even an interrupt controller, just to get
> started.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/ppc/Makefile.objs              |   2 +
>  hw/ppc/pnv.c                      | 600 ++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/pnv.h              |  36 +++
>  4 files changed, 639 insertions(+)
>  create mode 100644 hw/ppc/pnv.c
>  create mode 100644 include/hw/ppc/pnv.h

Many of my comments below may be made irrelevant by later patches in
the series.

> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index bb71b23..96574c8 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -40,6 +40,7 @@ CONFIG_I8259=y
>  CONFIG_XILINX=y
>  CONFIG_XILINX_ETHLITE=y
>  CONFIG_PSERIES=y
> +CONFIG_POWERNV=y
>  CONFIG_PREP=y
>  CONFIG_MAC=y
>  CONFIG_E500=y
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..cd74c96 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -4,6 +4,8 @@ obj-y += ppc.o ppc_booke.o
>  obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> +# IBM PowerNV
> +obj-$(CONFIG_POWERNV) += pnv.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> new file mode 100644
> index 0000000..e68c9b1
> --- /dev/null
> +++ b/hw/ppc/pnv.c
> @@ -0,0 +1,600 @@
> +/*
> + * QEMU PowerPC PowerNV model
> + *
> + * Copyright (c) 2004-2007 Fabrice Bellard
> + * Copyright (c) 2007 Jocelyn Mayer
> + * Copyright (c) 2010 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +#include "sysemu/sysemu.h"
> +#include "hw/hw.h"
> +#include "hw/fw-path-provider.h"
> +#include "elf.h"
> +#include "net/net.h"
> +#include "sysemu/block-backend.h"
> +#include "sysemu/cpus.h"
> +#include "sysemu/kvm.h"
> +#include "sysemu/numa.h"
> +#include "kvm_ppc.h"
> +#include "mmu-hash64.h"
> +#include "qom/cpu.h"
> +
> +#include "hw/boards.h"
> +#include "hw/ppc/ppc.h"
> +#include "hw/ppc/pnv.h"
> +#include "hw/loader.h"
> +
> +#include "exec/address-spaces.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "trace.h"
> +#include "hw/nmi.h"
> +
> +#include "hw/compat.h"
> +
> +#include <libfdt.h>
> +
> +#define FDT_ADDR                0x01000000
> +#define FDT_MAX_SIZE            0x00100000
> +#define FW_MAX_SIZE             0x00400000
> +#define FW_FILE_NAME            "skiboot.lid"
> +#define KERNEL_FILE_NAME        "skiroot.lid"
> +#define KERNEL_LOAD_ADDR        0x20000000
> +
> +#define TIMEBASE_FREQ           512000000ULL
> +
> +#define MAX_CPUS                255
> +
> +#define PHANDLE_XICP            0x00001111
> +
> +typedef struct sPowerNVMachineState sPowerNVMachineState;
> +
> +#define TYPE_POWERNV_MACHINE      "powernv-machine"
> +#define POWERNV_MACHINE(obj) \
> +    OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE)
> +
> +/**
> + * sPowerNVMachineState:
> + */
> +struct sPowerNVMachineState {
> +    /*< private >*/
> +    MachineState parent_obj;
> +    PnvSystem sys;
> +};
> +
> +static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
> +                                     size_t maxsize)
> +{
> +    size_t maxcells = maxsize / sizeof(uint32_t);
> +    int i, j, count;
> +    uint32_t *p = prop;
> +
> +    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
> +        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
> +
> +        if (!sps->page_shift) {
> +            break;
> +        }
> +        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
> +            if (sps->enc[count].page_shift == 0) {
> +                break;
> +            }
> +        }
> +        if ((p - prop) >= (maxcells - 3 - count * 2)) {
> +            break;
> +        }
> +        *(p++) = cpu_to_be32(sps->page_shift);
> +        *(p++) = cpu_to_be32(sps->slb_enc);
> +        *(p++) = cpu_to_be32(count);
> +        for (j = 0; j < count; j++) {
> +            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
> +            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
> +        }
> +    }
> +
> +    return (p - prop) * sizeof(uint32_t);
> +}
> +
> +#define _FDT(exp) \
> +    do { \
> +        int ret = (exp);                                           \
> +        if (ret < 0) {                                             \
> +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> +                    #exp, fdt_strerror(ret));                      \
> +            exit(1);                                               \
> +        }                                                          \
> +    } while (0)

We should probably make a file where helper routines used by both
spapr.c and pnv.c can live.

> +
> +static void powernv_populate_memory_node(void *fdt, int nodeid, hwaddr start,
> +                                         hwaddr size)
> +{
> +    /* Probablly bogus, need to match with what's going on in CPU nodes */
> +    uint32_t chip_id[] = {
> +        cpu_to_be32(0x0), cpu_to_be32(nodeid)
> +    };
> +    char *mem_name;
> +    uint64_t mem_reg_property[2];
> +
> +    mem_reg_property[0] = cpu_to_be64(start);
> +    mem_reg_property[1] = cpu_to_be64(size);
> +
> +    mem_name = g_strdup_printf("memory@"TARGET_FMT_lx, start);
> +    _FDT((fdt_begin_node(fdt, mem_name)));
> +    g_free(mem_name);
> +    _FDT((fdt_property_string(fdt, "device_type", "memory")));
> +    _FDT((fdt_property(fdt, "reg", mem_reg_property,
> +                       sizeof(mem_reg_property))));
> +    _FDT((fdt_property(fdt, "ibm,chip-id", chip_id, sizeof(chip_id))));
> +    _FDT((fdt_end_node(fdt)));
> +}
> +
> +static int powernv_populate_memory(void *fdt)
> +{
> +    hwaddr mem_start, node_size;
> +    int i, nb_nodes = nb_numa_nodes;
> +    NodeInfo *nodes = numa_info;
> +    NodeInfo ramnode;
> +
> +    /* No NUMA nodes, assume there is just one node with whole RAM */
> +    if (!nb_numa_nodes) {
> +        nb_nodes = 1;
> +        ramnode.node_mem = ram_size;
> +        nodes = &ramnode;
> +    }
> +
> +    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
> +        if (!nodes[i].node_mem) {
> +            continue;
> +        }
> +        if (mem_start >= ram_size) {
> +            node_size = 0;
> +        } else {
> +            node_size = nodes[i].node_mem;
> +            if (node_size > ram_size - mem_start) {
> +                node_size = ram_size - mem_start;
> +            }
> +        }
> +        for ( ; node_size; ) {
> +            hwaddr sizetmp = pow2floor(node_size);
> +
> +            /* mem_start != 0 here */
> +            if (ctzl(mem_start) < ctzl(sizetmp)) {
> +                sizetmp = 1ULL << ctzl(mem_start);
> +            }
> +
> +            powernv_populate_memory_node(fdt, i, mem_start, sizetmp);
> +            node_size -= sizetmp;
> +            mem_start += sizetmp;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void powernv_create_cpu_node(void *fdt, CPUState *cs, int smt_threads)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    uint32_t servers_prop[smt_threads];
> +    uint32_t gservers_prop[smt_threads * 2];
> +    int i, index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    char *nodename;
> +
> +    if ((index % smt_threads) != 0) {
> +        return;
> +    }
> +
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> +
> +    _FDT((fdt_begin_node(fdt, nodename)));
> +
> +    g_free(nodename);
> +
> +    _FDT((fdt_property_cell(fdt, "reg", index)));
> +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> +
> +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
> +                            env->icache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");

Hmm (note to self) should probably change a bunch of these both in
spapr.c and pnv.c from explicit fprintfs() to modern error_report()
and similar.

> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> +    }
> +
> +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_property_string(fdt, "status", "okay")));
> +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    /* XXX Just a hack for now */
> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", 0)));
> +
> +    if (cpu->cpu_version) {
> +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
> +    }
> +
> +    /* Build interrupt servers and gservers properties */
> +    for (i = 0; i < smt_threads; i++) {
> +        servers_prop[i] = cpu_to_be32(index + i);
> +        /* Hack, direct the group queues back to cpu 0 */
> +        gservers_prop[i*2] = cpu_to_be32(index + i);
> +        gservers_prop[i*2 + 1] = 0;
> +    }
> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> +                       servers_prop, sizeof(servers_prop))));
> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> +                       gservers_prop, sizeof(gservers_prop))));
> +
> +    _FDT((fdt_end_node(fdt)));
> +}
> +
> +static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
> +{

So.. does it make sense for qemu to create a device tree for powernv,
rather than leaving it to Opal?

> +    void *fdt;
> +    CPUState *cs;
> +    int smt = kvmppc_smt_threads();
> +    uint32_t start_prop = cpu_to_be32(initrd_base);
> +    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> +    char *buf;
> +    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
> +
> +    fdt = g_malloc0(FDT_MAX_SIZE);
> +    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
> +    _FDT((fdt_finish_reservemap(fdt)));
> +
> +    /* Root node */
> +    _FDT((fdt_begin_node(fdt, "")));
> +    _FDT((fdt_property_string(fdt, "model", "IBM PowerNV (emulated by qemu)")));
> +    _FDT((fdt_property(fdt, "compatible", plat_compat, sizeof(plat_compat))));
> +
> +    /*
> +     * Add info to guest to indentify which host is it being run on
> +     * and what is the uuid of the guest
> +     */
> +    if (kvmppc_get_host_model(&buf)) {
> +        _FDT((fdt_property_string(fdt, "host-model", buf)));
> +        g_free(buf);
> +    }
> +    if (kvmppc_get_host_serial(&buf)) {
> +        _FDT((fdt_property_string(fdt, "host-serial", buf)));
> +        g_free(buf);
> +    }

Since you're emulating a "bare metal" machine, surely the host
properties aren't relevant here.

> +
> +    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
> +                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
> +                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
> +                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
> +                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
> +                          qemu_uuid[14], qemu_uuid[15]);
> +
> +    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
> +    g_free(buf);
> +
> +    _FDT((fdt_begin_node(fdt, "chosen")));
> +    _FDT((fdt_property(fdt, "linux,initrd-start",
> +                       &start_prop, sizeof(start_prop))));
> +    _FDT((fdt_property(fdt, "linux,initrd-end",
> +                       &end_prop, sizeof(end_prop))));
> +    _FDT((fdt_end_node(fdt)));
> +
> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
> +
> +    /* cpus */
> +    _FDT((fdt_begin_node(fdt, "cpus")));
> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> +
> +    CPU_FOREACH(cs) {
> +        powernv_create_cpu_node(fdt, cs, smt);
> +    }
> +
> +    _FDT((fdt_end_node(fdt)));
> +
> +    /* Memory */
> +    _FDT((powernv_populate_memory(fdt)));
> +
> +    /* /hypervisor node */
> +    if (kvm_enabled()) {
> +        uint8_t hypercall[16];
> +
> +        /* indicate KVM hypercall interface */
> +        _FDT((fdt_begin_node(fdt, "hypervisor")));
> +        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
> +        if (kvmppc_has_cap_fixup_hcalls()) {
> +            /*
> +             * Older KVM versions with older guest kernels were broken with the
> +             * magic page, don't allow the guest to map it.
> +             */
> +            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
> +                                 sizeof(hypercall));
> +            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
> +                              sizeof(hypercall))));
> +        }
> +        _FDT((fdt_end_node(fdt)));
> +    }

And a hypercall interface surely doesn't make sense for powernv.

> +
> +    _FDT((fdt_end_node(fdt))); /* close root node */
> +    _FDT((fdt_finish(fdt)));
> +
> +    return fdt;
> +}
> +
> +static void powernv_cpu_reset(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    CPUState *cs = CPU(cpu);
> +    CPUPPCState *env = &cpu->env;
> +
> +    cpu_reset(cs);
> +
> +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
> +    env->spr[SPR_HIOR] = 0;
> +    env->gpr[3] = FDT_ADDR;
> +    env->nip = 0x10;
> +    env->msr |= MSR_HVB;
> +}

So, I believe the qemu-ishly correct way of doing this is to have the
cpu initialization in the cpu code, rather than the platform code, as
much as possible.  On PAPR we kind of get away with initialization in
the platform code on the grounds that it's a paravirt platform, but
powernv doesn't have that excuse.

But this may well be stuff that changes in later patches, so..

> +static const VMStateDescription vmstate_powernv = {
> +    .name = "powernv",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +};

It might be best to leave out the vmstate entirely until you're ready
to implement migration, rather than having a partial, probably not
working migration implementation.

> +
> +static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
> +{
> +    PnvChip *chip = &sys->chips[chip_no];
> +
> +    if (chip_no >= PNV_MAX_CHIPS) {
> +            return;
> +    }
> +
> +    /* XXX Improve chip numbering to better match HW */
> +    chip->chip_id = chip_no;

I think modern qemu conventions would suggest creating the chips as
QOM objects rather than having a fixed array.

> +}
> +
> +static void ppc_powernv_init(MachineState *machine)
> +{
> +    ram_addr_t ram_size = machine->ram_size;
> +    const char *cpu_model = machine->cpu_model;
> +    const char *kernel_filename = machine->kernel_filename;
> +    const char *initrd_filename = machine->initrd_filename;
> +    uint32_t initrd_base = 0;
> +    long initrd_size = 0;
> +    PowerPCCPU *cpu;
> +    CPUPPCState *env;
> +    MemoryRegion *sysmem = get_system_memory();
> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
> +    sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
> +    PnvSystem *sys = &pnv_machine->sys;
> +    long fw_size;
> +    char *filename;
> +    void *fdt;
> +    int i;
> +
> +    /* init CPUs */
> +    if (cpu_model == NULL) {
> +        cpu_model = kvm_enabled() ? "host" : "POWER8";
> +    }
> +
> +    for (i = 0; i < smp_cpus; i++) {
> +        cpu = cpu_ppc_init(cpu_model);
> +        if (cpu == NULL) {
> +            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> +            exit(1);
> +        }
> +        env = &cpu->env;
> +
> +        /* Set time-base frequency to 512 MHz */
> +        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> +
> +        /* MSR[IP] doesn't exist nowadays */
> +        env->msr_mask &= ~(1 << 6);
> +
> +        qemu_register_reset(powernv_cpu_reset, cpu);
> +    }
> +
> +    /* allocate RAM */
> +    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", ram_size);
> +    memory_region_add_subregion(sysmem, 0, ram);
> +
> +    /* XXX We should decide how many chips to create based on #cores and
> +     * Venice vs. Murano vs. Naples chip type etc..., for now, just create
> +     * one chip. Also creation of the CPUs should be done per-chip
> +     */
> +    sys->num_chips = 1;
> +
> +    /* Create only one PHB for now until I figure out what's wrong
> +     * when I create more (resource assignment failures in Linux)
> +     */
> +    pnv_create_chip(sys, 0);
> +
> +    if (bios_name == NULL) {
> +        bios_name = FW_FILE_NAME;
> +    }
> +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
> +    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
> +    if (fw_size < 0) {
> +        hw_error("qemu: could not load OPAL '%s'\n", filename);
> +        exit(1);
> +    }
> +    g_free(filename);
> +
> +
> +    if (kernel_filename == NULL) {
> +        kernel_filename = KERNEL_FILE_NAME;
> +    }
> +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
> kernel_filename);

The commit withe Opal image should go in before this, no?

> +    fw_size = load_image_targphys(filename, 0x20000000, 0x2000000);
> +    if (fw_size < 0) {
> +        hw_error("qemu: could not load kernel'%s'\n", filename);
> +        exit(1);
> +    }
> +    g_free(filename);
> +
> +    /* load initrd */
> +    if (initrd_filename) {
> +            /* Try to locate the initrd in the gap between the kernel
> +             * and the firmware. Add a bit of space just in case
> +             */
> +            initrd_base = 0x40000000;
> +            initrd_size = load_image_targphys(initrd_filename, initrd_base,
> +                                              0x10000000); // 128MB max
> +            if (initrd_size < 0) {
> +                    fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
> +                            initrd_filename);
> +                    exit(1);
> +            }
> +    } else {
> +            initrd_base = 0;
> +            initrd_size = 0;
> +    }
> +    fdt = powernv_create_fdt(sys, initrd_base, initrd_size);
> +    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
> +}
> +
> +static int powernv_kvm_type(const char *vm_type)
> +{
> +    /* Always force PR KVM */
> +    return 2;
> +}
> +
> +static void ppc_cpu_do_nmi_on_cpu(void *arg)
> +{
> +    CPUState *cs = arg;
> +
> +    cpu_synchronize_state(cs);
> +    ppc_cpu_do_system_reset(cs);
> +}
> +
> +static void powernv_nmi(NMIState *n, int cpu_index, Error **errp)
> +{
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
> +    }
> +}
> +
> +static void powernv_machine_class_init(ObjectClass *oc, void *data)
> +{
> +    MachineClass *mc = MACHINE_CLASS(oc);
> +    NMIClass *nc = NMI_CLASS(oc);
> +
> +    mc->init = ppc_powernv_init;
> +    mc->block_default_type = IF_SCSI;
> +    mc->max_cpus = MAX_CPUS;
> +    mc->no_parallel = 1;
> +    mc->default_boot_order = NULL;
> +    mc->kvm_type = powernv_kvm_type;
> +
> +    nc->nmi_monitor_handler = powernv_nmi;
> +}
> +
> +static const TypeInfo powernv_machine_info = {
> +    .name          = TYPE_POWERNV_MACHINE,
> +    .parent        = TYPE_MACHINE,
> +    .abstract      = true,
> +    .instance_size = sizeof(sPowerNVMachineState),
> +    .class_init    = powernv_machine_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_NMI },
> +        { }
> +    },
> +};
> +
> +static void powernv_machine_2_5_class_init(ObjectClass *oc, void *data)
> +{
> +    MachineClass *mc = MACHINE_CLASS(oc);
> +
> +    mc->name = "powernv-2.5";
> +    mc->desc = "PowerNV v2.5";
> +    mc->alias = "powernv";
> +}
> +
> +static const TypeInfo powernv_machine_2_5_info = {
> +    .name          = MACHINE_TYPE_NAME("powernv-2.5"),
> +    .parent        = TYPE_POWERNV_MACHINE,
> +    .class_init    = powernv_machine_2_5_class_init,
> +};
> +
> +static void powernv_machine_register_types(void)
> +{
> +    type_register_static(&powernv_machine_info);
> +    type_register_static(&powernv_machine_2_5_info);
> +}
> +
> +type_init(powernv_machine_register_types)
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> new file mode 100644
> index 0000000..9a48c16
> --- /dev/null
> +++ b/include/hw/ppc/pnv.h
> @@ -0,0 +1,36 @@
> +#ifndef _HW_LPC_H
> +#define _HW_LPC_H
> +/*
> + * QEMU PowerNV various definitions
> + *
> + * Copyright (c) 2014 BenH
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "hw/hw.h"
> +
> +/* Should we turn that into a QOjb of some sort ? */
> +typedef struct PnvChip {
> +    uint32_t         chip_id;
> +} PnvChip;
> +
> +typedef struct PnvSystem {
> +    uint32_t  num_chips;
> +#define PNV_MAX_CHIPS		1
> +    PnvChip   chips[PNV_MAX_CHIPS];
> +} PnvSystem;
> +
> +#endif /* _HW_PNV_LPC_H */
> +

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-20  7:45   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-24  0:44     ` Benjamin Herrenschmidt
  2015-11-24  2:22       ` David Gibson
  2015-11-24  0:51     ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  0:44 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Fri, 2015-11-20 at 18:45 +1100, David Gibson wrote:
> 
> So, I'm not 100% following the logic below, but it looks like the
> existing code used SPR_NOACCESS to mark things which generated a
> privilege exception compared to NULL for things which generated an
> invalid instruction exception.  Using that encoding, can you simplify
> the logic here?  Alternatively can you use the logic here to avoid
> the SPR_NOACESS encoding?

Well, so the SPR_NOACCESS has to do with how you react to a known SPR
who has explicit access permissions. The logic below is described in
the ISA for an unknown SPR number.

I don't know whether the access permission of "known" SPRs always
honor the 0x10 bit trick, and changing that in qemu would be a
fairly large patch. So I'd rather stick to the logic here for
"unknown" SPRs which matches the ISA definition.

I'll update the patch though for arch 2.07 as it defines a few
reserved SPRs as no-ops.

However:

> > -        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +
> > +        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
> > +         * it can generate a priv, a hv emu or a no-op
> > +         */
> > +        if (sprn & 0x10) {
> > +            if (ctx->pr) {
> > +                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +            }
> > +        } else {
> > +            if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 ||
> > sprn == 6) {
> > +                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +            }
> > +        }
> > +#if !defined(CONFIG_USER_ONLY)
> > +        /* HV priv */
> > +        if (ctx->spr_cb[sprn].hea_read) {
> > +            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +        }

That latest bit is bogus.

> If you're in PR mode, and it's an SPR with an hea_read function and
> has the 0x10 bit set, won't this call gen_priv_exception twice?

Yes, I've removed it. It should be handled by the SPR_NOACCESS.

> I also see no path here which will call gen_inval_exception(), is
> that
> right?  If you're in HV mode and it's a truly invalid SPRN, isn't
> that
> what you'd want?

No, the ISA says it's a nop.

Cheers,
Ben.

> > +#endif
> >      }
> >  }
> 
> 
> 
> >  
> > @@ -4395,13 +4423,9 @@ static void gen_mtcrf(DisasContext *ctx)
> >  #if defined(TARGET_PPC64)
> >  static void gen_mtmsrd(DisasContext *ctx)
> >  {
> > -#if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> > -#else
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> > -        return;
> > -    }
> > +    CHK_SV;
> > +
> > +#if !defined(CONFIG_USER_ONLY)
> >      if (ctx->opcode & 0x00010000) {
> >          /* Special form that does not need any synchronisation */
> >          TCGv t0 = tcg_temp_new();
> > @@ -4420,20 +4444,16 @@ static void gen_mtmsrd(DisasContext *ctx)
> >          /* Note that mtmsr is not always defined as context-
> > synchronizing */
> >          gen_stop_exception(ctx);
> >      }
> > -#endif
> > +#endif /* !defined(CONFIG_USER_ONLY) */
> >  }
> > -#endif
> > +#endif /* defined(TARGET_PPC64) */
> >  
> >  static void gen_mtmsr(DisasContext *ctx)
> >  {
> > -#if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> > -#else
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> > -        return;
> > -    }
> > -    if (ctx->opcode & 0x00010000) {
> > +    CHK_SV;
> > +
> > +#if !defined(CONFIG_USER_ONLY)
> > +   if (ctx->opcode & 0x00010000) {
> >          /* Special form that does not need any synchronisation */
> >          TCGv t0 = tcg_temp_new();
> >          tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 <<
> > MSR_RI) | (1 << MSR_EE));
> > @@ -4488,7 +4508,7 @@ static void gen_mtspr(DisasContext *ctx)
> >                       TARGET_FMT_lx "\n", sprn, sprn, ctx->nip -
> > 4);
> >              printf("Trying to write privileged spr %d (0x%03x) at
> > "
> >                     TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> > -            gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
> > +            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
> >          }
> >      } else {
> >          /* Not defined */
> > @@ -4496,7 +4516,25 @@ static void gen_mtspr(DisasContext *ctx)
> >                   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> >          printf("Trying to write invalid spr %d (0x%03x) at "
> >                 TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
> > -        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +
> > +        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
> > +         * it can generate a priv, a hv emu or a no-op
> > +         */
> > +        if (sprn & 0x10) {
> > +            if (ctx->pr) {
> > +                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +            }
> > +        } else {
> > +            if (ctx->pr || sprn == 0) {
> > +                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +            }
> > +        }
> > +#if !defined(CONFIG_USER_ONLY)
> > +        /* HV priv */
> > +        if (ctx->spr_cb[sprn].hea_write) {
> > +            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > +        }
> > +#endif
> 
> Same concerns here as for mfspr.
> 
> [snip]
> >  /* tlbiel */
> >  static void gen_tlbiel(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> > -    if (unlikely(ctx->pr || !ctx->hv)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +    CHK_SV;
> 
> You have CHK_SV here, but the original code checks for HV, as does
> your new code for tlbia and tlbiel, is that right?
> 
> [snip]
> >  /* tlbsync */
> >  static void gen_tlbsync(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -#else
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +    GEN_PRIV;
> > +#else    
> > +    CHK_HV;
> > +
> 
> Old code didn't check for HV, mode, but AFAICT it should have, so
> this
> looks correct.
> 
> [snip]
> > @@ -5941,18 +5921,16 @@ static void gen_mfapidi(DisasContext *ctx)
> >  static void gen_tlbiva(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +
> > +    CHK_SV;
> 
> Is the same thing as tlbivax, or some ancient instruction?  AFAICT
> the
> ISA says tlbivax is hypervisor privileged.
> 
> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> >      gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> 
> [snip]
> >  static void gen_tlbivax_booke206(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> >  
> > +    CHK_SV;
> 
> ISA says tlbivax is hypervisor privileged when the CPU has a
> hypervisor mode, which I guess booke206 probably doesn't?
> 
> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> > -
> >      gen_helper_booke206_tlbivax(cpu_env, t0);
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> >  
> >  static void gen_tlbilx_booke206(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> >  
> > +    CHK_SV;
> 
> And apparently hv vs. sv privilege of tlbilx depends on the EPCR
> register.  Again, may not be relevant for 2.06.
> 
> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> >  
> > @@ -6672,7 +6574,7 @@ static void gen_tlbilx_booke206(DisasContext
> > *ctx)
> >      }
> >  
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-20  7:45   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-24  0:44     ` Benjamin Herrenschmidt
@ 2015-11-24  0:51     ` Benjamin Herrenschmidt
  2015-11-24  2:22       ` David Gibson
  1 sibling, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  0:51 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Fri, 2015-11-20 at 18:45 +1100, David Gibson wrote:
> snip]
> >  /* tlbiel */
> >  static void gen_tlbiel(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> > -    if (unlikely(ctx->pr || !ctx->hv)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +    CHK_SV;
> 
> You have CHK_SV here, but the original code checks for HV, as does
> your new code for tlbia and tlbiel, is that right?

Yes. tlbiel is supervisor accessible (for weird reasons).

> [snip]
> >  /* tlbsync */
> >  static void gen_tlbsync(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -#else
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +    GEN_PRIV;
> > +#else    
> > +    CHK_HV;
> > +
> 
> Old code didn't check for HV, mode, but AFAICT it should have, so
> this looks correct.

Yes, this is a hypervisor instruction.

> [snip]
> > @@ -5941,18 +5921,16 @@ static void gen_mfapidi(DisasContext *ctx)
> >  static void gen_tlbiva(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> > +
> > +    CHK_SV;
> 
> Is the same thing as tlbivax, or some ancient instruction?  AFAICT
> the ISA says tlbivax is hypervisor privileged.

"tlbiva" is the 4xx variant, there is no hypervisor mode on these
things.

> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> >      gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> 
> [snip]
> >  static void gen_tlbivax_booke206(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> >  
> > +    CHK_SV;
> 
> ISA says tlbivax is hypervisor privileged when the CPU has a
> hypervisor mode, which I guess booke206 probably doesn't?

Right so here, the "problem" is that afaik, TCG doesn't implement
the BookE hypervisor mode. So with my limited BookE testing
ability I prefer sticking to a mechanical replacement that matches
the existing code. It can be fixed later if necessary.

> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> > -
> >      gen_helper_booke206_tlbivax(cpu_env, t0);
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> >  
> >  static void gen_tlbilx_booke206(DisasContext *ctx)
> >  {
> >  #if defined(CONFIG_USER_ONLY)
> > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > +    GEN_PRIV;
> >  #else
> >      TCGv t0;
> > -    if (unlikely(ctx->pr)) {
> > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > -        return;
> > -    }
> >  
> > +    CHK_SV;
> 
> And apparently hv vs. sv privilege of tlbilx depends on the EPCR
> register.  Again, may not be relevant for 2.06.

Well, here too, I basically preserve existing BookE TCG behaviour,
whether it's correct or not. That can be fixed separately if somebody
cares about BookE HV mode.

> >      t0 = tcg_temp_new();
> >      gen_addr_reg_index(ctx, t0);
> >  
> > @@ -6672,7 +6574,7 @@ static void gen_tlbilx_booke206(DisasContext
> *ctx)
> >      }
> >  
> >      tcg_temp_free(t0);
> > -#endif
> > +#endif /* defined(CONFIG_USER_ONLY) */
> >  }
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8
  2015-11-20  7:48   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-24  0:58     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  0:58 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Fri, 2015-11-20 at 18:48 +1100, David Gibson wrote:
> 
> > @@ -380,9 +380,11 @@ typedef struct opcode_t {
> >  #if defined(CONFIG_USER_ONLY)
> >  #define CHK_HV GEN_PRIV
> >  #define CHK_SV GEN_PRIV
> > +#define CHK_HVDR GEN_PRIV
> 
> I'm guessing this is supposed to be CHK_HVRM as below.

Indeed. I had test built ... odd.

> >  #else
> >  #define CHK_HV do { if (unlikely(ctx->pr || !ctx->hv)) GEN_PRIV; } while(0)
> >  #define CHK_SV do { if (unlikely(ctx->pr))  GEN_PRIV; }  while(0)
> > +#define CHK_HVRM do { if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) GEN_PRIV; } while(0)
> >  #endif
> >  
> >  #define CHK_NONE
> > @@ -2887,7 +2889,7 @@ static void glue(gen_, name##u)(DisasContext *ctx)
> >  }
> >  
> >  #define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
> > -static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
> > +static void glue(gen_, name##ux)(DisasContext *ctx)                           \
> 
> Extraneous change.

Fixed. Thanks.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 24/77] ppc: Move exception generation code out of line
  2015-11-20  7:53   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-24  0:59     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  0:59 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Fri, 2015-11-20 at 18:53 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:37AM +1100, Benjamin Herrenschmidt
> wrote:
> > There's no point inlining this, if you hit the exception case you
> > exit anyway,
> 
> That doesn't quite seem relevant - IIUC this is affecting inlining in
> the code generation path, rather than the code execution path.

Well, the generation path is also slightly performance sensitive. My
point was making the error path inline wasn't buying us anything.

> > and not inlining saves about 100K of code size (and cache
> > footprint).
> 
> That sounds like a win, though.

Yup.

  .../...

> > -static inline void gen_exception_err(DisasContext *ctx, uint32_t
> > excp, uint32_t error)
> > +static void __attribute__((noinline))
> > +gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t
> > error)
> 
> I thought we generally avoided bare gcc attributes in qemu, but I
> don't see a helper macro for it and I do see it used in a few other
> places, so I guess its ok.

That was my guess too...

> >  {
> >      TCGv_i32 t0, t1;
> >      if (ctx->exception == POWERPC_EXCP_NONE) {
> > @@ -293,7 +294,8 @@ static inline void
> > gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t
> >      ctx->exception = (excp);
> >  }
> >  
> > -static inline void gen_exception(DisasContext *ctx, uint32_t excp)
> > +static void __attribute__((noinline))
> > +gen_exception(DisasContext *ctx, uint32_t excp)
> >  {
> >      TCGv_i32 t0;
> >      if (ctx->exception == POWERPC_EXCP_NONE) {
> > @@ -305,7 +307,8 @@ static inline void gen_exception(DisasContext
> > *ctx, uint32_t excp)
> >      ctx->exception = (excp);
> >  }
> >  
> > -static inline void gen_debug_exception(DisasContext *ctx)
> > +static void __attribute__((noinline))
> > +gen_debug_exception(DisasContext *ctx)
> >  {
> >      TCGv_i32 t0;
> >  
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform
  2015-11-20  8:21   ` David Gibson
@ 2015-11-24  1:45     ` Benjamin Herrenschmidt
  2015-11-24  2:43       ` David Gibson
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  1:45 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Fri, 2015-11-20 at 19:21 +1100, David Gibson wrote:
> On Wed, Nov 11, 2015 at 11:27:39AM +1100, Benjamin Herrenschmidt
> wrote:
> > No devices yet, not even an interrupt controller, just to get
> > started.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > ---
> >  default-configs/ppc64-softmmu.mak |   1 +
> >  hw/ppc/Makefile.objs              |   2 +
> >  hw/ppc/pnv.c                      | 600
> > ++++++++++++++++++++++++++++++++++++++
> >  include/hw/ppc/pnv.h              |  36 +++
> >  4 files changed, 639 insertions(+)
> >  create mode 100644 hw/ppc/pnv.c
> >  create mode 100644 include/hw/ppc/pnv.h
> 
> Many of my comments below may be made irrelevant by later patches in
> the series.

Heh, well there is where the "meat" of the new platform starts showing
up :-)

 .../...

> > +#define _FDT(exp) \
> > +    do { \
> > +        int ret = (exp);                                           \
> > +        if (ret < 0) {                                             \
> > +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> > +                    #exp, fdt_strerror(ret));                      \
> > +            exit(1);                                               \
> > +        }                                                          \
> > +    } while (0)
> 
> We should probably make a file where helper routines used by both
> spapr.c and pnv.c can live.

Probably but I'd see that as a later cleanup rather than doing it
in this series...

 .../...

> > +    if (pcc->l1_dcache_size) {
> > +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> > +    } else {
> > +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> 
> Hmm (note to self) should probably change a bunch of these both in
> spapr.c and pnv.c from explicit fprintfs() to modern error_report()
> and similar.

That's a train I completely missed, but yes.

  .../...

> > +    }
> > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> > +                       servers_prop, sizeof(servers_prop))));
> > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> > +                       gservers_prop, sizeof(gservers_prop))));
> > +
> > +    _FDT((fdt_end_node(fdt)));
> > +}
> > +
> > +static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
> > +{
> 
> So.. does it make sense for qemu to create a device tree for powernv,
> rather than leaving it to Opal?

Well, OPAL only creates a device-tree if you are on an FSP machine in
which case it expects a complex data structure (HDAT) coming from the
FSP to use as a source of info.

On OpenPower machines, which is closer to what we simulate here, we
do get a device-tree as an input in OPAL, it's generated by HostBoot.

Now, I am not running HostBoot in qemu because most of what it does
is completely irrelevant to an emulated system (training the various
links, initializing the memory buffer chips, etc...).

However we do need to pass a number of platform information to OPAL
which HB does via the device-tree, such as which cores are enabled,
the memory map configured for PCI, which PHBs are enabled, etc...  so
creating a DT in qemu makes sense, it mimmics HB in essence.

OPAL will enrich that device-tree before starting Linux.

> > +    /*
> > +     * Add info to guest to indentify which host is it being run on
> > +     * and what is the uuid of the guest
> > +     */
> > +    if (kvmppc_get_host_model(&buf)) {
> > +        _FDT((fdt_property_string(fdt, "host-model", buf)));
> > +        g_free(buf);
> > +    }
> > +    if (kvmppc_get_host_serial(&buf)) {
> > +        _FDT((fdt_property_string(fdt, "host-serial", buf)));
> > +        g_free(buf);
> > +    }
> 
> Since you're emulating a "bare metal" machine, surely the host
> properties aren't relevant here.

They may or may not. But yes, I can take that out.

> > +    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
> > +                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
> > +                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
> > +                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
> > +                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
> > +                          qemu_uuid[14], qemu_uuid[15]);
> > +
> > +    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
> > +    g_free(buf);
> > +
> > +    _FDT((fdt_begin_node(fdt, "chosen")));
> > +    _FDT((fdt_property(fdt, "linux,initrd-start",
> > +                       &start_prop, sizeof(start_prop))));
> > +    _FDT((fdt_property(fdt, "linux,initrd-end",
> > +                       &end_prop, sizeof(end_prop))));
> > +    _FDT((fdt_end_node(fdt)));
> > +
> > +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
> > +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
> > +
> > +    /* cpus */
> > +    _FDT((fdt_begin_node(fdt, "cpus")));
> > +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> > +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> > +
> > +    CPU_FOREACH(cs) {
> > +        powernv_create_cpu_node(fdt, cs, smt);
> > +    }
> > +
> > +    _FDT((fdt_end_node(fdt)));
> > +
> > +    /* Memory */
> > +    _FDT((powernv_populate_memory(fdt)));
> > +
> > +    /* /hypervisor node */
> > +    if (kvm_enabled()) {
> > +        uint8_t hypercall[16];
> > +
> > +        /* indicate KVM hypercall interface */
> > +        _FDT((fdt_begin_node(fdt, "hypervisor")));
> > +        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
> > +        if (kvmppc_has_cap_fixup_hcalls()) {
> > +            /*
> > +             * Older KVM versions with older guest kernels were broken with the
> > +             * magic page, don't allow the guest to map it.
> > +             */
> > +            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
> > +                                 sizeof(hypercall));
> > +            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
> > +                              sizeof(hypercall))));
> > +        }
> > +        _FDT((fdt_end_node(fdt)));
> > +    }
> 
> And a hypercall interface surely doesn't make sense for powernv.

It's qemu paravirt, it exist on G5 too :-) It's for PR KVM, it allows
to speed up some bits and pieces. But yeah we don't yet really
"support" it at this point. However we might.

> > +
> > +    _FDT((fdt_end_node(fdt))); /* close root node */
> > +    _FDT((fdt_finish(fdt)));
> > +
> > +    return fdt;
> > +}
> > +
> > +static void powernv_cpu_reset(void *opaque)
> > +{
> > +    PowerPCCPU *cpu = opaque;
> > +    CPUState *cs = CPU(cpu);
> > +    CPUPPCState *env = &cpu->env;
> > +
> > +    cpu_reset(cs);
> > +
> > +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
> > +    env->spr[SPR_HIOR] = 0;
> > +    env->gpr[3] = FDT_ADDR;
> > +    env->nip = 0x10;
> > +    env->msr |= MSR_HVB;
> > +}
> 
> So, I believe the qemu-ishly correct way of doing this is to have the
> cpu initialization in the cpu code, rather than the platform code, as
> much as possible.  On PAPR we kind of get away with initialization in
> the platform code on the grounds that it's a paravirt platform, but
> powernv doesn't have that excuse.
> 
> But this may well be stuff that changes in later patches, so..

Well no, not really. But here too, we mimmic the state as coming out of
HostBoot, which isn't quite the same thing. We need to provide the FDT
entry, etc...

The "real" reset state of a P8 isn't something we can easily
simulate... 

It runs some microcode from a SEEPROM with a small microcontroller
which initializes a core, which then runs some HB code off it's L3
cache etc... really not something we want to do in qemu at least
for now.

So the initial state here is somewhat in between full virt and
paravirt, we simulate a platform that has been partially initialized by
HostBoot, to the state it has when it enters OPAL.

> > +static const VMStateDescription vmstate_powernv = {
> > +    .name = "powernv",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +};
> 
> It might be best to leave out the vmstate entirely until you're ready
> to implement migration, rather than having a partial, probably not
> working migration implementation.

Ok.

> > +
> > +static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
> > +{
> > +    PnvChip *chip = &sys->chips[chip_no];
> > +
> > +    if (chip_no >= PNV_MAX_CHIPS) {
> > +            return;
> > +    }
> > +
> > +    /* XXX Improve chip numbering to better match HW */
> > +    chip->chip_id = chip_no;
> 
> I think modern qemu conventions would suggest creating the chips as
> QOM objects rather than having a fixed array.

Yeah, more code & much larger memory footprint for the same result :-)

I can look into it but it's low priority. I still want to rework some
of that chip stuff in future patches anyway.

> > +}
> > +
> > +static void ppc_powernv_init(MachineState *machine)
> > +{
> > +    ram_addr_t ram_size = machine->ram_size;
> > +    const char *cpu_model = machine->cpu_model;
> > +    const char *kernel_filename = machine->kernel_filename;
> > +    const char *initrd_filename = machine->initrd_filename;
> > +    uint32_t initrd_base = 0;
> > +    long initrd_size = 0;
> > +    PowerPCCPU *cpu;
> > +    CPUPPCState *env;
> > +    MemoryRegion *sysmem = get_system_memory();
> > +    MemoryRegion *ram = g_new(MemoryRegion, 1);
> > +    sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
> > +    PnvSystem *sys = &pnv_machine->sys;
> > +    long fw_size;
> > +    char *filename;
> > +    void *fdt;
> > +    int i;
> > +
> > +    /* init CPUs */
> > +    if (cpu_model == NULL) {
> > +        cpu_model = kvm_enabled() ? "host" : "POWER8";
> > +    }
> > +
> > +    for (i = 0; i < smp_cpus; i++) {
> > +        cpu = cpu_ppc_init(cpu_model);
> > +        if (cpu == NULL) {
> > +            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> > +            exit(1);
> > +        }
> > +        env = &cpu->env;
> > +
> > +        /* Set time-base frequency to 512 MHz */
> > +        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> > +
> > +        /* MSR[IP] doesn't exist nowadays */
> > +        env->msr_mask &= ~(1 << 6);
> > +
> > +        qemu_register_reset(powernv_cpu_reset, cpu);
> > +    }
> > +
> > +    /* allocate RAM */
> > +    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", ram_size);
> > +    memory_region_add_subregion(sysmem, 0, ram);
> > +
> > +    /* XXX We should decide how many chips to create based on #cores and
> > +     * Venice vs. Murano vs. Naples chip type etc..., for now, just create
> > +     * one chip. Also creation of the CPUs should be done per-chip
> > +     */
> > +    sys->num_chips = 1;
> > +
> > +    /* Create only one PHB for now until I figure out what's wrong
> > +     * when I create more (resource assignment failures in Linux)
> > +     */
> > +    pnv_create_chip(sys, 0);
> > +
> > +    if (bios_name == NULL) {
> > +        bios_name = FW_FILE_NAME;
> > +    }
> > +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
> > +    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
> > +    if (fw_size < 0) {
> > +        hw_error("qemu: could not load OPAL '%s'\n", filename);
> > +        exit(1);
> > +    }
> > +    g_free(filename);
> > +
> > +
> > +    if (kernel_filename == NULL) {
> > +        kernel_filename = KERNEL_FILE_NAME;
> > +    }
> > +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
> > kernel_filename);
> 
> The commit withe Opal image should go in before this, no?

Now this is a bit of an open discussion at the moment :-)

The way OPAL is built on OPP machines today is by essentially building
a complete flash image which contains HostBoot, OPAL and the petitboot-
based bootloader which contains a Linux kernel etc...

We could create a target without HB and with a slimmed down Linux but
it would still probably be about 12MB I reckon, if not more. It feels a
bit "big" to ship as a binary as part of qemu...

We would also have to add code to qemu to "find" OPAL in that image,
and then add a model for the flash controller.

The other option is to bundle just OPAL itself. However that means
you can't go anywhere without a -kernel argument, which would then
be either a petitboot-based bootloader or your actual target kernel.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-24  0:44     ` Benjamin Herrenschmidt
@ 2015-11-24  2:22       ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  2:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2688 bytes --]

On Tue, Nov 24, 2015 at 11:44:36AM +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2015-11-20 at 18:45 +1100, David Gibson wrote:
> > 
> > So, I'm not 100% following the logic below, but it looks like the
> > existing code used SPR_NOACCESS to mark things which generated a
> > privilege exception compared to NULL for things which generated an
> > invalid instruction exception.  Using that encoding, can you simplify
> > the logic here?  Alternatively can you use the logic here to avoid
> > the SPR_NOACESS encoding?
> 
> Well, so the SPR_NOACCESS has to do with how you react to a known SPR
> who has explicit access permissions. The logic below is described in
> the ISA for an unknown SPR number.
> 
> I don't know whether the access permission of "known" SPRs always
> honor the 0x10 bit trick, and changing that in qemu would be a
> fairly large patch. So I'd rather stick to the logic here for
> "unknown" SPRs which matches the ISA definition.
> 
> I'll update the patch though for arch 2.07 as it defines a few
> reserved SPRs as no-ops.

Ok, that makes sense.

> However:
> 
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > > +
> > > +        /* The behaviour depends on MSR:PR and SPR# bit 0x10,
> > > +         * it can generate a priv, a hv emu or a no-op
> > > +         */
> > > +        if (sprn & 0x10) {
> > > +            if (ctx->pr) {
> > > +                gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > > +            }
> > > +        } else {
> > > +            if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 ||
> > > sprn == 6) {
> > > +                gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > > +            }
> > > +        }
> > > +#if !defined(CONFIG_USER_ONLY)
> > > +        /* HV priv */
> > > +        if (ctx->spr_cb[sprn].hea_read) {
> > > +            gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
> > > +        }
> 
> That latest bit is bogus.
> 
> > If you're in PR mode, and it's an SPR with an hea_read function and
> > has the 0x10 bit set, won't this call gen_priv_exception twice?
> 
> Yes, I've removed it. It should be handled by the SPR_NOACCESS.
> 
> > I also see no path here which will call gen_inval_exception(), is
> > that
> > right?  If you're in HV mode and it's a truly invalid SPRN, isn't
> > that
> > what you'd want?
> 
> No, the ISA says it's a nop.

Huh, ok.

Some comments referencing the ISA might be useful here.


-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts
  2015-11-24  0:51     ` Benjamin Herrenschmidt
@ 2015-11-24  2:22       ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  2:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 4516 bytes --]

On Tue, Nov 24, 2015 at 11:51:44AM +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2015-11-20 at 18:45 +1100, David Gibson wrote:
> > snip]
> > >  /* tlbiel */
> > >  static void gen_tlbiel(DisasContext *ctx)
> > >  {
> > >  #if defined(CONFIG_USER_ONLY)
> > > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > +    GEN_PRIV;
> > >  #else
> > > -    if (unlikely(ctx->pr || !ctx->hv)) {
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -        return;
> > > -    }
> > > +    CHK_SV;
> > 
> > You have CHK_SV here, but the original code checks for HV, as does
> > your new code for tlbia and tlbiel, is that right?
> 
> Yes. tlbiel is supervisor accessible (for weird reasons).
> 
> > [snip]
> > >  /* tlbsync */
> > >  static void gen_tlbsync(DisasContext *ctx)
> > >  {
> > >  #if defined(CONFIG_USER_ONLY)
> > > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -#else
> > > -    if (unlikely(ctx->pr)) {
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -        return;
> > > -    }
> > > +    GEN_PRIV;
> > > +#else    
> > > +    CHK_HV;
> > > +
> > 
> > Old code didn't check for HV, mode, but AFAICT it should have, so
> > this looks correct.
> 
> Yes, this is a hypervisor instruction.
> 
> > [snip]
> > > @@ -5941,18 +5921,16 @@ static void gen_mfapidi(DisasContext *ctx)
> > >  static void gen_tlbiva(DisasContext *ctx)
> > >  {
> > >  #if defined(CONFIG_USER_ONLY)
> > > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > +    GEN_PRIV;
> > >  #else
> > >      TCGv t0;
> > > -    if (unlikely(ctx->pr)) {
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -        return;
> > > -    }
> > > +
> > > +    CHK_SV;
> > 
> > Is the same thing as tlbivax, or some ancient instruction?  AFAICT
> > the ISA says tlbivax is hypervisor privileged.
> 
> "tlbiva" is the 4xx variant, there is no hypervisor mode on these
> things.
> 
> > >      t0 = tcg_temp_new();
> > >      gen_addr_reg_index(ctx, t0);
> > >      gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
> > >      tcg_temp_free(t0);
> > > -#endif
> > > +#endif /* defined(CONFIG_USER_ONLY) */
> > >  }
> > 
> > [snip]
> > >  static void gen_tlbivax_booke206(DisasContext *ctx)
> > >  {
> > >  #if defined(CONFIG_USER_ONLY)
> > > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > +    GEN_PRIV;
> > >  #else
> > >      TCGv t0;
> > > -    if (unlikely(ctx->pr)) {
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -        return;
> > > -    }
> > >  
> > > +    CHK_SV;
> > 
> > ISA says tlbivax is hypervisor privileged when the CPU has a
> > hypervisor mode, which I guess booke206 probably doesn't?
> 
> Right so here, the "problem" is that afaik, TCG doesn't implement
> the BookE hypervisor mode. So with my limited BookE testing
> ability I prefer sticking to a mechanical replacement that matches
> the existing code. It can be fixed later if necessary.

Fair enough.

> > >      t0 = tcg_temp_new();
> > >      gen_addr_reg_index(ctx, t0);
> > > -
> > >      gen_helper_booke206_tlbivax(cpu_env, t0);
> > >      tcg_temp_free(t0);
> > > -#endif
> > > +#endif /* defined(CONFIG_USER_ONLY) */
> > >  }
> > >  
> > >  static void gen_tlbilx_booke206(DisasContext *ctx)
> > >  {
> > >  #if defined(CONFIG_USER_ONLY)
> > > -    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > +    GEN_PRIV;
> > >  #else
> > >      TCGv t0;
> > > -    if (unlikely(ctx->pr)) {
> > > -        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> > > -        return;
> > > -    }
> > >  
> > > +    CHK_SV;
> > 
> > And apparently hv vs. sv privilege of tlbilx depends on the EPCR
> > register.  Again, may not be relevant for 2.06.
> 
> Well, here too, I basically preserve existing BookE TCG behaviour,
> whether it's correct or not. That can be fixed separately if somebody
> cares about BookE HV mode.
> 
> > >      t0 = tcg_temp_new();
> > >      gen_addr_reg_index(ctx, t0);
> > >  
> > > @@ -6672,7 +6574,7 @@ static void gen_tlbilx_booke206(DisasContext
> > *ctx)
> > >      }
> > >  
> > >      tcg_temp_free(t0);
> > > -#endif
> > > +#endif /* defined(CONFIG_USER_ONLY) */
> > >  }
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform
  2015-11-24  1:45     ` Benjamin Herrenschmidt
@ 2015-11-24  2:43       ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  2:43 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 14232 bytes --]

On Tue, Nov 24, 2015 at 12:45:48PM +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2015-11-20 at 19:21 +1100, David Gibson wrote:
> > On Wed, Nov 11, 2015 at 11:27:39AM +1100, Benjamin Herrenschmidt
> > wrote:
> > > No devices yet, not even an interrupt controller, just to get
> > > started.
> > > 
> > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > > ---
> > >  default-configs/ppc64-softmmu.mak |   1 +
> > >  hw/ppc/Makefile.objs              |   2 +
> > >  hw/ppc/pnv.c                      | 600
> > > ++++++++++++++++++++++++++++++++++++++
> > >  include/hw/ppc/pnv.h              |  36 +++
> > >  4 files changed, 639 insertions(+)
> > >  create mode 100644 hw/ppc/pnv.c
> > >  create mode 100644 include/hw/ppc/pnv.h
> > 
> > Many of my comments below may be made irrelevant by later patches in
> > the series.
> 
> Heh, well there is where the "meat" of the new platform starts showing
> up :-)
> 
>  .../...
> 
> > > +#define _FDT(exp) \
> > > +    do { \
> > > +        int ret = (exp);                                           \
> > > +        if (ret < 0) {                                             \
> > > +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> > > +                    #exp, fdt_strerror(ret));                      \
> > > +            exit(1);                                               \
> > > +        }                                                          \
> > > +    } while (0)
> > 
> > We should probably make a file where helper routines used by both
> > spapr.c and pnv.c can live.
> 
> Probably but I'd see that as a later cleanup rather than doing it
> in this series...

Ok.

> 
>  .../...
> 
> > > +    if (pcc->l1_dcache_size) {
> > > +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> > > +    } else {
> > > +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> > 
> > Hmm (note to self) should probably change a bunch of these both in
> > spapr.c and pnv.c from explicit fprintfs() to modern error_report()
> > and similar.
> 
> That's a train I completely missed, but yes.
> 
>   .../...
> 
> > > +    }
> > > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> > > +                       servers_prop, sizeof(servers_prop))));
> > > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> > > +                       gservers_prop, sizeof(gservers_prop))));
> > > +
> > > +    _FDT((fdt_end_node(fdt)));
> > > +}
> > > +
> > > +static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t initrd_size)
> > > +{
> > 
> > So.. does it make sense for qemu to create a device tree for powernv,
> > rather than leaving it to Opal?
> 
> Well, OPAL only creates a device-tree if you are on an FSP machine in
> which case it expects a complex data structure (HDAT) coming from the
> FSP to use as a source of info.
> 
> On OpenPower machines, which is closer to what we simulate here, we
> do get a device-tree as an input in OPAL, it's generated by HostBoot.
> 
> Now, I am not running HostBoot in qemu because most of what it does
> is completely irrelevant to an emulated system (training the various
> links, initializing the memory buffer chips, etc...).
> 
> However we do need to pass a number of platform information to OPAL
> which HB does via the device-tree, such as which cores are enabled,
> the memory map configured for PCI, which PHBs are enabled, etc...  so
> creating a DT in qemu makes sense, it mimmics HB in essence.
> 
> OPAL will enrich that device-tree before starting Linux.

Ok.  Some comments mentioning that you're simulating the exit state
from HostBoot would be good then.

> > > +    /*
> > > +     * Add info to guest to indentify which host is it being run on
> > > +     * and what is the uuid of the guest
> > > +     */
> > > +    if (kvmppc_get_host_model(&buf)) {
> > > +        _FDT((fdt_property_string(fdt, "host-model", buf)));
> > > +        g_free(buf);
> > > +    }
> > > +    if (kvmppc_get_host_serial(&buf)) {
> > > +        _FDT((fdt_property_string(fdt, "host-serial", buf)));
> > > +        g_free(buf);
> > > +    }
> > 
> > Since you're emulating a "bare metal" machine, surely the host
> > properties aren't relevant here.
> 
> They may or may not. But yes, I can take that out.
> 
> > > +    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
> > > +                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
> > > +                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
> > > +                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
> > > +                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
> > > +                          qemu_uuid[14], qemu_uuid[15]);
> > > +
> > > +    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
> > > +    g_free(buf);
> > > +
> > > +    _FDT((fdt_begin_node(fdt, "chosen")));
> > > +    _FDT((fdt_property(fdt, "linux,initrd-start",
> > > +                       &start_prop, sizeof(start_prop))));
> > > +    _FDT((fdt_property(fdt, "linux,initrd-end",
> > > +                       &end_prop, sizeof(end_prop))));
> > > +    _FDT((fdt_end_node(fdt)));
> > > +
> > > +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
> > > +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
> > > +
> > > +    /* cpus */
> > > +    _FDT((fdt_begin_node(fdt, "cpus")));
> > > +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> > > +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> > > +
> > > +    CPU_FOREACH(cs) {
> > > +        powernv_create_cpu_node(fdt, cs, smt);
> > > +    }
> > > +
> > > +    _FDT((fdt_end_node(fdt)));
> > > +
> > > +    /* Memory */
> > > +    _FDT((powernv_populate_memory(fdt)));
> > > +
> > > +    /* /hypervisor node */
> > > +    if (kvm_enabled()) {
> > > +        uint8_t hypercall[16];
> > > +
> > > +        /* indicate KVM hypercall interface */
> > > +        _FDT((fdt_begin_node(fdt, "hypervisor")));
> > > +        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
> > > +        if (kvmppc_has_cap_fixup_hcalls()) {
> > > +            /*
> > > +             * Older KVM versions with older guest kernels were broken with the
> > > +             * magic page, don't allow the guest to map it.
> > > +             */
> > > +            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
> > > +                                 sizeof(hypercall));
> > > +            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
> > > +                              sizeof(hypercall))));
> > > +        }
> > > +        _FDT((fdt_end_node(fdt)));
> > > +    }
> > 
> > And a hypercall interface surely doesn't make sense for powernv.
> 
> It's qemu paravirt, it exist on G5 too :-) It's for PR KVM, it allows
> to speed up some bits and pieces. But yeah we don't yet really
> "support" it at this point. However we might.

Ah, yes, I forgot about that.

> > > +
> > > +    _FDT((fdt_end_node(fdt))); /* close root node */
> > > +    _FDT((fdt_finish(fdt)));
> > > +
> > > +    return fdt;
> > > +}
> > > +
> > > +static void powernv_cpu_reset(void *opaque)
> > > +{
> > > +    PowerPCCPU *cpu = opaque;
> > > +    CPUState *cs = CPU(cpu);
> > > +    CPUPPCState *env = &cpu->env;
> > > +
> > > +    cpu_reset(cs);
> > > +
> > > +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
> > > +    env->spr[SPR_HIOR] = 0;
> > > +    env->gpr[3] = FDT_ADDR;
> > > +    env->nip = 0x10;
> > > +    env->msr |= MSR_HVB;
> > > +}
> > 
> > So, I believe the qemu-ishly correct way of doing this is to have the
> > cpu initialization in the cpu code, rather than the platform code, as
> > much as possible.  On PAPR we kind of get away with initialization in
> > the platform code on the grounds that it's a paravirt platform, but
> > powernv doesn't have that excuse.
> > 
> > But this may well be stuff that changes in later patches, so..
> 
> Well no, not really. But here too, we mimmic the state as coming out of
> HostBoot, which isn't quite the same thing. We need to provide the FDT
> entry, etc...
> 
> The "real" reset state of a P8 isn't something we can easily
> simulate... 
> 
> It runs some microcode from a SEEPROM with a small microcontroller
> which initializes a core, which then runs some HB code off it's L3
> cache etc... really not something we want to do in qemu at least
> for now.
> 
> So the initial state here is somewhat in between full virt and
> paravirt, we simulate a platform that has been partially initialized by
> HostBoot, to the state it has when it enters OPAL.

Ok, that makes sense, but I think it needs a bit more explanation in
the code to that effect.

> > > +static const VMStateDescription vmstate_powernv = {
> > > +    .name = "powernv",
> > > +    .version_id = 1,
> > > +    .minimum_version_id = 1,
> > > +};
> > 
> > It might be best to leave out the vmstate entirely until you're ready
> > to implement migration, rather than having a partial, probably not
> > working migration implementation.
> 
> Ok.
> 
> > > +
> > > +static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
> > > +{
> > > +    PnvChip *chip = &sys->chips[chip_no];
> > > +
> > > +    if (chip_no >= PNV_MAX_CHIPS) {
> > > +            return;
> > > +    }
> > > +
> > > +    /* XXX Improve chip numbering to better match HW */
> > > +    chip->chip_id = chip_no;
> > 
> > I think modern qemu conventions would suggest creating the chips as
> > QOM objects rather than having a fixed array.
> 
> Yeah, more code & much larger memory footprint for the same result :-)
> 
> I can look into it but it's low priority. I still want to rework some
> of that chip stuff in future patches anyway.
> 
> > > +}
> > > +
> > > +static void ppc_powernv_init(MachineState *machine)
> > > +{
> > > +    ram_addr_t ram_size = machine->ram_size;
> > > +    const char *cpu_model = machine->cpu_model;
> > > +    const char *kernel_filename = machine->kernel_filename;
> > > +    const char *initrd_filename = machine->initrd_filename;
> > > +    uint32_t initrd_base = 0;
> > > +    long initrd_size = 0;
> > > +    PowerPCCPU *cpu;
> > > +    CPUPPCState *env;
> > > +    MemoryRegion *sysmem = get_system_memory();
> > > +    MemoryRegion *ram = g_new(MemoryRegion, 1);
> > > +    sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
> > > +    PnvSystem *sys = &pnv_machine->sys;
> > > +    long fw_size;
> > > +    char *filename;
> > > +    void *fdt;
> > > +    int i;
> > > +
> > > +    /* init CPUs */
> > > +    if (cpu_model == NULL) {
> > > +        cpu_model = kvm_enabled() ? "host" : "POWER8";
> > > +    }
> > > +
> > > +    for (i = 0; i < smp_cpus; i++) {
> > > +        cpu = cpu_ppc_init(cpu_model);
> > > +        if (cpu == NULL) {
> > > +            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> > > +            exit(1);
> > > +        }
> > > +        env = &cpu->env;
> > > +
> > > +        /* Set time-base frequency to 512 MHz */
> > > +        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> > > +
> > > +        /* MSR[IP] doesn't exist nowadays */
> > > +        env->msr_mask &= ~(1 << 6);
> > > +
> > > +        qemu_register_reset(powernv_cpu_reset, cpu);
> > > +    }
> > > +
> > > +    /* allocate RAM */
> > > +    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", ram_size);
> > > +    memory_region_add_subregion(sysmem, 0, ram);
> > > +
> > > +    /* XXX We should decide how many chips to create based on #cores and
> > > +     * Venice vs. Murano vs. Naples chip type etc..., for now, just create
> > > +     * one chip. Also creation of the CPUs should be done per-chip
> > > +     */
> > > +    sys->num_chips = 1;
> > > +
> > > +    /* Create only one PHB for now until I figure out what's wrong
> > > +     * when I create more (resource assignment failures in Linux)
> > > +     */
> > > +    pnv_create_chip(sys, 0);
> > > +
> > > +    if (bios_name == NULL) {
> > > +        bios_name = FW_FILE_NAME;
> > > +    }
> > > +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
> > > +    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
> > > +    if (fw_size < 0) {
> > > +        hw_error("qemu: could not load OPAL '%s'\n", filename);
> > > +        exit(1);
> > > +    }
> > > +    g_free(filename);
> > > +
> > > +
> > > +    if (kernel_filename == NULL) {
> > > +        kernel_filename = KERNEL_FILE_NAME;
> > > +    }
> > > +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
> > > kernel_filename);
> > 
> > The commit withe Opal image should go in before this, no?
> 
> Now this is a bit of an open discussion at the moment :-)
> 
> The way OPAL is built on OPP machines today is by essentially building
> a complete flash image which contains HostBoot, OPAL and the petitboot-
> based bootloader which contains a Linux kernel etc...
> 
> We could create a target without HB and with a slimmed down Linux but
> it would still probably be about 12MB I reckon, if not more. It feels a
> bit "big" to ship as a binary as part of qemu...
> 
> We would also have to add code to qemu to "find" OPAL in that image,
> and then add a model for the flash controller.
> 
> The other option is to bundle just OPAL itself. However that means
> you can't go anywhere without a -kernel argument, which would then
> be either a petitboot-based bootloader or your actual target kernel.

Hm, ok.  But in order for this to be usable, we need some way to get a
suitable image.  So medium term, I think it makes sense to include
both opal and PetitBoot, so you can boot the guest like a real
machine.

However, including only Opal and requiring -kernel would be a
reasonable interim step.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure Benjamin Herrenschmidt
@ 2015-11-24  3:20   ` David Gibson
  2015-11-24  8:49     ` Benjamin Herrenschmidt
  2015-11-24  8:55     ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  3:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 21451 bytes --]

On Wed, Nov 11, 2015 at 11:27:40AM +1100, Benjamin Herrenschmidt wrote:
> XSCOM is an interface to a sideband bus provided by the POWER8 chip
> pervasive unit, which gives access to a number of facilities in the
> chip that are needed by the OPAL firmware and to a lesser extent,
> Linux. This is among others how the PCI Host bridges get configured
> at boot or how the LPC bus is accessed.
> 
> This provides a simple bus and device type for devices sitting on
> XSCOM along with some facilities to optionally generate corresponding
> device-tree nodes
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hw/ppc/Makefile.objs       |   2 +-
>  hw/ppc/pnv.c               |  11 ++
>  hw/ppc/pnv_xscom.c         | 415 +++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/pnv.h       |   2 +
>  include/hw/ppc/pnv_xscom.h |  73 ++++++++
>  5 files changed, 502 insertions(+), 1 deletion(-)
>  create mode 100644 hw/ppc/pnv_xscom.c
>  create mode 100644 include/hw/ppc/pnv_xscom.h
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index cd74c96..2a7dd42 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -5,7 +5,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>  # IBM PowerNV
> -obj-$(CONFIG_POWERNV) += pnv.o
> +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index e68c9b1..2eac877 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -41,6 +41,7 @@
>  #include "hw/ppc/ppc.h"
>  #include "hw/ppc/pnv.h"
>  #include "hw/loader.h"
> +#include "hw/ppc/pnv_xscom.h"
>  
>  #include "exec/address-spaces.h"
>  #include "qemu/config-file.h"
> @@ -310,6 +311,7 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
>      uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
>      char *buf;
>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
> +    unsigned int i;
>  
>      fdt = g_malloc0(FDT_MAX_SIZE);
>      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
> @@ -367,6 +369,12 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
>      /* Memory */
>      _FDT((powernv_populate_memory(fdt)));
>  
> +    /* For each chip */
> +    for (i = 0; i < sys->num_chips; i++) {
> +        /* Populate XSCOM */
> +        _FDT((xscom_populate_fdt(sys->chips[i].xscom, fdt)));
> +    }
> +
>      /* /hypervisor node */
>      if (kvm_enabled()) {
>          uint8_t hypercall[16];
> @@ -424,6 +432,9 @@ static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no)
>  
>      /* XXX Improve chip numbering to better match HW */
>      chip->chip_id = chip_no;
> +
> +    /* Set up XSCOM bus */
> +    xscom_create(chip);

Hmm.. I'm thinking it probably makes sense to unify the representation
of "chip" and "xscom" since there's a 1:1 correspondance.  To be
QOMishly correct, I think the right way would be a SysBusDevice for
each chip, implementing the xscom MMIOs, then an array of link properties
under the Machine object to find the right scoms by chip id.

>  }
>  
>  static void ppc_powernv_init(MachineState *machine)
> diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
> new file mode 100644
> index 0000000..bb35422
> --- /dev/null
> +++ b/hw/ppc/pnv_xscom.c
> @@ -0,0 +1,415 @@
> +
> +/*
> + * QEMU PowerNV XSCOM bus definitions
> + *
> + * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
> + * Based on the s390 virtio bus code:
> + * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/* TODO: Add some infrastructure for "random stuff" and FIRs that
> + * various units might want to deal with without creating actual
> + * XSCOM devices.
> + *
> + * For example, HB LPC XSCOM in the PIBAM
> + */
> +#include "hw/hw.h"
> +#include "sysemu/sysemu.h"
> +#include "hw/boards.h"
> +#include "monitor/monitor.h"
> +#include "hw/loader.h"
> +#include "elf.h"
> +#include "hw/sysbus.h"
> +#include "sysemu/kvm.h"
> +#include "sysemu/device_tree.h"
> +#include "kvm_ppc.h"
> +
> +#include "hw/ppc/pnv_xscom.h"
> +
> +#include <libfdt.h>
> +
> +#define TYPE_XSCOM "xscom"
> +#define XSCOM(obj) OBJECT_CHECK(XScomState, (obj), TYPE_XSCOM)
> +
> +#define XSCOM_SIZE        0x800000000ull
> +#define XSCOM_BASE(chip)  (0x3fc0000000000ull + ((uint64_t)(chip)) * XSCOM_SIZE)
> +
> +//#define TRACE_SCOMS
> +
> +typedef struct XScomState {
> +    /*< private >*/
> +    SysBusDevice parent_obj;
> +    /*< public >*/
> +
> +    MemoryRegion mem;
> +    int32_t chip_id;
> +    XScomBus *bus;
> +} XScomState;
> +
> +static uint32_t xscom_to_pcb_addr(uint64_t addr)
> +{
> +        addr &= (XSCOM_SIZE - 1);
> +        return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);

Wow, that's a pretty weird address transform.

> +}
> +
> +static void xscom_complete(uint64_t hmer_bits)
> +{
> +    CPUState *cs = current_cpu;
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +
> +    cpu_synchronize_state(cs);
> +    env->spr[SPR_HMER] |= hmer_bits;
> +
> +    /* XXX Need a CPU helper to set HMER, also handle gneeration
> +     * of HMIs

Not sure what you're referring to here.  Nothing more should be needed
to set the HMER - because you've called cpu_synchronize_state() it
will be marked dirty and flushed back to KVM before re-entry.

> +     */
> +}
> +
> +static XScomDevice *xscom_find_target(XScomState *s, uint32_t pcb_addr, uint32_t *range)
> +{
> +    BusChild *bc;
> +
> +    QTAILQ_FOREACH(bc, &s->bus->bus.children, sibling) {
> +        DeviceState *qd = bc->child;
> +        XScomDevice *xd = XSCOM_DEVICE(qd);
> +        unsigned int i;
> +
> +        for (i = 0; i < MAX_XSCOM_RANGES; i++) {
> +            if (xd->ranges[i].addr <= pcb_addr &&
> +                (xd->ranges[i].addr + xd->ranges[i].size) > pcb_addr) {
> +                *range = i;
> +                return xd;
> +            }
> +        }
> +    }

I'm wondering if it makes sense to construct a custom AddressSpace and
use the existing address space lookup logic from exec.c and memory.c
rather than implementing your own.

> +    return NULL;
> +}
> +
> +static bool xscom_dispatch_read(XScomState *s, uint32_t pcb_addr, uint64_t *out_val)
> +{
> +    uint32_t range, offset;
> +    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
> +    XScomDeviceClass *xc;
> +
> +    if (!xd) {
> +        return false;
> +    }
> +    xc = XSCOM_DEVICE_GET_CLASS(xd);
> +    if (!xc->read) {
> +        return false;
> +    }
> +    offset = pcb_addr - xd->ranges[range].addr;
> +    return xc->read(xd, range, offset, out_val);
> +}
> +
> +static bool xscom_dispatch_write(XScomState *s, uint32_t pcb_addr, uint64_t val)
> +{
> +    uint32_t range, offset;
> +    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
> +    XScomDeviceClass *xc;
> +
> +    if (!xd) {
> +        return false;
> +    }
> +    xc = XSCOM_DEVICE_GET_CLASS(xd);
> +    if (!xc->write) {
> +        return false;
> +    }
> +    offset = pcb_addr - xd->ranges[range].addr;
> +    return xc->write(xd, range, offset, val);
> +}
> +
> +static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
> +{
> +    XScomState *s = opaque;
> +    uint32_t pcba = xscom_to_pcb_addr(addr);
> +    uint64_t val;
> +
> +    assert(width == 8);
> +
> +#ifdef TRACE_SCOMS
> +    printf("XSCOM_READ(0x%x:0x%x)\n", s->chip_id, pcba);
> +#endif

You should be using the built in trace infrastructure here - it's
really not that much of a pain.  Put
	trace_xscom_read(s->chip_id, pcba)
here, put a suitable format in trace-events, and ./configure
--enable-trace-backends=stderr

> +
> +    /* Handle some SCOMs here before dispatch */
> +    switch(pcba) {
> +    case 0xf000f:
> +        val = 0x221EF04980000000;
> +        break;
> +    case 0x1010c00:     /* PIBAM FIR */
> +    case 0x1010c03:     /* PIBAM FIR MASK */
> +    case 0x2020007:     /* ADU stuff */
> +    case 0x2020009:     /* ADU stuff */
> +    case 0x202000f:     /* ADU stuff */
> +        val = 0;
> +        break;
> +    case 0x2013f00:     /* PBA stuff */
> +    case 0x2013f01:     /* PBA stuff */
> +    case 0x2013f02:     /* PBA stuff */
> +    case 0x2013f03:     /* PBA stuff */
> +    case 0x2013f04:     /* PBA stuff */
> +    case 0x2013f05:     /* PBA stuff */
> +    case 0x2013f06:     /* PBA stuff */
> +    case 0x2013f07:     /* PBA stuff */
> +        val = 0;
> +        break;
> +    default:
> +        if (!xscom_dispatch_read(s, pcba, &val)) {
> +            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
> +            return 0;
> +        }
> +    }
> +
> +    xscom_complete(HMER_XSCOM_DONE);
> +    return val;
> +}
> +
> +static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
> +                        unsigned width)
> +{
> +    XScomState *s = opaque;
> +    uint32_t pcba = xscom_to_pcb_addr(addr);
> +
> +    assert(width == 8);
> +
> +#ifdef TRACE_SCOMS
> +    printf("XSCOM_WRITE(0x%x:0x%x, 0x%016llx)\n",
> +           s->chip_id, pcba, (unsigned long long)val);
> +#endif
> +    /* Handle some SCOMs here before dispatch */
> +    switch(pcba) {
> +        /* We ignore writes to these */
> +    case 0xf000f:       /* chip id is RO */
> +    case 0x1010c00:     /* PIBAM FIR */
> +    case 0x1010c01:     /* PIBAM FIR */
> +    case 0x1010c02:     /* PIBAM FIR */
> +    case 0x1010c03:     /* PIBAM FIR MASK */
> +    case 0x1010c04:     /* PIBAM FIR MASK */
> +    case 0x1010c05:     /* PIBAM FIR MASK */
> +    case 0x2020007:     /* ADU stuff */
> +    case 0x2020009:     /* ADU stuff */
> +    case 0x202000f:     /* ADU stuff */
> +        break;
> +    default:
> +        if (!xscom_dispatch_write(s, pcba, val)) {
> +            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
> +            return;
> +        }
> +    }
> +
> +    xscom_complete(HMER_XSCOM_DONE);
> +}
> +
> +static const MemoryRegionOps xscom_ops = {
> +    .read = xscom_read,
> +    .write = xscom_write,
> +    .valid.min_access_size = 8,
> +    .valid.max_access_size = 8,
> +    .impl.min_access_size = 8,
> +    .impl.max_access_size = 8,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +};
> +
> +static int xscom_init(SysBusDevice *dev)
> +{
> +    XScomState *s = XSCOM(dev);
> +
> +    s->chip_id = -1;
> +    return 0;
> +}
> +
> +static void xscom_realize(DeviceState *dev, Error **errp)
> +{
> +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> +    XScomState *s = XSCOM(dev);
> +    char *name;
> +
> +    assert(s->chip_id >= 0);

So, this assert could be tripped if the user explicitly instantiated
an xscom device which they probably shouldn't do, but could.  So, it
probably makes sense to use error_setg() here instead of assert().

> +    name = g_strdup_printf("xscom-%x", s->chip_id);
> +    memory_region_init_io(&s->mem, OBJECT(s), &xscom_ops, s, name, XSCOM_SIZE);
> +    sysbus_init_mmio(sbd, &s->mem);
> +    sysbus_mmio_map(sbd, 0, XSCOM_BASE(s->chip_id));
> +}
> +
> +static Property xscom_properties[] = {
> +        DEFINE_PROP_INT32("chip_id", XScomState, chip_id, 0),
> +        DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xscom_class_init(ObjectClass *klass, void *data)
> +{
> +    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->props = xscom_properties;
> +    dc->realize = xscom_realize;
> +    k->init = xscom_init;
> +}
> +
> +static const TypeInfo xscom_info = {
> +    .name          = TYPE_XSCOM,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(XScomState),
> +    .class_init    = xscom_class_init,
> +};
> +
> +static void xscom_bus_class_init(ObjectClass *klass, void *data)
> +{
> +}
> +
> +static const TypeInfo xscom_bus_info = {
> +    .name = TYPE_XSCOM_BUS,
> +    .parent = TYPE_BUS,
> +    .class_init = xscom_bus_class_init,
> +    .instance_size = sizeof(XScomBus),
> +};
> +
> +void xscom_create(PnvChip *chip)
> +{
> +    DeviceState *dev;
> +    XScomState *xdev;
> +    BusState *qbus;
> +    XScomBus *xb;
> +
> +    dev = qdev_create(NULL, TYPE_XSCOM);
> +    qdev_prop_set_uint32(dev, "chip_id", chip->chip_id);
> +    qdev_init_nofail(dev);
> +
> +    /* Create bus on bridge device */
> +    qbus = qbus_create(TYPE_XSCOM_BUS, dev, "xscom");
> +    xb = DO_UPCAST(XScomBus, bus, qbus);
> +    xb->chip_id = chip->chip_id;
> +    xdev = XSCOM(dev);
> +    xdev->bus = xb;
> +    chip->xscom = xb;

I believe the qbus_create() is usually invoked by the bridge's init
function, rather than externally.

> +}
> +
> +#define _FDT(exp) \
> +    do { \
> +        int ret = (exp);                                           \
> +        if (ret < 0) {                                             \
> +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> +                    #exp, fdt_strerror(ret));                      \
> +            exit(1);                                               \
> +        }                                                          \
> +    } while (0)
> +
> +
> +int xscom_populate_fdt(XScomBus *xb, void *fdt)
> +{
> +    BusChild *bc;
> +    char *name;
> +    const char compat[] = "ibm,power8-xscom\0ibm,xscom";
> +    uint64_t reg[] = { cpu_to_be64(XSCOM_BASE(xb->chip_id)),
> +                       cpu_to_be64(XSCOM_SIZE) };
> +
> +    name = g_strdup_printf("xscom@%llx", (unsigned long long)be64_to_cpu(reg[0]));
> +    _FDT((fdt_begin_node(fdt, name)));
> +    g_free(name);
> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", xb->chip_id)));
> +    _FDT((fdt_property_cell(fdt, "#address-cells", 1)));
> +    _FDT((fdt_property_cell(fdt, "#size-cells", 1)));
> +    _FDT((fdt_property(fdt, "reg", reg, sizeof(reg))));
> +    _FDT((fdt_property(fdt, "compatible", compat, sizeof(compat)))); 
> +    _FDT((fdt_property(fdt, "scom-controller", NULL, 0))); 
> +
> +    QTAILQ_FOREACH(bc, &xb->bus.children, sibling) {
> +        DeviceState *qd = bc->child;
> +        XScomDevice *xd = XSCOM_DEVICE(qd);
> +        XScomDeviceClass *xc = XSCOM_DEVICE_GET_CLASS(xd);
> +        uint32_t reg[MAX_XSCOM_RANGES * 2];
> +        unsigned int i, sz = 0;
> +        void *cp, *p;
> +
> +        /* Some XSCOM slaves may not be represented in the DT */
> +        if (!xc->dt_name) {
> +            continue;
> +        }
> +        name = g_strdup_printf("%s@%x", xc->dt_name, xd->ranges[0].addr);
> +        _FDT((fdt_begin_node(fdt, name)));
> +        g_free(name);
> +        for (i = 0; i < MAX_XSCOM_RANGES; i++) {
> +            if (xd->ranges[i].size == 0) {
> +                break;
> +            }
> +            reg[sz++] = cpu_to_be32(xd->ranges[i].addr);
> +            reg[sz++] = cpu_to_be32(xd->ranges[i].size);
> +        }
> +        _FDT((fdt_property(fdt, "reg", reg, sz * 4)));
> +        if (xc->devnode) {
> +            _FDT((xc->devnode(xd, fdt)));
> +        }
> +#define MAX_COMPATIBLE_PROP     1024
> +        cp = p = g_malloc0(MAX_COMPATIBLE_PROP);
> +        i = 0;
> +        while((p - cp) < MAX_COMPATIBLE_PROP) {
> +            int l;
> +            if (xc->dt_compatible[i] == NULL) {
> +                break;
> +            }
> +            l = strlen(xc->dt_compatible[i]);
> +            if (l >= (MAX_COMPATIBLE_PROP - i)) {
> +                break;
> +            }
> +            strcpy(p, xc->dt_compatible[i++]);
> +            p += l + 1;
> +        }
> +        _FDT((fdt_property(fdt, "compatible", cp, p - cp)));
> +        _FDT((fdt_end_node(fdt)));
> +    }
> +
> +    _FDT((fdt_end_node(fdt)));
> +
> +    return 0;
> +}
> +
> +static int xscom_qdev_init(DeviceState *qdev)
> +{
> +    XScomDevice *xdev = (XScomDevice *)qdev;
> +    XScomDeviceClass *xc = XSCOM_DEVICE_GET_CLASS(xdev);
> +
> +    if (xc->init) {
> +        return xc->init(xdev);
> +    }
> +    return 0;
> +}
> +
> +static void xscom_device_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *k = DEVICE_CLASS(klass);
> +    k->init = xscom_qdev_init;
> +    k->bus_type = TYPE_XSCOM_BUS;
> +}
> +
> +static const TypeInfo xscom_dev_info = {
> +    .name = TYPE_XSCOM_DEVICE,
> +    .parent = TYPE_DEVICE,
> +    .instance_size = sizeof(XScomDevice),
> +    .abstract = true,
> +    .class_size = sizeof(XScomDeviceClass),
> +    .class_init = xscom_device_class_init,
> +};
> +
> +static void xscom_register_types(void)
> +{
> +    type_register_static(&xscom_info);
> +    type_register_static(&xscom_bus_info);
> +    type_register_static(&xscom_dev_info);
> +}
> +
> +type_init(xscom_register_types)
> +
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index 9a48c16..cb157eb 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -20,10 +20,12 @@
>   */
>  
>  #include "hw/hw.h"
> +typedef struct XScomBus XScomBus;
>  
>  /* Should we turn that into a QOjb of some sort ? */
>  typedef struct PnvChip {
>      uint32_t         chip_id;
> +    XScomBus         *xscom;
>  } PnvChip;
>  
>  typedef struct PnvSystem {
> diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
> new file mode 100644
> index 0000000..99de078
> --- /dev/null
> +++ b/include/hw/ppc/pnv_xscom.h
> @@ -0,0 +1,73 @@
> +#ifndef _HW_XSCOM_H
> +#define _HW_XSCOM_H
> +/*
> + * QEMU PowerNV XSCOM bus definitions
> + *
> + * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au>
> + * Based on the s390 virtio bus definitions:
> + * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <hw/ppc/pnv.h>
> +
> +#define TYPE_XSCOM_DEVICE "xscom-device"
> +#define XSCOM_DEVICE(obj) \
> +     OBJECT_CHECK(XScomDevice, (obj), TYPE_XSCOM_DEVICE)
> +#define XSCOM_DEVICE_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(XScomDeviceClass, (klass), TYPE_XSCOM_DEVICE)
> +#define XSCOM_DEVICE_GET_CLASS(obj) \
> +     OBJECT_GET_CLASS(XScomDeviceClass, (obj), TYPE_XSCOM_DEVICE)
> +
> +#define TYPE_XSCOM_BUS "xscom-bus"
> +#define XSCOM_BUS(obj) OBJECT_CHECK(XScomBus, (obj), TYPE_XSCOM_BUS)
> +
> +typedef struct XScomDevice XScomDevice;
> +typedef struct XScomBus XScomBus;
> +
> +typedef struct XScomDeviceClass {
> +    DeviceClass parent_class;
> +
> +    const char *dt_name;
> +    const char **dt_compatible;
> +    int (*init)(XScomDevice *dev);
> +    int (*devnode)(XScomDevice *dev, void *fdt);
> +
> +    /* Actual XScom accesses */
> +    bool (*read)(XScomDevice *dev, uint32_t range, uint32_t offset, uint64_t *out_val);
> +    bool (*write)(XScomDevice *dev, uint32_t range, uint32_t offset, uint64_t val);
> +} XScomDeviceClass;
> +
> +typedef struct XScomRange {
> +    uint32_t addr;
> +    uint32_t size;
> +} XScomRange;
> +
> +struct XScomDevice {
> +    DeviceState qdev;
> +#define MAX_XSCOM_RANGES	4
> +    struct XScomRange ranges[MAX_XSCOM_RANGES];
> +};
> +
> +struct XScomBus {
> +    BusState bus;
> +    uint32_t chip_id;
> +};
> +
> +extern void xscom_create(PnvChip *chip);
> +extern int xscom_populate_fdt(XScomBus *xscom, void *fdt);
> +
> +
> +#endif /* _HW_XSCOM_H */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR Benjamin Herrenschmidt
@ 2015-11-24  3:25   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  3:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 12032 bytes --]

On Wed, Nov 11, 2015 at 11:27:41AM +1100, Benjamin Herrenschmidt wrote:
> The common class doesn't change, the KVM one is SPAPR specific, this
> is a preliminary change to make it easier to support "native" XICS.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

So changing the variable names is fine, but I believe actually
changing the type name from "xics" to "xics-spapr" will break
migration for existing sPAPR guests.  There are some ways to wangle
that, but I think it's going to be much simpler to leave the existing
name as "xics" and add a new "xics-native".

You can still change the name of the TYPE_XICS macro to
TYPE_XICS_SPAPR if you want.

> ---
>  hw/intc/xics.c        | 26 +++++++++++++-------------
>  hw/intc/xics_kvm.c    |  6 +++---
>  hw/ppc/spapr.c        |  6 +++---
>  hw/ppc/spapr_events.c |  2 +-
>  hw/ppc/spapr_pci.c    |  8 ++++----
>  hw/ppc/spapr_vio.c    |  2 +-
>  include/hw/ppc/xics.h | 25 +++++++++++++------------
>  7 files changed, 38 insertions(+), 37 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 9ff5796..bcea1f0 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -711,7 +711,7 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
>      return -1;
>  }
>  
> -int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
> +int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
>  {
>      ICSState *ics = &icp->ics[src];
>      int irq;
> @@ -742,7 +742,7 @@ int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
>   * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
>   * If align==true, aligns the first IRQ number to num.
>   */
> -int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
> +int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
>  {
>      int i, first = -1;
>      ICSState *ics = &icp->ics[src];
> @@ -787,7 +787,7 @@ static void ics_free(ICSState *ics, int srcno, int num)
>      }
>  }
>  
> -void xics_free(XICSState *icp, int irq, int num)
> +void xics_spapr_free(XICSState *icp, int irq, int num)
>  {
>      int src = xics_find_source(icp, irq);
>  
> @@ -1006,7 +1006,7 @@ static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
>      }
>  }
>  
> -static void xics_realize(DeviceState *dev, Error **errp)
> +static void xics_spapr_realize(DeviceState *dev, Error **errp)
>  {
>      XICSState *icp = XICS(dev);
>      Error *error = NULL;
> @@ -1045,7 +1045,7 @@ static void xics_realize(DeviceState *dev, Error **errp)
>      }
>  }
>  
> -static void xics_initfn(Object *obj)
> +static void xics_spapr_initfn(Object *obj)
>  {
>      XICSState *xics = XICS(obj);
>  
> @@ -1054,29 +1054,29 @@ static void xics_initfn(Object *obj)
>      xics->ics->icp = xics;
>  }
>  
> -static void xics_class_init(ObjectClass *oc, void *data)
> +static void xics_spapr_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> -    XICSStateClass *xsc = XICS_CLASS(oc);
> +    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
>  
> -    dc->realize = xics_realize;
> +    dc->realize = xics_spapr_realize;
>      xsc->set_nr_irqs = xics_set_nr_irqs;
>      xsc->set_nr_servers = xics_set_nr_servers;
>  }
>  
> -static const TypeInfo xics_info = {
> -    .name          = TYPE_XICS,
> +static const TypeInfo xics_spapr_info = {
> +    .name          = TYPE_XICS_SPAPR,
>      .parent        = TYPE_XICS_COMMON,
>      .instance_size = sizeof(XICSState),
>      .class_size = sizeof(XICSStateClass),
> -    .class_init    = xics_class_init,
> -    .instance_init = xics_initfn,
> +    .class_init    = xics_spapr_class_init,
> +    .instance_init = xics_spapr_initfn,
>  };
>  
>  static void xics_register_types(void)
>  {
>      type_register_static(&xics_common_info);
> -    type_register_static(&xics_info);
> +    type_register_static(&xics_spapr_info);
>      type_register_static(&ics_info);
>      type_register_static(&icp_info);
>  }
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> index d58729c..03ae801 100644
> --- a/hw/intc/xics_kvm.c
> +++ b/hw/intc/xics_kvm.c
> @@ -490,8 +490,8 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
>      xsc->set_nr_servers = xics_kvm_set_nr_servers;
>  }
>  
> -static const TypeInfo xics_kvm_info = {
> -    .name          = TYPE_KVM_XICS,
> +static const TypeInfo xics_spapr_kvm_info = {
> +    .name          = TYPE_XICS_SPAPR_KVM,
>      .parent        = TYPE_XICS_COMMON,
>      .instance_size = sizeof(KVMXICSState),
>      .class_init    = xics_kvm_class_init,
> @@ -500,7 +500,7 @@ static const TypeInfo xics_kvm_info = {
>  
>  static void xics_kvm_register_types(void)
>  {
> -    type_register_static(&xics_kvm_info);
> +    type_register_static(&xics_spapr_kvm_info);
>      type_register_static(&ics_kvm_info);
>      type_register_static(&icp_kvm_info);
>  }
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 610629e..bf94426 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -119,7 +119,7 @@ static XICSState *xics_system_init(MachineState *machine,
>          Error *err = NULL;
>  
>          if (machine_kernel_irqchip_allowed(machine)) {
> -            icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err);
> +            icp = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, &err);
>          }
>          if (machine_kernel_irqchip_required(machine) && !icp) {
>              error_report("kernel_irqchip requested but unavailable: %s",
> @@ -128,7 +128,7 @@ static XICSState *xics_system_init(MachineState *machine,
>      }
>  
>      if (!icp) {
> -        icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort);
> +        icp = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, &error_abort);
>      }
>  
>      return icp;
> @@ -1768,7 +1768,7 @@ static void ppc_spapr_init(MachineState *machine)
>      spapr->icp = xics_system_init(machine,
>                                    DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
>                                                 smp_threads),
> -                                  XICS_IRQS);
> +                                  XICS_IRQS_SPAPR);
>  
>      if (smc->dr_lmb_enabled) {
>          spapr_validate_node_memory(machine);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 744ea62..3b3663e 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -587,7 +587,7 @@ out_no_events:
>  void spapr_events_init(sPAPRMachineState *spapr)
>  {
>      QTAILQ_INIT(&spapr->pending_events);
> -    spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
> +    spapr->check_exception_irq = xics_spapr_alloc(spapr->icp, 0, 0, false);
>      spapr->epow_notifier.notify = spapr_powerdown_req;
>      qemu_register_powerdown_notifier(&spapr->epow_notifier);
>      spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 55fa8db..8b613a8 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -313,7 +313,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>              return;
>          }
>  
> -        xics_free(spapr->icp, msi->first_irq, msi->num);
> +        xics_spapr_free(spapr->icp, msi->first_irq, msi->num);
>          if (msi_present(pdev)) {
>              spapr_msi_setmsg(pdev, 0, false, 0, num);
>          }
> @@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      }
>  
>      /* Allocate MSIs */
> -    irq = xics_alloc_block(spapr->icp, 0, req_num, false,
> +    irq = xics_spapr_alloc_block(spapr->icp, 0, req_num, false,
>                             ret_intr_type == RTAS_TYPE_MSI);
>      if (!irq) {
>          error_report("Cannot allocate MSIs for device %x", config_addr);
> @@ -1360,7 +1360,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
>      for (i = 0; i < PCI_NUM_PINS; i++) {
>          uint32_t irq;
>  
> -        irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
> +        irq = xics_spapr_alloc_block(spapr->icp, 0, 1, true, false);
>          if (!irq) {
>              error_setg(errp, "spapr_allocate_lsi failed");
>              return;
> @@ -1717,7 +1717,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>      _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
>      _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
>      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
> -    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
> +    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
>  
>      /* Build the interrupt-map, this must matches what is done
>       * in pci_spapr_map_irq
> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> index c51eb8e..7b718cc 100644
> --- a/hw/ppc/spapr_vio.c
> +++ b/hw/ppc/spapr_vio.c
> @@ -462,7 +462,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
>          dev->qdev.id = id;
>      }
>  
> -    dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
> +    dev->irq = xics_spapr_alloc(spapr->icp, 0, dev->irq, false);
>      if (!dev->irq) {
>          error_setg(errp, "can't allocate IRQ");
>          return;
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 355a966..8d33dfa 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -32,20 +32,20 @@
>  #define TYPE_XICS_COMMON "xics-common"
>  #define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
>  
> -#define TYPE_XICS "xics"
> -#define XICS(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS)
> +#define TYPE_XICS_SPAPR "xics-spapr"
> +#define XICS(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
>  
> -#define TYPE_KVM_XICS "xics-kvm"
> -#define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_KVM_XICS)
> +#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
> +#define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
>  
>  #define XICS_COMMON_CLASS(klass) \
>       OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
> -#define XICS_CLASS(klass) \
> -     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS)
> +#define XICS_SPAPR_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
>  #define XICS_COMMON_GET_CLASS(obj) \
>       OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
> -#define XICS_GET_CLASS(obj) \
> -     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS)
> +#define XICS_SPAPR_GET_CLASS(obj) \
> +     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
>  
>  #define XICS_IPI        0x2
>  #define XICS_BUID       0x1
> @@ -157,13 +157,14 @@ struct ICSIRQState {
>      uint8_t flags;
>  };
>  
> -#define XICS_IRQS               1024
> +#define XICS_IRQS_SPAPR               1024
>  
>  qemu_irq xics_get_qirq(XICSState *icp, int irq);
>  void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
> -int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
> -int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
> -void xics_free(XICSState *icp, int irq, int num);
> +
> +int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
> +int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
> +void xics_spapr_free(XICSState *icp, int irq, int num);
>  
>  void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file Benjamin Herrenschmidt
@ 2015-11-24  3:32   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  3:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 31670 bytes --]

On Wed, Nov 11, 2015 at 11:27:42AM +1100, Benjamin Herrenschmidt wrote:
> Leave the core ICP/ICS logic in xics.c and move the top level
> class wrapper, hypercall and RTAS handlers to xics_spapr.c
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Concept looks fine.

> ---
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/intc/Makefile.objs             |   1 +
>  hw/intc/xics.c                    | 390 ++----------------------------------
>  hw/intc/xics_spapr.c              | 401 ++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/xics.h             |  23 +++
>  5 files changed, 437 insertions(+), 379 deletions(-)
>  create mode 100644 hw/intc/xics_spapr.c
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index 96574c8..516a6e2 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -50,6 +50,7 @@ CONFIG_ETSEC=y
>  CONFIG_LIBDECNUMBER=y
>  # For pSeries
>  CONFIG_XICS=$(CONFIG_PSERIES)
> +CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>  CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
>  # For PReP
>  CONFIG_MC146818RTC=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 004b0c2..e24cb03 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -26,6 +26,7 @@ obj-$(CONFIG_OMAP) += omap_intc.o
>  obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
>  obj-$(CONFIG_SH4) += sh_intc.o
>  obj-$(CONFIG_XICS) += xics.o
> +obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index bcea1f0..38cacd8 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -33,7 +33,7 @@
>  #include "qemu/error-report.h"
>  #include "qapi/visitor.h"
>  
> -static int get_cpu_index_by_dt_id(int cpu_dt_id)
> +int get_cpu_index_by_dt_id(int cpu_dt_id)
>  {
>      PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id);
>  
> @@ -224,7 +224,7 @@ static void icp_resend(XICSState *icp, int server)
>      ics_resend(icp->ics);
>  }
>  
> -static void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
> +void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
>  {
>      ICPState *ss = icp->ss + server;
>      uint8_t old_cppr;
> @@ -248,7 +248,7 @@ static void icp_set_cppr(XICSState *icp, int server, uint8_t cppr)
>      }
>  }
>  
> -static void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
> +void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
>  {
>      ICPState *ss = icp->ss + server;
>  
> @@ -258,7 +258,7 @@ static void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr)
>      }
>  }
>  
> -static uint32_t icp_accept(ICPState *ss)
> +uint32_t icp_accept(ICPState *ss)
>  {
>      uint32_t xirr = ss->xirr;
>  
> @@ -271,7 +271,7 @@ static uint32_t icp_accept(ICPState *ss)
>      return xirr;
>  }
>  
> -static void icp_eoi(XICSState *icp, int server, uint32_t xirr)
> +void icp_eoi(XICSState *icp, int server, uint32_t xirr)
>  {
>      ICPState *ss = icp->ss + server;
>  
> @@ -372,12 +372,6 @@ static const TypeInfo icp_info = {
>  /*
>   * ICS: Source layer
>   */
> -static int ics_valid_irq(ICSState *ics, uint32_t nr)
> -{
> -    return (nr >= ics->offset)
> -        && (nr < (ics->offset + ics->nr_irqs));
> -}
> -
>  static void resend_msi(ICSState *ics, int srcno)
>  {
>      ICSIRQState *irq = ics->irqs + srcno;
> @@ -462,8 +456,8 @@ static void write_xive_lsi(ICSState *ics, int srcno)
>      resend_lsi(ics, srcno);
>  }
>  
> -static void ics_write_xive(ICSState *ics, int nr, int server,
> -                           uint8_t priority, uint8_t saved_priority)
> +void ics_write_xive(ICSState *ics, int nr, int server,
> +		    uint8_t priority, uint8_t saved_priority)
>  {
>      int srcno = nr - ics->offset;
>      ICSIRQState *irq = ics->irqs + srcno;
> @@ -640,7 +634,7 @@ static const TypeInfo ics_info = {
>  /*
>   * Exported functions
>   */
> -static int xics_find_source(XICSState *icp, int irq)
> +int xics_find_source(XICSState *icp, int irq)
>  {
>      int sources = 1;
>      int src;
> @@ -668,7 +662,7 @@ qemu_irq xics_get_qirq(XICSState *icp, int irq)
>      return NULL;
>  }
>  
> -static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
> +void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
>  {
>      assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
>  
> @@ -687,310 +681,16 @@ void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
>      ics_set_irq_type(ics, irq - ics->offset, lsi);
>  }
>  
> -#define ICS_IRQ_FREE(ics, srcno)   \
> -    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> -
> -static int ics_find_free_block(ICSState *ics, int num, int alignnum)
> -{
> -    int first, i;
> -
> -    for (first = 0; first < ics->nr_irqs; first += alignnum) {
> -        if (num > (ics->nr_irqs - first)) {
> -            return -1;
> -        }
> -        for (i = first; i < first + num; ++i) {
> -            if (!ICS_IRQ_FREE(ics, i)) {
> -                break;
> -            }
> -        }
> -        if (i == (first + num)) {
> -            return first;
> -        }
> -    }
> -
> -    return -1;
> -}
> -
> -int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
> -{
> -    ICSState *ics = &icp->ics[src];
> -    int irq;
> -
> -    if (irq_hint) {
> -        assert(src == xics_find_source(icp, irq_hint));
> -        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
> -            trace_xics_alloc_failed_hint(src, irq_hint);
> -            return -1;
> -        }
> -        irq = irq_hint;
> -    } else {
> -        irq = ics_find_free_block(ics, 1, 1);
> -        if (irq < 0) {
> -            trace_xics_alloc_failed_no_left(src);
> -            return -1;
> -        }
> -        irq += ics->offset;
> -    }
> -
> -    ics_set_irq_type(ics, irq - ics->offset, lsi);
> -    trace_xics_alloc(src, irq);
> -
> -    return irq;
> -}
> -
> -/*
> - * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
> - * If align==true, aligns the first IRQ number to num.
> - */
> -int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
> -{
> -    int i, first = -1;
> -    ICSState *ics = &icp->ics[src];
> -
> -    assert(src == 0);
> -    /*
> -     * MSIMesage::data is used for storing VIRQ so
> -     * it has to be aligned to num to support multiple
> -     * MSI vectors. MSI-X is not affected by this.
> -     * The hint is used for the first IRQ, the rest should
> -     * be allocated continuously.
> -     */
> -    if (align) {
> -        assert((num == 1) || (num == 2) || (num == 4) ||
> -               (num == 8) || (num == 16) || (num == 32));
> -        first = ics_find_free_block(ics, num, num);
> -    } else {
> -        first = ics_find_free_block(ics, num, 1);
> -    }
> -
> -    if (first >= 0) {
> -        for (i = first; i < first + num; ++i) {
> -            ics_set_irq_type(ics, i, lsi);
> -        }
> -    }
> -    first += ics->offset;
> -
> -    trace_xics_alloc_block(src, first, num, lsi, align);
> -
> -    return first;
> -}
> -
> -static void ics_free(ICSState *ics, int srcno, int num)
> -{
> -    int i;
> -
> -    for (i = srcno; i < srcno + num; ++i) {
> -        if (ICS_IRQ_FREE(ics, i)) {
> -            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
> -        }
> -        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> -    }
> -}
> -
> -void xics_spapr_free(XICSState *icp, int irq, int num)
> -{
> -    int src = xics_find_source(icp, irq);
> -
> -    if (src >= 0) {
> -        ICSState *ics = &icp->ics[src];
> -
> -        /* FIXME: implement multiple sources */
> -        assert(src == 0);
> -
> -        trace_xics_ics_free(ics - icp->ics, irq, num);
> -        ics_free(ics, irq - ics->offset, num);
> -    }
> -}
> -
> -/*
> - * Guest interfaces
> - */
> -
> -static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                           target_ulong opcode, target_ulong *args)
> -{
> -    CPUState *cs = CPU(cpu);
> -    target_ulong cppr = args[0];
> -
> -    icp_set_cppr(spapr->icp, cs->cpu_index, cppr);
> -    return H_SUCCESS;
> -}
> -
> -static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                          target_ulong opcode, target_ulong *args)
> -{
> -    target_ulong server = get_cpu_index_by_dt_id(args[0]);
> -    target_ulong mfrr = args[1];
> -
> -    if (server >= spapr->icp->nr_servers) {
> -        return H_PARAMETER;
> -    }
> -
> -    icp_set_mfrr(spapr->icp, server, mfrr);
> -    return H_SUCCESS;
> -}
> -
> -static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                           target_ulong opcode, target_ulong *args)
> -{
> -    CPUState *cs = CPU(cpu);
> -    uint32_t xirr = icp_accept(spapr->icp->ss + cs->cpu_index);
> -
> -    args[0] = xirr;
> -    return H_SUCCESS;
> -}
> -
> -static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                             target_ulong opcode, target_ulong *args)
> -{
> -    CPUState *cs = CPU(cpu);
> -    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
> -    uint32_t xirr = icp_accept(ss);
> -
> -    args[0] = xirr;
> -    args[1] = cpu_get_host_ticks();
> -    return H_SUCCESS;
> -}
> -
> -static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                          target_ulong opcode, target_ulong *args)
> -{
> -    CPUState *cs = CPU(cpu);
> -    target_ulong xirr = args[0];
> -
> -    icp_eoi(spapr->icp, cs->cpu_index, xirr);
> -    return H_SUCCESS;
> -}
> -
> -static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                            target_ulong opcode, target_ulong *args)
> -{
> -    CPUState *cs = CPU(cpu);
> -    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
> -
> -    args[0] = ss->xirr;
> -    args[1] = ss->mfrr;
> -
> -    return H_SUCCESS;
> -}
> -
> -static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                          uint32_t token,
> -                          uint32_t nargs, target_ulong args,
> -                          uint32_t nret, target_ulong rets)
> -{
> -    ICSState *ics = spapr->icp->ics;
> -    uint32_t nr, server, priority;
> -
> -    if ((nargs != 3) || (nret != 1)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    nr = rtas_ld(args, 0);
> -    server = get_cpu_index_by_dt_id(rtas_ld(args, 1));
> -    priority = rtas_ld(args, 2);
> -
> -    if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
> -        || (priority > 0xff)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    ics_write_xive(ics, nr, server, priority, priority);
> -
> -    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> -}
> -
> -static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                          uint32_t token,
> -                          uint32_t nargs, target_ulong args,
> -                          uint32_t nret, target_ulong rets)
> -{
> -    ICSState *ics = spapr->icp->ics;
> -    uint32_t nr;
> -
> -    if ((nargs != 1) || (nret != 3)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    nr = rtas_ld(args, 0);
> -
> -    if (!ics_valid_irq(ics, nr)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> -    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
> -    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
> -}
> -
> -static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                         uint32_t token,
> -                         uint32_t nargs, target_ulong args,
> -                         uint32_t nret, target_ulong rets)
> -{
> -    ICSState *ics = spapr->icp->ics;
> -    uint32_t nr;
> -
> -    if ((nargs != 1) || (nret != 1)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    nr = rtas_ld(args, 0);
> -
> -    if (!ics_valid_irq(ics, nr)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
> -                   ics->irqs[nr - ics->offset].priority);
> -
> -    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> -}
> -
> -static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> -                        uint32_t token,
> -                        uint32_t nargs, target_ulong args,
> -                        uint32_t nret, target_ulong rets)
> -{
> -    ICSState *ics = spapr->icp->ics;
> -    uint32_t nr;
> -
> -    if ((nargs != 1) || (nret != 1)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    nr = rtas_ld(args, 0);
> -
> -    if (!ics_valid_irq(ics, nr)) {
> -        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> -        return;
> -    }
> -
> -    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
> -                   ics->irqs[nr - ics->offset].saved_priority,
> -                   ics->irqs[nr - ics->offset].saved_priority);
> -
> -    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> -}
> -
>  /*
>   * XICS
>   */
>  
> -static void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
> +void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp)
>  {
>      icp->nr_irqs = icp->ics->nr_irqs = nr_irqs;
>  }
>  
> -static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
> -                                Error **errp)
> +void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp)
>  {
>      int i;
>  
> @@ -1006,77 +706,9 @@ static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
>      }
>  }
>  
> -static void xics_spapr_realize(DeviceState *dev, Error **errp)
> -{
> -    XICSState *icp = XICS(dev);
> -    Error *error = NULL;
> -    int i;
> -
> -    if (!icp->nr_servers) {
> -        error_setg(errp, "Number of servers needs to be greater 0");
> -        return;
> -    }
> -
> -    /* Registration of global state belongs into realize */
> -    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
> -    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
> -    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
> -    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
> -
> -    spapr_register_hypercall(H_CPPR, h_cppr);
> -    spapr_register_hypercall(H_IPI, h_ipi);
> -    spapr_register_hypercall(H_XIRR, h_xirr);
> -    spapr_register_hypercall(H_XIRR_X, h_xirr_x);
> -    spapr_register_hypercall(H_EOI, h_eoi);
> -    spapr_register_hypercall(H_IPOLL, h_ipoll);
> -
> -    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
> -    if (error) {
> -        error_propagate(errp, error);
> -        return;
> -    }
> -
> -    for (i = 0; i < icp->nr_servers; i++) {
> -        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
> -        if (error) {
> -            error_propagate(errp, error);
> -            return;
> -        }
> -    }
> -}
> -
> -static void xics_spapr_initfn(Object *obj)
> -{
> -    XICSState *xics = XICS(obj);
> -
> -    xics->ics = ICS(object_new(TYPE_ICS));
> -    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
> -    xics->ics->icp = xics;
> -}
> -
> -static void xics_spapr_class_init(ObjectClass *oc, void *data)
> -{
> -    DeviceClass *dc = DEVICE_CLASS(oc);
> -    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
> -
> -    dc->realize = xics_spapr_realize;
> -    xsc->set_nr_irqs = xics_set_nr_irqs;
> -    xsc->set_nr_servers = xics_set_nr_servers;
> -}
> -
> -static const TypeInfo xics_spapr_info = {
> -    .name          = TYPE_XICS_SPAPR,
> -    .parent        = TYPE_XICS_COMMON,
> -    .instance_size = sizeof(XICSState),
> -    .class_size = sizeof(XICSStateClass),
> -    .class_init    = xics_spapr_class_init,
> -    .instance_init = xics_spapr_initfn,
> -};
> -
>  static void xics_register_types(void)
>  {
>      type_register_static(&xics_common_info);
> -    type_register_static(&xics_spapr_info);
>      type_register_static(&ics_info);
>      type_register_static(&icp_info);
>  }
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> new file mode 100644
> index 0000000..820fe79
> --- /dev/null
> +++ b/hw/intc/xics_spapr.c
> @@ -0,0 +1,401 @@
> +/*
> + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
> + *
> + * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
> + *
> + * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +
> +#include "hw/hw.h"
> +#include "trace.h"
> +#include "qemu/timer.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/xics.h"
> +#include "qemu/error-report.h"
> +#include "qapi/visitor.h"
> +
> +/*
> + * Guest interfaces
> + */
> +
> +static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                           target_ulong opcode, target_ulong *args)
> +{
> +    CPUState *cs = CPU(cpu);
> +    target_ulong cppr = args[0];
> +
> +    icp_set_cppr(spapr->icp, cs->cpu_index, cppr);
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                          target_ulong opcode, target_ulong *args)
> +{
> +    target_ulong server = get_cpu_index_by_dt_id(args[0]);
> +    target_ulong mfrr = args[1];
> +
> +    if (server >= spapr->icp->nr_servers) {
> +        return H_PARAMETER;
> +    }
> +
> +    icp_set_mfrr(spapr->icp, server, mfrr);
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                           target_ulong opcode, target_ulong *args)
> +{
> +    CPUState *cs = CPU(cpu);
> +    uint32_t xirr = icp_accept(spapr->icp->ss + cs->cpu_index);
> +
> +    args[0] = xirr;
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                             target_ulong opcode, target_ulong *args)
> +{
> +    CPUState *cs = CPU(cpu);
> +    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
> +    uint32_t xirr = icp_accept(ss);
> +
> +    args[0] = xirr;
> +    args[1] = cpu_get_host_ticks();
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                          target_ulong opcode, target_ulong *args)
> +{
> +    CPUState *cs = CPU(cpu);
> +    target_ulong xirr = args[0];
> +
> +    icp_eoi(spapr->icp, cs->cpu_index, xirr);
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                            target_ulong opcode, target_ulong *args)
> +{
> +    CPUState *cs = CPU(cpu);
> +    ICPState *ss = &spapr->icp->ss[cs->cpu_index];
> +
> +    args[0] = ss->xirr;
> +    args[1] = ss->mfrr;
> +
> +    return H_SUCCESS;
> +}
> +
> +static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                          uint32_t token,
> +                          uint32_t nargs, target_ulong args,
> +                          uint32_t nret, target_ulong rets)
> +{
> +    ICSState *ics = spapr->icp->ics;
> +    uint32_t nr, server, priority;
> +
> +    if ((nargs != 3) || (nret != 1)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    nr = rtas_ld(args, 0);
> +    server = get_cpu_index_by_dt_id(rtas_ld(args, 1));
> +    priority = rtas_ld(args, 2);
> +
> +    if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
> +        || (priority > 0xff)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    ics_write_xive(ics, nr, server, priority, priority);
> +
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                          uint32_t token,
> +                          uint32_t nargs, target_ulong args,
> +                          uint32_t nret, target_ulong rets)
> +{
> +    ICSState *ics = spapr->icp->ics;
> +    uint32_t nr;
> +
> +    if ((nargs != 1) || (nret != 3)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    nr = rtas_ld(args, 0);
> +
> +    if (!ics_valid_irq(ics, nr)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
> +    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
> +}
> +
> +static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                         uint32_t token,
> +                         uint32_t nargs, target_ulong args,
> +                         uint32_t nret, target_ulong rets)
> +{
> +    ICSState *ics = spapr->icp->ics;
> +    uint32_t nr;
> +
> +    if ((nargs != 1) || (nret != 1)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    nr = rtas_ld(args, 0);
> +
> +    if (!ics_valid_irq(ics, nr)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
> +                   ics->irqs[nr - ics->offset].priority);
> +
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> +                        uint32_t token,
> +                        uint32_t nargs, target_ulong args,
> +                        uint32_t nret, target_ulong rets)
> +{
> +    ICSState *ics = spapr->icp->ics;
> +    uint32_t nr;
> +
> +    if ((nargs != 1) || (nret != 1)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    nr = rtas_ld(args, 0);
> +
> +    if (!ics_valid_irq(ics, nr)) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
> +                   ics->irqs[nr - ics->offset].saved_priority,
> +                   ics->irqs[nr - ics->offset].saved_priority);
> +
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void xics_spapr_realize(DeviceState *dev, Error **errp)
> +{
> +    XICSState *icp = XICS(dev);
> +    Error *error = NULL;
> +    int i;
> +
> +    if (!icp->nr_servers) {
> +        error_setg(errp, "Number of servers needs to be greater 0");
> +        return;
> +    }
> +
> +    /* Registration of global state belongs into realize */
> +    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
> +    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
> +    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
> +    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
> +
> +    spapr_register_hypercall(H_CPPR, h_cppr);
> +    spapr_register_hypercall(H_IPI, h_ipi);
> +    spapr_register_hypercall(H_XIRR, h_xirr);
> +    spapr_register_hypercall(H_XIRR_X, h_xirr_x);
> +    spapr_register_hypercall(H_EOI, h_eoi);
> +    spapr_register_hypercall(H_IPOLL, h_ipoll);
> +
> +    object_property_set_bool(OBJECT(icp->ics), true, "realized", &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        return;
> +    }
> +
> +    for (i = 0; i < icp->nr_servers; i++) {
> +        object_property_set_bool(OBJECT(&icp->ss[i]), true, "realized", &error);
> +        if (error) {
> +            error_propagate(errp, error);
> +            return;
> +        }
> +    }
> +}
> +
> +static void xics_spapr_initfn(Object *obj)
> +{
> +    XICSState *xics = XICS(obj);
> +
> +    xics->ics = ICS(object_new(TYPE_ICS));
> +    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
> +    xics->ics->icp = xics;
> +}
> +
> +static void xics_spapr_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
> +
> +    dc->realize = xics_spapr_realize;
> +    xsc->set_nr_irqs = xics_set_nr_irqs;
> +    xsc->set_nr_servers = xics_set_nr_servers;
> +}
> +
> +static const TypeInfo xics_spapr_info = {
> +    .name          = TYPE_XICS_SPAPR,
> +    .parent        = TYPE_XICS_COMMON,
> +    .instance_size = sizeof(XICSState),
> +    .class_size = sizeof(XICSStateClass),
> +    .class_init    = xics_spapr_class_init,
> +    .instance_init = xics_spapr_initfn,
> +};
> +
> +#define ICS_IRQ_FREE(ics, srcno)   \
> +    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> +
> +static int ics_find_free_block(ICSState *ics, int num, int alignnum)
> +{
> +    int first, i;
> +
> +    for (first = 0; first < ics->nr_irqs; first += alignnum) {
> +        if (num > (ics->nr_irqs - first)) {
> +            return -1;
> +        }
> +        for (i = first; i < first + num; ++i) {
> +            if (!ICS_IRQ_FREE(ics, i)) {
> +                break;
> +            }
> +        }
> +        if (i == (first + num)) {
> +            return first;
> +        }
> +    }
> +
> +    return -1;
> +}
> +
> +int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
> +{
> +    ICSState *ics = &icp->ics[src];
> +    int irq;
> +
> +    if (irq_hint) {
> +        assert(src == xics_find_source(icp, irq_hint));
> +        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
> +            trace_xics_alloc_failed_hint(src, irq_hint);
> +            return -1;
> +        }
> +        irq = irq_hint;
> +    } else {
> +        irq = ics_find_free_block(ics, 1, 1);
> +        if (irq < 0) {
> +            trace_xics_alloc_failed_no_left(src);
> +            return -1;
> +        }
> +        irq += ics->offset;
> +    }
> +
> +    ics_set_irq_type(ics, irq - ics->offset, lsi);
> +    trace_xics_alloc(src, irq);
> +
> +    return irq;
> +}
> +
> +/*
> + * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
> + * If align==true, aligns the first IRQ number to num.
> + */
> +int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
> +{
> +    int i, first = -1;
> +    ICSState *ics = &icp->ics[src];
> +
> +    assert(src == 0);
> +    /*
> +     * MSIMesage::data is used for storing VIRQ so
> +     * it has to be aligned to num to support multiple
> +     * MSI vectors. MSI-X is not affected by this.
> +     * The hint is used for the first IRQ, the rest should
> +     * be allocated continuously.
> +     */
> +    if (align) {
> +        assert((num == 1) || (num == 2) || (num == 4) ||
> +               (num == 8) || (num == 16) || (num == 32));
> +        first = ics_find_free_block(ics, num, num);
> +    } else {
> +        first = ics_find_free_block(ics, num, 1);
> +    }
> +
> +    if (first >= 0) {
> +        for (i = first; i < first + num; ++i) {
> +            ics_set_irq_type(ics, i, lsi);
> +        }
> +    }
> +    first += ics->offset;
> +
> +    trace_xics_alloc_block(src, first, num, lsi, align);
> +
> +    return first;
> +}
> +
> +static void ics_free(ICSState *ics, int srcno, int num)
> +{
> +    int i;
> +
> +    for (i = srcno; i < srcno + num; ++i) {
> +        if (ICS_IRQ_FREE(ics, i)) {
> +            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
> +        }
> +        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> +    }
> +}
> +
> +void xics_spapr_free(XICSState *icp, int irq, int num)
> +{
> +    int src = xics_find_source(icp, irq);
> +
> +    if (src >= 0) {
> +        ICSState *ics = &icp->ics[src];
> +
> +        /* FIXME: implement multiple sources */
> +        assert(src == 0);
> +
> +        trace_xics_ics_free(ics - icp->ics, irq, num);
> +        ics_free(ics, irq - ics->offset, num);
> +    }
> +}
> +
> +static void xics_spapr_register_types(void)
> +{
> +    type_register_static(&xics_spapr_info);
> +}
> +
> +type_init(xics_spapr_register_types)
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 8d33dfa..8efff94 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -141,6 +141,12 @@ struct ICSState {
>      XICSState *icp;
>  };
>  
> +static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
> +{
> +    return (nr >= ics->offset)
> +        && (nr < (ics->offset + ics->nr_irqs));
> +}
> +
>  struct ICSIRQState {
>      uint32_t server;
>      uint8_t priority;
> @@ -168,4 +174,21 @@ void xics_spapr_free(XICSState *icp, int irq, int num);
>  
>  void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
>  
> +/* Internal XICS interfaces */
> +int get_cpu_index_by_dt_id(int cpu_dt_id);
> +
> +void icp_set_cppr(XICSState *icp, int server, uint8_t cppr);
> +void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr);
> +uint32_t icp_accept(ICPState *ss);
> +void icp_eoi(XICSState *icp, int server, uint32_t xirr);
> +
> +void ics_write_xive(ICSState *ics, int nr, int server,
> +                    uint8_t priority, uint8_t saved_priority);
> +
> +void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
> +
> +void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
> +void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
> +int xics_find_source(XICSState *icp, int irq);
> +
>  #endif /* __XICS_H__ */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type()
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type() Benjamin Herrenschmidt
@ 2015-11-24  3:34   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  3:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1782 bytes --]

On Wed, Nov 11, 2015 at 11:27:44AM +1100, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

In fact, this is a sufficiently good clean up that I think I'll merge
it shortly, regardless of what happens with the rest of the pnv series.

> ---
>  hw/intc/xics.c        | 11 -----------
>  include/hw/ppc/xics.h |  1 -
>  2 files changed, 12 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 165ff0b..197df33 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -678,17 +678,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
>          lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
>  }
>  
> -void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
> -{
> -    int src = xics_find_source(icp, irq);
> -    ICSState *ics;
> -
> -    assert(src >= 0);
> -
> -    ics = &icp->ics[src];
> -    ics_set_irq_type(ics, irq - ics->offset, lsi);
> -}
> -
>  /*
>   * XICS
>   */
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index ad39c8c..8e7998f 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -166,7 +166,6 @@ struct ICSIRQState {
>  #define XICS_IRQS_SPAPR               1024
>  
>  qemu_irq xics_get_qirq(XICSState *icp, int irq);
> -void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
>  
>  int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
>  int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places Benjamin Herrenschmidt
@ 2015-11-24  3:36   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-11-24  3:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 806 bytes --]

On Wed, Nov 11, 2015 at 11:27:45AM +1100, Benjamin Herrenschmidt wrote:
> The "ICP" is a different object than the "XICS". For historical reasons,
> we have a number of places where we name a variable "icp" while it contains
> a XICSState pointer. There *is* an ICPState structure too so this makes
> the code really confusing.
> 
> This is a mechanical replacement of all those instances to use the name
> "xics" instead. There should be no functional change.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Ah, good idea.  I think I'll take this one independently as well.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure
  2015-11-24  3:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-24  8:49     ` Benjamin Herrenschmidt
  2015-11-24  8:55     ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  8:49 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Tue, 2015-11-24 at 14:20 +1100, David Gibson wrote:
> Hmm.. I'm thinking it probably makes sense to unify the representation
> of "chip" and "xscom" since there's a 1:1 correspondance.  To be
> QOMishly correct, I think the right way would be a SysBusDevice for
> each chip, implementing the xscom MMIOs, then an array of link properties
> under the Machine object to find the right scoms by chip id.

But there is more than xscom's under each chip ... there's PHBs, LPC
controllers (though those are currently under XSCOM) and possibly other
completely unrelated things...

To be honest I've never been a big fan of that whole QOM business and
am a bit confused as to how that would all work together.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure
  2015-11-24  3:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2015-11-24  8:49     ` Benjamin Herrenschmidt
@ 2015-11-24  8:55     ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-24  8:55 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel

On Tue, 2015-11-24 at 14:20 +1100, David Gibson wrote:
> 
> > +static uint32_t xscom_to_pcb_addr(uint64_t addr)
> > +{
> > +        addr &= (XSCOM_SIZE - 1);
> > +        return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
> 
> Wow, that's a pretty weird address transform.

Indeed :-) That's how it is in HW. It's also a bit different between
chip generations.

> > +}
> > +
> > +static void xscom_complete(uint64_t hmer_bits)
> > +{
> > +    CPUState *cs = current_cpu;
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    CPUPPCState *env = &cpu->env;
> > +
> > +    cpu_synchronize_state(cs);
> > +    env->spr[SPR_HMER] |= hmer_bits;
> > +
> > +    /* XXX Need a CPU helper to set HMER, also handle gneeration
> > +     * of HMIs
> 
> Not sure what you're referring to here.  Nothing more should be
> needed to set the HMER - because you've called
> cpu_synchronize_state() it will be marked dirty and flushed back to
> KVM before re-entry.

No it's not that. Setting HMER can potentially generate an HMI
interrupt (if enabled in HMEER). We never use the interrupt
corresponding to XSCOMs in FW though, so that's why I haven't bothered
yet.

> > +     */
> > +}
> > +
> > +static XScomDevice *xscom_find_target(XScomState *s, uint32_t pcb_addr, uint32_t *range)
> > +{
> > +    BusChild *bc;
> > +
> > +    QTAILQ_FOREACH(bc, &s->bus->bus.children, sibling) {
> > +        DeviceState *qd = bc->child;
> > +        XScomDevice *xd = XSCOM_DEVICE(qd);
> > +        unsigned int i;
> > +
> > +        for (i = 0; i < MAX_XSCOM_RANGES; i++) {
> > +            if (xd->ranges[i].addr <= pcb_addr &&
> > +                (xd->ranges[i].addr + xd->ranges[i].size) > pcb_addr) {
> > +                *range = i;
> > +                return xd;
> > +            }
> > +        }
> > +    }
> 
> I'm wondering if it makes sense to construct a custom AddressSpace
> and
> use the existing address space lookup logic from exec.c and memory.c
> rather than implementing your own.

Maybe but we'd then have to shift everything by 3 bits, which means the
"XSCOM addresses" would no longer match the doc unless we use some kind
of macro to do the shifting.

> > +    return NULL;
> > +}
> > +
> > +static bool xscom_dispatch_read(XScomState *s, uint32_t pcb_addr, uint64_t *out_val)
> > +{
> > +    uint32_t range, offset;
> > +    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
> > +    XScomDeviceClass *xc;
> > +
> > +    if (!xd) {
> > +        return false;
> > +    }
> > +    xc = XSCOM_DEVICE_GET_CLASS(xd);
> > +    if (!xc->read) {
> > +        return false;
> > +    }
> > +    offset = pcb_addr - xd->ranges[range].addr;
> > +    return xc->read(xd, range, offset, out_val);
> > +}
> > +
> > +static bool xscom_dispatch_write(XScomState *s, uint32_t pcb_addr, uint64_t val)
> > +{
> > +    uint32_t range, offset;
> > +    struct XScomDevice *xd = xscom_find_target(s, pcb_addr, &range);
> > +    XScomDeviceClass *xc;
> > +
> > +    if (!xd) {
> > +        return false;
> > +    }
> > +    xc = XSCOM_DEVICE_GET_CLASS(xd);
> > +    if (!xc->write) {
> > +        return false;
> > +    }
> > +    offset = pcb_addr - xd->ranges[range].addr;
> > +    return xc->write(xd, range, offset, val);
> > +}
> > +
> > +static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
> > +{
> > +    XScomState *s = opaque;
> > +    uint32_t pcba = xscom_to_pcb_addr(addr);
> > +    uint64_t val;
> > +
> > +    assert(width == 8);
> > +
> > +#ifdef TRACE_SCOMS
> > +    printf("XSCOM_READ(0x%x:0x%x)\n", s->chip_id, pcba);
> > +#endif
> 
> You should be using the built in trace infrastructure here - it's
> really not that much of a pain.  Put
>         trace_xscom_read(s->chip_id, pcba)
> here, put a suitable format in trace-events, and ./configure
> --enable-trace-backends=stderr

I'll investigate this.

> > +
> > +    /* Handle some SCOMs here before dispatch */
> > +    switch(pcba) {
> > +    case 0xf000f:
> > +        val = 0x221EF04980000000;
> > +        break;
> > +    case 0x1010c00:     /* PIBAM FIR */
> > +    case 0x1010c03:     /* PIBAM FIR MASK */
> > +    case 0x2020007:     /* ADU stuff */
> > +    case 0x2020009:     /* ADU stuff */
> > +    case 0x202000f:     /* ADU stuff */
> > +        val = 0;
> > +        break;
> > +    case 0x2013f00:     /* PBA stuff */
> > +    case 0x2013f01:     /* PBA stuff */
> > +    case 0x2013f02:     /* PBA stuff */
> > +    case 0x2013f03:     /* PBA stuff */
> > +    case 0x2013f04:     /* PBA stuff */
> > +    case 0x2013f05:     /* PBA stuff */
> > +    case 0x2013f06:     /* PBA stuff */
> > +    case 0x2013f07:     /* PBA stuff */
> > +        val = 0;
> > +        break;
> > +    default:
> > +        if (!xscom_dispatch_read(s, pcba, &val)) {
> > +            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
> > +            return 0;
> > +        }
> > +    }
> > +
> > +    xscom_complete(HMER_XSCOM_DONE);
> > +    return val;
> > +}
> > +
> > +static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
> > +                        unsigned width)
> > +{
> > +    XScomState *s = opaque;
> > +    uint32_t pcba = xscom_to_pcb_addr(addr);
> > +
> > +    assert(width == 8);
> > +
> > +#ifdef TRACE_SCOMS
> > +    printf("XSCOM_WRITE(0x%x:0x%x, 0x%016llx)\n",
> > +           s->chip_id, pcba, (unsigned long long)val);
> > +#endif
> > +    /* Handle some SCOMs here before dispatch */
> > +    switch(pcba) {
> > +        /* We ignore writes to these */
> > +    case 0xf000f:       /* chip id is RO */
> > +    case 0x1010c00:     /* PIBAM FIR */
> > +    case 0x1010c01:     /* PIBAM FIR */
> > +    case 0x1010c02:     /* PIBAM FIR */
> > +    case 0x1010c03:     /* PIBAM FIR MASK */
> > +    case 0x1010c04:     /* PIBAM FIR MASK */
> > +    case 0x1010c05:     /* PIBAM FIR MASK */
> > +    case 0x2020007:     /* ADU stuff */
> > +    case 0x2020009:     /* ADU stuff */
> > +    case 0x202000f:     /* ADU stuff */
> > +        break;
> > +    default:
> > +        if (!xscom_dispatch_write(s, pcba, val)) {
> > +            xscom_complete(HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
> > +            return;
> > +        }
> > +    }
> > +
> > +    xscom_complete(HMER_XSCOM_DONE);
> > +}
> > +
> > +static const MemoryRegionOps xscom_ops = {
> > +    .read = xscom_read,
> > +    .write = xscom_write,
> > +    .valid.min_access_size = 8,
> > +    .valid.max_access_size = 8,
> > +    .impl.min_access_size = 8,
> > +    .impl.max_access_size = 8,
> > +    .endianness = DEVICE_BIG_ENDIAN,
> > +};
> > +
> > +static int xscom_init(SysBusDevice *dev)
> > +{
> > +    XScomState *s = XSCOM(dev);
> > +
> > +    s->chip_id = -1;
> > +    return 0;
> > +}
> > +
> > +static void xscom_realize(DeviceState *dev, Error **errp)
> > +{
> > +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> > +    XScomState *s = XSCOM(dev);
> > +    char *name;
> > +
> > +    assert(s->chip_id >= 0);
> 
> So, this assert could be tripped if the user explicitly instantiated
> an xscom device which they probably shouldn't do, but could.  So, it
> probably makes sense to use error_setg() here instead of assert().

No idea what error_setg() is, I'll look into it :)

> > +    name = g_strdup_printf("xscom-%x", s->chip_id);
> > +    memory_region_init_io(&s->mem, OBJECT(s), &xscom_ops, s, name,
> XSCOM_SIZE);
> > +    sysbus_init_mmio(sbd, &s->mem);
> > +    sysbus_mmio_map(sbd, 0, XSCOM_BASE(s->chip_id));
> > +}
> > +
> > +static Property xscom_properties[] = {
> > +        DEFINE_PROP_INT32("chip_id", XScomState, chip_id, 0),
> > +        DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> > +static void xscom_class_init(ObjectClass *klass, void *data)
> > +{
> > +    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
> > +    DeviceClass *dc = DEVICE_CLASS(klass);
> > +
> > +    dc->props = xscom_properties;
> > +    dc->realize = xscom_realize;
> > +    k->init = xscom_init;
> > +}
> > +
> > +static const TypeInfo xscom_info = {
> > +    .name          = TYPE_XSCOM,
> > +    .parent        = TYPE_SYS_BUS_DEVICE,
> > +    .instance_size = sizeof(XScomState),
> > +    .class_init    = xscom_class_init,
> > +};
> > +
> > +static void xscom_bus_class_init(ObjectClass *klass, void *data)
> > +{
> > +}
> > +
> > +static const TypeInfo xscom_bus_info = {
> > +    .name = TYPE_XSCOM_BUS,
> > +    .parent = TYPE_BUS,
> > +    .class_init = xscom_bus_class_init,
> > +    .instance_size = sizeof(XScomBus),
> > +};
> > +
> > +void xscom_create(PnvChip *chip)
> > +{
> > +    DeviceState *dev;
> > +    XScomState *xdev;
> > +    BusState *qbus;
> > +    XScomBus *xb;
> > +
> > +    dev = qdev_create(NULL, TYPE_XSCOM);
> > +    qdev_prop_set_uint32(dev, "chip_id", chip->chip_id);
> > +    qdev_init_nofail(dev);
> > +
> > +    /* Create bus on bridge device */
> > +    qbus = qbus_create(TYPE_XSCOM_BUS, dev, "xscom");
> > +    xb = DO_UPCAST(XScomBus, bus, qbus);
> > +    xb->chip_id = chip->chip_id;
> > +    xdev = XSCOM(dev);
> > +    xdev->bus = xb;
> > +    chip->xscom = xb;
> 
> I believe the qbus_create() is usually invoked by the bridge's init
> function, rather than externally.

Init or realize ?

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-11  4:41             ` Alexey Kardashevskiy
  2015-11-11  4:47               ` Benjamin Herrenschmidt
@ 2015-11-27 10:21               ` Alexander Graf
  2015-11-28  7:59                 ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 198+ messages in thread
From: Alexander Graf @ 2015-11-27 10:21 UTC (permalink / raw)
  To: Alexey Kardashevskiy, Benjamin Herrenschmidt, Eric Blake, qemu-ppc
  Cc: qemu-devel



On 11.11.15 05:41, Alexey Kardashevskiy wrote:
> On 11/11/2015 03:16 PM, Benjamin Herrenschmidt wrote:
>> On Wed, 2015-11-11 at 15:07 +1100, Alexey Kardashevskiy wrote:
>>>
>>> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
>>> powernv \
>>> -nographic -vga none -initrd t/le.cpio -kernel t/vml420le -bios \
>>> skiboot.lid -smp 1,threads=1
>>>
>>> just hangs at:
>>>
>>> [1491287872,5] INIT: Waiting for kernel...
>>> [1493257423,5] Assuming kernel at 0x20000000
>>> [1494710040,5] INIT: Kernel loaded, size: 0 bytes (0 = unknown
>>> preload)
>>> [1497506414,5] INIT: 64-bit LE kernel discovered
>>> [1500827972,5] INIT: 64-bit kernel entry at 0x20010000
>>> [1505594383,3] OCC: No HOMER detected, assuming no pstates
>>> [1507983930,3] ELOG: Error getting buffer to log error
>>> [1556792870,5] Free space in HEAP memory regions:
>>> [1559724738,5] Region ibm,firmware-heap free: 12778984
>>> [1561377946,5] Region ibm,firmware-allocs-memory@0000000000000000
>>>   free: 376992
>>> [1563789914,5] Total free: 13155976
>>> [1565066925,5] INIT: Starting kernel at 0x20010000, fdt at 0x30350610
>>> (size
>>> 0x2ce4)
>>
>> Hrm, works for me, I've been testing various LE kernels including a
>> full ubuntu distro in there, we need to debug that further. Does that
>> same kernel actually work on real HW ?
> 
> 
> Ok, as we figured out, CONFIG_PPC_EARLY_DEBUG is responsible for this as
> it does hypercalls in the very beginning.
> 
> 
>>> If I try LE disk image (ubuntu 14), it just crashes:
>>>
>>> p/qemu-powernv/ppc64-softmmu/qemu-system-ppc64 -m 2048 -machine
>>> powernv \
>>> -nographic -vga none img/u14_32GB_cuda7.qcow2 -bios skiboot.lid \
>>> -smp 1,threads=1
>>> qemu: hardware error: qemu: could not load kernel'(null)'
>>
>> Right, we don't load kernels from disk, you need to pass a -kernel that
> 
> Worth mentioning as well ;)
> 
>> typically is the openpower bootloader (Linux + petitboot). My plan is
>> to make the pnv platform automatically extract these things from a ROM
>> image of an openpower eval board (aka palmetto) which you can build
>> from github. It's a bit too big to include as a binary in qemu however
>> (about 16M).
> 
> git submodule?

How does real hardware store petitboot? If it's flash, you could pass it
in using -pflash and thus model things even more closely and allow users
to just take the ROM image as is.


Alex

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
  2015-11-16  4:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-11-27 10:29   ` Alexander Graf
  2015-11-27 12:15     ` Paolo Bonzini
  1 sibling, 1 reply; 198+ messages in thread
From: Alexander Graf @ 2015-11-27 10:29 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: Paolo Bonzini, qemu-devel



On 11.11.15 01:27, Benjamin Herrenschmidt wrote:
> We rework the way the MMU indices are calculated, providing separate
> indices for I and D side based on MSR:IR and MSR:DR respectively,
> and thus no longer need to flush the TLB on context changes. This also
> adds correct support for HV as a separate address space.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Paolo had a patch set poking at the same places a while back to speed up
the ppc target by almost 10%.

Paolo, what happened to those patches? Would you prefer to rebase them
on top of the HV bits or have Ben look into them while he's at it anyway? ;)


Alex

> ---
>  target-ppc/cpu.h         | 11 +++++++---
>  target-ppc/excp_helper.c | 11 ----------
>  target-ppc/helper_regs.h | 54 +++++++++++++++++++++++++++++++++++++++++-------
>  target-ppc/machine.c     |  4 +++-
>  target-ppc/translate.c   |  7 ++++---
>  5 files changed, 62 insertions(+), 25 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 9ef0859..aaa7117 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -462,6 +462,8 @@ struct ppc_slb_t {
>  #define MSR_EP   6  /* Exception prefix on 601                               */
>  #define MSR_IR   5  /* Instruction relocate                                  */
>  #define MSR_DR   4  /* Data relocate                                         */
> +#define MSR_IS   5  /* Instruction address space (BookE)                     */
> +#define MSR_DS   4  /* Data address space (BookE)                            */
>  #define MSR_PE   3  /* Protection enable on 403                              */
>  #define MSR_PX   2  /* Protection exclusive on 403                  x        */
>  #define MSR_PMM  2  /* Performance monitor mark on POWER            x        */
> @@ -505,6 +507,8 @@ struct ppc_slb_t {
>  #define msr_ep   ((env->msr >> MSR_EP)   & 1)
>  #define msr_ir   ((env->msr >> MSR_IR)   & 1)
>  #define msr_dr   ((env->msr >> MSR_DR)   & 1)
> +#define msr_is   ((env->msr >> MSR_IS)   & 1)
> +#define msr_ds   ((env->msr >> MSR_DS)   & 1)
>  #define msr_pe   ((env->msr >> MSR_PE)   & 1)
>  #define msr_px   ((env->msr >> MSR_PX)   & 1)
>  #define msr_pmm  ((env->msr >> MSR_PMM)  & 1)
> @@ -944,7 +948,7 @@ struct ppc_segment_page_sizes {
>  
>  /*****************************************************************************/
>  /* The whole PowerPC CPU context */
> -#define NB_MMU_MODES 3
> +#define NB_MMU_MODES    8
>  
>  #define PPC_CPU_OPCODES_LEN          0x40
>  #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
> @@ -1108,7 +1112,8 @@ struct CPUPPCState {
>      /* Those resources are used only in QEMU core */
>      target_ulong hflags;      /* hflags is a MSR & HFLAGS_MASK         */
>      target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
> -    int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
> +    int immu_idx;         /* precomputed MMU index to speed up insn access */
> +    int dmmu_idx;         /* precomputed MMU index to speed up data accesses */
>  
>      /* Power management */
>      int (*check_pow)(CPUPPCState *env);
> @@ -1249,7 +1254,7 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
>  #define MMU_USER_IDX 0
>  static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  {
> -    return env->mmu_idx;
> +    return ifetch ? env->immu_idx : env->dmmu_idx;
>  }
>  
>  #include "exec/cpu-all.h"
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 4250106..3e39098 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>  
>      if (env->spr[SPR_LPCR] & LPCR_AIL) {
>          new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
> -        /* If we disactivated any translation, flush TLBs */
> -        tlb_flush(cs, 1);
>      }
>  
>  #ifdef TARGET_PPC64
> @@ -674,14 +671,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      /* Reset exception state */
>      cs->exception_index = POWERPC_EXCP_NONE;
>      env->error_code = 0;
> -
> -    if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
> -        (env->mmu_model == POWERPC_MMU_BOOKE206)) {
> -        /* XXX: The BookE changes address space when switching modes,
> -                we should probably implement that as different MMU indexes,
> -                but for the moment we do it the slow way and flush all.  */
> -        tlb_flush(cs, 1);
> -    }
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 271fddf..f7edd5b 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -41,11 +41,50 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  
>  static inline void hreg_compute_mem_idx(CPUPPCState *env)
>  {
> -    /* Precompute MMU index */
> -    if (msr_pr == 0 && msr_hv != 0) {
> -        env->mmu_idx = 2;
> +    /* This is our encoding for server processors
> +     *
> +     *   0 = Guest User space virtual mode
> +     *   1 = Guest Kernel space virtual mode
> +     *   2 = Guest Kernel space real mode
> +     *   3 = HV User space virtual mode
> +     *   4 = HV Kernel space virtual mode
> +     *   5 = HV Kernel space real mode
> +     *
> +     * The combination PR=1 IR&DR=0 is invalid, we will treat
> +     * it as IR=DR=1
> +     *
> +     * For BookE, we need 8 MMU modes as follow:
> +     *
> +     *  0 = AS 0 HV User space
> +     *  1 = AS 0 HV Kernel space
> +     *  2 = AS 1 HV User space
> +     *  3 = AS 1 HV Kernel space
> +     *  4 = AS 0 Guest User space
> +     *  5 = AS 0 Guest Kernel space
> +     *  6 = AS 1 Guest User space
> +     *  7 = AS 1 Guest Kernel space
> +     */
> +    if (env->mmu_model & POWERPC_MMU_BOOKE) {
> +        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
> +        env->immu_idx += msr_is ? 2 : 0;
> +        env->dmmu_idx += msr_ds ? 2 : 0;
> +        env->immu_idx += msr_gs ? 4 : 0;
> +        env->dmmu_idx += msr_gs ? 4 : 0;
>      } else {
> -        env->mmu_idx = 1 - msr_pr;
> +        /* First calucalte a base value independent of HV */
> +        if (msr_pr != 0) {
> +            /* User space, ignore IR and DR */
> +            env->immu_idx = env->dmmu_idx = 0;
> +        } else {
> +            /* Kernel, setup a base I/D value */
> +            env->immu_idx = msr_ir ? 1 : 2;
> +            env->dmmu_idx = msr_dr ? 1 : 2;
> +        }
> +        /* Then offset it for HV */
> +        if (msr_hv) {
> +            env->immu_idx += 3;
> +            env->dmmu_idx += 3;
> +        }
>      }
>  }
>  
> @@ -82,9 +121,10 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      }
>      if (((value >> MSR_IR) & 1) != msr_ir ||
>          ((value >> MSR_DR) & 1) != msr_dr) {
> -        /* Flush all tlb when changing translation mode */
> -        tlb_flush(cs, 1);
> -        excp = POWERPC_EXCP_NONE;
> +        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +    }
> +    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
> +        ((value >> MSR_GS) & 1) != msr_gs) {
>          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>      }
>      if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index f4ac761..b969492 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -90,9 +90,11 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
>      qemu_get_betls(f, &env->nip);
>      qemu_get_betls(f, &env->hflags);
>      qemu_get_betls(f, &env->hflags_nmsr);
> -    qemu_get_sbe32s(f, &env->mmu_idx);
>      qemu_get_sbe32(f); /* Discard unused power_mode */
>  
> +    /* Ignore saved mmu_idx, recompute */
> +    hreg_compute_mem_idx(env);
> +
>      return 0;
>  }
>  
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 308ad68..6d9f252 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -11220,8 +11220,9 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>                  env->nip, env->lr, env->ctr, cpu_read_xer(env),
>                  cs->cpu_index);
>      cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
> -                TARGET_FMT_lx " idx %d\n", env->msr, env->spr[SPR_HID0],
> -                env->hflags, env->mmu_idx);
> +                TARGET_FMT_lx " iidx %d didx %d\n",
> +                env->msr, env->spr[SPR_HID0],
> +                env->hflags, env->immu_idx, env->dmmu_idx);
>  #if !defined(NO_TIMER_DUMP)
>      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
>  #if !defined(CONFIG_USER_ONLY)
> @@ -11426,7 +11427,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
>      ctx.spr_cb = env->spr_cb;
>      ctx.pr = msr_pr;
>      ctx.hv = !msr_pr && msr_hv;
> -    ctx.mem_idx = env->mmu_idx;
> +    ctx.mem_idx = env->dmmu_idx;
>      ctx.insns_flags = env->insns_flags;
>      ctx.insns_flags2 = env->insns_flags2;
>      ctx.access_type = -1;
> 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts
  2015-11-27 10:29   ` Alexander Graf
@ 2015-11-27 12:15     ` Paolo Bonzini
  0 siblings, 0 replies; 198+ messages in thread
From: Paolo Bonzini @ 2015-11-27 12:15 UTC (permalink / raw)
  To: Alexander Graf, Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel



On 27/11/2015 11:29, Alexander Graf wrote:
> > We rework the way the MMU indices are calculated, providing separate
> > indices for I and D side based on MSR:IR and MSR:DR respectively,
> > and thus no longer need to flush the TLB on context changes. This also
> > adds correct support for HV as a separate address space.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> Paolo had a patch set poking at the same places a while back to speed up
> the ppc target by almost 10%.
> 
> Paolo, what happened to those patches? Would you prefer to rebase them
> on top of the HV bits or have Ben look into them while he's at it anyway? ;)

Ben decided to do it this way, which avoids a proliferation of MMU
modes.  The net effect of his patches is the same if not better.

Paolo

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-27 10:21               ` Alexander Graf
@ 2015-11-28  7:59                 ` Benjamin Herrenschmidt
  2015-11-28 10:53                   ` Alexander Graf
  2015-11-30 18:15                   ` Cédric Le Goater
  0 siblings, 2 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-28  7:59 UTC (permalink / raw)
  To: Alexander Graf, Alexey Kardashevskiy, Eric Blake, qemu-ppc; +Cc: qemu-devel

On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
> 
> How does real hardware store petitboot? If it's flash, you could pass it
> in using -pflash and thus model things even more closely and allow users
> to just take the ROM image as is.

It is a flash image, we could use an Open Power machine flash image "as-is"
provided we taught qemu to extract skiboot (aka OPAL) from it.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-28  7:59                 ` Benjamin Herrenschmidt
@ 2015-11-28 10:53                   ` Alexander Graf
  2015-11-29  0:38                     ` Benjamin Herrenschmidt
  2015-11-30 18:15                   ` Cédric Le Goater
  1 sibling, 1 reply; 198+ messages in thread
From: Alexander Graf @ 2015-11-28 10:53 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Alexey Kardashevskiy, qemu-ppc, qemu-devel



> Am 28.11.2015 um 08:59 schrieb Benjamin Herrenschmidt <benh@kernel.crashing.org>:
> 
>> On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
>> 
>> How does real hardware store petitboot? If it's flash, you could pass it
>> in using -pflash and thus model things even more closely and allow users
>> to just take the ROM image as is.
> 
> It is a flash image, we could use an Open Power machine flash image "as-is"
> provided we taught qemu to extract skiboot (aka OPAL) from it.

That's probably the best way. If it's memory mapped, you might not even have to extract anything - just jump to its location ;).

The -kernel syntax is useful to have, but should really be something "on top" of the normal system boot rather than its primary boot interface.


Alex

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-28 10:53                   ` Alexander Graf
@ 2015-11-29  0:38                     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-29  0:38 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Alexey Kardashevskiy, qemu-ppc, qemu-devel

On Sat, 2015-11-28 at 11:53 +0100, Alexander Graf wrote:
> 
> > Am 28.11.2015 um 08:59 schrieb Benjamin Herrenschmidt <benh@kernel.
> > crashing.org>:
> > 
> > > On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
> > > 
> > > How does real hardware store petitboot? If it's flash, you could
> > > pass it
> > > in using -pflash and thus model things even more closely and
> > > allow users
> > > to just take the ROM image as is.
> > 
> > It is a flash image, we could use an Open Power machine flash image
> > "as-is"
> > provided we taught qemu to extract skiboot (aka OPAL) from it.
> 
> That's probably the best way. If it's memory mapped, you might not
> even have to extract anything - just jump to its location ;).

It's not memory mapped sadly (well not on P8...)

The way a real P8 boots is first a little internal uC executes code
from a i2c seeprom to initialize a core and some cache. It then copies
a payload from flash into that cache. That payload is HostBoot, which
is a huge thing that initializes a pile of stuff such as the internal
busses, SMP links, links to memory buffers, trains memory etc... it
then constructs an embryo device-tree and loads & run OPAL (aka
skiboot).

The model I chose for powernv is to simulate a boot at the hostboot ->
skiboot transition point.

> The -kernel syntax is useful to have, but should really be something
> "on top" of the normal system boot rather than its primary boot
> interface.

I know, I'm just not keen on committing a 16M "BIOS" to qemu tree ;-)

In any case, this is easy to change / fix. There's a lot more work on
the rest of the series first.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-28  7:59                 ` Benjamin Herrenschmidt
  2015-11-28 10:53                   ` Alexander Graf
@ 2015-11-30 18:15                   ` Cédric Le Goater
  2015-11-30 20:09                     ` Benjamin Herrenschmidt
  2015-12-07  1:25                     ` Stewart Smith
  1 sibling, 2 replies; 198+ messages in thread
From: Cédric Le Goater @ 2015-11-30 18:15 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Alexander Graf, Alexey Kardashevskiy,
	Eric Blake, qemu-ppc
  Cc: qemu-devel

On 11/28/2015 08:59 AM, Benjamin Herrenschmidt wrote:
> On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
>>
>> How does real hardware store petitboot? If it's flash, you could pass it
>> in using -pflash and thus model things even more closely and allow users
>> to just take the ROM image as is.
> 
> It is a flash image, we could use an Open Power machine flash image "as-is"
> provided we taught qemu to extract skiboot (aka OPAL) from it.

Couldn't we add an offset argument to load_image_targphys() or make that 
an extra routine ? If so, we could then load directly from an openpower 
pnor file. 

I gave it a quick (and dirty) try and a powernv guest runs fine up to 
petitboot with just :

	qemu-system-ppc64 -m 2G -M powernv -bios  ~/work/open-power/images/palmetto.pnor -nographic -nodefaults -serial stdio

The pnor file is compiled from github. The patch is below (without the dirty
cut and paste I did in loader.c). The offset for the PAYLOAD and BOOTKERNEL
partitions are hard coded but I guess we don't need to read the flash partition
table in qemu, not yet.
 

Cheers,

C. 


Index: qemu-powernv.git/hw/ppc/pnv.c
===================================================================
--- qemu-powernv.git.orig/hw/ppc/pnv.c
+++ qemu-powernv.git/hw/ppc/pnv.c
@@ -69,7 +69,7 @@
 
 #define FDT_ADDR                0x01000000
 #define FDT_MAX_SIZE            0x00100000
-#define FW_MAX_SIZE             0x00400000
+#define FW_MAX_SIZE             0x04000000
 #define FW_FILE_NAME            "skiboot.lid"
 #define KERNEL_FILE_NAME        "skiroot.lid"
 #define KERNEL_LOAD_ADDR        0x20000000
@@ -902,7 +902,9 @@ static void ppc_powernv_init(MachineStat
 {
     ram_addr_t ram_size = machine->ram_size;
     const char *cpu_model = machine->cpu_model;
+#if 0
     const char *kernel_filename = machine->kernel_filename;
+#endif
     const char *initrd_filename = machine->initrd_filename;
     uint32_t initrd_base = 0;
     long initrd_size = 0;
@@ -998,19 +1000,20 @@ static void ppc_powernv_init(MachineStat
         bios_name = FW_FILE_NAME;
     }
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
-    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
+    fw_size = load_image_targphys_offset(filename, 0, FW_MAX_SIZE, 0x961000);
     if (fw_size < 0) {
         hw_error("qemu: could not load OPAL '%s'\n", filename);
         exit(1);
     }
+#if 0
     g_free(filename);
 
     if (kernel_filename == NULL) {
         kernel_filename = KERNEL_FILE_NAME;
     }
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, kernel_filename);
-    fw_size = load_image_targphys(filename, 0x20000000, 0x2000000);
+#endif
+    fw_size = load_image_targphys_offset(filename, 0x20000000, 0x2000000, 0xa61000);
     if (fw_size < 0) {
         hw_error("qemu: could not load kernel'%s'\n", filename);
         exit(1);

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-30 18:15                   ` Cédric Le Goater
@ 2015-11-30 20:09                     ` Benjamin Herrenschmidt
  2015-11-30 21:24                       ` Cédric Le Goater
  2015-12-07  1:25                     ` Stewart Smith
  1 sibling, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-30 20:09 UTC (permalink / raw)
  To: Cédric Le Goater, Alexander Graf, Alexey Kardashevskiy,
	Eric Blake, qemu-ppc
  Cc: qemu-devel

On Mon, 2015-11-30 at 19:15 +0100, Cédric Le Goater wrote:
> The pnor file is compiled from github. The patch is below (without the dirty
> cut and paste I did in loader.c). The offset for the PAYLOAD and BOOTKERNEL
> partitions are hard coded but I guess we don't need to read the flash partition
> table in qemu, not yet.

In practice we should read the partition tables, I don't like hard
coded offsets... But we should probably create a proper "flash driver"
that does a bunch of this, and also adds the BMC style flash interface
so OPAL can write to nvram.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-30 20:09                     ` Benjamin Herrenschmidt
@ 2015-11-30 21:24                       ` Cédric Le Goater
  2015-11-30 23:12                         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2015-11-30 21:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Alexander Graf, Alexey Kardashevskiy,
	Eric Blake, qemu-ppc
  Cc: qemu-devel

On 11/30/2015 09:09 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2015-11-30 at 19:15 +0100, Cédric Le Goater wrote:
>> The pnor file is compiled from github. The patch is below (without the dirty
>> cut and paste I did in loader.c). The offset for the PAYLOAD and BOOTKERNEL
>> partitions are hard coded but I guess we don't need to read the flash partition
>> table in qemu, not yet.
> 
> In practice we should read the partition tables, I don't like hard
> coded offsets... But we should probably create a proper "flash driver"
> that does a bunch of this, and also adds the BMC style flash interface
> so OPAL can write to nvram.

yes that would be better but I don't measure what it takes to implement 
the LPC/AHB bridge to access the PNOR.

C.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-30 21:24                       ` Cédric Le Goater
@ 2015-11-30 23:12                         ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-30 23:12 UTC (permalink / raw)
  To: Cédric Le Goater, Alexander Graf, Alexey Kardashevskiy,
	Eric Blake, qemu-ppc
  Cc: qemu-devel

On Mon, 2015-11-30 at 22:24 +0100, Cédric Le Goater wrote:
> > In practice we should read the partition tables, I don't like hard
> > coded offsets... But we should probably create a proper "flash driver"
> > that does a bunch of this, and also adds the BMC style flash interface
> > so OPAL can write to nvram.
> 
> yes that would be better but I don't measure what it takes to implement 
> the LPC/AHB bridge to access the PNOR.

Yeah that's probably a little bit of work, but then there's also work
being done to protect the flash using IPMI commands to do the flashing,
so that could be an alternate option. I like emulating more of the BMC
though, including the flash controller, as it would improve our testing
coverage.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 33/77] ppc/xics: Make the ICSState a list
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 33/77] ppc/xics: Make the ICSState a list Benjamin Herrenschmidt
@ 2015-12-01  4:30   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  4:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 23626 bytes --]

On Wed, Nov 11, 2015 at 11:27:46AM +1100, Benjamin Herrenschmidt wrote:
> Instead of an array of fixed sized blocks, use a list, as we will need
> to have sources with variable number of interrupts. SPAPR only uses
> a single entry. Native will create more. If performance becomes an
> issue we can add some hashed lookup but for now this will do fine.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hw/intc/xics.c        | 86 +++++++++++++++++++++++++++++++--------------------
>  hw/intc/xics_kvm.c    | 28 +++++++++--------
>  hw/intc/xics_spapr.c  | 75 ++++++++++++++++++++++++--------------------
>  hw/ppc/spapr_events.c |  2 +-
>  hw/ppc/spapr_pci.c    |  4 +--
>  hw/ppc/spapr_vio.c    |  2 +-
>  include/hw/ppc/xics.h | 10 +++---
>  7 files changed, 118 insertions(+), 89 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index d21471f..c4ac057 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -79,13 +79,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
>  static void xics_common_reset(DeviceState *d)
>  {
>      XICSState *xics = XICS_COMMON(d);
> +    ICSState *ics;
>      int i;
>  
>      for (i = 0; i < xics->nr_servers; i++) {
>          device_reset(DEVICE(&xics->ss[i]));
>      }
>  
> -    device_reset(DEVICE(xics->ics));
> +    QLIST_FOREACH(ics, &xics->ics, list) {
> +        device_reset(DEVICE(ics));
> +    }
>  }
>  
>  static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
> @@ -117,7 +120,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v,
>      }
>  
>      assert(info->set_nr_irqs);
> -    assert(xics->ics);
>      info->set_nr_irqs(xics, value, errp);
>  }
>  
> @@ -195,33 +197,35 @@ static void ics_reject(ICSState *ics, int nr);
>  static void ics_resend(ICSState *ics);
>  static void ics_eoi(ICSState *ics, int nr);
>  
> -static void icp_check_ipi(XICSState *xics, int server)
> +static void icp_check_ipi(ICPState *ss, int server)
>  {
> -    ICPState *ss = xics->ss + server;
> -
>      if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
>          return;
>      }
>  
>      trace_xics_icp_check_ipi(server, ss->mfrr);
>  
> -    if (XISR(ss)) {
> -        ics_reject(xics->ics, XISR(ss));
> +    if (XISR(ss) && ss->xirr_owner) {
> +        ics_reject(ss->xirr_owner, XISR(ss));
>      }
>  
>      ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
>      ss->pending_priority = ss->mfrr;
> +    ss->xirr_owner = NULL;
>      qemu_irq_raise(ss->output);
>  }
>  
>  static void icp_resend(XICSState *xics, int server)
>  {
>      ICPState *ss = xics->ss + server;
> +    ICSState *ics;
>  
>      if (ss->mfrr < CPPR(ss)) {
> -        icp_check_ipi(xics, server);
> +        icp_check_ipi(ss, server);
> +    }
> +    QLIST_FOREACH(ics, &xics->ics, list) {
> +        ics_resend(ics);
>      }
> -    ics_resend(xics->ics);
>  }
>  
>  void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
> @@ -239,7 +243,10 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
>              ss->xirr &= ~XISR_MASK; /* Clear XISR */
>              ss->pending_priority = 0xff;
>              qemu_irq_lower(ss->output);
> -            ics_reject(xics->ics, old_xisr);
> +            if (ss->xirr_owner) {
> +                ics_reject(ss->xirr_owner, old_xisr);
> +                ss->xirr_owner = NULL;
> +            }

To match the layour of the rest of things, this little fragment should
probably go into an icp_reject() function.

>          }
>      } else {
>          if (!XISR(ss)) {
> @@ -254,7 +261,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr)
>  
>      ss->mfrr = mfrr;
>      if (mfrr < CPPR(ss)) {
> -        icp_check_ipi(xics, server);
> +        icp_check_ipi(ss, server);
>      }
>  }
>  
> @@ -265,6 +272,7 @@ uint32_t icp_accept(ICPState *ss)
>      qemu_irq_lower(ss->output);
>      ss->xirr = ss->pending_priority << 24;
>      ss->pending_priority = 0xff;
> +    ss->xirr_owner = NULL;
>  
>      trace_xics_icp_accept(xirr, ss->xirr);
>  
> @@ -282,30 +290,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
>  void icp_eoi(XICSState *xics, int server, uint32_t xirr)
>  {
>      ICPState *ss = xics->ss + server;
> +    ICSState *ics;
> +    uint32_t irq;
>  
>      /* Send EOI -> ICS */
>      ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
>      trace_xics_icp_eoi(server, xirr, ss->xirr);
> -    ics_eoi(xics->ics, xirr & XISR_MASK);
> +    irq = xirr & XISR_MASK;
> +    QLIST_FOREACH(ics, &xics->ics, list) {
> +        if (ics_valid_irq(ics, irq)) {
> +            ics_eoi(ics, irq);
> +        }
> +    }
>      if (!XISR(ss)) {
>          icp_resend(xics, server);
>      }
>  }
>  
> -static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
> +static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
>  {
> +    XICSState *xics = ics->xics;
>      ICPState *ss = xics->ss + server;
>  
>      trace_xics_icp_irq(server, nr, priority);
>  
>      if ((priority >= CPPR(ss))
>          || (XISR(ss) && (ss->pending_priority <= priority))) {
> -        ics_reject(xics->ics, nr);
> +        ics_reject(ics, nr);
>      } else {
> -        if (XISR(ss)) {
> -            ics_reject(xics->ics, XISR(ss));
> +        if (XISR(ss) && ss->xirr_owner) {

I'm guessing the only case where we should get XISR(ss) &&
!ss->xirr_owner will be an IPI, is that right?

> +            ics_reject(ss->xirr_owner, XISR(ss));
> +            ss->xirr_owner = NULL;
>          }
>          ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
> +        ss->xirr_owner = ics;
>          ss->pending_priority = priority;
>          trace_xics_icp_raise(ss->xirr, ss->pending_priority);
>          qemu_irq_raise(ss->output);
> @@ -388,8 +406,7 @@ static void resend_msi(ICSState *ics, int srcno)
>      if (irq->status & XICS_STATUS_REJECTED) {
>          irq->status &= ~XICS_STATUS_REJECTED;
>          if (irq->priority != 0xff) {
> -            icp_irq(ics->xics, irq->server, srcno + ics->offset,
> -                    irq->priority);
> +            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>          }
>      }
>  }
> @@ -402,7 +419,7 @@ static void resend_lsi(ICSState *ics, int srcno)
>          && (irq->status & XICS_STATUS_ASSERTED)
>          && !(irq->status & XICS_STATUS_SENT)) {
>          irq->status |= XICS_STATUS_SENT;
> -        icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
> +        icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>      }
>  }
>  
> @@ -417,7 +434,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int val)
>              irq->status |= XICS_STATUS_MASKED_PENDING;
>              trace_xics_masked_pending();
>          } else  {
> -            icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
> +            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>          }
>      }
>  }
> @@ -456,7 +473,7 @@ static void write_xive_msi(ICSState *ics, int srcno)
>      }
>  
>      irq->status &= ~XICS_STATUS_MASKED_PENDING;
> -    icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
> +    icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
>  }
>  
>  static void write_xive_lsi(ICSState *ics, int srcno)
> @@ -642,28 +659,23 @@ static const TypeInfo ics_info = {
>  /*
>   * Exported functions
>   */
> -int xics_find_source(XICSState *xics, int irq)
> +ICSState *xics_find_source(XICSState *xics, int irq)
>  {
> -    int sources = 1;
> -    int src;
> +    ICSState *ics;
>  
> -    /* FIXME: implement multiple sources */
> -    for (src = 0; src < sources; ++src) {
> -        ICSState *ics = &xics->ics[src];
> +    QLIST_FOREACH(ics, &xics->ics, list) {
>          if (ics_valid_irq(ics, irq)) {
> -            return src;
> +            return ics;
>          }
>      }
> -
> -    return -1;
> +    return NULL;
>  }
>  
>  qemu_irq xics_get_qirq(XICSState *xics, int irq)
>  {
> -    int src = xics_find_source(xics, irq);
> +    ICSState *ics = xics_find_source(xics, irq);
>  
> -    if (src >= 0) {
> -        ICSState *ics = &xics->ics[src];
> +    if (ics) {
>          return ics->qirqs[irq - ics->offset];
>      }
>  
> @@ -684,7 +696,13 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
>  
>  void xics_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
>  {
> -    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
> +    ICSState *ics = QLIST_FIRST(&xics->ics);
> +
> +    /* This needs to be deprecated ... */

Yeah..

I can't even remember what this was really for.  We probably need to
move it to a property on the ics objects, with a backwards compat shim
for PAPR to copy the xics "master" object value to the single ics.

> +    xics->nr_irqs = nr_irqs;
> +    if (ics) {
> +        ics->nr_irqs = nr_irqs;
> +    }
>  }
>  
>  void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> index 7d86157..a478d25 100644
> --- a/hw/intc/xics_kvm.c
> +++ b/hw/intc/xics_kvm.c
> @@ -356,11 +356,6 @@ static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
>      }
>  }
>  
> -static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
> -{
> -    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
> -}
> -
>  static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
>                                      Error **errp)
>  {
> @@ -391,6 +386,7 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
>  {
>      KVMXICSState *xicskvm = KVM_XICS(dev);
>      XICSState *xics = XICS_COMMON(dev);
> +    ICSState *ics;
>      int i, rc;
>      Error *error = NULL;
>      struct kvm_create_device xics_create_device = {
> @@ -442,10 +438,12 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
>  
>      xicskvm->kernel_xics_fd = xics_create_device.fd;
>  
> -    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
> -    if (error) {
> -        error_propagate(errp, error);
> -        goto fail;
> +    QLIST_FOREACH(ics, &xics->ics, list) {
> +        object_property_set_bool(OBJECT(ics), true, "realized", &error);
> +        if (error) {
> +            error_propagate(errp, error);
> +            goto fail;
> +        }
>      }
>  
>      assert(xics->nr_servers);
> @@ -473,10 +471,14 @@ fail:
>  static void xics_kvm_initfn(Object *obj)
>  {
>      XICSState *xics = XICS_COMMON(obj);
> +    ICSState *ics;
> +
> +    QLIST_INIT(&xics->ics);
>  
> -    xics->ics = ICS(object_new(TYPE_KVM_ICS));
> -    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
> -    xics->ics->xics = xics;
> +    ics = ICS(object_new(TYPE_KVM_ICS));
> +    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
> +    ics->xics = xics;
> +    QLIST_INSERT_HEAD(&xics->ics, ics, list);
>  }
>  
>  static void xics_kvm_class_init(ObjectClass *oc, void *data)
> @@ -486,7 +488,7 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
>  
>      dc->realize = xics_kvm_realize;
>      xsc->cpu_setup = xics_kvm_cpu_setup;
> -    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
> +    xsc->set_nr_irqs = xics_set_nr_irqs;

Wow.. why were xics_kvm_set_nr_irqs and xics_set_nr_irqs ever
separate, I wonder.

>      xsc->set_nr_servers = xics_kvm_set_nr_servers;
>  }
>  
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index fb508cd..d75fcf0 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -111,10 +111,10 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                            uint32_t nargs, target_ulong args,
>                            uint32_t nret, target_ulong rets)
>  {
> -    ICSState *ics = spapr->xics->ics;
> +    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
>      uint32_t nr, server, priority;
>  
> -    if ((nargs != 3) || (nret != 1)) {
> +    if ((nargs != 3) || (nret != 1) || !ics) {
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;
>      }

!ics should probably be a HW_ERROR (or even an assert).

> @@ -139,10 +139,10 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                            uint32_t nargs, target_ulong args,
>                            uint32_t nret, target_ulong rets)
>  {
> -    ICSState *ics = spapr->xics->ics;
> +    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
>      uint32_t nr;
>  
> -    if ((nargs != 1) || (nret != 3)) {
> +    if ((nargs != 1) || (nret != 3) || !ics) {
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;
>      }
> @@ -164,10 +164,10 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                           uint32_t nargs, target_ulong args,
>                           uint32_t nret, target_ulong rets)
>  {
> -    ICSState *ics = spapr->xics->ics;
> +    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
>      uint32_t nr;
>  
> -    if ((nargs != 1) || (nret != 1)) {
> +    if ((nargs != 1) || (nret != 1) || !ics) {
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;
>      }
> @@ -190,10 +190,10 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                          uint32_t nargs, target_ulong args,
>                          uint32_t nret, target_ulong rets)
>  {
> -    ICSState *ics = spapr->xics->ics;
> +    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
>      uint32_t nr;
>  
> -    if ((nargs != 1) || (nret != 1)) {
> +    if ((nargs != 1) || (nret != 1) || !ics) {
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>          return;
>      }
> @@ -215,6 +215,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>  static void xics_spapr_realize(DeviceState *dev, Error **errp)
>  {
>      XICSState *xics = XICS(dev);
> +    ICSState *ics;
>      Error *error = NULL;
>      int i;
>  
> @@ -236,10 +237,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
>      spapr_register_hypercall(H_EOI, h_eoi);
>      spapr_register_hypercall(H_IPOLL, h_ipoll);
>  
> -    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
> -    if (error) {
> -        error_propagate(errp, error);
> -        return;
> +    QLIST_FOREACH(ics, &xics->ics, list) {
> +        object_property_set_bool(OBJECT(ics), true, "realized", &error);
> +        if (error) {
> +            error_propagate(errp, error);
> +            return;
> +        }
>      }
>  
>      for (i = 0; i < xics->nr_servers; i++) {
> @@ -254,10 +257,14 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
>  static void xics_spapr_initfn(Object *obj)
>  {
>      XICSState *xics = XICS(obj);
> +    ICSState *ics;
> +
> +    QLIST_INIT(&xics->ics);
>  
> -    xics->ics = ICS(object_new(TYPE_ICS));
> -    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
> -    xics->ics->xics = xics;
> +    ics = ICS(object_new(TYPE_ICS));    
> +    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
> +    ics->xics = xics;
> +    QLIST_INSERT_HEAD(&xics->ics, ics, list);
>  }
>  
>  static void xics_spapr_class_init(ObjectClass *oc, void *data)
> @@ -303,29 +310,31 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
>      return -1;
>  }
>  
> -int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi)
> +int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi)
>  {
> -    ICSState *ics = &xics->ics[src];
> +    ICSState *ics = QLIST_FIRST(&xics->ics);
>      int irq;
>  
> +    if (!ics) {
> +        return -1;
> +    }
>      if (irq_hint) {
> -        assert(src == xics_find_source(xics, irq_hint));
>          if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
> -            trace_xics_alloc_failed_hint(src, irq_hint);
> +            trace_xics_alloc_failed_hint(0, irq_hint);
>              return -1;
>          }
>          irq = irq_hint;
>      } else {
>          irq = ics_find_free_block(ics, 1, 1);
>          if (irq < 0) {
> -            trace_xics_alloc_failed_no_left(src);
> +            trace_xics_alloc_failed_no_left(0);
>              return -1;
>          }
>          irq += ics->offset;
>      }
>  
>      ics_set_irq_type(ics, irq - ics->offset, lsi);
> -    trace_xics_alloc(src, irq);
> +    trace_xics_alloc(0, irq);

Actually we should really deprecate xics_spapr_alloc().  I hadn't
realised it at the time, but dynamically allocating resources like
this is a PITA for migration and cross-version compatibility.  So we
should avoid it, instead having the caller explicitly assign numbers
based on explicit parameters.

>      return irq;
>  }
> @@ -334,12 +343,15 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi)
>   * Allocate block of consecutive IRQs, and return the number of the first IRQ in the block.
>   * If align==true, aligns the first IRQ number to num.
>   */
> -int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, bool align)
> +int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align)
>  {
> +    ICSState *ics = QLIST_FIRST(&xics->ics);
>      int i, first = -1;
> -    ICSState *ics = &xics->ics[src];
>  
> -    assert(src == 0);
> +    if (!ics) {
> +        return -1;
> +    }
> +
>      /*
>       * MSIMesage::data is used for storing VIRQ so
>       * it has to be aligned to num to support multiple
> @@ -362,7 +374,7 @@ int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, bool ali
>      }
>      first += ics->offset;
>  
> -    trace_xics_alloc_block(src, first, num, lsi, align);
> +    trace_xics_alloc_block(0, first, num, lsi, align);
>  
>      return first;
>  }
> @@ -373,7 +385,7 @@ static void ics_free(ICSState *ics, int srcno, int num)
>  
>      for (i = srcno; i < srcno + num; ++i) {
>          if (ICS_IRQ_FREE(ics, i)) {
> -            trace_xics_ics_free_warn(ics - ics->xics->ics, i + ics->offset);
> +            trace_xics_ics_free_warn(0, i + ics->offset);
>          }
>          memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
>      }
> @@ -381,15 +393,10 @@ static void ics_free(ICSState *ics, int srcno, int num)
>  
>  void xics_spapr_free(XICSState *xics, int irq, int num)
>  {
> -    int src = xics_find_source(xics, irq);
> -
> -    if (src >= 0) {
> -        ICSState *ics = &xics->ics[src];
> -
> -        /* FIXME: implement multiple sources */
> -        assert(src == 0);
> +    ICSState *ics = xics_find_source(xics, irq);
>  
> -        trace_xics_ics_free(ics - xics->ics, irq, num);
> +    if (ics) {
> +        trace_xics_ics_free(0, irq, num);
>          ics_free(ics, irq - ics->offset, num);
>      }
>  }
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index c06deea..6335ead 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -587,7 +587,7 @@ out_no_events:
>  void spapr_events_init(sPAPRMachineState *spapr)
>  {
>      QTAILQ_INIT(&spapr->pending_events);
> -    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false);
> +    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, false);
>      spapr->epow_notifier.notify = spapr_powerdown_req;
>      qemu_register_powerdown_notifier(&spapr->epow_notifier);
>      spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index cf3192e..9b13f85 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      }
>  
>      /* Allocate MSIs */
> -    irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false,
> +    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
>                             ret_intr_type == RTAS_TYPE_MSI);
>      if (!irq) {
>          error_report("Cannot allocate MSIs for device %x", config_addr);
> @@ -1360,7 +1360,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
>      for (i = 0; i < PCI_NUM_PINS; i++) {
>          uint32_t irq;
>  
> -        irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false);
> +        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false);
>          if (!irq) {
>              error_setg(errp, "spapr_allocate_lsi failed");
>              return;
> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> index fc731eb..1a84815 100644
> --- a/hw/ppc/spapr_vio.c
> +++ b/hw/ppc/spapr_vio.c
> @@ -462,7 +462,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
>          dev->qdev.id = id;
>      }
>  
> -    dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false);
> +    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false);
>      if (!dev->irq) {
>          error_setg(errp, "can't allocate IRQ");
>          return;
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index e670e89..12fc584 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -79,7 +79,7 @@ struct XICSState {
>      uint32_t nr_servers;
>      uint32_t nr_irqs;
>      ICPState *ss;
> -    ICSState *ics;
> +    QLIST_HEAD(, ICSState) ics;
>  };
>  
>  #define TYPE_ICP "icp"
> @@ -105,6 +105,7 @@ struct ICPState {
>      DeviceState parent_obj;
>      /*< public >*/
>      CPUState *cs;
> +    ICSState *xirr_owner;

Currently xirr_owner will be lost across migration, which will break
things.

Am I right in thinking that it's basically an optimization, and could
be reconstructed from the XISR value and list of ICS ranges?  If so we
can do that easily enough in a post_load function.

>      uint32_t xirr;
>      uint8_t pending_priority;
>      uint8_t mfrr;
> @@ -139,6 +140,7 @@ struct ICSState {
>      qemu_irq *qirqs;
>      ICSIRQState *irqs;
>      XICSState *xics;
> +    QLIST_ENTRY(ICSState) list;
>  };
>  
>  static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
> @@ -167,8 +169,8 @@ struct ICSIRQState {
>  
>  qemu_irq xics_get_qirq(XICSState *icp, int irq);
>  
> -int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
> -int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
> +int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi);
> +int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align);
>  void xics_spapr_free(XICSState *icp, int irq, int num);
>  
>  void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
> @@ -189,6 +191,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
>  
>  void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
>  void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
> -int xics_find_source(XICSState *icp, int irq);
> +ICSState *xics_find_source(XICSState *icp, int irq);
>  
>  #endif /* __XICS_H__ */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized Benjamin Herrenschmidt
@ 2015-12-01  4:40   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  4:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1019 bytes --]

On Wed, Nov 11, 2015 at 11:27:47AM +1100, Benjamin Herrenschmidt wrote:
> This will make life easier for dealing with dynamically configured
> ICSes such as PHB3
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>


> ---
>  include/hw/ppc/xics.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 12fc584..51a63cb 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -145,7 +145,7 @@ struct ICSState {
>  
>  static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
>  {
> -    return (nr >= ics->offset)
> +    return (ics->offset != 0) && (nr >= ics->offset)
>          && (nr < (ics->offset + ics->nr_irqs));
>  }
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c Benjamin Herrenschmidt
@ 2015-12-01  4:46   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  4:46 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 4189 bytes --]

On Wed, Nov 11, 2015 at 11:27:48AM +1100, Benjamin Herrenschmidt wrote:
> It will not be used by the native implementation. This allows us to
> also remove the include of spapr.h from the common code
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hw/intc/xics.c        | 12 ------------
>  hw/intc/xics_kvm.c    | 13 ++++++++++++-
>  hw/intc/xics_spapr.c  | 13 ++++++++++++-
>  include/hw/ppc/xics.h |  1 -
>  4 files changed, 24 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index c4ac057..0c355f4 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -28,7 +28,6 @@
>  #include "hw/hw.h"
>  #include "trace.h"
>  #include "qemu/timer.h"
> -#include "hw/ppc/spapr.h"
>  #include "hw/ppc/xics.h"
>  #include "qemu/error-report.h"
>  #include "qapi/visitor.h"
> @@ -694,17 +693,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
>   * XICS
>   */
>  
> -void xics_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
> -{
> -    ICSState *ics = QLIST_FIRST(&xics->ics);
> -
> -    /* This needs to be deprecated ... */
> -    xics->nr_irqs = nr_irqs;
> -    if (ics) {
> -        ics->nr_irqs = nr_irqs;
> -    }
> -}
> -
>  void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp)
>  {
>      int i;
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> index a478d25..6fbc715 100644
> --- a/hw/intc/xics_kvm.c
> +++ b/hw/intc/xics_kvm.c
> @@ -468,6 +468,17 @@ fail:
>      kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
>  }
>  
> +static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
> +{
> +    ICSState *ics = QLIST_FIRST(&xics->ics);
> +
> +    /* This needs to be deprecated ... */
> +    xics->nr_irqs = nr_irqs;
> +    if (ics) {
> +        ics->nr_irqs = nr_irqs;
> +    }
> +}
> +

Looks like you have a bit of unnecessary churn here, since you unified
xics_kvm_set_nr_irqs() with xics_set_nr_irqs() earlier in the series
only to split them again here.

>  static void xics_kvm_initfn(Object *obj)
>  {
>      XICSState *xics = XICS_COMMON(obj);
> @@ -488,7 +499,7 @@ static void xics_kvm_class_init(ObjectClass *oc, void *data)
>  
>      dc->realize = xics_kvm_realize;
>      xsc->cpu_setup = xics_kvm_cpu_setup;
> -    xsc->set_nr_irqs = xics_set_nr_irqs;
> +    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
>      xsc->set_nr_servers = xics_kvm_set_nr_servers;
>  }
>  
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index d75fcf0..3092f8d 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -267,13 +267,24 @@ static void xics_spapr_initfn(Object *obj)
>      QLIST_INSERT_HEAD(&xics->ics, ics, list);
>  }
>  
> +static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
> +{
> +    ICSState *ics = QLIST_FIRST(&xics->ics);
> +
> +    /* This needs to be deprecated ... */
> +    xics->nr_irqs = nr_irqs;
> +    if (ics) {
> +        ics->nr_irqs = nr_irqs;
> +    }
> +}
> +
>  static void xics_spapr_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
>      XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
>  
>      dc->realize = xics_spapr_realize;
> -    xsc->set_nr_irqs = xics_set_nr_irqs;
> +    xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
>      xsc->set_nr_servers = xics_set_nr_servers;
>  }
>  
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 51a63cb..9e5b751 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -189,7 +189,6 @@ void ics_write_xive(ICSState *ics, int nr, int server,
>  
>  void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
>  
> -void xics_set_nr_irqs(XICSState *icp, uint32_t nr_irqs, Error **errp);
>  void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
>  ICSState *xics_find_source(XICSState *icp, int irq);
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS Benjamin Herrenschmidt
@ 2015-12-01  4:47   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  4:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2131 bytes --]

On Wed, Nov 11, 2015 at 11:27:49AM +1100, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Hmm.. does it make sense for the helper to do the object_new() and
add_child() as well?


> ---
>  hw/intc/xics.c        | 6 ++++++
>  hw/intc/xics_spapr.c  | 3 +--
>  include/hw/ppc/xics.h | 1 +
>  3 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 0c355f4..3cd696f 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -90,6 +90,12 @@ static void xics_common_reset(DeviceState *d)
>      }
>  }
>  
> +void xics_add_ics(XICSState *xics, ICSState *ics)
> +{
> +    ics->xics = xics;
> +    QLIST_INSERT_HEAD(&xics->ics, ics, list);
> +}
> +
>  static void xics_prop_get_nr_irqs(Object *obj, Visitor *v,
>                                    void *opaque, const char *name, Error **errp)
>  {
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index 3092f8d..fc331d8 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -263,8 +263,7 @@ static void xics_spapr_initfn(Object *obj)
>  
>      ics = ICS(object_new(TYPE_ICS));    
>      object_property_add_child(obj, "ics", OBJECT(ics), NULL);
> -    ics->xics = xics;
> -    QLIST_INSERT_HEAD(&xics->ics, ics, list);
> +    xics_add_ics(xics, ics);
>  }
>  
>  static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp)
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 9e5b751..5acb329 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -191,5 +191,6 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
>  
>  void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
>  ICSState *xics_find_source(XICSState *icp, int irq);
> +void xics_add_ics(XICSState *xics, ICSState *ics);
>  
>  #endif /* __XICS_H__ */

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation Benjamin Herrenschmidt
@ 2015-12-01  5:13   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  5:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2886 bytes --]

On Wed, Nov 11, 2015 at 11:27:50AM +1100, Benjamin Herrenschmidt wrote:
> The existing implementation becomes the "ics-simple" subclass of ICS,
> so there should be no change in behaviour for SPAPR.
> 
> This will allow different implementations for the source controllers
> such as the MSI support of PHB3 on Power8 which uses in-memory state
> tables for example.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

[snip]

> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index fc331d8..f7d444a 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -112,7 +112,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                            uint32_t nret, target_ulong rets)
>  {
>      ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
> -    uint32_t nr, server, priority;
> +    uint32_t nr, src_no, server, priority;

Nit: elsewhere I use srcno without a _ for this value.

[snip]
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 5acb329..93a627b 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -116,6 +116,9 @@ struct ICPState {
>  #define TYPE_ICS "ics"
>  #define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS)
>  
> +#define TYPE_ICS_SIMPLE "ics-simple"
> +#define ICS_SIMPLE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SIMPLE)

Again, changing type names will break migration, I believe.  So better
to leave this as ics and add "ics-base" and "ics-native" or similar.

> +
>  #define TYPE_KVM_ICS "icskvm"
>  #define KVM_ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_KVM_ICS)
>  
> @@ -129,6 +132,9 @@ struct ICSStateClass {
>  
>      void (*pre_save)(ICSState *s);
>      int (*post_load)(ICSState *s, int version_id);
> +    void (*reject)(ICSState *s, uint32_t irq);
> +    void (*resend)(ICSState *s);
> +    void (*eoi)(ICSState *s, uint32_t irq);
>  };
>  
>  struct ICSState {
> @@ -184,10 +190,10 @@ uint32_t icp_accept(ICPState *ss);
>  uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
>  void icp_eoi(XICSState *icp, int server, uint32_t xirr);
>  
> -void ics_write_xive(ICSState *ics, int nr, int server,
> -                    uint8_t priority, uint8_t saved_priority);
> +void ics_simple_write_xive(ICSState *ics, int nr, int server,
> +                           uint8_t priority, uint8_t saved_priority);
>  
> -void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
> +void ics_simple_set_irq_type(ICSState *ics, int srcno, bool lsi);
>  
>  void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
>  ICSState *xics_find_source(XICSState *icp, int irq);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 38/77] ppc/xics: Add "native" XICS subclass
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass Benjamin Herrenschmidt
@ 2015-12-01  6:28   ` David Gibson
  2015-12-01  6:39   ` David Gibson
  1 sibling, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  6:28 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 14165 bytes --]

On Wed, Nov 11, 2015 at 11:27:51AM +1100, Benjamin Herrenschmidt wrote:
> This provides MMIO based ICP access as found on POWER8

Mostly looks sound.

I note that the links registers don't actually do anything at this
point, I assume thats intentional.  It probably deserves a comment
somewhere (in the commit message at least).

> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  default-configs/ppc64-softmmu.mak |   3 +-
>  hw/intc/Makefile.objs             |   1 +
>  hw/intc/xics_native.c             | 294 ++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/xics.h             |  14 ++
>  4 files changed, 311 insertions(+), 1 deletion(-)
>  create mode 100644 hw/intc/xics_native.c
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index 516a6e2..d30176e 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -49,8 +49,9 @@ CONFIG_PLATFORM_BUS=y
>  CONFIG_ETSEC=y
>  CONFIG_LIBDECNUMBER=y
>  # For pSeries
> -CONFIG_XICS=$(CONFIG_PSERIES)
> +CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> +CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
>  CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
>  # For PReP
>  CONFIG_MC146818RTC=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index e24cb03..104a169 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -27,6 +27,7 @@ obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
>  obj-$(CONFIG_SH4) += sh_intc.o
>  obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> +obj-$(CONFIG_XICS_NATIVE) += xics_native.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
> new file mode 100644
> index 0000000..3f488f3
> --- /dev/null
> +++ b/hw/intc/xics_native.c
> @@ -0,0 +1,294 @@
> +/*
> + * QEMU PowerPC hardware System Emulator
> + *
> + * Native version of ICS/ICP
> + *
> + * Copyright (c) 2010,2011 David Gibson, IBM Corporation.

It should probably have your copyright too; I'm pretty sure this is
not all my code ;).

> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +
> +#include "hw/hw.h"
> +#include "trace.h"
> +#include "qemu/timer.h"
> +#include "hw/ppc/xics.h"
> +#include "qemu/error-report.h"
> +#include "qapi/visitor.h"
> +
> +#include <libfdt.h>
> +
> +//#define DEBUG_MM(fmt...)      printf(fmt)
> +#define DEBUG_MM(fmt...)        do { } while(0)
> +
> +static void xics_native_initfn(Object *obj)
> +{
> +    XICSState *xics = XICS_NATIVE(obj);
> +
> +    QLIST_INIT(&xics->ics);
> +}
> +
> +static uint64_t icp_mm_read(void *opaque, hwaddr addr, unsigned width)
> +{
> +    XICSState *s = opaque;
> +    int32_t cpu_id, server;
> +    uint32_t val;
> +    ICPState *ss;
> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> +
> +    cpu_id = (addr & (ICP_MM_SIZE - 1)) >> 12;
> +    server = get_cpu_index_by_dt_id(cpu_id);
> +    if (server < 0) {
> +        fprintf(stderr, "XICS: Bad ICP server %d\n", server);
> +        goto bad_access;
> +    }
> +    ss = &s->ss[server];
> +
> +    switch(addr & 0xffc) {
> +    case 0: /* poll */
> +        val = icp_ipoll(ss, NULL);
> +        if (byte0) {
> +            val >>= 24;
> +        } else if (width != 4) {
> +            goto bad_access;
> +        }
> +        break;
> +    case 4: /* xirr */
> +        if (byte0) {
> +            val = icp_ipoll(ss, NULL) >> 24;
> +        } else if (width == 4) {
> +            val = icp_accept(ss);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 12:
> +        if (byte0) {
> +            val = ss->mfrr;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 16:
> +        if (width == 4) {
> +            val = ss->links[0];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 20:
> +        if (width == 4) {
> +            val = ss->links[1];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 24:
> +        if (width == 4) {
> +            val = ss->links[2];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    default:
> +bad_access:
> +        fprintf(stderr, "XICS: Bad ICP access %llx/%d\n",
> +                (unsigned long long)addr, width);
> +        val = 0xffffffff;
> +    }
> +    DEBUG_MM("icp_mm_read(addr=%016llx,serv=0x%x/%d,off=%d,w=%d,val=0x%08x)\n",
> +             (unsigned long long)addr, cpu_id, server, (int)(addr & 0xffc),
> +             width, val);
> +
> +    return val;
> +}
> +
> +static void icp_mm_write(void *opaque, hwaddr addr, uint64_t val,
> +                        unsigned width)
> +{
> +    XICSState *s = opaque;
> +    int32_t cpu_id, server;
> +    ICPState *ss;
> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> +
> +    cpu_id = (addr & (ICP_MM_SIZE - 1)) >> 12;
> +    server = get_cpu_index_by_dt_id(cpu_id);
> +    if (server < 0) {
> +        fprintf(stderr, "XICS: Bad ICP server %d\n", server);
> +        goto bad_access;
> +    }
> +    ss = &s->ss[server];
> +
> +    DEBUG_MM("icp_mm_write(addr=%016llx,serv=0x%x/%d,off=%d,w=%d,val=0x%08x)\n",
> +             (unsigned long long)addr, cpu_id, server,
> +             (int)(addr & 0xffc), width, (uint32_t)val);
> +
> +    switch(addr & 0xffc) {
> +    case 4: /* xirr */
> +        if (byte0) {
> +            icp_set_cppr(s, server, val);
> +        } else if (width == 4) {
> +            icp_eoi(s, server, val);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 12:
> +        if (byte0) {
> +            icp_set_mfrr(s, server, val);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 16:
> +        if (width == 4) {
> +            ss->links[0] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 20:
> +        if (width == 4) {
> +            ss->links[1] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 24:
> +        if (width == 4) {
> +            ss->links[2] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    default:
> + bad_access:
> +        val = 0xffffffff;
> +    }
> +}
> +
> +static const MemoryRegionOps icp_mm_ops = {
> +    .read = icp_mm_read,
> +    .write = icp_mm_write,
> +    .valid.min_access_size = 1,
> +    .valid.max_access_size = 4,
> +    .impl.min_access_size = 1,
> +    .impl.max_access_size = 4,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +};
> +
> +#define _FDT(exp) \
> +    do { \
> +        int ret = (exp);                                           \
> +        if (ret < 0) {                                             \
> +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> +                    #exp, fdt_strerror(ret));                      \
> +            exit(1);                                               \
> +        }                                                          \
> +    } while (0)
> +
> +void xics_create_native_icp_node(XICSState *s, void *fdt,
> +                                 uint32_t base, uint32_t count)
> +{
> +    uint64_t addr;
> +    char *name;
> +    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
> +    uint32_t irange[2], i, rsize;
> +    uint64_t *reg;
> +
> +    addr = ICP_MM_BASE | (base << 12);
> +
> +    irange[0] = cpu_to_be32(base);
> +    irange[1] = cpu_to_be32(count);
> +
> +    rsize = sizeof(uint64_t) * 2 * count;
> +    reg = g_malloc(rsize);
> +    for (i = 0; i < count; i++) {
> +        reg[i * 2] = cpu_to_be64(addr | ((base + i) * 0x1000));
> +        reg[i * 2 + 1] = cpu_to_be64(0x1000);
> +    }
> +
> +    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
> +
> +    /* interrupt controller */
> +    _FDT((fdt_begin_node(fdt, name)));
> +    g_free(name);
> +
> +    _FDT((fdt_property(fdt, "compatible", compat, sizeof(compat))));
> +    _FDT((fdt_property(fdt, "reg", reg, rsize)));
> +    _FDT((fdt_property_string(fdt, "device_type",
> +                              "PowerPC-External-Interrupt-Presentation")));
> +    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> +    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
> +                       irange, sizeof(irange))));
> +    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 1)));
> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0)));
> +    _FDT((fdt_end_node(fdt)));
> +}
> +
> +static void xics_native_realize(DeviceState *dev, Error **errp)
> +{
> +    XICSState *s = XICS_NATIVE(dev);
> +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> +    Error *error = NULL;
> +    int i;
> +
> +    if (!s->nr_servers) {
> +        error_setg(errp, "Number of servers needs to be greater 0");
> +        return;
> +    }
> +
> +    /* Register MMIO regions */
> +    memory_region_init_io(&s->icp_mmio, OBJECT(s), &icp_mm_ops, s, "icp",
> +                          ICP_MM_SIZE);
> +    sysbus_init_mmio(sbd, &s->icp_mmio);
> +    sysbus_mmio_map(sbd, 0, ICP_MM_BASE);

As a rule it's better to have MMIO addresses of things as a property
and have the thing creating them assign it, rather than having the
knowledge of the address internally.

> +
> +    for (i = 0; i < s->nr_servers; i++) {
> +        object_property_set_bool(OBJECT(&s->ss[i]), true, "realized", &error);
> +        if (error) {
> +            error_propagate(errp, error);
> +            return;
> +        }
> +    }
> +}
> +
> +static void xics_native_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
> +
> +    dc->realize = xics_native_realize;
> +    xsc->set_nr_servers = xics_set_nr_servers;

I thought you weren't going to be using the set_nr_servers stuff in
the native mode xics?

> +}
> +
> +static const TypeInfo xics_native_info = {
> +    .name          = TYPE_XICS_NATIVE,
> +    .parent        = TYPE_XICS_COMMON,
> +    .instance_size = sizeof(XICSState),
> +    .class_size = sizeof(XICSStateClass),
> +    .class_init    = xics_native_class_init,
> +    .instance_init = xics_native_initfn,
> +};
> +
> +static void xics_native_register_types(void)
> +{
> +    type_register_static(&xics_native_info);
> +}
> +type_init(xics_native_register_types)
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 93a627b..f32f409 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -38,6 +38,9 @@
>  #define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
>  #define KVM_XICS(obj) OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
>  
> +#define TYPE_XICS_NATIVE "xics-native"
> +#define XICS_NATIVE(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_NATIVE)
> +
>  #define XICS_COMMON_CLASS(klass) \
>       OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
>  #define XICS_SPAPR_CLASS(klass) \
> @@ -46,6 +49,8 @@
>       OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
>  #define XICS_SPAPR_GET_CLASS(obj) \
>       OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
> +#define XICS_NATIVE_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
>  
>  #define XICS_IPI        0x2
>  #define XICS_BUID       0x1
> @@ -80,6 +85,7 @@ struct XICSState {
>      uint32_t nr_irqs;
>      ICPState *ss;
>      QLIST_HEAD(, ICSState) ics;
> +    MemoryRegion icp_mmio;
>  };
>  
>  #define TYPE_ICP "icp"
> @@ -111,8 +117,13 @@ struct ICPState {
>      uint8_t mfrr;
>      qemu_irq output;
>      bool cap_irq_xics_enabled;
> +    uint32_t links[3];
>  };
>  
> +/* This should be an XSCOM BAR ... the size is arbitrary as well */
> +#define ICP_MM_BASE     0x0003FFFF80000000
> +#define ICP_MM_SIZE     0x0000000010000000
> +
>  #define TYPE_ICS "ics"
>  #define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS)
>  
> @@ -181,6 +192,9 @@ void xics_spapr_free(XICSState *icp, int irq, int num);
>  
>  void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
>  
> +void xics_create_native_icp_node(XICSState *s, void *fdt,
> +                                 uint32_t base, uint32_t count);
> +
>  /* Internal XICS interfaces */
>  int get_cpu_index_by_dt_id(int cpu_dt_id);
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command Benjamin Herrenschmidt
@ 2015-12-01  6:32   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  6:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 5784 bytes --]

On Wed, Nov 11, 2015 at 11:27:52AM +1100, Benjamin Herrenschmidt wrote:
> Useful to debug interrupt problems.

Ugh.. I can see the use of this, but we really want to just deprecate
info pic entirely, it's an awful, awful interface.

I think the right way to do this is to allow some state introspection
via the QOM interfaces on the xics devices themselves, but I'm not
immediately sure how to go about that.

> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  hmp-commands-info.hx  |  2 ++
>  hw/intc/xics.c        | 38 ++++++++++++++++++++++++++++++++++++++
>  hw/ppc/ppc.c          | 14 ++++++++++++++
>  include/hw/ppc/ppc.h  |  2 ++
>  include/hw/ppc/xics.h |  2 ++
>  monitor.c             |  3 +++
>  6 files changed, 61 insertions(+)
> 
> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
> index 9b71351..2f1dc86 100644
> --- a/hmp-commands-info.hx
> +++ b/hmp-commands-info.hx
> @@ -203,6 +203,8 @@ ETEXI
>          .mhandler.cmd = sun4m_hmp_info_pic,
>  #elif defined(TARGET_LM32)
>          .mhandler.cmd = lm32_hmp_info_pic,
> +#elif defined(TARGET_PPC)
> +        .mhandler.cmd = ppc_hmp_info_pic,
>  #else
>          .mhandler.cmd = hmp_info_pic,
>  #endif
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 4b33e6d..d027a24 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -31,6 +31,9 @@
>  #include "hw/ppc/xics.h"
>  #include "qemu/error-report.h"
>  #include "qapi/visitor.h"
> +#include "monitor/monitor.h"
> +
> +static XICSState *g_xics;
>  
>  int get_cpu_index_by_dt_id(int cpu_dt_id)
>  {
> @@ -170,6 +173,9 @@ static void xics_common_initfn(Object *obj)
>      object_property_add(obj, "nr_servers", "int",
>                          xics_prop_get_nr_servers, xics_prop_set_nr_servers,
>                          NULL, NULL, NULL);
> +
> +    /* For exclusive use of monitor command */
> +    g_xics = XICS_COMMON(obj);
>  }
>  
>  static void xics_common_class_init(ObjectClass *oc, void *data)
> @@ -614,6 +620,38 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
>      return 0;
>  }
>  
> +void xics_hmp_info_pic(Monitor *mon, const QDict *qdict)
> +{
> +    ICSState *ics;
> +    uint32_t i;
> +
> +    for (i = 0; i < g_xics->nr_servers; i++) {
> +        ICPState *icp = &g_xics->ss[i];
> +
> +        if (!icp->output) {
> +            continue;
> +        }
> +        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
> +                       i, icp->xirr, icp->xirr_owner,
> +                       icp->pending_priority, icp->mfrr);
> +    }
> +    QLIST_FOREACH(ics, &g_xics->ics, list) {
> +        monitor_printf(mon, "ICS %4x..%4x %p\n",
> +                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
> +        for (i = 0; i < ics->nr_irqs; i++) {
> +            ICSIRQState *irq = ics->irqs + i;
> +
> +            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
> +                continue;
> +            }
> +            monitor_printf(mon, "  %4x %s %02x %02x\n",
> +                           ics->offset + i,
> +                           (irq->flags & XICS_FLAGS_IRQ_LSI) ? "LSI" : "MSI",
> +                           irq->priority, irq->status);
> +        }
> +    }
> +}
> +
>  static const VMStateDescription vmstate_ics_simple_irq = {
>      .name = "ics/irq",
>      .version_id = 2,
> diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> index 2c604ef..3b14f09 100644
> --- a/hw/ppc/ppc.c
> +++ b/hw/ppc/ppc.c
> @@ -24,6 +24,7 @@
>  #include "hw/hw.h"
>  #include "hw/ppc/ppc.h"
>  #include "hw/ppc/ppc_e500.h"
> +#include "hw/i386/pc.h"
>  #include "qemu/timer.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/cpus.h"
> @@ -35,6 +36,10 @@
>  #include "kvm_ppc.h"
>  #include "trace.h"
>  
> +#if defined(TARGET_PPC64)
> +#include "hw/ppc/xics.h"
> +#endif
> +
>  //#define PPC_DEBUG_IRQ
>  //#define PPC_DEBUG_TB
>  
> @@ -1337,3 +1342,12 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
>  
>      return NULL;
>  }
> +
> +void ppc_hmp_info_pic(Monitor *mon, const QDict *qdict)
> +{
> +    /* Call in turn every PIC around. OpenPIC doesn't have one yet */
> +#ifdef TARGET_PPC64
> +    xics_hmp_info_pic(mon, qdict);
> +#endif
> +    hmp_info_pic(mon, qdict);
> +}
> diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
> index 14efd0c..d5c648d 100644
> --- a/include/hw/ppc/ppc.h
> +++ b/include/hw/ppc/ppc.h
> @@ -1,6 +1,8 @@
>  #ifndef HW_PPC_H
>  #define HW_PPC_H 1
>  
> +void ppc_hmp_info_pic(Monitor *mon, const QDict *qdict);
> +
>  void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level);
>  
>  /* PowerPC hardware exceptions management helpers */
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index f32f409..1cf7037 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -213,4 +213,6 @@ void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers, Error **errp);
>  ICSState *xics_find_source(XICSState *icp, int irq);
>  void xics_add_ics(XICSState *xics, ICSState *ics);
>  
> +void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
> +
>  #endif /* __XICS_H__ */
> diff --git a/monitor.c b/monitor.c
> index 3295840..988477e 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -76,6 +76,9 @@
>  #include "qapi-event.h"
>  #include "qmp-introspect.h"
>  #include "sysemu/block-backend.h"
> +#if defined(TARGET_PPC)
> +#include "hw/ppc/ppc.h"
> +#endif
>  
>  /* for hmp_info_irq/pic */
>  #if defined(TARGET_SPARC)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 38/77] ppc/xics: Add "native" XICS subclass
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass Benjamin Herrenschmidt
  2015-12-01  6:28   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-12-01  6:39   ` David Gibson
  1 sibling, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  6:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 845 bytes --]

On Wed, Nov 11, 2015 at 11:27:51AM +1100, Benjamin Herrenschmidt wrote:
> This provides MMIO based ICP access as found on POWER8
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

[snip]
> +static void xics_native_realize(DeviceState *dev, Error **errp)
> +{
> +    XICSState *s = XICS_NATIVE(dev);
> +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);

Ah.. here's another wrinkle.  So the current XICS is defined as a
SysBusDevice, but that's not actually correct because it doesn't have
MMIO.  But your xics native does have MMIO, so my understanding is
that SysBusDevice is appropriate.  Not sure how to handle that

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform
  2015-11-11  0:27 ` [Qemu-devel] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform Benjamin Herrenschmidt
@ 2015-12-01  6:41   ` David Gibson
  0 siblings, 0 replies; 198+ messages in thread
From: David Gibson @ 2015-12-01  6:41 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 5396 bytes --]

On Wed, Nov 11, 2015 at 11:27:53AM +1100, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Looks sound enough, but will probably need some rework due to other
suggested changes in the new xics stuff.

> ---
>  hw/ppc/pnv.c          | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/pnv.h  |  2 ++
>  include/hw/ppc/xics.h |  2 ++
>  3 files changed, 73 insertions(+)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 2eac877..a7a9b0f 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -41,6 +41,7 @@
>  #include "hw/ppc/ppc.h"
>  #include "hw/ppc/pnv.h"
>  #include "hw/loader.h"
> +#include "hw/ppc/xics.h"
>  #include "hw/ppc/pnv_xscom.h"
>  
>  #include "exec/address-spaces.h"
> @@ -81,6 +82,59 @@ struct sPowerNVMachineState {
>      PnvSystem sys;
>  };
>  
> +static XICSState *try_create_xics(const char *type, int nr_servers,
> +                                  int nr_irqs, Error **errp)
> +{
> +    Error *err = NULL;
> +    DeviceState *dev;
> +
> +    dev = qdev_create(NULL, type);
> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
> +    object_property_set_bool(OBJECT(dev), true, "realized", &err);
> +    if (err) {
> +        error_propagate(errp, err);
> +        object_unparent(OBJECT(dev));
> +        return NULL;
> +    }
> +
> +    return XICS_COMMON(dev);
> +}
> +
> +static XICSState *xics_system_init(int nr_servers, int nr_irqs)
> +{
> +    XICSState *xics = NULL;
> +
> +#if 0 /* Some fixing needed to handle native ICS in KVM mode */
> +    if (kvm_enabled()) {
> +        QemuOpts *machine_opts = qemu_get_machine_opts();
> +        bool irqchip_allowed = qemu_opt_get_bool(machine_opts,
> +                                                "kernel_irqchip", true);
> +        bool irqchip_required = qemu_opt_get_bool(machine_opts,
> +                                                  "kernel_irqchip", false);
> +        if (irqchip_allowed) {
> +                icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs,
> +                                      &error_abort);
> +        }
> +
> +        if (irqchip_required && !icp) {
> +            perror("Failed to create in-kernel XICS\n");
> +            abort();
> +        }
> +    }
> +#endif
> +
> +    if (!xics) {
> +        xics = try_create_xics(TYPE_XICS_NATIVE, nr_servers, nr_irqs,
> +                               &error_abort);
> +    }
> +
> +    if (!xics) {
> +        perror("Failed to create XICS\n");
> +        abort();
> +    }
> +    return xics;
> +}
> +
>  static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
>                                       size_t maxsize)
>  {
> @@ -366,6 +420,13 @@ static void *powernv_create_fdt(PnvSystem *sys, uint32_t initrd_base, uint32_t i
>  
>      _FDT((fdt_end_node(fdt)));
>  
> +    /* ICPs */
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        uint32_t base_server = ppc_get_vcpu_dt_id(cpu);
> +        xics_create_native_icp_node(sys->xics, fdt, base_server, smt);
> +    }
> +
>      /* Memory */
>      _FDT((powernv_populate_memory(fdt)));
>  
> @@ -451,11 +512,17 @@ static void ppc_powernv_init(MachineState *machine)
>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>      sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine);
>      PnvSystem *sys = &pnv_machine->sys;
> +    XICSState *xics;
>      long fw_size;
>      char *filename;
>      void *fdt;
>      int i;
>  
> +    /* Set up Interrupt Controller before we create the VCPUs */
> +    xics = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
> +                            XICS_IRQS_POWERNV);
> +    sys->xics = xics;
> +
>      /* init CPUs */
>      if (cpu_model == NULL) {
>          cpu_model = kvm_enabled() ? "host" : "POWER8";
> @@ -475,6 +542,8 @@ static void ppc_powernv_init(MachineState *machine)
>          /* MSR[IP] doesn't exist nowadays */
>          env->msr_mask &= ~(1 << 6);
>  
> +        xics_cpu_setup(xics, cpu);
> +
>          qemu_register_reset(powernv_cpu_reset, cpu);
>      }
>  
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index cb157eb..80617b4 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -21,6 +21,7 @@
>  
>  #include "hw/hw.h"
>  typedef struct XScomBus XScomBus;
> +typedef struct XICSState XICSState;
>  
>  /* Should we turn that into a QOjb of some sort ? */
>  typedef struct PnvChip {
> @@ -29,6 +30,7 @@ typedef struct PnvChip {
>  } PnvChip;
>  
>  typedef struct PnvSystem {
> +    XICSState *xics;
>      uint32_t  num_chips;
>  #define PNV_MAX_CHIPS		1
>      PnvChip   chips[PNV_MAX_CHIPS];
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 1cf7037..85d2fb9 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -183,6 +183,8 @@ struct ICSIRQState {
>  };
>  
>  #define XICS_IRQS_SPAPR               1024
> +#define XICS_IRQS_POWERNV             (1 << 19)
> +
>  
>  qemu_irq xics_get_qirq(XICSState *icp, int irq);
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-11-17  0:40     ` Benjamin Herrenschmidt
@ 2015-12-01  6:43       ` David Gibson
  2015-12-02  2:24         ` Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-12-01  6:43 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Alexey Kardashevskiy, qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1649 bytes --]

On Tue, Nov 17, 2015 at 11:40:04AM +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2015-11-17 at 11:32 +1100, Alexey Kardashevskiy wrote:
> > On 11/11/2015 11:27 AM, Benjamin Herrenschmidt wrote:
> > > This adds a model of the POWER8 LPC controller. It is then used
> > > by the PowerNV code to attach a UART and RTC, which, with the right
> > > version of OPAL firmware, will provide a working console.
> > > 
> > > This version of the LPC controller model doesn't yet implement
> > > support for the SerIRQ deserializer present in the Naples version
> > > of the chip though some preliminary work is there.
> > > 
> > 
> > Is this LPC controller one per a chip or per a machine?
> 
> Per chip but we usually only wire one up per machine.
> 
> > In general it is quite nice when "-nodefaults" does not create
> > neither PHB nor LPC so the user can add them manually with parameters
> > different than defaults.
> 
> In this case though, PHB and LPC bridges are all part of the P8 chip,
> and I'm trying to represent that topology as best as possible.
> 
> I think "-nodefaults" for Pnv should only be about the devices we
> attach to the LPC/PHB not the busses themselves.

Exactly what is and isn't covered by -nodefaults is a bit of a mess -
part of the topic of my talk at KVM Forum.

But on the whole I agree with you, since the LPC is part of the P8
chip, I think it makes sense to include it even with -nodefaults.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-01  6:43       ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2015-12-02  2:24         ` Alexey Kardashevskiy
  2015-12-02  5:29           ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-12-02  2:24 UTC (permalink / raw)
  To: David Gibson, Benjamin Herrenschmidt; +Cc: qemu-ppc, qemu-devel

On 12/01/2015 05:43 PM, David Gibson wrote:
> On Tue, Nov 17, 2015 at 11:40:04AM +1100, Benjamin Herrenschmidt wrote:
>> On Tue, 2015-11-17 at 11:32 +1100, Alexey Kardashevskiy wrote:
>>> On 11/11/2015 11:27 AM, Benjamin Herrenschmidt wrote:
>>>> This adds a model of the POWER8 LPC controller. It is then used
>>>> by the PowerNV code to attach a UART and RTC, which, with the right
>>>> version of OPAL firmware, will provide a working console.
>>>>
>>>> This version of the LPC controller model doesn't yet implement
>>>> support for the SerIRQ deserializer present in the Naples version
>>>> of the chip though some preliminary work is there.
>>>>
>>>
>>> Is this LPC controller one per a chip or per a machine?
>>
>> Per chip but we usually only wire one up per machine.
>>
>>> In general it is quite nice when "-nodefaults" does not create
>>> neither PHB nor LPC so the user can add them manually with parameters
>>> different than defaults.
>>
>> In this case though, PHB and LPC bridges are all part of the P8 chip,
>> and I'm trying to represent that topology as best as possible.
>>
>> I think "-nodefaults" for Pnv should only be about the devices we
>> attach to the LPC/PHB not the busses themselves.
>
> Exactly what is and isn't covered by -nodefaults is a bit of a mess -
> part of the topic of my talk at KVM Forum.
>
> But on the whole I agree with you, since the LPC is part of the P8
> chip, I think it makes sense to include it even with -nodefaults.

POWER8 chips all have 8 threads per core but we do not always assume -smt 
...,threads=8, how are LPC or PHB different? PHB is more interesting - how 
is the user supposed to add more? And there always will be the default one 
which properties are set in a separate way (via -global, not -device). I 
found it sometime really annoying to debug the existing pseries which 
always adds a default PHB (I know, this was to make libvirt happy but this 
is not the case here).

Out of curiosity - if we have 2 chips, will the system work if the second 
chip does not get any LPC or PHB attached?


-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-02  2:24         ` Alexey Kardashevskiy
@ 2015-12-02  5:29           ` Benjamin Herrenschmidt
  2015-12-03  1:04             ` Alexey Kardashevskiy
  0 siblings, 1 reply; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-12-02  5:29 UTC (permalink / raw)
  To: Alexey Kardashevskiy, David Gibson; +Cc: qemu-ppc, qemu-devel

On Wed, 2015-12-02 at 13:24 +1100, Alexey Kardashevskiy wrote:
> > But on the whole I agree with you, since the LPC is part of the P8
> > chip, I think it makes sense to include it even with -nodefaults.
> 
> POWER8 chips all have 8 threads per core but we do not always assume -smt 
> ...,threads=8, how are LPC or PHB different? 

First, for pseries which is paravirtualized it's a different can of
worms completely. For powernv, we *should* represent all 8 threads,
we just can't yet due to TCG limitations.

> PHB is more interesting - how is the user supposed to add more?

That's an open question. Since we model a real P8 chip we can only
model the PHBs as they exist on it, which is up to 3 per chip at
very specific XSCOM addresses. We could try to model some non-existing
P8 chip with more but bad things will happen when the FW try to assign
interrupt numbers for example.

We simulate a machine that has been primed by HostBoot before OPAL
starts. So we rely on what the device-tree tells us of what PHB were
enabled but appart from that, we have to stick to the limitations.

> And there always will be the default one 
> which properties are set in a separate way (via -global, not -device). I 
> found it sometime really annoying to debug the existing pseries which 
> always adds a default PHB (I know, this was to make libvirt happy but this 
> is not the case here).
> 
> Out of curiosity - if we have 2 chips, will the system work if the second 
> chip does not get any LPC or PHB attached?

This is something I need to look into, there's a lot of work needed to
properly model "chips" that I haven't done yet, but what is there is
sufficient for a lot of usages already.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-02  5:29           ` Benjamin Herrenschmidt
@ 2015-12-03  1:04             ` Alexey Kardashevskiy
  2015-12-03  1:45               ` David Gibson
  2015-12-03 22:54               ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 198+ messages in thread
From: Alexey Kardashevskiy @ 2015-12-03  1:04 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, David Gibson; +Cc: qemu-ppc, qemu-devel

On 12/02/2015 04:29 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2015-12-02 at 13:24 +1100, Alexey Kardashevskiy wrote:
>>> But on the whole I agree with you, since the LPC is part of the P8
>>> chip, I think it makes sense to include it even with -nodefaults.
>>
>> POWER8 chips all have 8 threads per core but we do not always assume -smt
>> ...,threads=8, how are LPC or PHB different?
>
> First, for pseries which is paravirtualized it's a different can of
> worms completely. For powernv, we *should* represent all 8 threads,
> we just can't yet due to TCG limitations.

Out of curiosity - for pseries we should not? I know it works with various 
numbers of threads but is not that because we also control guest linux 
kernel and, for example, the Other OS (AIX) might be upset on 
non-multiply-of-2 number of threads?


>> PHB is more interesting - how is the user supposed to add more?
>
> That's an open question. Since we model a real P8 chip we can only
> model the PHBs as they exist on it, which is up to 3 per chip at
> very specific XSCOM addresses. We could try to model some non-existing
> P8 chip with more but bad things will happen when the FW try to assign
> interrupt numbers for example.
 >
> We simulate a machine that has been primed by HostBoot before OPAL
> starts. So we rely on what the device-tree tells us of what PHB were
> enabled but appart from that, we have to stick to the limitations.
 >
>> And there always will be the default one
>> which properties are set in a separate way (via -global, not -device). I
>> found it sometime really annoying to debug the existing pseries which
>> always adds a default PHB (I know, this was to make libvirt happy but this
>> is not the case here).
>>
>> Out of curiosity - if we have 2 chips, will the system work if the second
>> chip does not get any LPC or PHB attached?
>
> This is something I need to look into, there's a lot of work needed to
> properly model "chips" that I haven't done yet, but what is there is
> sufficient for a lot of usages already.

For now, if possible, I'd suggest implementing -nodefaults with no defaults 
whatsoever and create a config somewhere in the qemu tree to pass it via 
-readconfig to get reasonably configured machine so people will know what 
is expected to work but there will still be possibility for experiments (do 
not we secretly hope that other vendors will start designing/manufacturing 
their ppc64 chips?). It could be a config file per an actual POWER8 chip 
(we have two already).


-- 
Alexey

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-03  1:04             ` Alexey Kardashevskiy
@ 2015-12-03  1:45               ` David Gibson
  2015-12-03 22:58                 ` Benjamin Herrenschmidt
  2015-12-03 22:54               ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 198+ messages in thread
From: David Gibson @ 2015-12-03  1:45 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 4706 bytes --]

On Thu, Dec 03, 2015 at 12:04:58PM +1100, Alexey Kardashevskiy wrote:
> On 12/02/2015 04:29 PM, Benjamin Herrenschmidt wrote:
> >On Wed, 2015-12-02 at 13:24 +1100, Alexey Kardashevskiy wrote:
> >>>But on the whole I agree with you, since the LPC is part of the P8
> >>>chip, I think it makes sense to include it even with -nodefaults.
> >>
> >>POWER8 chips all have 8 threads per core but we do not always assume -smt
> >>...,threads=8, how are LPC or PHB different?
> >
> >First, for pseries which is paravirtualized it's a different can of
> >worms completely. For powernv, we *should* represent all 8 threads,
> >we just can't yet due to TCG limitations.
> 
> Out of curiosity - for pseries we should not? I know it works with various
> numbers of threads but is not that because we also control guest linux
> kernel and, for example, the Other OS (AIX) might be upset on
> non-multiply-of-2 number of threads?

There are several different cases here and I'm not sure which you're
thinking about.

1) Guest has different number of threads-per-core than the host

This one is just fine - PAPR defines how the guest should get the
number of threads from the device tree, and qemu sets that correctly.

2) Guest threads-per-core not a power of two

The PAPR thread mechanism allows this to be communicated to the guest,
and I don't know if PAPR explicitly permits or prohibitis this
situation.  Guests could get confused by it, although that's arguably
a guest bug.

2) "Partially filled core", e.g. guest has 8 threads-per-core declared
   but only one vcpu available

This is the only one I can see as relying on Linux guest behaviour.
We kind of get away with this by accident with a Linux guest - it will
try to bring up all 8 threads, but fail non fatally.  We shouldn't
allow this situation, although we do right now.  Bharata posted a
patch which would prevent this in qemu, and I have a BZ to make
libvirt not allow this construction either.

> >>PHB is more interesting - how is the user supposed to add more?
> >
> >That's an open question. Since we model a real P8 chip we can only
> >model the PHBs as they exist on it, which is up to 3 per chip at
> >very specific XSCOM addresses. We could try to model some non-existing
> >P8 chip with more but bad things will happen when the FW try to assign
> >interrupt numbers for example.
> >
> >We simulate a machine that has been primed by HostBoot before OPAL
> >starts. So we rely on what the device-tree tells us of what PHB were
> >enabled but appart from that, we have to stick to the limitations.
> >
> >>And there always will be the default one
> >>which properties are set in a separate way (via -global, not -device). I
> >>found it sometime really annoying to debug the existing pseries which
> >>always adds a default PHB (I know, this was to make libvirt happy but this
> >>is not the case here).
> >>
> >>Out of curiosity - if we have 2 chips, will the system work if the second
> >>chip does not get any LPC or PHB attached?
> >
> >This is something I need to look into, there's a lot of work needed to
> >properly model "chips" that I haven't done yet, but what is there is
> >sufficient for a lot of usages already.
> 
> For now, if possible, I'd suggest implementing -nodefaults with no defaults
> whatsoever and create a config somewhere in the qemu tree to pass it via
> -readconfig to get reasonably configured machine so people will know what is
> expected to work but there will still be possibility for experiments (do not
> we secretly hope that other vendors will start designing/manufacturing their
> ppc64 chips?). It could be a config file per an actual POWER8 chip (we have
> two already).

I can see some benefit to that approach, but it does stray away from
current qemu practice (in general, not just compared to spapr).
Hmm.. not sure.

What I do think would be a good idea is to represent a POWER8 "chip"
as a instantiable qdev device, which will create the scoms and PHBs
under itself as per the hardware.  We can add device properties as
needed to make that construction more flexible.

We probably don't want to link the number of CPUs to the chip qdevs,
partly because that doesn't really fit the qemu model, but also
because we'll probably want some extra flexibility.  e.g. making a UP
system for experimentation, even though a single chip would have
multiple cores (IIUC) - SMP TCG is super slow, so we probably want that.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-03  1:04             ` Alexey Kardashevskiy
  2015-12-03  1:45               ` David Gibson
@ 2015-12-03 22:54               ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-12-03 22:54 UTC (permalink / raw)
  To: Alexey Kardashevskiy, David Gibson; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-12-03 at 12:04 +1100, Alexey Kardashevskiy wrote:
> On 12/02/2015 04:29 PM, Benjamin Herrenschmidt wrote:
> > On Wed, 2015-12-02 at 13:24 +1100, Alexey Kardashevskiy wrote:
> > > > But on the whole I agree with you, since the LPC is part of the P8
> > > > chip, I think it makes sense to include it even with -nodefaults.
> > > 
> > > POWER8 chips all have 8 threads per core but we do not always assume -smt
> > > ...,threads=8, how are LPC or PHB different?
> > 
> > First, for pseries which is paravirtualized it's a different can of
> > worms completely. For powernv, we *should* represent all 8 threads,
> > we just can't yet due to TCG limitations.
> 
> Out of curiosity - for pseries we should not? 

Not that we should not, more like, it makes sense to offer whatever
choice, it would be indeed better to have the ability to support them
however.

> I know it works with various 
> numbers of threads but is not that because we also control guest linux 
> kernel and, for example, the Other OS (AIX) might be upset on 
> non-multiply-of-2 number of threads?

Quite possibly. I have some plans to add threads support, just haven't
got to it yet :-)
> 
> > > PHB is more interesting - how is the user supposed to add more?
> > 
> > That's an open question. Since we model a real P8 chip we can only
> > model the PHBs as they exist on it, which is up to 3 per chip at
> > very specific XSCOM addresses. We could try to model some non-existing
> > P8 chip with more but bad things will happen when the FW try to assign
> > interrupt numbers for example.
>  >
> > We simulate a machine that has been primed by HostBoot before OPAL
> > starts. So we rely on what the device-tree tells us of what PHB were
> > enabled but appart from that, we have to stick to the limitations.
>  >
> > > And there always will be the default one
> > > which properties are set in a separate way (via -global, not -device). I
> > > found it sometime really annoying to debug the existing pseries which
> > > always adds a default PHB (I know, this was to make libvirt happy but this
> > > is not the case here).
> > > 
> > > Out of curiosity - if we have 2 chips, will the system work if the second
> > > chip does not get any LPC or PHB attached?
> > 
> > This is something I need to look into, there's a lot of work needed to
> > properly model "chips" that I haven't done yet, but what is there is
> > sufficient for a lot of usages already.
> 
> For now, if possible, I'd suggest implementing -nodefaults with no defaults 
> whatsoever and create a config somewhere in the qemu tree to pass it via 
> -readconfig to get reasonably configured machine so people will know what 
> is expected to work but there will still be possibility for experiments (do 
> not we secretly hope that other vendors will start designing/manufacturing 
> their ppc64 chips?). It could be a config file per an actual POWER8 chip 
> (we have two already).

There could be, but we'd need ways to specify a bunch of things, it's
not that trivial. IE, the xscom addresses of the 3 ranges for each PHB
for example etc.. For now let's keep things simple.

I also am in no hurry to support migration with powernv, so we have
quite a bit of flexibility to change things.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC
  2015-12-03  1:45               ` David Gibson
@ 2015-12-03 22:58                 ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2015-12-03 22:58 UTC (permalink / raw)
  To: David Gibson, Alexey Kardashevskiy; +Cc: qemu-ppc, qemu-devel

On Thu, 2015-12-03 at 12:45 +1100, David Gibson wrote:
> 
> There are several different cases here and I'm not sure which you're
> thinking about.
> 
> 1) Guest has different number of threads-per-core than the host
> 
> This one is just fine - PAPR defines how the guest should get the
> number of threads from the device tree, and qemu sets that correctly.
> 
> 2) Guest threads-per-core not a power of two
> 
> The PAPR thread mechanism allows this to be communicated to the guest,
> and I don't know if PAPR explicitly permits or prohibitis this
> situation.  Guests could get confused by it, although that's arguably
> a guest bug.

Linux only supports powers of 2

 .../...

> > For now, if possible, I'd suggest implementing -nodefaults with no defaults
> > whatsoever and create a config somewhere in the qemu tree to pass it via
> > -readconfig to get reasonably configured machine so people will know what is
> > expected to work but there will still be possibility for experiments (do not
> > we secretly hope that other vendors will start designing/manufacturing their
> > ppc64 chips?). It could be a config file per an actual POWER8 chip (we have
> > two already).
> 
> I can see some benefit to that approach, but it does stray away from
> current qemu practice (in general, not just compared to spapr).
> Hmm.. not sure.

I don't see an urgent need. We can do that in a separate series if we
want to. As I said in another email, I don't want powernv to be "set in
stone" in term of ABIs & migration for a while, so we have latitude to
play with the model for a while before we lock things down. At least
until after P9.

> What I do think would be a good idea is to represent a POWER8 "chip"
> as a instantiable qdev device, which will create the scoms and PHBs
> under itself as per the hardware.  We can add device properties as
> needed to make that construction more flexible.

It would also instanciate the CPUs. I want to change that in the
current model, just haven't got to it yet.

> We probably don't want to link the number of CPUs to the chip qdevs,
> partly because that doesn't really fit the qemu model, but also
> because we'll probably want some extra flexibility.  e.g. making a UP
> system for experimentation, even though a single chip would have
> multiple cores (IIUC) - SMP TCG is super slow, so we probably want that.

We want to eventually represent the EX units properly on XSCOM (the
cores). So we will need that tie between chip and CPU. But we don't
have to mark all cores in a chip as "enabled". It's ok to have chips
with only one good core for example.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-11-30 18:15                   ` Cédric Le Goater
  2015-11-30 20:09                     ` Benjamin Herrenschmidt
@ 2015-12-07  1:25                     ` Stewart Smith
  2015-12-07 22:48                       ` Cédric Le Goater
  1 sibling, 1 reply; 198+ messages in thread
From: Stewart Smith @ 2015-12-07  1:25 UTC (permalink / raw)
  To: Cédric Le Goater, Benjamin Herrenschmidt, Alexander Graf,
	Alexey Kardashevskiy, Eric Blake, qemu-ppc
  Cc: qemu-devel

Cédric Le Goater <clg@fr.ibm.com> writes:
> On 11/28/2015 08:59 AM, Benjamin Herrenschmidt wrote:
>> On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
>>>
>>> How does real hardware store petitboot? If it's flash, you could pass it
>>> in using -pflash and thus model things even more closely and allow users
>>> to just take the ROM image as is.
>> 
>> It is a flash image, we could use an Open Power machine flash image "as-is"
>> provided we taught qemu to extract skiboot (aka OPAL) from it.
>
> Couldn't we add an offset argument to load_image_targphys() or make that 
> an extra routine ? If so, we could then load directly from an openpower 
> pnor file. 
>
> I gave it a quick (and dirty) try and a powernv guest runs fine up to 
> petitboot with just :
>
> 	qemu-system-ppc64 -m 2G -M powernv -bios  ~/work/open-power/images/palmetto.pnor -nographic -nodefaults -serial stdio
>
> The pnor file is compiled from github. The patch is below (without the dirty
> cut and paste I did in loader.c). The offset for the PAYLOAD and BOOTKERNEL
> partitions are hard coded but I guess we don't need to read the flash partition
> table in qemu, not yet.

One downside to this is that if we don't fall back to being able to load
skiboot.lid it becomes more annoying to boot a gcov enabled skiboot as
typical PNOR layout only gives 1MB for skiboot, and gcov builds bloat
that a *lot*.

We probably don't want NVRAM writes going back to a single system wide
PNOR image too, so while using pnor file is great for simulating what
hardware does, may not work as the solution for long term model.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform
  2015-12-07  1:25                     ` Stewart Smith
@ 2015-12-07 22:48                       ` Cédric Le Goater
  0 siblings, 0 replies; 198+ messages in thread
From: Cédric Le Goater @ 2015-12-07 22:48 UTC (permalink / raw)
  To: Stewart Smith, Benjamin Herrenschmidt, Alexander Graf,
	Alexey Kardashevskiy, Eric Blake, qemu-ppc
  Cc: qemu-devel

On 12/07/2015 02:25 AM, Stewart Smith wrote:
> Cédric Le Goater <clg@fr.ibm.com> writes:
>> On 11/28/2015 08:59 AM, Benjamin Herrenschmidt wrote:
>>> On Fri, 2015-11-27 at 11:21 +0100, Alexander Graf wrote:
>>>>
>>>> How does real hardware store petitboot? If it's flash, you could pass it
>>>> in using -pflash and thus model things even more closely and allow users
>>>> to just take the ROM image as is.
>>>
>>> It is a flash image, we could use an Open Power machine flash image "as-is"
>>> provided we taught qemu to extract skiboot (aka OPAL) from it.
>>
>> Couldn't we add an offset argument to load_image_targphys() or make that 
>> an extra routine ? If so, we could then load directly from an openpower 
>> pnor file. 
>>
>> I gave it a quick (and dirty) try and a powernv guest runs fine up to 
>> petitboot with just :
>>
>> 	qemu-system-ppc64 -m 2G -M powernv -bios  ~/work/open-power/images/palmetto.pnor -nographic -nodefaults -serial stdio
>>
>> The pnor file is compiled from github. The patch is below (without the dirty
>> cut and paste I did in loader.c). The offset for the PAYLOAD and BOOTKERNEL
>> partitions are hard coded but I guess we don't need to read the flash partition
>> table in qemu, not yet.
> 
> One downside to this is that if we don't fall back to being able to load
> skiboot.lid it becomes more annoying to boot a gcov enabled skiboot as
> typical PNOR layout only gives 1MB for skiboot, and gcov builds bloat
> that a *lot*.

I guess that what we can imagine having a bigger partition for skiboot 
in the case of gcov ?  This will require a custom pnor build. Might be 
too complex. Or we could use a -pflash option to load the pnor and an
optional -bios for skiboot if we want a custom one.

> We probably don't want NVRAM writes going back to a single system wide
> PNOR image too, so while using pnor file is great for simulating what
> hardware does, may not work as the solution for long term model.

we can use a memory backend to start with, which is also much simpler 
than having to handle the block backend like the cfi pflash is doing. 
a guest could use its own private pnor if a block backend is needed.

C. 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR Benjamin Herrenschmidt
@ 2016-03-02 20:22   ` Thomas Huth
  0 siblings, 0 replies; 198+ messages in thread
From: Thomas Huth @ 2016-03-02 20:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 11.11.2015 01:28, Benjamin Herrenschmidt wrote:
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h            | 1 +
>  target-ppc/translate_init.c | 4 ++++
>  2 files changed, 5 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index bf8892a..aa328a7 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1361,6 +1361,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_SRR1              (0x01B)
>  #define SPR_CFAR              (0x01C)
>  #define SPR_AMR               (0x01D)
> +#define SPR_ACOP              (0x01F)
>  #define SPR_BOOKE_PID         (0x030)
>  #define SPR_BOOKE_DECAR       (0x036)
>  #define SPR_BOOKE_CSRR0       (0x03A)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index b5fd076..4ec532c 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8226,6 +8226,10 @@ static void gen_spr_power8_book4(CPUPPCState *env)
>                      &spr_read_generic, SPR_NOACCESS,
>                      &spr_read_generic, &spr_write_generic,
>                      0);
> +    spr_register(env, SPR_ACOP, "ACOP",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0);
>  #endif
>  }

I think this patch should be merged with the next one (where you change
the ACOP hunk in translate_init.c again.

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs Benjamin Herrenschmidt
@ 2016-03-02 20:30   ` Thomas Huth
  2016-03-04  0:59     ` Benjamin Herrenschmidt
  2016-03-09 20:04     ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
  0 siblings, 2 replies; 198+ messages in thread
From: Thomas Huth @ 2016-03-02 20:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 11.11.2015 01:28, Benjamin Herrenschmidt wrote:
> WORT and PID this time
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target-ppc/cpu.h            |  2 ++
>  target-ppc/translate_init.c | 16 ++++++++++++----
>  2 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aa328a7..6179fbc 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1363,6 +1363,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_AMR               (0x01D)
>  #define SPR_ACOP              (0x01F)
>  #define SPR_BOOKE_PID         (0x030)
> +#define SPR_BOOKS_PID         (0x030)
>  #define SPR_BOOKE_DECAR       (0x036)
>  #define SPR_BOOKE_CSRR0       (0x03A)
>  #define SPR_BOOKE_CSRR1       (0x03B)
> @@ -1716,6 +1717,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>  #define SPR_POWER_SPMC1       (0x37C)
>  #define SPR_POWER_SPMC2       (0x37D)
>  #define SPR_POWER_MMCRS       (0x37E)
> +#define SPR_WORT              (0x37F)
>  #define SPR_PPR               (0x380)
>  #define SPR_750_GQR0          (0x390)
>  #define SPR_440_DNV0          (0x390)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 4ec532c..bfdf028 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8226,10 +8226,18 @@ static void gen_spr_power8_book4(CPUPPCState *env)
>                      &spr_read_generic, SPR_NOACCESS,
>                      &spr_read_generic, &spr_write_generic,
>                      0);
> -    spr_register(env, SPR_ACOP, "ACOP",
> -                 SPR_NOACCESS, SPR_NOACCESS,
> -                 &spr_read_generic, &spr_write_generic,
> -                 0);
> +    spr_register_kvm(env, SPR_ACOP, "ACOP",
> +                     SPR_NOACCESS, SPR_NOACCESS,
> +                     &spr_read_generic, &spr_write_generic,
> +                     KVM_REG_PPC_ACOP, 0);
> +    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
> +                     SPR_NOACCESS, SPR_NOACCESS,
> +                     &spr_read_generic, &spr_write_generic,
> +                     KVM_REG_PPC_PID, 0);
> +    spr_register_kvm(env, SPR_WORT, "WORT",
> +                     SPR_NOACCESS, SPR_NOACCESS,
> +                     &spr_read_generic, &spr_write_generic,
> +                     KVM_REG_PPC_WORT, 0);
>  #endif
>  }

AFAICT all patches where you define new SPRs with spr_register_kvm[_hv]
are also important independently of the rest of your patch series -
otherwise these registers are currently lost during migration since they
are not sync'ed with the KVM part in the kernel right now.

So if you've got some spare time, could you maybe extract all those
patches that define new SPRs with spr_register_kvm[_hv] and send them as
a separate patch series? That could help to fix future migration issues,
and also would decrease the size of your really huge "Add native POWER8
platform" patch series a little bit!

 Thanks,
  Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-02 20:30   ` Thomas Huth
@ 2016-03-04  0:59     ` Benjamin Herrenschmidt
  2016-03-09 20:04     ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
  1 sibling, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2016-03-04  0:59 UTC (permalink / raw)
  To: Thomas Huth, qemu-ppc; +Cc: qemu-devel

On Wed, 2016-03-02 at 21:30 +0100, Thomas Huth wrote:
> So if you've got some spare time, could you maybe extract all those
> patches that define new SPRs with spr_register_kvm[_hv] and send them as
> a separate patch series? That could help to fix future migration issues,
> and also would decrease the size of your really huge "Add native POWER8
> platform" patch series a little bit!

Time is the problem :-) My tree is bitrotting right now, I am completely
caught up with a few other things.

I'm trying to get somebody to pick up that work.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-02 20:30   ` Thomas Huth
  2016-03-04  0:59     ` Benjamin Herrenschmidt
@ 2016-03-09 20:04     ` Cédric Le Goater
  2016-03-09 21:17       ` Thomas Huth
  1 sibling, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2016-03-09 20:04 UTC (permalink / raw)
  To: Thomas Huth, Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 03/02/2016 09:30 PM, Thomas Huth wrote:
> On 11.11.2015 01:28, Benjamin Herrenschmidt wrote:
>> WORT and PID this time
>>
>> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> ---
>>  target-ppc/cpu.h            |  2 ++
>>  target-ppc/translate_init.c | 16 ++++++++++++----
>>  2 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>> index aa328a7..6179fbc 100644
>> --- a/target-ppc/cpu.h
>> +++ b/target-ppc/cpu.h
>> @@ -1363,6 +1363,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>>  #define SPR_AMR               (0x01D)
>>  #define SPR_ACOP              (0x01F)
>>  #define SPR_BOOKE_PID         (0x030)
>> +#define SPR_BOOKS_PID         (0x030)
>>  #define SPR_BOOKE_DECAR       (0x036)
>>  #define SPR_BOOKE_CSRR0       (0x03A)
>>  #define SPR_BOOKE_CSRR1       (0x03B)
>> @@ -1716,6 +1717,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch)
>>  #define SPR_POWER_SPMC1       (0x37C)
>>  #define SPR_POWER_SPMC2       (0x37D)
>>  #define SPR_POWER_MMCRS       (0x37E)
>> +#define SPR_WORT              (0x37F)
>>  #define SPR_PPR               (0x380)
>>  #define SPR_750_GQR0          (0x390)
>>  #define SPR_440_DNV0          (0x390)
>> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
>> index 4ec532c..bfdf028 100644
>> --- a/target-ppc/translate_init.c
>> +++ b/target-ppc/translate_init.c
>> @@ -8226,10 +8226,18 @@ static void gen_spr_power8_book4(CPUPPCState *env)
>>                      &spr_read_generic, SPR_NOACCESS,
>>                      &spr_read_generic, &spr_write_generic,
>>                      0);
>> -    spr_register(env, SPR_ACOP, "ACOP",
>> -                 SPR_NOACCESS, SPR_NOACCESS,
>> -                 &spr_read_generic, &spr_write_generic,
>> -                 0);
>> +    spr_register_kvm(env, SPR_ACOP, "ACOP",
>> +                     SPR_NOACCESS, SPR_NOACCESS,
>> +                     &spr_read_generic, &spr_write_generic,
>> +                     KVM_REG_PPC_ACOP, 0);
>> +    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
>> +                     SPR_NOACCESS, SPR_NOACCESS,
>> +                     &spr_read_generic, &spr_write_generic,
>> +                     KVM_REG_PPC_PID, 0);
>> +    spr_register_kvm(env, SPR_WORT, "WORT",
>> +                     SPR_NOACCESS, SPR_NOACCESS,
>> +                     &spr_read_generic, &spr_write_generic,
>> +                     KVM_REG_PPC_WORT, 0);
>>  #endif
>>  }
> 
> AFAICT all patches where you define new SPRs with spr_register_kvm[_hv]
> are also important independently of the rest of your patch series -
> otherwise these registers are currently lost during migration since they
> are not sync'ed with the KVM part in the kernel right now.
> 
> So if you've got some spare time, could you maybe extract all those
> patches that define new SPRs with spr_register_kvm[_hv] and send them as
> a separate patch series? That could help to fix future migration issues,
> and also would decrease the size of your really huge "Add native POWER8
> platform" patch series a little bit!

I have been maintaining a port of Ben's patchset on the latest qemu for other 
parts which should come after pnv is merged so I have a framework to test such 
sub-patchsets. I also have time to work on them but clearly not the expertise
in all areas !

What would be nice is to identify the most obvious ones, non controversial
that could be merged after a few iterations. I have a vague idea, the ones 
Reviewed-by David obviously being good candidates, the definition of new SPRs 
(even the dummy ones ?). That would remove 15/20 patches from the list below
I believe. May be more. I will go through the email thread to see what can 
be taken.

Would follow the xics patches (plenty of conflicts lately), mmu patches 
(but David has been working on this so it is shrinking), some pci fixes 
and helpers (should ping maintainers). We would then be at a point where 
only the core pnv patches are left. I think I am being very optimistic :) 

I can start with a first serie. Suggestions welcomed. List below.


 065f7f9b1e15 ppc: Fix CFAR updates
 f385d52fe9f2 ppc: Add slbfee. instruction
 2bf0d4f4239a ppc: Add dummy logmpp instruction
 8102a4cc9c30 ppc: Print HSRR0/HSRR1 in "info registers"
 730a9b4dc941 ppc: Add KVM numbers to some P8 SPRs
 48adf38e9cab ppc: A couple more dummy POWER8 Book4 regs
 bce7c738ec1b ppc: Add dummy ACOP SPR
 34f1af75e75e ppc: Add dummy CIABR SPR
 64f92045e284 ppc: Add dummy POWER8 PSPB SPR
 ab003604993b ppc: Add dummy POWER8 MPPR register
 b81cf8c41288 ppc: Add dummy write to VTB
 490af9c31d8b ppc: Add a few more P8 PMU SPRs
 84886e8c188d ppc: Add POWER8 IAMR register
 5046c10f9e58 ppc: Fix writing to AMR/UAMOR
 c188b86cac90 ppc: Initialize AMOR in PAPR mode
 ee62b5581094 ppc: Add dummy SPR_IC for POWER8
 83e02b2026ef ppc: SPURR & PURR are HV writeable and privileged
 906a513acb1a ppc: LPCR is a HV resource
 27f45e80a104 ppc: Add placeholder SPRs for DPDES and DHDES on P8
 c1fc6d316f87 ppc: Initial HDEC support
 9f5d6c607af5 ppc: Enforce setting MSR:EE,IR and DR when MSR:PR is set
 a356d76a11bb ppc: Fix conditions for delivering external interrupts to a guest
 6ba5e576d1cc ppc/pnv+spapr: Add "ibm,pa-features" property to the device-tree
 cf768bc434e5 ppc: Fix 64K pages support in full emulation
 dbd6e54fd9c5 ppc: Add proper real mode translation support
 7250469a21ce ppc: Cosmetic, align some comments
 d94d8c4e7b3f ppc: Use a helper to filter writes to LPCR
 0694e0d87fe3 ppc: Update LPCR definitions
 20596a008f27 ppc/pnv: Create a default PCI layout
 4b393cbdb438 ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge
 fa93c24cd3dc pci: Don't call pci_irq_handler() for a negative intx
 8872d909a8fb pci: Use the new pci_can_add_device() to enforce devfn_min/max
 9204957062b8 qdev: Add a hook for a bus to device if it can add devices
 00b55e345926 pci-bridge: Set a supported devfn_min for bridge
 e53d84a2716a ppc/pnv: Add OCC model stub with interrupt support
 0b6c38406589 ppc/pnv: Add cut down PSI bridge model and hookup external interrupt
 2649daef3f73 ppc/pnv: Add LPC controller and hook it up with a UART and RTC
 81fff5adcc87 ppc/pnv: Wire up XICS native with PowerNV platform
 aed1f862f0cc ppc/xics: Add xics to the monitor "info pic" command
 8a9d65a58764 ppc/xics: Add "native" XICS subclass
 2f29b84050d0 ppc/xics: Split ICS into base class and "simple" implementation
 f16e1abac820 ppc/xics: Use a helper to add a new ICS
 8311defd2daa ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c
 80ff29cbe02c ppc/xics: An ICS with offset 0 is assumed to be uninitialized
 b051717058cd ppc/xics: Make the ICSState a list
 c1498ed62ab9 ppc/xics: Replace "icp" with "xics" in most places
 de147c5277ee ppc/xics: Remove unused xics_set_irq_type()
 e8322cdda768 ppc/xics: Implement H_IPOLL using an accessor
 14597dd4d7c5 ppc/xics: Move SPAPR specific code to a separate file
 ea78b4729fe7 ppc/xics: Rename existing XICS classe to XICS_SPAPR
 b68322388931 ppc/pnv: Add XSCOM infrastructure
 e567fa64df0a ppc/pnv: Add skeletton PowerNV platform
 a926d8369e26 ppc: Add P7/P8 Power Management instructions
 bba3bf8362cf ppc: Move exception generation code out of line
 15411d6de866 ppc: Turn a bunch of booleans from int to bool
 554cf2e208d4 ppc: Add real mode CI load/store instructions for P7 and P8
 e75b02f8786e ppc: Rework generation of priv and inval interrupts
 ec7fde6eb1a6 ppc: Fix generation if ISI/DSI vs. HV mode
 91d6d6b8c720 ppc: Fix POWER7 and POWER8 exception definitions
 886802ba3a7b ppc: Rework POWER7 & POWER8 exception model
 72a37f87f790 ppc: Add PPC_64H instruction flag to POWER7 and POWER8
 036b3623c02d ppc: Get out of emulation on SMT "OR" ops
 5bf1a7bf1e45 ppc: Fix sign extension issue in mtmsr(d) emulation
 fbac2f43e655 ppc: Change 'invalid' bit mask of tlbiel and tlbie
 f308c5b1006f ppc: tlbie, tlbia and tlbisync are HV only
 317658992d97 ppc: Better figure out if processor has HV mode
 e25b26973a4b ppc: Create cpu_ppc_set_papr() helper
 14bffe9681df ppc: Fix rfi/rfid/hrfi/... emulation
 fb21ea327c61 ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV
 35fd16185bea ppc: Add number of threads per core to the processor definition
 e408897526e9 ppc: Add a bunch of hypervisor SPRs to Book3s
 5b287e66c751 ppc: Add macros to register hypervisor mode SPRs
 295ccb0507dc ppc: Update SPR definitions
 ed011d4fb179 ppc: Do some batching of TCG tlb flushes
 048a18af1c8d ppc: Use split I/D mmu modes to avoid flushes on interrupts
 681fc8d20391 ppc: Remove MMU_MODEn_SUFFIX definitions
 065f7f9b1e15 ppc: Fix CFAR updates
 2d3b7c0164e1 Merge remote-tracking branch 'remotes/amit-virtio-rng/tags/rng-for-2.6-1' into staging

>From :

 https://github.com/legoater/qemu/commits/powernv-ipmi

$ git diff 2d3b7c0164e1..065f7f9b1e15  | diffstat 
 default-configs/ppc64-softmmu.mak   |    5 
 hmp-commands-info.hx                |    2 
 hw/intc/Makefile.objs               |    2 
 hw/intc/xics.c                      |  727 +++++++-----------------
 hw/intc/xics_kvm.c                  |   91 +--
 hw/intc/xics_native.c               |  295 +++++++++
 hw/intc/xics_spapr.c                |  429 ++++++++++++++
 hw/pci-bridge/pci_bridge_dev.c      |    2 
 hw/pci-host/Makefile.objs           |    2 
 hw/pci-host/pnv_phb3.c              | 1084 ++++++++++++++++++++++++++++++++++++
 hw/pci-host/pnv_phb3_msi.c          |  354 +++++++++++
 hw/pci-host/pnv_phb3_pbcq.c         |  315 ++++++++++
 hw/pci-host/pnv_phb3_rc.c           |  130 ++++
 hw/pci/pci.c                        |   26 
 hw/ppc/Makefile.objs                |    2 
 hw/ppc/pnv.c                        |  864 ++++++++++++++++++++++++++++
 hw/ppc/pnv_lpc.c                    |  527 +++++++++++++++++
 hw/ppc/pnv_occ.c                    |  126 ++++
 hw/ppc/pnv_psi.c                    |  594 +++++++++++++++++++
 hw/ppc/pnv_xscom.c                  |  415 +++++++++++++
 hw/ppc/ppc.c                        |   31 -
 hw/ppc/spapr.c                      |   28 
 hw/ppc/spapr_events.c               |    8 
 hw/ppc/spapr_hcall.c                |   14 
 hw/ppc/spapr_pci.c                  |   12 
 hw/ppc/spapr_vio.c                  |    2 
 include/hw/pci-host/pnv_phb3.h      |  145 ++++
 include/hw/pci-host/pnv_phb3_regs.h |  505 ++++++++++++++++
 include/hw/pci-host/spapr.h         |    2 
 include/hw/pci/pci_bus.h            |    1 
 include/hw/ppc/pnv.h                |   67 ++
 include/hw/ppc/pnv_xscom.h          |   73 ++
 include/hw/ppc/ppc.h                |    2 
 include/hw/ppc/spapr.h              |    2 
 include/hw/ppc/spapr_vio.h          |    2 
 include/hw/ppc/xics.h               |   81 ++
 include/hw/qdev-core.h              |    1 
 linux-user/main.c                   |    1 
 monitor.c                           |    3 
 qdev-monitor.c                      |   13 
 target-ppc/cpu-qom.h                |    1 
 target-ppc/cpu.h                    |  148 ++++
 target-ppc/excp_helper.c            |  369 +++++++-----
 target-ppc/helper.h                 |    6 
 target-ppc/helper_regs.h            |   75 ++
 target-ppc/machine.c                |    5 
 target-ppc/mmu-hash32.c             |    4 
 target-ppc/mmu-hash64.c             |  344 +++++++++--
 target-ppc/mmu-hash64.h             |    1 
 target-ppc/mmu_helper.c             |   17 
 target-ppc/timebase_helper.c        |   10 
 target-ppc/translate.c              | 1013 ++++++++++++++++++---------------
 target-ppc/translate_init.c         |  738 ++++++++++++++++++++++--
 53 files changed, 8350 insertions(+), 1366 deletions(-)

Thanks,

C. 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-09 20:04     ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
@ 2016-03-09 21:17       ` Thomas Huth
  2016-03-10 18:01         ` Thomas Huth
  0 siblings, 1 reply; 198+ messages in thread
From: Thomas Huth @ 2016-03-09 21:17 UTC (permalink / raw)
  To: Cédric Le Goater, Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 09.03.2016 21:04, Cédric Le Goater wrote:
> On 03/02/2016 09:30 PM, Thomas Huth wrote:
>> On 11.11.2015 01:28, Benjamin Herrenschmidt wrote:
>>> WORT and PID this time
>>>
...
>> AFAICT all patches where you define new SPRs with spr_register_kvm[_hv]
>> are also important independently of the rest of your patch series -
>> otherwise these registers are currently lost during migration since they
>> are not sync'ed with the KVM part in the kernel right now.
>>
>> So if you've got some spare time, could you maybe extract all those
>> patches that define new SPRs with spr_register_kvm[_hv] and send them as
>> a separate patch series? That could help to fix future migration issues,
>> and also would decrease the size of your really huge "Add native POWER8
>> platform" patch series a little bit!
> 
> I have been maintaining a port of Ben's patchset on the latest qemu for other 
> parts which should come after pnv is merged so I have a framework to test such 
> sub-patchsets. I also have time to work on them but clearly not the expertise
> in all areas !

That would be great if you could take care of this!

> What would be nice is to identify the most obvious ones, non controversial
> that could be merged after a few iterations. I have a vague idea, the ones 
> Reviewed-by David obviously being good candidates, the definition of new SPRs 
> (even the dummy ones ?).

I really like to see the KVM SPRs patches first - since they are fixing
potential problems with migration of the _current_ KVM machines already!
And being bug fixes, maybe these patches could even be included for QEMU
2.6 already? (i.e. before the hard freeze at the end of March)

So my wish-list for a first small patch series looks like this:

5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs

There are a couple of other patches touching the SPRs initialization,
but they are not important with regards to migration... so not sure
whether it makes sense to include them now already...

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-09 21:17       ` Thomas Huth
@ 2016-03-10 18:01         ` Thomas Huth
  2016-03-10 22:27           ` Cédric Le Goater
  0 siblings, 1 reply; 198+ messages in thread
From: Thomas Huth @ 2016-03-10 18:01 UTC (permalink / raw)
  To: Cédric Le Goater, Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 09.03.2016 22:17, Thomas Huth wrote:
> On 09.03.2016 21:04, Cédric Le Goater wrote:
....
>> I have been maintaining a port of Ben's patchset on the latest qemu for other 
>> parts which should come after pnv is merged so I have a framework to test such 
>> sub-patchsets. I also have time to work on them but clearly not the expertise
>> in all areas !
> 
> That would be great if you could take care of this!
> 
>> What would be nice is to identify the most obvious ones, non controversial
>> that could be merged after a few iterations. I have a vague idea, the ones 
>> Reviewed-by David obviously being good candidates, the definition of new SPRs 
>> (even the dummy ones ?).
> 
> I really like to see the KVM SPRs patches first - since they are fixing
> potential problems with migration of the _current_ KVM machines already!
> And being bug fixes, maybe these patches could even be included for QEMU
> 2.6 already? (i.e. before the hard freeze at the end of March)
> 
> So my wish-list for a first small patch series looks like this:
> 
> 5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
> 34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
> 48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
> 730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs
> 
> There are a couple of other patches touching the SPRs initialization,
> but they are not important with regards to migration... so not sure
> whether it makes sense to include them now already...

FWIW, I just saw today (by doing some more experiments with
kvm-unit-tests) that the IAMR register is also not migrated yet ... so
it would be nice if you could include the related patches for IAMR, too,
and wire the KVM part up with KVM_REG_PPC_IAMR...

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-10 18:01         ` Thomas Huth
@ 2016-03-10 22:27           ` Cédric Le Goater
  2016-03-11 10:04             ` Thomas Huth
  0 siblings, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2016-03-10 22:27 UTC (permalink / raw)
  To: Thomas Huth, Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

On 03/10/2016 07:01 PM, Thomas Huth wrote:
> On 09.03.2016 22:17, Thomas Huth wrote:
>> On 09.03.2016 21:04, Cédric Le Goater wrote:
> ....
>>> I have been maintaining a port of Ben's patchset on the latest qemu for other 
>>> parts which should come after pnv is merged so I have a framework to test such 
>>> sub-patchsets. I also have time to work on them but clearly not the expertise
>>> in all areas !
>>
>> That would be great if you could take care of this!
>>
>>> What would be nice is to identify the most obvious ones, non controversial
>>> that could be merged after a few iterations. I have a vague idea, the ones 
>>> Reviewed-by David obviously being good candidates, the definition of new SPRs 
>>> (even the dummy ones ?).
>>
>> I really like to see the KVM SPRs patches first - since they are fixing
>> potential problems with migration of the _current_ KVM machines already!
>> And being bug fixes, maybe these patches could even be included for QEMU
>> 2.6 already? (i.e. before the hard freeze at the end of March)
>>
>> So my wish-list for a first small patch series looks like this:
>>
>> 5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
>> 34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
>> 48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
>> 730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs
>>
>> There are a couple of other patches touching the SPRs initialization,
>> but they are not important with regards to migration... so not sure
>> whether it makes sense to include them now already...
> 
> FWIW, I just saw today (by doing some more experiments with
> kvm-unit-tests) that the IAMR register is also not migrated yet ... so
> it would be nice if you could include the related patches for IAMR, too,
> and wire the KVM part up with KVM_REG_PPC_IAMR...

OK. So we should be targeting something like :

	ppc: Update SPR definitions
	ppc: Add macros to register hypervisor mode SPRs
	ppc: Add a bunch of hypervisor SPRs to Book3s

	ppc: LPCR is a HV resource
	ppc: SPURR & PURR are HV writeable and privileged
	ppc: Add dummy SPR_IC for POWER8
	ppc: Initialize AMOR in PAPR mode
	ppc: Fix writing to AMR/UAMOR
	ppc: Add POWER8 IAMR register
	ppc: Add a few more P8 PMU SPRs
	ppc: Add dummy write to VTB
	ppc: Add dummy POWER8 MPPR register
	ppc: Add dummy POWER8 PSPB SPR
	ppc: Add dummy CIABR SPR
	ppc: Add dummy ACOP SPR
	ppc: A couple more dummy POWER8 Book4 regs
	ppc: Add KVM numbers to some P8 SPRs



Also, there seem to be an issue with qemu's HEAD on ppc64el with the
random device :

	-object rng-random,filename=/dev/urandom,id=gid0 -device spapr-rng,rng=gid0

qemu "hangs". This is a vague description for a symptom ... Does that ring
a bell or do I need to dig in to get more info ? 

Thanks,

C.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-10 22:27           ` Cédric Le Goater
@ 2016-03-11 10:04             ` Thomas Huth
  2016-03-11 14:22               ` Cédric Le Goater
  0 siblings, 1 reply; 198+ messages in thread
From: Thomas Huth @ 2016-03-11 10:04 UTC (permalink / raw)
  To: Cédric Le Goater, qemu-ppc; +Cc: qemu-devel

On 10.03.2016 23:27, Cédric Le Goater wrote:
> On 03/10/2016 07:01 PM, Thomas Huth wrote:
>> On 09.03.2016 22:17, Thomas Huth wrote:
>>> On 09.03.2016 21:04, Cédric Le Goater wrote:
>> ....
>>>> I have been maintaining a port of Ben's patchset on the latest qemu for other 
>>>> parts which should come after pnv is merged so I have a framework to test such 
>>>> sub-patchsets. I also have time to work on them but clearly not the expertise
>>>> in all areas !
>>>
>>> That would be great if you could take care of this!
>>>
>>>> What would be nice is to identify the most obvious ones, non controversial
>>>> that could be merged after a few iterations. I have a vague idea, the ones 
>>>> Reviewed-by David obviously being good candidates, the definition of new SPRs 
>>>> (even the dummy ones ?).
>>>
>>> I really like to see the KVM SPRs patches first - since they are fixing
>>> potential problems with migration of the _current_ KVM machines already!
>>> And being bug fixes, maybe these patches could even be included for QEMU
>>> 2.6 already? (i.e. before the hard freeze at the end of March)
>>>
>>> So my wish-list for a first small patch series looks like this:
>>>
>>> 5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
>>> 34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
>>> 48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
>>> 730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs
>>>
>>> There are a couple of other patches touching the SPRs initialization,
>>> but they are not important with regards to migration... so not sure
>>> whether it makes sense to include them now already...
>>
>> FWIW, I just saw today (by doing some more experiments with
>> kvm-unit-tests) that the IAMR register is also not migrated yet ... so
>> it would be nice if you could include the related patches for IAMR, too,
>> and wire the KVM part up with KVM_REG_PPC_IAMR...
> 
> OK. So we should be targeting something like :
> 
> 	ppc: Update SPR definitions
> 	ppc: Add macros to register hypervisor mode SPRs
> 	ppc: Add a bunch of hypervisor SPRs to Book3s
> 
> 	ppc: LPCR is a HV resource
> 	ppc: SPURR & PURR are HV writeable and privileged
> 	ppc: Add dummy SPR_IC for POWER8
> 	ppc: Initialize AMOR in PAPR mode
> 	ppc: Fix writing to AMR/UAMOR
> 	ppc: Add POWER8 IAMR register
> 	ppc: Add a few more P8 PMU SPRs
> 	ppc: Add dummy write to VTB
> 	ppc: Add dummy POWER8 MPPR register
> 	ppc: Add dummy POWER8 PSPB SPR
> 	ppc: Add dummy CIABR SPR
> 	ppc: Add dummy ACOP SPR
> 	ppc: A couple more dummy POWER8 Book4 regs
> 	ppc: Add KVM numbers to some P8 SPRs

Sounds good - but you likely can drop the "Add a few more P8 PMU SPRs"
from your list since it has already been queued by David already (see
https://github.com/dgibson/qemu/commits/ppc-for-2.6), and the PSPB patch
is also not required anymore since I submitted a similar patch to David
already when I discovered that it is lost during migration.

> Also, there seem to be an issue with qemu's HEAD on ppc64el with the
> random device :
> 
> 	-object rng-random,filename=/dev/urandom,id=gid0 -device spapr-rng,rng=gid0
> 
> qemu "hangs". This is a vague description for a symptom ... Does that ring
> a bell or do I need to dig in to get more info ? 

Works for me™ ... could you supply more information? Where does it hang?
Which exact level of QEMU are you using? ... and please open a new mail
thread for this, since it's off-topic to this mail thread.

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-11 10:04             ` Thomas Huth
@ 2016-03-11 14:22               ` Cédric Le Goater
  2016-03-11 14:46                 ` Thomas Huth
  0 siblings, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2016-03-11 14:22 UTC (permalink / raw)
  To: Thomas Huth, qemu-ppc; +Cc: qemu-devel

On 03/11/2016 11:04 AM, Thomas Huth wrote:
> On 10.03.2016 23:27, Cédric Le Goater wrote:
>> On 03/10/2016 07:01 PM, Thomas Huth wrote:
>>> On 09.03.2016 22:17, Thomas Huth wrote:
>>>> On 09.03.2016 21:04, Cédric Le Goater wrote:
>>> ....
>>>>> I have been maintaining a port of Ben's patchset on the latest qemu for other 
>>>>> parts which should come after pnv is merged so I have a framework to test such 
>>>>> sub-patchsets. I also have time to work on them but clearly not the expertise
>>>>> in all areas !
>>>>
>>>> That would be great if you could take care of this!
>>>>
>>>>> What would be nice is to identify the most obvious ones, non controversial
>>>>> that could be merged after a few iterations. I have a vague idea, the ones 
>>>>> Reviewed-by David obviously being good candidates, the definition of new SPRs 
>>>>> (even the dummy ones ?).
>>>>
>>>> I really like to see the KVM SPRs patches first - since they are fixing
>>>> potential problems with migration of the _current_ KVM machines already!
>>>> And being bug fixes, maybe these patches could even be included for QEMU
>>>> 2.6 already? (i.e. before the hard freeze at the end of March)
>>>>
>>>> So my wish-list for a first small patch series looks like this:
>>>>
>>>> 5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
>>>> 34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
>>>> 48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
>>>> 730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs
>>>>
>>>> There are a couple of other patches touching the SPRs initialization,
>>>> but they are not important with regards to migration... so not sure
>>>> whether it makes sense to include them now already...
>>>
>>> FWIW, I just saw today (by doing some more experiments with
>>> kvm-unit-tests) that the IAMR register is also not migrated yet ... so
>>> it would be nice if you could include the related patches for IAMR, too,
>>> and wire the KVM part up with KVM_REG_PPC_IAMR...
>>
>> OK. So we should be targeting something like :
>>
>> 	ppc: Update SPR definitions
>> 	ppc: Add macros to register hypervisor mode SPRs
>> 	ppc: Add a bunch of hypervisor SPRs to Book3s
>>
>> 	ppc: LPCR is a HV resource
>> 	ppc: SPURR & PURR are HV writeable and privileged
>> 	ppc: Add dummy SPR_IC for POWER8
>> 	ppc: Initialize AMOR in PAPR mode
>> 	ppc: Fix writing to AMR/UAMOR
>> 	ppc: Add POWER8 IAMR register
>> 	ppc: Add a few more P8 PMU SPRs
>> 	ppc: Add dummy write to VTB
>> 	ppc: Add dummy POWER8 MPPR register
>> 	ppc: Add dummy POWER8 PSPB SPR
>> 	ppc: Add dummy CIABR SPR
>> 	ppc: Add dummy ACOP SPR
>> 	ppc: A couple more dummy POWER8 Book4 regs
>> 	ppc: Add KVM numbers to some P8 SPRs
> 
> Sounds good - but you likely can drop the "Add a few more P8 PMU SPRs"
> from your list since it has already been queued by David already (see
> https://github.com/dgibson/qemu/commits/ppc-for-2.6), and the PSPB patch
> is also not required anymore since I submitted a similar patch to David
> already when I discovered that it is lost during migration.

Here is a first port on Dave's 2.6 branch. I tried to keep the
patchset minimal but I had to pull a few extra patches to keep
them more or less in sync with the original version from Ben.

	ppc: Update SPR definitions
	ppc: Add macros to register hypervisor mode SPRs
	ppc: Add a bunch of hypervisor SPRs to Book3s
	ppc: Add number of threads per core to the processor definition
	ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV
	ppc: Create cpu_ppc_set_papr() helper
	ppc: Better figure out if processor has HV mode
	ppc: Add placeholder SPRs for DPDES and DHDES on P8
	ppc: SPURR & PURR are HV writeable and privileged
	ppc: Add dummy SPR_IC for POWER8
	ppc: Initialize AMOR in PAPR mode
	ppc: Fix writing to AMR/UAMOR
	ppc: Add POWER8 IAMR register
	ppc: Add dummy write to VTB
	ppc: Add dummy POWER8 MPPR register
	ppc: Add dummy CIABR SPR
	ppc: Add dummy ACOP SPR
	ppc: A couple more dummy POWER8 Book4 regs
	ppc: Add KVM numbers to some P8 SPRs

Available here:

	https://github.com/legoater/qemu/commits/for-2.6

If you want to take a look, I did some quick test on KVM and
TCG on a ppc64le ubuntu host but no migration.

These two will probably need a merge as the first breaks the
compile, and I modified the second.

	ppc: Fix writing to AMR/UAMOR
	ppc: Add POWER8 IAMR register

I guess that I will send on the list for review after.

Thanks,

C.

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-11 14:22               ` Cédric Le Goater
@ 2016-03-11 14:46                 ` Thomas Huth
  2016-03-14 14:53                   ` Cédric Le Goater
  0 siblings, 1 reply; 198+ messages in thread
From: Thomas Huth @ 2016-03-11 14:46 UTC (permalink / raw)
  To: Cédric Le Goater, qemu-ppc; +Cc: qemu-devel

On 11.03.2016 15:22, Cédric Le Goater wrote:
> On 03/11/2016 11:04 AM, Thomas Huth wrote:
>> On 10.03.2016 23:27, Cédric Le Goater wrote:
>>> On 03/10/2016 07:01 PM, Thomas Huth wrote:
>>>> On 09.03.2016 22:17, Thomas Huth wrote:
>>>>> On 09.03.2016 21:04, Cédric Le Goater wrote:
>>>> ....
>>>>>> I have been maintaining a port of Ben's patchset on the latest qemu for other 
>>>>>> parts which should come after pnv is merged so I have a framework to test such 
>>>>>> sub-patchsets. I also have time to work on them but clearly not the expertise
>>>>>> in all areas !
>>>>>
>>>>> That would be great if you could take care of this!
>>>>>
>>>>>> What would be nice is to identify the most obvious ones, non controversial
>>>>>> that could be merged after a few iterations. I have a vague idea, the ones 
>>>>>> Reviewed-by David obviously being good candidates, the definition of new SPRs 
>>>>>> (even the dummy ones ?).
>>>>>
>>>>> I really like to see the KVM SPRs patches first - since they are fixing
>>>>> potential problems with migration of the _current_ KVM machines already!
>>>>> And being bug fixes, maybe these patches could even be included for QEMU
>>>>> 2.6 already? (i.e. before the hard freeze at the end of March)
>>>>>
>>>>> So my wish-list for a first small patch series looks like this:
>>>>>
>>>>> 5b287e66c7513209  ppc: Add macros to register hypervisor mode SPRs
>>>>> 34f1af75e75e7ba0  ppc: Add dummy CIABR SPR
>>>>> 48adf38e9cab4663  ppc: A couple more dummy POWER8 Book4 regs
>>>>> 730a9b4dc9414818  ppc: Add KVM numbers to some P8 SPRs
>>>>>
>>>>> There are a couple of other patches touching the SPRs initialization,
>>>>> but they are not important with regards to migration... so not sure
>>>>> whether it makes sense to include them now already...
>>>>
>>>> FWIW, I just saw today (by doing some more experiments with
>>>> kvm-unit-tests) that the IAMR register is also not migrated yet ... so
>>>> it would be nice if you could include the related patches for IAMR, too,
>>>> and wire the KVM part up with KVM_REG_PPC_IAMR...
>>>
>>> OK. So we should be targeting something like :
>>>
>>> 	ppc: Update SPR definitions
>>> 	ppc: Add macros to register hypervisor mode SPRs
>>> 	ppc: Add a bunch of hypervisor SPRs to Book3s
>>>
>>> 	ppc: LPCR is a HV resource
>>> 	ppc: SPURR & PURR are HV writeable and privileged
>>> 	ppc: Add dummy SPR_IC for POWER8
>>> 	ppc: Initialize AMOR in PAPR mode
>>> 	ppc: Fix writing to AMR/UAMOR
>>> 	ppc: Add POWER8 IAMR register
>>> 	ppc: Add a few more P8 PMU SPRs
>>> 	ppc: Add dummy write to VTB
>>> 	ppc: Add dummy POWER8 MPPR register
>>> 	ppc: Add dummy POWER8 PSPB SPR
>>> 	ppc: Add dummy CIABR SPR
>>> 	ppc: Add dummy ACOP SPR
>>> 	ppc: A couple more dummy POWER8 Book4 regs
>>> 	ppc: Add KVM numbers to some P8 SPRs
>>
>> Sounds good - but you likely can drop the "Add a few more P8 PMU SPRs"
>> from your list since it has already been queued by David already (see
>> https://github.com/dgibson/qemu/commits/ppc-for-2.6), and the PSPB patch
>> is also not required anymore since I submitted a similar patch to David
>> already when I discovered that it is lost during migration.
> 
> Here is a first port on Dave's 2.6 branch. I tried to keep the
> patchset minimal but I had to pull a few extra patches to keep
> them more or less in sync with the original version from Ben.
> 
> 	ppc: Update SPR definitions
> 	ppc: Add macros to register hypervisor mode SPRs
> 	ppc: Add a bunch of hypervisor SPRs to Book3s
> 	ppc: Add number of threads per core to the processor definition
> 	ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV
> 	ppc: Create cpu_ppc_set_papr() helper
> 	ppc: Better figure out if processor has HV mode
> 	ppc: Add placeholder SPRs for DPDES and DHDES on P8
> 	ppc: SPURR & PURR are HV writeable and privileged
> 	ppc: Add dummy SPR_IC for POWER8
> 	ppc: Initialize AMOR in PAPR mode
> 	ppc: Fix writing to AMR/UAMOR
> 	ppc: Add POWER8 IAMR register
> 	ppc: Add dummy write to VTB
> 	ppc: Add dummy POWER8 MPPR register
> 	ppc: Add dummy CIABR SPR
> 	ppc: Add dummy ACOP SPR
> 	ppc: A couple more dummy POWER8 Book4 regs
> 	ppc: Add KVM numbers to some P8 SPRs
> 
> Available here:
> 
> 	https://github.com/legoater/qemu/commits/for-2.6
> 
> If you want to take a look, I did some quick test on KVM and
> TCG on a ppc64le ubuntu host but no migration.
> 
> These two will probably need a merge as the first breaks the
> compile, and I modified the second.
> 
> 	ppc: Fix writing to AMR/UAMOR
> 	ppc: Add POWER8 IAMR register

Modifying patches is fine as long as you mention the changes in the
patch description (in square brackets) and add your Sob - looking at
your tree, you already did this for the "ppc: Add POWER8 IAMR register"
patch, so that should be OK.

I'd maybe also squash the "Add KVM numbers to some P8 SPRs" patch into
the "Add a bunch of hypervisor SPRs to Book3s" to reduce the code churn
a little bit (note that the patch description for the "Add a bunch of
hypervisor SPRs to Book3s" patch is misleading - DAWR and DAWRX need to
be synchronized for migration, since the guest can set this register via
the H_SET_MODE hypercall).

You could maybe also merge "Add dummy ACOP SPR" with "A couple more
dummy POWER8 Book4 regs" since the latter patch completely reworks the
definition of the ACOP register.

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-11 14:46                 ` Thomas Huth
@ 2016-03-14 14:53                   ` Cédric Le Goater
  2016-03-14 15:43                     ` Thomas Huth
  0 siblings, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2016-03-14 14:53 UTC (permalink / raw)
  To: Thomas Huth, qemu-ppc; +Cc: qemu-devel

Hello Thomas,

On 03/11/2016 03:46 PM, Thomas Huth wrote:
> I'd maybe also squash the "Add KVM numbers to some P8 SPRs" patch into
> the "Add a bunch of hypervisor SPRs to Book3s" to reduce the code churn
> a little bit (note that the patch description for the "Add a bunch of
> hypervisor SPRs to Book3s" patch is misleading - DAWR and DAWRX need to
> be synchronized for migration, since the guest can set this register via
> the H_SET_MODE hypercall).

So shall I just remove the comment :

  "We don't give them a KVM reg number yet as no current KVM version
   supports HV mode."

from patch "Add a bunch of hypervisor SPRs to Book3s" after squashing in 
"Add KVM numbers to some P8 SPRs" ?


Thanks,

C. 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-14 14:53                   ` Cédric Le Goater
@ 2016-03-14 15:43                     ` Thomas Huth
  2016-03-14 15:50                       ` Cédric Le Goater
  0 siblings, 1 reply; 198+ messages in thread
From: Thomas Huth @ 2016-03-14 15:43 UTC (permalink / raw)
  To: Cédric Le Goater, qemu-ppc; +Cc: qemu-devel

On 14.03.2016 15:53, Cédric Le Goater wrote:
> Hello Thomas,
> 
> On 03/11/2016 03:46 PM, Thomas Huth wrote:
>> I'd maybe also squash the "Add KVM numbers to some P8 SPRs" patch into
>> the "Add a bunch of hypervisor SPRs to Book3s" to reduce the code churn
>> a little bit (note that the patch description for the "Add a bunch of
>> hypervisor SPRs to Book3s" patch is misleading - DAWR and DAWRX need to
>> be synchronized for migration, since the guest can set this register via
>> the H_SET_MODE hypercall).
> 
> So shall I just remove the comment :
> 
>   "We don't give them a KVM reg number yet as no current KVM version
>    supports HV mode."
> 
> from patch "Add a bunch of hypervisor SPRs to Book3s" after squashing in 
> "Add KVM numbers to some P8 SPRs" ?

Either that, or change it into something like:

"We don't give them a KVM reg number to most of the registers yet as no
current KVM version supports HV mode. For DAWR and DAWRX, the KVM reg
number is needed since this register can be set by the guest via the
H_SET_MODE hypercall."

?

 Thomas

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs
  2016-03-14 15:43                     ` Thomas Huth
@ 2016-03-14 15:50                       ` Cédric Le Goater
  0 siblings, 0 replies; 198+ messages in thread
From: Cédric Le Goater @ 2016-03-14 15:50 UTC (permalink / raw)
  To: Thomas Huth, qemu-ppc; +Cc: qemu-devel

On 03/14/2016 04:43 PM, Thomas Huth wrote:
> On 14.03.2016 15:53, Cédric Le Goater wrote:
>> Hello Thomas,
>>
>> On 03/11/2016 03:46 PM, Thomas Huth wrote:
>>> I'd maybe also squash the "Add KVM numbers to some P8 SPRs" patch into
>>> the "Add a bunch of hypervisor SPRs to Book3s" to reduce the code churn
>>> a little bit (note that the patch description for the "Add a bunch of
>>> hypervisor SPRs to Book3s" patch is misleading - DAWR and DAWRX need to
>>> be synchronized for migration, since the guest can set this register via
>>> the H_SET_MODE hypercall).
>>
>> So shall I just remove the comment :
>>
>>   "We don't give them a KVM reg number yet as no current KVM version
>>    supports HV mode."
>>
>> from patch "Add a bunch of hypervisor SPRs to Book3s" after squashing in 
>> "Add KVM numbers to some P8 SPRs" ?
> 
> Either that, or change it into something like:
> 
> "We don't give them a KVM reg number to most of the registers yet as no
> current KVM version supports HV mode. For DAWR and DAWRX, the KVM reg
> number is needed since this register can be set by the guest via the
> H_SET_MODE hypercall."

OK. Good. I should be sending a first patchset soon the list, after a few
checks.

Thanks,

C. 

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge
  2015-11-11  0:28 ` [Qemu-devel] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge Benjamin Herrenschmidt
@ 2017-03-17  8:24   ` Cédric Le Goater
  2017-03-17 22:15     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 198+ messages in thread
From: Cédric Le Goater @ 2017-03-17  8:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc; +Cc: qemu-devel

Ben,

Quick question,

> +/* This is called whenever the PHB LSI, MSI source ID register or
> + * the PBCQ irq filters are written.
> + */
> +void pnv_phb3_remap_irqs(PnvPhb3State *phb)
> +{
> +    uint32_t local, global, count, mask, comp;
> +    uint64_t baren;
> +
> +    /* First check if we are enabled. Unlike real HW we don't separate TX and RX
> +     * so we enable if both are set
> +     */
> +    baren = phb->pbcq->nest_regs[PBCQ_NEST_BAR_EN];
> +    if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
> +        !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
> +        phb->lsi_ics->offset = 0;
> +        return;
> +    }
> +
> +    /* Grab local LSI source ID */
> +    local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
> +
> +    /* Grab global one and compare */
> +    global = GETFIELD(PBCQ_NEST_LSI_SRC,
> +                      phb->pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
> +    if (global != local) {
> +        /* This happens during initialization, let's come back when we
> +         * are properly configured
> +         */
> +        phb->lsi_ics->offset = 0;
> +        return;
> +    }
> +
> +    /* Get the base on the powerbus */
> +    comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
> +                    phb->pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
> +    mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
> +                    phb->pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
> +    count = ((~mask) + 1) & 0x7ffff;
> +    phb->total_irq = count;
> +
> +    /* Sanity checks */
> +    if ((global + 8) > count) {
> +        DBG_MAP(phb, "LSIs out of reach: LSI base=%d total irq=%d",
> +                global, count);
> +    }
> +    if (count > 2048) {
> +        DBG_MAP(phb, "More interrupts than supported: %d", count);
> +    }
> +    if ((comp & mask) != comp) {
> +        DBG_MAP(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
> +                comp, mask);
> +        comp &= mask;
> +    }
> +    /* Setup LSI offset */
> +    phb->lsi_ics->offset = comp + global;
> +
> +    /* Setup MSI offset */
> +    pnv_phb3_msi_update_config(phb->msis, comp, count);

I changed that to :

    pnv_phb3_msi_update_config(phb->msis, comp, count - PHB_NUM_LSI);

else the IRQ numbers overlap with the LSI and I think this why we were 
uselessly looping on the EOI.

Correct ? 

C.
 
> +
> +    DBG_MAP(phb, "Initialized for %d interrupts @0x%x, LSI off=%d",
> +            count, comp, global);
> +}
> +

^ permalink raw reply	[flat|nested] 198+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge
  2017-03-17  8:24   ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
@ 2017-03-17 22:15     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 198+ messages in thread
From: Benjamin Herrenschmidt @ 2017-03-17 22:15 UTC (permalink / raw)
  To: Cédric Le Goater, qemu-ppc; +Cc: qemu-devel

On Fri, 2017-03-17 at 09:24 +0100, Cédric Le Goater wrote:
> I changed that to :
> 
>     pnv_phb3_msi_update_config(phb->msis, comp, count - PHB_NUM_LSI);
> 
> else the IRQ numbers overlap with the LSI and I think this why we
> were 
> uselessly looping on the EOI.
> 
> Correct ? 

Quite possibly though to be honest, I have swapped out a lot of that
out of my brain :-)

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 198+ messages in thread

end of thread, other threads:[~2017-03-17 22:15 UTC | newest]

Thread overview: 198+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-11  0:27 [Qemu-devel] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 01/77] ppc: Remove MMU_MODEn_SUFFIX definitions Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts Benjamin Herrenschmidt
2015-11-16  4:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-16 10:10     ` Benjamin Herrenschmidt
2015-11-16 12:42       ` David Gibson
2015-11-27 10:29   ` Alexander Graf
2015-11-27 12:15     ` Paolo Bonzini
2015-11-11  0:27 ` [Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes Benjamin Herrenschmidt
2015-11-16  5:00   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-16 10:16     ` Benjamin Herrenschmidt
2015-11-19  6:09       ` David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 04/77] target-ppc: Use sensible POWER8/POWER8E versions Benjamin Herrenschmidt
2015-11-11  0:59   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
2015-11-16  5:01   ` David Gibson
2015-11-16 10:17     ` Benjamin Herrenschmidt
2015-11-17  0:11       ` Alexey Kardashevskiy
2015-11-17  0:40         ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions Benjamin Herrenschmidt
2015-11-16  5:06   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs Benjamin Herrenschmidt
2015-11-16  5:09   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 07/77] ppc: Add a bunch of hypervisor SPRs to Book3s Benjamin Herrenschmidt
2015-11-19  6:11   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-19 10:21     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition Benjamin Herrenschmidt
2015-11-16  5:16   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-20  0:29     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation Benjamin Herrenschmidt
2015-11-19  6:19   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-19 10:23     ` Benjamin Herrenschmidt
2015-11-20  0:26       ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 10/77] ppc: Fix hreg_store_msr() so that non-HV mode cannot alter MSR:HV Benjamin Herrenschmidt
2015-11-19  6:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper Benjamin Herrenschmidt
2015-11-16  5:30   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode Benjamin Herrenschmidt
2015-11-19  6:22   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 13/77] ppc: tlbie, tlbia and tlbisync are HV only Benjamin Herrenschmidt
2015-11-16  5:34   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-16 10:21     ` Benjamin Herrenschmidt
2015-11-18  0:06       ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 14/77] ppc: Change 'invalid' bit mask of tlbiel and tlbie Benjamin Herrenschmidt
2015-11-20  7:02   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 15/77] ppc: Fix sign extension issue in mtmsr(d) emulation Benjamin Herrenschmidt
2015-11-19  6:26   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-19 10:26     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 16/77] ppc: Get out of emulation on SMT "OR" ops Benjamin Herrenschmidt
2015-11-16  5:40   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 17/77] ppc: Add PPC_64H instruction flag to POWER7 and POWER8 Benjamin Herrenschmidt
2015-11-16  5:41   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 18/77] ppc: Rework POWER7 & POWER8 exception model Benjamin Herrenschmidt
2015-11-19  6:44   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-19 10:31     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 19/77] ppc: Fix POWER7 and POWER8 exception definitions Benjamin Herrenschmidt
2015-11-19  6:46   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 20/77] ppc: Fix generation if ISI/DSI vs. HV mode Benjamin Herrenschmidt
2015-11-19  6:50   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 21/77] ppc: Rework generation of priv and inval interrupts Benjamin Herrenschmidt
2015-11-20  7:45   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-24  0:44     ` Benjamin Herrenschmidt
2015-11-24  2:22       ` David Gibson
2015-11-24  0:51     ` Benjamin Herrenschmidt
2015-11-24  2:22       ` David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 22/77] ppc: Add real mode CI load/store instructions for P7 and P8 Benjamin Herrenschmidt
2015-11-20  7:48   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-24  0:58     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 23/77] ppc: Turn a bunch of booleans from int to bool Benjamin Herrenschmidt
2015-11-20  7:49   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 24/77] ppc: Move exception generation code out of line Benjamin Herrenschmidt
2015-11-20  7:53   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-24  0:59     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 25/77] ppc: Add P7/P8 Power Management instructions Benjamin Herrenschmidt
2015-11-20  8:06   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 26/77] ppc/pnv: Add skeletton PowerNV platform Benjamin Herrenschmidt
2015-11-19  8:58   ` [Qemu-devel] [Qemu-ppc] " Stewart Smith
2015-11-20  8:21   ` David Gibson
2015-11-24  1:45     ` Benjamin Herrenschmidt
2015-11-24  2:43       ` David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 27/77] ppc/pnv: Add XSCOM infrastructure Benjamin Herrenschmidt
2015-11-24  3:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-24  8:49     ` Benjamin Herrenschmidt
2015-11-24  8:55     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 28/77] ppc/xics: Rename existing XICS classe to XICS_SPAPR Benjamin Herrenschmidt
2015-11-24  3:25   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 29/77] ppc/xics: Move SPAPR specific code to a separate file Benjamin Herrenschmidt
2015-11-24  3:32   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 30/77] ppc/xics: Implement H_IPOLL using an accessor Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 31/77] ppc/xics: Remove unused xics_set_irq_type() Benjamin Herrenschmidt
2015-11-24  3:34   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 32/77] ppc/xics: Replace "icp" with "xics" in most places Benjamin Herrenschmidt
2015-11-24  3:36   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 33/77] ppc/xics: Make the ICSState a list Benjamin Herrenschmidt
2015-12-01  4:30   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 34/77] ppc/xics: An ICS with offset 0 is assumed to be uninitialized Benjamin Herrenschmidt
2015-12-01  4:40   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 35/77] ppc/xics: Move xics_set_nr_irqs() to xics_spapr.c and xics_kvm.c Benjamin Herrenschmidt
2015-12-01  4:46   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 36/77] ppc/xics: Use a helper to add a new ICS Benjamin Herrenschmidt
2015-12-01  4:47   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 37/77] ppc/xics: Split ICS into base class and "simple" implementation Benjamin Herrenschmidt
2015-12-01  5:13   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 38/77] ppc/xics: Add "native" XICS subclass Benjamin Herrenschmidt
2015-12-01  6:28   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-12-01  6:39   ` David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 39/77] ppc/xics: Add xics to the monitor "info pic" command Benjamin Herrenschmidt
2015-12-01  6:32   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 40/77] ppc/pnv: Wire up XICS native with PowerNV platform Benjamin Herrenschmidt
2015-12-01  6:41   ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-11-11  0:27 ` [Qemu-devel] [PATCH 41/77] ppc/pnv: Add LPC controller and hook it up with a UART and RTC Benjamin Herrenschmidt
2015-11-17  0:32   ` Alexey Kardashevskiy
2015-11-17  0:40     ` Benjamin Herrenschmidt
2015-12-01  6:43       ` [Qemu-devel] [Qemu-ppc] " David Gibson
2015-12-02  2:24         ` Alexey Kardashevskiy
2015-12-02  5:29           ` Benjamin Herrenschmidt
2015-12-03  1:04             ` Alexey Kardashevskiy
2015-12-03  1:45               ` David Gibson
2015-12-03 22:58                 ` Benjamin Herrenschmidt
2015-12-03 22:54               ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 42/77] ppc/pnv: Add cut down PSI bridge model and hookup external interrupt Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 43/77] ppc/pnv: Add OCC model stub with interrupt support Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 44/77] pci-bridge: Set a supported devfn_min for bridge Benjamin Herrenschmidt
2015-11-18 12:31   ` Paolo Bonzini
2015-11-18 12:41     ` [Qemu-devel] [PATCH for-2.5 " Paolo Bonzini
2015-11-18 14:21       ` Michael S. Tsirkin
2015-11-18 14:25         ` Paolo Bonzini
2015-11-18 16:38           ` Michael S. Tsirkin
2015-11-11  0:27 ` [Qemu-devel] [PATCH 45/77] qdev: Add a hook for a bus to device if it can add devices Benjamin Herrenschmidt
2015-11-18 12:34   ` Paolo Bonzini
2015-11-18 20:06     ` Benjamin Herrenschmidt
2015-11-11  0:27 ` [Qemu-devel] [PATCH 46/77] pci: Use the new pci_can_add_device() to enforce devfn_min/max Benjamin Herrenschmidt
2015-11-18 12:35   ` Paolo Bonzini
2015-11-11  0:28 ` [Qemu-devel] [PATCH 47/77] pci: Don't call pci_irq_handler() for a negative intx Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 48/77] ppc/pnv: Add model for Power8 PHB3 PCIe Host bridge Benjamin Herrenschmidt
2017-03-17  8:24   ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
2017-03-17 22:15     ` Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 49/77] ppc/pnv: Create a default PCI layout Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 50/77] ppc: Update LPCR definitions Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 51/77] ppc: Use a helper to filter writes to LPCR Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 52/77] ppc: Cosmetic, align some comments Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 53/77] ppc: Add proper real mode translation support Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 54/77] ppc: Fix 64K pages support in full emulation Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 55/77] ppc/pnv+spapr: Add "ibm, pa-features" property to the device-tree Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 56/77] ppc: Fix conditions for delivering external interrupts to a guest Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 57/77] ppc: Enforce setting MSR:EE, IR and DR when MSR:PR is set Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 58/77] ppc: Initial HDEC support Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 59/77] ppc: Add placeholder SPRs for DPDES and DHDES on P8 Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 60/77] ppc: LPCR is a HV resource Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 61/77] ppc: SPURR & PURR are HV writeable and privileged Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 62/77] ppc: Add dummy SPR_IC for POWER8 Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 63/77] ppc: Initialize AMOR in PAPR mode Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 64/77] ppc: Fix writing to AMR/UAMOR Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 65/77] ppc: Add POWER8 IAMR register Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 66/77] ppc: Add a few more P8 PMU SPRs Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 67/77] ppc: Add dummy write to VTB Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 68/77] ppc: Add dummy POWER8 MPPR register Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 69/77] ppc: Add dummy POWER8 PSPB SPR Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 70/77] ppc: Add dummy CIABR SPR Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 71/77] ppc: Add dummy ACOP SPR Benjamin Herrenschmidt
2016-03-02 20:22   ` Thomas Huth
2015-11-11  0:28 ` [Qemu-devel] [PATCH 72/77] ppc: A couple more dummy POWER8 Book4 regs Benjamin Herrenschmidt
2016-03-02 20:30   ` Thomas Huth
2016-03-04  0:59     ` Benjamin Herrenschmidt
2016-03-09 20:04     ` [Qemu-devel] [Qemu-ppc] " Cédric Le Goater
2016-03-09 21:17       ` Thomas Huth
2016-03-10 18:01         ` Thomas Huth
2016-03-10 22:27           ` Cédric Le Goater
2016-03-11 10:04             ` Thomas Huth
2016-03-11 14:22               ` Cédric Le Goater
2016-03-11 14:46                 ` Thomas Huth
2016-03-14 14:53                   ` Cédric Le Goater
2016-03-14 15:43                     ` Thomas Huth
2016-03-14 15:50                       ` Cédric Le Goater
2015-11-11  0:28 ` [Qemu-devel] [PATCH 73/77] ppc: Add KVM numbers to some P8 SPRs Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 74/77] ppc: Print HSRR0/HSRR1 in "info registers" Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 75/77] ppc: Add dummy logmpp instruction Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 76/77] ppc: Add slbfee. instruction Benjamin Herrenschmidt
2015-11-11  0:28 ` [Qemu-devel] [PATCH 77/77] ppc: Fix CFAR updates Benjamin Herrenschmidt
2015-11-11  0:42 ` [Qemu-devel] [Qemu-ppc] [PATCH 00/77] ppc: Add "native" POWER8 platform Benjamin Herrenschmidt
2015-11-11  0:50 ` [Qemu-devel] " Eric Blake
2015-11-11  0:56   ` Benjamin Herrenschmidt
2015-11-11  3:27     ` [Qemu-devel] [Qemu-ppc] " Alexey Kardashevskiy
2015-11-11  3:38       ` Benjamin Herrenschmidt
2015-11-11  4:07         ` Alexey Kardashevskiy
2015-11-11  4:16           ` Benjamin Herrenschmidt
2015-11-11  4:41             ` Alexey Kardashevskiy
2015-11-11  4:47               ` Benjamin Herrenschmidt
2015-11-27 10:21               ` Alexander Graf
2015-11-28  7:59                 ` Benjamin Herrenschmidt
2015-11-28 10:53                   ` Alexander Graf
2015-11-29  0:38                     ` Benjamin Herrenschmidt
2015-11-30 18:15                   ` Cédric Le Goater
2015-11-30 20:09                     ` Benjamin Herrenschmidt
2015-11-30 21:24                       ` Cédric Le Goater
2015-11-30 23:12                         ` Benjamin Herrenschmidt
2015-12-07  1:25                     ` Stewart Smith
2015-12-07 22:48                       ` Cédric Le Goater
2015-11-11  0:57 ` Stewart Smith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.