All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER
@ 2019-02-26  3:05 Suraj Jitindar Singh
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG Suraj Jitindar Singh
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26  3:05 UTC (permalink / raw)
  To: qemu-ppc; +Cc: david, clg, qemu-devel, Suraj Jitindar Singh

Add spapr_cap SPAPR_CAP_LARGE_DECREMENTER to be used to control the
availability and size of the large decrementer made available to the
guest.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
---
 hw/ppc/spapr.c         |  2 ++
 hw/ppc/spapr_caps.c    | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |  5 ++++-
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b6a571b6f1..acf62a2b9f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2077,6 +2077,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_irq_map,
         &vmstate_spapr_cap_nested_kvm_hv,
         &vmstate_spapr_dtb,
+        &vmstate_spapr_cap_large_decr,
         NULL
     }
 };
@@ -4288,6 +4289,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;
     spapr_caps_add_properties(smc, &error_abort);
     smc->irq = &spapr_irq_xics;
     smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 64f98ae68d..1545a02729 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -182,6 +182,34 @@ static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name,
     spapr->eff.caps[cap->index] = val;
 }
 
+static void spapr_cap_get_uint8(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    sPAPRCapabilityInfo *cap = opaque;
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    uint8_t val = spapr_get_cap(spapr, cap->index);
+
+    visit_type_uint8(v, name, &val, errp);
+}
+
+static void spapr_cap_set_uint8(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    sPAPRCapabilityInfo *cap = opaque;
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    Error *local_err = NULL;
+    uint8_t val;
+
+    visit_type_uint8(v, name, &val, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    spapr->cmd_line_caps[cap->index] = true;
+    spapr->eff.caps[cap->index] = val;
+}
+
 static void cap_htm_apply(sPAPRMachineState *spapr, uint8_t val, Error **errp)
 {
     if (!val) {
@@ -390,6 +418,13 @@ static void cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
     }
 }
 
+static void cap_large_decr_apply(sPAPRMachineState *spapr,
+                                 uint8_t val, Error **errp)
+{
+    if (val)
+        error_setg(errp, "No large decrementer support, try cap-large-decr=0");
+}
+
 sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -468,6 +503,15 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_nested_kvm_hv_apply,
     },
+    [SPAPR_CAP_LARGE_DECREMENTER] = {
+        .name = "large-decr",
+        .description = "Size of Large Decrementer for the Guest (bits) 0=disabled",
+        .index = SPAPR_CAP_LARGE_DECREMENTER,
+        .get = spapr_cap_get_uint8,
+        .set = spapr_cap_set_uint8,
+        .type = "int",
+        .apply = cap_large_decr_apply,
+    },
 };
 
 static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
@@ -596,6 +640,7 @@ SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
 SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
 SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
+SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 
 void spapr_caps_init(sPAPRMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 59073a7579..8efc5e0779 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -74,8 +74,10 @@ typedef enum {
 #define SPAPR_CAP_HPT_MAXPAGESIZE       0x06
 /* Nested KVM-HV */
 #define SPAPR_CAP_NESTED_KVM_HV         0x07
+/* Large Decrementer */
+#define SPAPR_CAP_LARGE_DECREMENTER     0x08
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_KVM_HV + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_LARGE_DECREMENTER + 1)
 
 /*
  * Capability Values
@@ -828,6 +830,7 @@ extern const VMStateDescription vmstate_spapr_cap_cfpc;
 extern const VMStateDescription vmstate_spapr_cap_sbbc;
 extern const VMStateDescription vmstate_spapr_cap_ibs;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
+extern const VMStateDescription vmstate_spapr_cap_large_decr;
 
 static inline uint8_t spapr_get_cap(sPAPRMachineState *spapr, int cap)
 {
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG
  2019-02-26  3:05 [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER Suraj Jitindar Singh
@ 2019-02-26  3:05 ` Suraj Jitindar Singh
  2019-02-26  3:53   ` David Gibson
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM Suraj Jitindar Singh
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26  3:05 UTC (permalink / raw)
  To: qemu-ppc; +Cc: david, clg, qemu-devel, Suraj Jitindar Singh

Prior to POWER9 the decrementer was a 32-bit register which decremented
with each tick of the timebase. From POWER9 onwards the decrementer can
be set to operate in a mode called large decrementer where it acts as a
n-bit decrementing register which is visible as a 64-bit register, that
is the value of the decrementer is sign extended to 64 bits (where n is
implementation dependant).

The mode in which the decrementer operates is controlled by the LPCR_LD
bit in the logical paritition control register (LPCR).

>From POWER9 onwards the HDEC (hypervisor decrementer) was enlarged to
h-bits, also sign extended to 64 bits (where h is implementation
dependant). Note this isn't configurable and is always enabled.

For TCG we allow the user to configure a custom large decrementer size,
so long as it's at least 32 and less than the size of the HDEC (the
restrictions imposed by the ISA).

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/ppc/ppc.c                    | 78 ++++++++++++++++++++++++++++-------------
 hw/ppc/spapr.c                  |  8 +++++
 hw/ppc/spapr_caps.c             | 38 +++++++++++++++++++-
 target/ppc/cpu-qom.h            |  1 +
 target/ppc/cpu.h                | 11 +++---
 target/ppc/mmu-hash64.c         |  2 +-
 target/ppc/translate.c          |  2 +-
 target/ppc/translate_init.inc.c |  1 +
 8 files changed, 109 insertions(+), 32 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index d1e3d4cd20..853afeed6a 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -744,10 +744,10 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env)
     return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
 }
 
-static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+static inline uint64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
 {
     ppc_tb_t *tb_env = env->tb_env;
-    uint32_t decr;
+    uint64_t decr;
     int64_t diff;
 
     diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -758,27 +758,42 @@ static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
     }  else {
         decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
     }
-    LOG_TB("%s: %08" PRIx32 "\n", __func__, decr);
+    LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
 
     return decr;
 }
 
-uint32_t cpu_ppc_load_decr (CPUPPCState *env)
+target_ulong cpu_ppc_load_decr (CPUPPCState *env)
 {
     ppc_tb_t *tb_env = env->tb_env;
+    uint64_t decr;
 
     if (kvm_enabled()) {
         return env->spr[SPR_DECR];
     }
 
-    return _cpu_ppc_load_decr(env, tb_env->decr_next);
+    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
+
+    /*
+     * If large decrementer is enabled then the decrementer is signed extened
+     * to 64 bits, otherwise it is a 32 bit value.
+     */
+    if (env->spr[SPR_LPCR] & LPCR_LD)
+        return decr;
+    return (uint32_t) decr;
 }
 
-uint32_t cpu_ppc_load_hdecr (CPUPPCState *env)
+target_ulong cpu_ppc_load_hdecr (CPUPPCState *env)
 {
     ppc_tb_t *tb_env = env->tb_env;
+    uint64_t decr;
 
-    return _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+    decr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+
+    /* If POWER9 or later then hdecr is sign extended to 64 bits otherwise 32 */
+    if (env->mmu_model & POWERPC_MMU_3_00)
+        return decr;
+    return (uint32_t) decr;
 }
 
 uint64_t cpu_ppc_load_purr (CPUPPCState *env)
@@ -832,13 +847,21 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
                                  QEMUTimer *timer,
                                  void (*raise_excp)(void *),
                                  void (*lower_excp)(PowerPCCPU *),
-                                 uint32_t decr, uint32_t value)
+                                 target_ulong decr, target_ulong value,
+                                 int nr_bits)
 {
     CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t now, next;
+    bool negative;
+
+    /* Truncate value to decr_width and sign extend for simplicity */
+    value &= ((1ULL << nr_bits) - 1);
+    negative = !!(value & (1ULL << (nr_bits - 1)));
+    if (negative)
+        value |= (0xFFFFFFFFULL << nr_bits);
 
-    LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__,
+    LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__,
                 decr, value);
 
     if (kvm_enabled()) {
@@ -860,15 +883,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
      * an edge interrupt, so raise it here too.
      */
     if ((value < 3) ||
-        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && (value & 0x80000000)) ||
-        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && (value & 0x80000000)
-          && !(decr & 0x80000000))) {
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) ||
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative
+          && !(decr & (1ULL << (nr_bits - 1))))) {
         (*raise_excp)(cpu);
         return;
     }
 
     /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
-    if (!(value & 0x80000000) && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+    if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
         (*lower_excp)(cpu);
     }
 
@@ -881,21 +904,24 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
     timer_mod(timer, next);
 }
 
-static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, uint32_t decr,
-                                       uint32_t value)
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
+                                       target_ulong value, int nr_bits)
 {
     ppc_tb_t *tb_env = cpu->env.tb_env;
 
     __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
                          tb_env->decr_timer->cb, &cpu_ppc_decr_lower, decr,
-                         value);
+                         value, nr_bits);
 }
 
-void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
+void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    int nr_bits = 32;
+    if ((env->spr[SPR_LPCR] & LPCR_LD) && (env->large_decr_bits > 32))
+        nr_bits = env->large_decr_bits;
 
-    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value);
+    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
 }
 
 static void cpu_ppc_decr_cb(void *opaque)
@@ -905,23 +931,25 @@ static void cpu_ppc_decr_cb(void *opaque)
     cpu_ppc_decr_excp(cpu);
 }
 
-static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t hdecr,
-                                        uint32_t value)
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
+                                        target_ulong value, int nr_bits)
 {
     ppc_tb_t *tb_env = cpu->env.tb_env;
 
     if (tb_env->hdecr_timer != NULL) {
         __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
                              tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
-                             hdecr, value);
+                             hdecr, value, nr_bits);
     }
 }
 
-void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
+void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    int nr_bits = (pcc->hdecr_bits > 32) ? pcc->hdecr_bits : 32;
 
-    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value);
+    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, nr_bits);
 }
 
 static void cpu_ppc_hdecr_cb(void *opaque)
@@ -951,8 +979,8 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
      * if a decrementer exception is pending when it enables msr_ee at startup,
      * it's not ready to handle it...
      */
-    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
-    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
+    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
+    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
     cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index acf62a2b9f..966bc74e68 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -557,6 +557,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                           pcc->radix_page_info->count *
                           sizeof(radix_AP_encodings[0]))));
     }
+
+    /*
+     * We set this property to let the guest know that it can use the large
+     * decrementer and its width in bits.
+     */
+    if (env->large_decr_bits)
+        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
+                              env->large_decr_bits)));
 }
 
 static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 1545a02729..44542fdbb2 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -421,8 +421,43 @@ static void cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
 static void cap_large_decr_apply(sPAPRMachineState *spapr,
                                  uint8_t val, Error **errp)
 {
-    if (val)
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    if (!val)
+        return; /* Disabled by default */
+
+    if (tcg_enabled()) {
+        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+            error_setg(errp,
+                "Large decrementer only supported on POWER9, try -cpu POWER9");
+            return;
+        }
+        if ((val < 32) || (val > pcc->hdecr_bits)) {
+            error_setg(errp,
+                "Large decrementer size unsupported, try -cap-large-decr=%d",
+                pcc->hdecr_bits);
+            return;
+        }
+    } else {
         error_setg(errp, "No large decrementer support, try cap-large-decr=0");
+    }
+}
+
+static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
+                                     PowerPCCPU *cpu,
+                                     uint8_t val, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong lpcr = env->spr[SPR_LPCR];
+
+    if (val)
+        lpcr |= LPCR_LD;
+    else
+        lpcr &= ~LPCR_LD;
+    ppc_store_lpcr(cpu, lpcr);
+    env->large_decr_bits = val;
 }
 
 sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
@@ -511,6 +546,7 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .set = spapr_cap_set_uint8,
         .type = "int",
         .apply = cap_large_decr_apply,
+        .cpu_apply = cap_large_decr_cpu_apply,
     },
 };
 
diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index ae51fe754e..cced705e30 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -190,6 +190,7 @@ typedef struct PowerPCCPUClass {
 #endif
     const PPCHash64Options *hash64_opts;
     struct ppc_radix_page_info *radix_page_info;
+    uint32_t hdecr_bits;
     void (*init_proc)(CPUPPCState *env);
     int  (*check_pow)(CPUPPCState *env);
     int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx);
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 26604ddf98..8da333e9da 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1171,6 +1171,9 @@ struct CPUPPCState {
     uint32_t tm_vscr;
     uint64_t tm_dscr;
     uint64_t tm_tar;
+
+    /* Large Decrementer */
+    int large_decr_bits;
 };
 
 #define SET_FIT_PERIOD(a_, b_, c_, d_)          \
@@ -1321,10 +1324,10 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env);
 void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value);
 void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value);
 bool ppc_decr_clear_on_delivery(CPUPPCState *env);
-uint32_t cpu_ppc_load_decr (CPUPPCState *env);
-void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value);
-uint32_t cpu_ppc_load_hdecr (CPUPPCState *env);
-void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value);
+target_ulong cpu_ppc_load_decr (CPUPPCState *env);
+void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value);
+target_ulong cpu_ppc_load_hdecr (CPUPPCState *env);
+void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value);
 uint64_t cpu_ppc_load_purr (CPUPPCState *env);
 uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env);
 uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env);
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index c431303eff..a2b1ec5040 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -1109,7 +1109,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
     case POWERPC_MMU_3_00: /* P9 */
         lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
                       (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
-                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR |
+                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
                       (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
                       LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC |
                       LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 819221f246..b156be4d98 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -7417,7 +7417,7 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
 #if !defined(NO_TIMER_DUMP)
     cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
 #if !defined(CONFIG_USER_ONLY)
-                " DECR %08" PRIu32
+                " DECR " TARGET_FMT_lu
 #endif
                 "\n",
                 cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 58542c0fe0..4e0bf1f47a 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -8926,6 +8926,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
     /* segment page size remain the same */
     pcc->hash64_opts = &ppc_hash64_opts_POWER7;
     pcc->radix_page_info = &POWER9_radix_page_info;
+    pcc->hdecr_bits = 56;
 #endif
     pcc->excp_model = POWERPC_EXCP_POWER9;
     pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM
  2019-02-26  3:05 [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER Suraj Jitindar Singh
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG Suraj Jitindar Singh
@ 2019-02-26  3:05 ` Suraj Jitindar Singh
  2019-02-26  3:55   ` David Gibson
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9 Suraj Jitindar Singh
  2019-02-26  3:39 ` [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER David Gibson
  3 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26  3:05 UTC (permalink / raw)
  To: qemu-ppc; +Cc: david, clg, qemu-devel, Suraj Jitindar Singh

Implement support to allow KVM guests to take advantage of the large
decrementer introduced on POWER9 cpus.

To determine if the host can support the requested large decrementer
size, we check it matches that specified in the ibm,dec-bits device-tree
property. We also need to enable it in KVM by setting the LPCR_LD bit in
the LPCR. Note that to do this we need to try and set the bit, then read
it back to check the host allowed us to set it, if so we can use it but
if we were unable to set it the host cannot support it and we must not
use the large decrementer.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/ppc/spapr_caps.c  | 17 +++++++++++++++--
 target/ppc/kvm.c     | 39 +++++++++++++++++++++++++++++++++++++++
 target/ppc/kvm_ppc.h | 12 ++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 44542fdbb2..e07568fb94 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -440,8 +440,16 @@ static void cap_large_decr_apply(sPAPRMachineState *spapr,
                 pcc->hdecr_bits);
             return;
         }
-    } else {
-        error_setg(errp, "No large decrementer support, try cap-large-decr=0");
+    } else if (kvm_enabled()) {
+        int kvm_nr_bits = kvmppc_get_cap_large_decr();
+
+        if (!kvm_nr_bits) {
+            error_setg(errp, "No large decrementer support, try cap-large-decr=0");
+        } else if (val != kvm_nr_bits) {
+            error_setg(errp,
+                "Large decrementer size unsupported, try -cap-large-decr=%d",
+                kvm_nr_bits);
+        }
     }
 }
 
@@ -452,6 +460,11 @@ static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
     CPUPPCState *env = &cpu->env;
     target_ulong lpcr = env->spr[SPR_LPCR];
 
+    if (kvm_enabled()) {
+        if (kvmppc_enable_cap_large_decr(cpu, !!val))
+            error_setg(errp, "No large decrementer support, try cap-large-decr=0");
+    }
+
     if (val)
         lpcr |= LPCR_LD;
     else
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index d01852fe31..3f650c8fc4 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -91,6 +91,7 @@ static int cap_ppc_safe_cache;
 static int cap_ppc_safe_bounds_check;
 static int cap_ppc_safe_indirect_branch;
 static int cap_ppc_nested_kvm_hv;
+static int cap_large_decr;
 
 static uint32_t debug_inst_opcode;
 
@@ -124,6 +125,7 @@ static bool kvmppc_is_pr(KVMState *ks)
 
 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 static void kvmppc_get_cpu_characteristics(KVMState *s);
+static int kvmppc_get_dec_bits(void);
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
@@ -151,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
+    cap_large_decr = kvmppc_get_dec_bits();
     /*
      * Note: setting it to false because there is not such capability
      * in KVM at this moment.
@@ -1927,6 +1930,15 @@ uint64_t kvmppc_get_clockfreq(void)
     return kvmppc_read_int_cpu_dt("clock-frequency");
 }
 
+static int kvmppc_get_dec_bits(void)
+{
+    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
+
+    if (nr_bits > 0)
+        return nr_bits;
+    return 0;
+}
+
 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
  {
      PowerPCCPU *cpu = ppc_env_get_cpu(env);
@@ -2442,6 +2454,33 @@ bool kvmppc_has_cap_spapr_vfio(void)
     return cap_spapr_vfio;
 }
 
+int kvmppc_get_cap_large_decr(void)
+{
+    return cap_large_decr;
+}
+
+int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
+{
+    CPUState *cs = CPU(cpu);
+    uint64_t lpcr;
+
+    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
+    /* Do we need to modify the LPCR? */
+    if (!!(lpcr & LPCR_LD) != !!enable) {
+        if (enable)
+            lpcr |= LPCR_LD;
+        else
+            lpcr &= ~LPCR_LD;
+        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
+        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
+
+        if (!!(lpcr & LPCR_LD) != !!enable)
+            return -1;
+    }
+
+    return 0;
+}
+
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
 {
     uint32_t host_pvr = mfpvr();
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index bdfaa4e70a..a79835bd14 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -64,6 +64,8 @@ int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
 bool kvmppc_has_cap_nested_kvm_hv(void);
 int kvmppc_set_cap_nested_kvm_hv(int enable);
+int kvmppc_get_cap_large_decr(void);
+int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
@@ -332,6 +334,16 @@ static inline int kvmppc_set_cap_nested_kvm_hv(int enable)
     return -1;
 }
 
+static inline int kvmppc_get_cap_large_decr(void)
+{
+    return 0;
+}
+
+static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
+{
+    return -1;
+}
+
 static inline int kvmppc_enable_hwrng(void)
 {
     return -1;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9
  2019-02-26  3:05 [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER Suraj Jitindar Singh
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG Suraj Jitindar Singh
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM Suraj Jitindar Singh
@ 2019-02-26  3:05 ` Suraj Jitindar Singh
  2019-02-26  3:59   ` David Gibson
  2019-02-26  3:39 ` [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER David Gibson
  3 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26  3:05 UTC (permalink / raw)
  To: qemu-ppc; +Cc: david, clg, qemu-devel, Suraj Jitindar Singh

Enable the large decrementer by default on POWER9 cpu models. The
default value applied is that provided in the cpu class.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
---
 hw/ppc/spapr_caps.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index e07568fb94..f48aa367e3 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -566,11 +566,18 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
                                                const char *cputype)
 {
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(object_class_by_name(cputype));
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
     sPAPRCapabilities caps;
 
     caps = smc->default_caps;
 
+    caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = pcc->hdecr_bits;
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;
+    }
+
     if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07,
                                0, spapr->max_compat_pvr)) {
         caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER
  2019-02-26  3:05 [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER Suraj Jitindar Singh
                   ` (2 preceding siblings ...)
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9 Suraj Jitindar Singh
@ 2019-02-26  3:39 ` David Gibson
  2019-02-26  6:26   ` Suraj Jitindar Singh
  3 siblings, 1 reply; 15+ messages in thread
From: David Gibson @ 2019-02-26  3:39 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 5647 bytes --]

On Tue, Feb 26, 2019 at 02:05:28PM +1100, Suraj Jitindar Singh wrote:
> Add spapr_cap SPAPR_CAP_LARGE_DECREMENTER to be used to control the
> availability and size of the large decrementer made available to the
> guest.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> ---
>  hw/ppc/spapr.c         |  2 ++
>  hw/ppc/spapr_caps.c    | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |  5 ++++-
>  3 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index b6a571b6f1..acf62a2b9f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2077,6 +2077,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_irq_map,
>          &vmstate_spapr_cap_nested_kvm_hv,
>          &vmstate_spapr_dtb,
> +        &vmstate_spapr_cap_large_decr,
>          NULL
>      }
>  };
> @@ -4288,6 +4289,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;

This looks basically fine, but the name kind of suggests it's a
boolean, whereas it's actually a number of bits.  I wonder if just
calling it "decrementer bits" would be clearer, with it defaulting to
32.

>      spapr_caps_add_properties(smc, &error_abort);
>      smc->irq = &spapr_irq_xics;
>      smc->dr_phb_enabled = true;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 64f98ae68d..1545a02729 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -182,6 +182,34 @@ static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name,
>      spapr->eff.caps[cap->index] = val;
>  }
>  
> +static void spapr_cap_get_uint8(Object *obj, Visitor *v, const char *name,
> +                                void *opaque, Error **errp)
> +{
> +    sPAPRCapabilityInfo *cap = opaque;
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +    uint8_t val = spapr_get_cap(spapr, cap->index);
> +
> +    visit_type_uint8(v, name, &val, errp);
> +}
> +
> +static void spapr_cap_set_uint8(Object *obj, Visitor *v, const char *name,
> +                                void *opaque, Error **errp)
> +{
> +    sPAPRCapabilityInfo *cap = opaque;
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +    Error *local_err = NULL;
> +    uint8_t val;
> +
> +    visit_type_uint8(v, name, &val, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +
> +    spapr->cmd_line_caps[cap->index] = true;
> +    spapr->eff.caps[cap->index] = val;
> +}
> +
>  static void cap_htm_apply(sPAPRMachineState *spapr, uint8_t val, Error **errp)
>  {
>      if (!val) {
> @@ -390,6 +418,13 @@ static void cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
>      }
>  }
>  
> +static void cap_large_decr_apply(sPAPRMachineState *spapr,
> +                                 uint8_t val, Error **errp)
> +{
> +    if (val)
> +        error_setg(errp, "No large decrementer support, try cap-large-decr=0");
> +}
> +
>  sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -468,6 +503,15 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_nested_kvm_hv_apply,
>      },
> +    [SPAPR_CAP_LARGE_DECREMENTER] = {
> +        .name = "large-decr",
> +        .description = "Size of Large Decrementer for the Guest (bits) 0=disabled",
> +        .index = SPAPR_CAP_LARGE_DECREMENTER,
> +        .get = spapr_cap_get_uint8,
> +        .set = spapr_cap_set_uint8,
> +        .type = "int",
> +        .apply = cap_large_decr_apply,
> +    },
>  };
>  
>  static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
> @@ -596,6 +640,7 @@ SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
>  SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>  SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> +SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  
>  void spapr_caps_init(sPAPRMachineState *spapr)
>  {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 59073a7579..8efc5e0779 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -74,8 +74,10 @@ typedef enum {
>  #define SPAPR_CAP_HPT_MAXPAGESIZE       0x06
>  /* Nested KVM-HV */
>  #define SPAPR_CAP_NESTED_KVM_HV         0x07
> +/* Large Decrementer */
> +#define SPAPR_CAP_LARGE_DECREMENTER     0x08
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_KVM_HV + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_LARGE_DECREMENTER + 1)
>  
>  /*
>   * Capability Values
> @@ -828,6 +830,7 @@ extern const VMStateDescription vmstate_spapr_cap_cfpc;
>  extern const VMStateDescription vmstate_spapr_cap_sbbc;
>  extern const VMStateDescription vmstate_spapr_cap_ibs;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> +extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  
>  static inline uint8_t spapr_get_cap(sPAPRMachineState *spapr, int cap)
>  {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG Suraj Jitindar Singh
@ 2019-02-26  3:53   ` David Gibson
  2019-02-26 23:28     ` Suraj Jitindar Singh
  0 siblings, 1 reply; 15+ messages in thread
From: David Gibson @ 2019-02-26  3:53 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 16410 bytes --]

On Tue, Feb 26, 2019 at 02:05:29PM +1100, Suraj Jitindar Singh wrote:
> Prior to POWER9 the decrementer was a 32-bit register which decremented
> with each tick of the timebase. From POWER9 onwards the decrementer can
> be set to operate in a mode called large decrementer where it acts as a
> n-bit decrementing register which is visible as a 64-bit register, that
> is the value of the decrementer is sign extended to 64 bits (where n is
> implementation dependant).
> 
> The mode in which the decrementer operates is controlled by the LPCR_LD
> bit in the logical paritition control register (LPCR).
> 
> >From POWER9 onwards the HDEC (hypervisor decrementer) was enlarged to
> h-bits, also sign extended to 64 bits (where h is implementation
> dependant). Note this isn't configurable and is always enabled.
> 
> For TCG we allow the user to configure a custom large decrementer size,
> so long as it's at least 32 and less than the size of the HDEC (the
> restrictions imposed by the ISA).
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  hw/ppc/ppc.c                    | 78 ++++++++++++++++++++++++++++-------------
>  hw/ppc/spapr.c                  |  8 +++++
>  hw/ppc/spapr_caps.c             | 38 +++++++++++++++++++-
>  target/ppc/cpu-qom.h            |  1 +
>  target/ppc/cpu.h                | 11 +++---
>  target/ppc/mmu-hash64.c         |  2 +-
>  target/ppc/translate.c          |  2 +-
>  target/ppc/translate_init.inc.c |  1 +
>  8 files changed, 109 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> index d1e3d4cd20..853afeed6a 100644
> --- a/hw/ppc/ppc.c
> +++ b/hw/ppc/ppc.c
> @@ -744,10 +744,10 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env)
>      return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
>  }
>  
> -static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
> +static inline uint64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)

Hrm.  Given how we use this - and how muldiv64 works, wouldn't it make
more sense to have it return int64_t (i.e. signed).

>  {
>      ppc_tb_t *tb_env = env->tb_env;
> -    uint32_t decr;
> +    uint64_t decr;
>      int64_t diff;
>  
>      diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> @@ -758,27 +758,42 @@ static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
>      }  else {
>          decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
>      }
> -    LOG_TB("%s: %08" PRIx32 "\n", __func__, decr);
> +    LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
>  
>      return decr;
>  }
>  
> -uint32_t cpu_ppc_load_decr (CPUPPCState *env)
> +target_ulong cpu_ppc_load_decr (CPUPPCState *env)
>  {
>      ppc_tb_t *tb_env = env->tb_env;
> +    uint64_t decr;
>  
>      if (kvm_enabled()) {
>          return env->spr[SPR_DECR];
>      }
>  
> -    return _cpu_ppc_load_decr(env, tb_env->decr_next);
> +    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
> +
> +    /*
> +     * If large decrementer is enabled then the decrementer is signed extened
> +     * to 64 bits, otherwise it is a 32 bit value.
> +     */
> +    if (env->spr[SPR_LPCR] & LPCR_LD)
> +        return decr;
> +    return (uint32_t) decr;
>  }
>  
> -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env)
> +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env)
>  {
>      ppc_tb_t *tb_env = env->tb_env;
> +    uint64_t decr;
>  
> -    return _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> +    decr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> +
> +    /* If POWER9 or later then hdecr is sign extended to 64 bits otherwise 32 */
> +    if (env->mmu_model & POWERPC_MMU_3_00)

You already have a pcc->hdecr_bits.  Wouldn't it make more sense to
check against that than the cpu model directly?

> +        return decr;
> +    return (uint32_t) decr;
>  }
>  
>  uint64_t cpu_ppc_load_purr (CPUPPCState *env)
> @@ -832,13 +847,21 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
>                                   QEMUTimer *timer,
>                                   void (*raise_excp)(void *),
>                                   void (*lower_excp)(PowerPCCPU *),
> -                                 uint32_t decr, uint32_t value)
> +                                 target_ulong decr, target_ulong value,
> +                                 int nr_bits)
>  {
>      CPUPPCState *env = &cpu->env;
>      ppc_tb_t *tb_env = env->tb_env;
>      uint64_t now, next;
> +    bool negative;
> +
> +    /* Truncate value to decr_width and sign extend for simplicity */
> +    value &= ((1ULL << nr_bits) - 1);
> +    negative = !!(value & (1ULL << (nr_bits - 1)));

Could you simplify this by just using
	negative = (int64_t)decr < 0;
before you mask?  Or would that be wrong in some edge case or other?

> +    if (negative)
> +        value |= (0xFFFFFFFFULL << nr_bits);
>  
> -    LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__,
> +    LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__,
>                  decr, value);
>  
>      if (kvm_enabled()) {
> @@ -860,15 +883,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
>       * an edge interrupt, so raise it here too.
>       */
>      if ((value < 3) ||
> -        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && (value & 0x80000000)) ||
> -        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && (value & 0x80000000)
> -          && !(decr & 0x80000000))) {
> +        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) ||
> +        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative
> +          && !(decr & (1ULL << (nr_bits - 1))))) {
>          (*raise_excp)(cpu);
>          return;
>      }
>  
>      /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
> -    if (!(value & 0x80000000) && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
> +    if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
>          (*lower_excp)(cpu);
>      }
>  
> @@ -881,21 +904,24 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
>      timer_mod(timer, next);
>  }
>  
> -static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, uint32_t decr,
> -                                       uint32_t value)
> +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
> +                                       target_ulong value, int nr_bits)
>  {
>      ppc_tb_t *tb_env = cpu->env.tb_env;
>  
>      __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
>                           tb_env->decr_timer->cb, &cpu_ppc_decr_lower, decr,
> -                         value);
> +                         value, nr_bits);
>  }
>  
> -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
> +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value)
>  {
>      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +    int nr_bits = 32;
> +    if ((env->spr[SPR_LPCR] & LPCR_LD) && (env->large_decr_bits > 32))
> +        nr_bits = env->large_decr_bits;

This would be simpler if you initialized large_decr_bits to 32 on
cpus that don't have large decr, wouldn't it?

>  
> -    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value);
> +    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
>  }
>  
>  static void cpu_ppc_decr_cb(void *opaque)
> @@ -905,23 +931,25 @@ static void cpu_ppc_decr_cb(void *opaque)
>      cpu_ppc_decr_excp(cpu);
>  }
>  
> -static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t hdecr,
> -                                        uint32_t value)
> +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
> +                                        target_ulong value, int nr_bits)
>  {
>      ppc_tb_t *tb_env = cpu->env.tb_env;
>  
>      if (tb_env->hdecr_timer != NULL) {
>          __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
>                               tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
> -                             hdecr, value);
> +                             hdecr, value, nr_bits);
>      }
>  }
>  
> -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
> +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value)
>  {
>      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +    int nr_bits = (pcc->hdecr_bits > 32) ? pcc->hdecr_bits : 32;

Similarly with hdecr_bits.

> -    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value);
> +    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, nr_bits);
>  }
>  
>  static void cpu_ppc_hdecr_cb(void *opaque)
> @@ -951,8 +979,8 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
>       * if a decrementer exception is pending when it enables msr_ee at startup,
>       * it's not ready to handle it...
>       */
> -    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> -    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> +    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
> +    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
>      cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
>  }
>  
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index acf62a2b9f..966bc74e68 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -557,6 +557,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>                            pcc->radix_page_info->count *
>                            sizeof(radix_AP_encodings[0]))));
>      }
> +
> +    /*
> +     * We set this property to let the guest know that it can use the large
> +     * decrementer and its width in bits.
> +     */
> +    if (env->large_decr_bits)
> +        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
> +                              env->large_decr_bits)));
>  }
>  
>  static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 1545a02729..44542fdbb2 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -421,8 +421,43 @@ static void cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
>  static void cap_large_decr_apply(sPAPRMachineState *spapr,
>                                   uint8_t val, Error **errp)
>  {
> -    if (val)
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +
> +    if (!val)
> +        return; /* Disabled by default */
> +
> +    if (tcg_enabled()) {
> +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> +                              spapr->max_compat_pvr)) {

IIUC this is strictly redundant with the check against hdecr_bits,
yes, but will  result in a more helpful error message.  Is that right?

> +            error_setg(errp,
> +                "Large decrementer only supported on POWER9, try -cpu POWER9");
> +            return;
> +        }
> +        if ((val < 32) || (val > pcc->hdecr_bits)) {
> +            error_setg(errp,
> +                "Large decrementer size unsupported, try -cap-large-decr=%d",
> +                pcc->hdecr_bits);
> +            return;
> +        }
> +    } else {
>          error_setg(errp, "No large decrementer support, try cap-large-decr=0");
> +    }
> +}
> +
> +static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
> +                                     PowerPCCPU *cpu,
> +                                     uint8_t val, Error **errp)
> +{
> +    CPUPPCState *env = &cpu->env;
> +    target_ulong lpcr = env->spr[SPR_LPCR];
> +
> +    if (val)
> +        lpcr |= LPCR_LD;
> +    else
> +        lpcr &= ~LPCR_LD;
> +    ppc_store_lpcr(cpu, lpcr);
> +    env->large_decr_bits = val;
>  }
>  
>  sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> @@ -511,6 +546,7 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .set = spapr_cap_set_uint8,
>          .type = "int",
>          .apply = cap_large_decr_apply,
> +        .cpu_apply = cap_large_decr_cpu_apply,
>      },
>  };
>  
> diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
> index ae51fe754e..cced705e30 100644
> --- a/target/ppc/cpu-qom.h
> +++ b/target/ppc/cpu-qom.h
> @@ -190,6 +190,7 @@ typedef struct PowerPCCPUClass {
>  #endif
>      const PPCHash64Options *hash64_opts;
>      struct ppc_radix_page_info *radix_page_info;
> +    uint32_t hdecr_bits;
>      void (*init_proc)(CPUPPCState *env);
>      int  (*check_pow)(CPUPPCState *env);
>      int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx);
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 26604ddf98..8da333e9da 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1171,6 +1171,9 @@ struct CPUPPCState {
>      uint32_t tm_vscr;
>      uint64_t tm_dscr;
>      uint64_t tm_tar;
> +
> +    /* Large Decrementer */
> +    int large_decr_bits;
>  };
>  
>  #define SET_FIT_PERIOD(a_, b_, c_, d_)          \
> @@ -1321,10 +1324,10 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env);
>  void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value);
>  void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value);
>  bool ppc_decr_clear_on_delivery(CPUPPCState *env);
> -uint32_t cpu_ppc_load_decr (CPUPPCState *env);
> -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value);
> -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env);
> -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value);
> +target_ulong cpu_ppc_load_decr (CPUPPCState *env);
> +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value);
> +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env);
> +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value);
>  uint64_t cpu_ppc_load_purr (CPUPPCState *env);
>  uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env);
>  uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env);
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index c431303eff..a2b1ec5040 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -1109,7 +1109,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
>      case POWERPC_MMU_3_00: /* P9 */
>          lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
>                        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
> -                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR |
> +                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
>                        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
>                        LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC |
>                        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE);
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 819221f246..b156be4d98 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -7417,7 +7417,7 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>  #if !defined(NO_TIMER_DUMP)
>      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
>  #if !defined(CONFIG_USER_ONLY)
> -                " DECR %08" PRIu32
> +                " DECR " TARGET_FMT_lu
>  #endif
>                  "\n",
>                  cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
> diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
> index 58542c0fe0..4e0bf1f47a 100644
> --- a/target/ppc/translate_init.inc.c
> +++ b/target/ppc/translate_init.inc.c
> @@ -8926,6 +8926,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
>      /* segment page size remain the same */
>      pcc->hash64_opts = &ppc_hash64_opts_POWER7;
>      pcc->radix_page_info = &POWER9_radix_page_info;
> +    pcc->hdecr_bits = 56;
>  #endif
>      pcc->excp_model = POWERPC_EXCP_POWER9;
>      pcc->bus_model = PPC_FLAGS_INPUT_POWER9;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM Suraj Jitindar Singh
@ 2019-02-26  3:55   ` David Gibson
  2019-02-26 23:34     ` Suraj Jitindar Singh
  0 siblings, 1 reply; 15+ messages in thread
From: David Gibson @ 2019-02-26  3:55 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 6288 bytes --]

On Tue, Feb 26, 2019 at 02:05:30PM +1100, Suraj Jitindar Singh wrote:
> Implement support to allow KVM guests to take advantage of the large
> decrementer introduced on POWER9 cpus.
> 
> To determine if the host can support the requested large decrementer
> size, we check it matches that specified in the ibm,dec-bits device-tree
> property. We also need to enable it in KVM by setting the LPCR_LD bit in
> the LPCR. Note that to do this we need to try and set the bit, then read
> it back to check the host allowed us to set it, if so we can use it but
> if we were unable to set it the host cannot support it and we must not
> use the large decrementer.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Although changes might be necessary to match it to things I've
suggested in the earlier patches in the series.

Is the KVM side support for this already merged?  If so, as of when?

> ---
>  hw/ppc/spapr_caps.c  | 17 +++++++++++++++--
>  target/ppc/kvm.c     | 39 +++++++++++++++++++++++++++++++++++++++
>  target/ppc/kvm_ppc.h | 12 ++++++++++++
>  3 files changed, 66 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 44542fdbb2..e07568fb94 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -440,8 +440,16 @@ static void cap_large_decr_apply(sPAPRMachineState *spapr,
>                  pcc->hdecr_bits);
>              return;
>          }
> -    } else {
> -        error_setg(errp, "No large decrementer support, try cap-large-decr=0");
> +    } else if (kvm_enabled()) {
> +        int kvm_nr_bits = kvmppc_get_cap_large_decr();
> +
> +        if (!kvm_nr_bits) {
> +            error_setg(errp, "No large decrementer support, try cap-large-decr=0");
> +        } else if (val != kvm_nr_bits) {
> +            error_setg(errp,
> +                "Large decrementer size unsupported, try -cap-large-decr=%d",
> +                kvm_nr_bits);
> +        }
>      }
>  }
>  
> @@ -452,6 +460,11 @@ static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
>      CPUPPCState *env = &cpu->env;
>      target_ulong lpcr = env->spr[SPR_LPCR];
>  
> +    if (kvm_enabled()) {
> +        if (kvmppc_enable_cap_large_decr(cpu, !!val))
> +            error_setg(errp, "No large decrementer support, try cap-large-decr=0");
> +    }
> +
>      if (val)
>          lpcr |= LPCR_LD;
>      else
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index d01852fe31..3f650c8fc4 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -91,6 +91,7 @@ static int cap_ppc_safe_cache;
>  static int cap_ppc_safe_bounds_check;
>  static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_nested_kvm_hv;
> +static int cap_large_decr;
>  
>  static uint32_t debug_inst_opcode;
>  
> @@ -124,6 +125,7 @@ static bool kvmppc_is_pr(KVMState *ks)
>  
>  static int kvm_ppc_register_host_cpu_type(MachineState *ms);
>  static void kvmppc_get_cpu_characteristics(KVMState *s);
> +static int kvmppc_get_dec_bits(void);
>  
>  int kvm_arch_init(MachineState *ms, KVMState *s)
>  {
> @@ -151,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
>      kvmppc_get_cpu_characteristics(s);
>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
> +    cap_large_decr = kvmppc_get_dec_bits();
>      /*
>       * Note: setting it to false because there is not such capability
>       * in KVM at this moment.
> @@ -1927,6 +1930,15 @@ uint64_t kvmppc_get_clockfreq(void)
>      return kvmppc_read_int_cpu_dt("clock-frequency");
>  }
>  
> +static int kvmppc_get_dec_bits(void)
> +{
> +    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
> +
> +    if (nr_bits > 0)
> +        return nr_bits;
> +    return 0;
> +}
> +
>  static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
>   {
>       PowerPCCPU *cpu = ppc_env_get_cpu(env);
> @@ -2442,6 +2454,33 @@ bool kvmppc_has_cap_spapr_vfio(void)
>      return cap_spapr_vfio;
>  }
>  
> +int kvmppc_get_cap_large_decr(void)
> +{
> +    return cap_large_decr;
> +}
> +
> +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
> +{
> +    CPUState *cs = CPU(cpu);
> +    uint64_t lpcr;
> +
> +    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> +    /* Do we need to modify the LPCR? */
> +    if (!!(lpcr & LPCR_LD) != !!enable) {
> +        if (enable)
> +            lpcr |= LPCR_LD;
> +        else
> +            lpcr &= ~LPCR_LD;
> +        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> +        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> +
> +        if (!!(lpcr & LPCR_LD) != !!enable)
> +            return -1;
> +    }
> +
> +    return 0;
> +}
> +
>  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
>  {
>      uint32_t host_pvr = mfpvr();
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index bdfaa4e70a..a79835bd14 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -64,6 +64,8 @@ int kvmppc_get_cap_safe_bounds_check(void);
>  int kvmppc_get_cap_safe_indirect_branch(void);
>  bool kvmppc_has_cap_nested_kvm_hv(void);
>  int kvmppc_set_cap_nested_kvm_hv(int enable);
> +int kvmppc_get_cap_large_decr(void);
> +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
>  int kvmppc_enable_hwrng(void);
>  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
>  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> @@ -332,6 +334,16 @@ static inline int kvmppc_set_cap_nested_kvm_hv(int enable)
>      return -1;
>  }
>  
> +static inline int kvmppc_get_cap_large_decr(void)
> +{
> +    return 0;
> +}
> +
> +static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
> +{
> +    return -1;
> +}
> +
>  static inline int kvmppc_enable_hwrng(void)
>  {
>      return -1;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9
  2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9 Suraj Jitindar Singh
@ 2019-02-26  3:59   ` David Gibson
  2019-02-26 23:50     ` Suraj Jitindar Singh
  0 siblings, 1 reply; 15+ messages in thread
From: David Gibson @ 2019-02-26  3:59 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1982 bytes --]

On Tue, Feb 26, 2019 at 02:05:31PM +1100, Suraj Jitindar Singh wrote:
> Enable the large decrementer by default on POWER9 cpu models. The
> default value applied is that provided in the cpu class.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> ---
>  hw/ppc/spapr_caps.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index e07568fb94..f48aa367e3 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -566,11 +566,18 @@ sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>  static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
>                                                 const char *cputype)
>  {
> +    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(object_class_by_name(cputype));
>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>      sPAPRCapabilities caps;
>  
>      caps = smc->default_caps;
>  
> +    caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = pcc->hdecr_bits;

So, the rule with default_caps_with_cpu() is that it can reduce the
value from the machine-global default, but never increase it (because
that could change guest visible behaviour for existing machine
versions).

I think the line above will do that.

> +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> +                               0, spapr->max_compat_pvr)) {
> +        caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;

And this part I think is redundant, because hdecr_bits won't be large
for anything pre-POWER9.

> +    }
> +
>      if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07,
>                                 0, spapr->max_compat_pvr)) {
>          caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER
  2019-02-26  3:39 ` [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER David Gibson
@ 2019-02-26  6:26   ` Suraj Jitindar Singh
  2019-02-26 22:49     ` David Gibson
  0 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26  6:26 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, clg, qemu-devel

On Tue, 2019-02-26 at 14:39 +1100, David Gibson wrote:
> On Tue, Feb 26, 2019 at 02:05:28PM +1100, Suraj Jitindar Singh wrote:
> > Add spapr_cap SPAPR_CAP_LARGE_DECREMENTER to be used to control the
> > availability and size of the large decrementer made available to
> > the
> > guest.
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > ---
> >  hw/ppc/spapr.c         |  2 ++
> >  hw/ppc/spapr_caps.c    | 45
> > +++++++++++++++++++++++++++++++++++++++++++++
> >  include/hw/ppc/spapr.h |  5 ++++-
> >  3 files changed, 51 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index b6a571b6f1..acf62a2b9f 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2077,6 +2077,7 @@ static const VMStateDescription vmstate_spapr
> > = {
> >          &vmstate_spapr_irq_map,
> >          &vmstate_spapr_cap_nested_kvm_hv,
> >          &vmstate_spapr_dtb,
> > +        &vmstate_spapr_cap_large_decr,
> >          NULL
> >      }
> >  };
> > @@ -4288,6 +4289,7 @@ static void
> > spapr_machine_class_init(ObjectClass *oc, void *data)
> >      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /*
> > 64kiB */
> >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] =
> > SPAPR_CAP_OFF;
> > +    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;
> 
> This looks basically fine, but the name kind of suggests it's a
> boolean, whereas it's actually a number of bits.  I wonder if just
> calling it "decrementer bits" would be clearer, with it defaulting to
> 32.

Yes, except there's a difference between a decrementer with 32 bits and
a large decrementer with 32 bits...

SPAPR_CAP_LARGE_DECR_NR_BITS?

> 
> >      spapr_caps_add_properties(smc, &error_abort);
> >      smc->irq = &spapr_irq_xics;
> >      smc->dr_phb_enabled = true;
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index 64f98ae68d..1545a02729 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -182,6 +182,34 @@ static void spapr_cap_set_pagesize(Object
> > *obj, Visitor *v, const char *name,
> >      spapr->eff.caps[cap->index] = val;
> >  }
> >  
> > +static void spapr_cap_get_uint8(Object *obj, Visitor *v, const
> > char *name,
> > +                                void *opaque, Error **errp)
> > +{
> > +    sPAPRCapabilityInfo *cap = opaque;
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +    uint8_t val = spapr_get_cap(spapr, cap->index);
> > +
> > +    visit_type_uint8(v, name, &val, errp);
> > +}
> > +
> > +static void spapr_cap_set_uint8(Object *obj, Visitor *v, const
> > char *name,
> > +                                void *opaque, Error **errp)
> > +{
> > +    sPAPRCapabilityInfo *cap = opaque;
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +    Error *local_err = NULL;
> > +    uint8_t val;
> > +
> > +    visit_type_uint8(v, name, &val, &local_err);
> > +    if (local_err) {
> > +        error_propagate(errp, local_err);
> > +        return;
> > +    }
> > +
> > +    spapr->cmd_line_caps[cap->index] = true;
> > +    spapr->eff.caps[cap->index] = val;
> > +}
> > +
> >  static void cap_htm_apply(sPAPRMachineState *spapr, uint8_t val,
> > Error **errp)
> >  {
> >      if (!val) {
> > @@ -390,6 +418,13 @@ static void
> > cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
> >      }
> >  }
> >  
> > +static void cap_large_decr_apply(sPAPRMachineState *spapr,
> > +                                 uint8_t val, Error **errp)
> > +{
> > +    if (val)
> > +        error_setg(errp, "No large decrementer support, try cap-
> > large-decr=0");
> > +}
> > +
> >  sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >      [SPAPR_CAP_HTM] = {
> >          .name = "htm",
> > @@ -468,6 +503,15 @@ sPAPRCapabilityInfo
> > capability_table[SPAPR_CAP_NUM] = {
> >          .type = "bool",
> >          .apply = cap_nested_kvm_hv_apply,
> >      },
> > +    [SPAPR_CAP_LARGE_DECREMENTER] = {
> > +        .name = "large-decr",
> > +        .description = "Size of Large Decrementer for the Guest
> > (bits) 0=disabled",
> > +        .index = SPAPR_CAP_LARGE_DECREMENTER,
> > +        .get = spapr_cap_get_uint8,
> > +        .set = spapr_cap_set_uint8,
> > +        .type = "int",
> > +        .apply = cap_large_decr_apply,
> > +    },
> >  };
> >  
> >  static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState
> > *spapr,
> > @@ -596,6 +640,7 @@ SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
> >  SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
> >  SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
> >  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> > +SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >  
> >  void spapr_caps_init(sPAPRMachineState *spapr)
> >  {
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 59073a7579..8efc5e0779 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -74,8 +74,10 @@ typedef enum {
> >  #define SPAPR_CAP_HPT_MAXPAGESIZE       0x06
> >  /* Nested KVM-HV */
> >  #define SPAPR_CAP_NESTED_KVM_HV         0x07
> > +/* Large Decrementer */
> > +#define SPAPR_CAP_LARGE_DECREMENTER     0x08
> >  /* Num Caps */
> > -#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_KVM_HV +
> > 1)
> > +#define
> > SPAPR_CAP_NUM                   (SPAPR_CAP_LARGE_DECREMENTER + 1)
> >  
> >  /*
> >   * Capability Values
> > @@ -828,6 +830,7 @@ extern const VMStateDescription
> > vmstate_spapr_cap_cfpc;
> >  extern const VMStateDescription vmstate_spapr_cap_sbbc;
> >  extern const VMStateDescription vmstate_spapr_cap_ibs;
> >  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> > +extern const VMStateDescription vmstate_spapr_cap_large_decr;
> >  
> >  static inline uint8_t spapr_get_cap(sPAPRMachineState *spapr, int
> > cap)
> >  {
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER
  2019-02-26  6:26   ` Suraj Jitindar Singh
@ 2019-02-26 22:49     ` David Gibson
  0 siblings, 0 replies; 15+ messages in thread
From: David Gibson @ 2019-02-26 22:49 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2201 bytes --]

On Tue, Feb 26, 2019 at 05:26:10PM +1100, Suraj Jitindar Singh wrote:
> On Tue, 2019-02-26 at 14:39 +1100, David Gibson wrote:
> > On Tue, Feb 26, 2019 at 02:05:28PM +1100, Suraj Jitindar Singh wrote:
> > > Add spapr_cap SPAPR_CAP_LARGE_DECREMENTER to be used to control the
> > > availability and size of the large decrementer made available to
> > > the
> > > guest.
> > > 
> > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > > ---
> > >  hw/ppc/spapr.c         |  2 ++
> > >  hw/ppc/spapr_caps.c    | 45
> > > +++++++++++++++++++++++++++++++++++++++++++++
> > >  include/hw/ppc/spapr.h |  5 ++++-
> > >  3 files changed, 51 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index b6a571b6f1..acf62a2b9f 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -2077,6 +2077,7 @@ static const VMStateDescription vmstate_spapr
> > > = {
> > >          &vmstate_spapr_irq_map,
> > >          &vmstate_spapr_cap_nested_kvm_hv,
> > >          &vmstate_spapr_dtb,
> > > +        &vmstate_spapr_cap_large_decr,
> > >          NULL
> > >      }
> > >  };
> > > @@ -4288,6 +4289,7 @@ static void
> > > spapr_machine_class_init(ObjectClass *oc, void *data)
> > >      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> > >      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /*
> > > 64kiB */
> > >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] =
> > > SPAPR_CAP_OFF;
> > > +    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;
> > 
> > This looks basically fine, but the name kind of suggests it's a
> > boolean, whereas it's actually a number of bits.  I wonder if just
> > calling it "decrementer bits" would be clearer, with it defaulting to
> > 32.
> 
> Yes, except there's a difference between a decrementer with 32 bits and
> a large decrementer with 32 bits...

Ok, and that difference is..?

> SPAPR_CAP_LARGE_DECR_NR_BITS?

That could work.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG
  2019-02-26  3:53   ` David Gibson
@ 2019-02-26 23:28     ` Suraj Jitindar Singh
  2019-02-26 23:39       ` David Gibson
  0 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26 23:28 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, clg, qemu-devel

On Tue, 2019-02-26 at 14:53 +1100, David Gibson wrote:
> On Tue, Feb 26, 2019 at 02:05:29PM +1100, Suraj Jitindar Singh wrote:
> > Prior to POWER9 the decrementer was a 32-bit register which
> > decremented
> > with each tick of the timebase. From POWER9 onwards the decrementer
> > can
> > be set to operate in a mode called large decrementer where it acts
> > as a
> > n-bit decrementing register which is visible as a 64-bit register,
> > that
> > is the value of the decrementer is sign extended to 64 bits (where
> > n is
> > implementation dependant).
> > 
> > The mode in which the decrementer operates is controlled by the
> > LPCR_LD
> > bit in the logical paritition control register (LPCR).
> > 
> > > From POWER9 onwards the HDEC (hypervisor decrementer) was
> > > enlarged to
> > 
> > h-bits, also sign extended to 64 bits (where h is implementation
> > dependant). Note this isn't configurable and is always enabled.
> > 
> > For TCG we allow the user to configure a custom large decrementer
> > size,
> > so long as it's at least 32 and less than the size of the HDEC (the
> > restrictions imposed by the ISA).
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > ---
> >  hw/ppc/ppc.c                    | 78 ++++++++++++++++++++++++++++-
> > ------------
> >  hw/ppc/spapr.c                  |  8 +++++
> >  hw/ppc/spapr_caps.c             | 38 +++++++++++++++++++-
> >  target/ppc/cpu-qom.h            |  1 +
> >  target/ppc/cpu.h                | 11 +++---
> >  target/ppc/mmu-hash64.c         |  2 +-
> >  target/ppc/translate.c          |  2 +-
> >  target/ppc/translate_init.inc.c |  1 +
> >  8 files changed, 109 insertions(+), 32 deletions(-)
> > 
> > diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> > index d1e3d4cd20..853afeed6a 100644
> > --- a/hw/ppc/ppc.c
> > +++ b/hw/ppc/ppc.c
> > @@ -744,10 +744,10 @@ bool ppc_decr_clear_on_delivery(CPUPPCState
> > *env)
> >      return ((tb_env->flags & flags) ==
> > PPC_DECR_UNDERFLOW_TRIGGERED);
> >  }
> >  
> > -static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env,
> > uint64_t next)
> > +static inline uint64_t _cpu_ppc_load_decr(CPUPPCState *env,
> > uint64_t next)
> 
> Hrm.  Given how we use this - and how muldiv64 works, wouldn't it
> make
> more sense to have it return int64_t (i.e. signed).

I mean, functionally it's the same. So sure

> 
> >  {
> >      ppc_tb_t *tb_env = env->tb_env;
> > -    uint32_t decr;
> > +    uint64_t decr;
> >      int64_t diff;
> >  
> >      diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> > @@ -758,27 +758,42 @@ static inline uint32_t
> > _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
> >      }  else {
> >          decr = -muldiv64(-diff, tb_env->decr_freq,
> > NANOSECONDS_PER_SECOND);
> >      }
> > -    LOG_TB("%s: %08" PRIx32 "\n", __func__, decr);
> > +    LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
> >  
> >      return decr;
> >  }
> >  
> > -uint32_t cpu_ppc_load_decr (CPUPPCState *env)
> > +target_ulong cpu_ppc_load_decr (CPUPPCState *env)
> >  {
> >      ppc_tb_t *tb_env = env->tb_env;
> > +    uint64_t decr;
> >  
> >      if (kvm_enabled()) {
> >          return env->spr[SPR_DECR];
> >      }
> >  
> > -    return _cpu_ppc_load_decr(env, tb_env->decr_next);
> > +    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
> > +
> > +    /*
> > +     * If large decrementer is enabled then the decrementer is
> > signed extened
> > +     * to 64 bits, otherwise it is a 32 bit value.
> > +     */
> > +    if (env->spr[SPR_LPCR] & LPCR_LD)
> > +        return decr;
> > +    return (uint32_t) decr;
> >  }
> >  
> > -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env)
> > +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env)
> >  {
> >      ppc_tb_t *tb_env = env->tb_env;
> > +    uint64_t decr;
> >  
> > -    return _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> > +    decr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> > +
> > +    /* If POWER9 or later then hdecr is sign extended to 64 bits
> > otherwise 32 */
> > +    if (env->mmu_model & POWERPC_MMU_3_00)
> 
> You already have a pcc->hdecr_bits.  Wouldn't it make more sense to
> check against that than the cpu model directly?

The end result is the same since P9 is the only one with hdecr_bits
defined. So I'll change it if you prefer.

> 
> > +        return decr;
> > +    return (uint32_t) decr;
> >  }
> >  
> >  uint64_t cpu_ppc_load_purr (CPUPPCState *env)
> > @@ -832,13 +847,21 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > *cpu, uint64_t *nextp,
> >                                   QEMUTimer *timer,
> >                                   void (*raise_excp)(void *),
> >                                   void (*lower_excp)(PowerPCCPU *),
> > -                                 uint32_t decr, uint32_t value)
> > +                                 target_ulong decr, target_ulong
> > value,
> > +                                 int nr_bits)
> >  {
> >      CPUPPCState *env = &cpu->env;
> >      ppc_tb_t *tb_env = env->tb_env;
> >      uint64_t now, next;
> > +    bool negative;
> > +
> > +    /* Truncate value to decr_width and sign extend for simplicity
> > */
> > +    value &= ((1ULL << nr_bits) - 1);
> > +    negative = !!(value & (1ULL << (nr_bits - 1)));
> 
> Could you simplify this by just using
> 	negative = (int64_t)decr < 0;
> before you mask?  Or would that be wrong in some edge case or other?

Value might not be a 64 bit number. It could be 56 bits for example
where the 56th bit being set would indicate that it's negative. So we
need to explicitly check as far as I can tell, unless I've missed
something.

Casting it to a (int64_t) would only make it negative if the 64th bit
was set correct?

But on a 56 bit decrementer you could load -1 as:
0x00FFFFFFFFFFFFFF
Which when cast to a (int64_t) wouldn't be negative.

> 
> > +    if (negative)
> > +        value |= (0xFFFFFFFFULL << nr_bits);
> >  
> > -    LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__,
> > +    LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n",
> > __func__,
> >                  decr, value);
> >  
> >      if (kvm_enabled()) {
> > @@ -860,15 +883,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > *cpu, uint64_t *nextp,
> >       * an edge interrupt, so raise it here too.
> >       */
> >      if ((value < 3) ||
> > -        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && (value &
> > 0x80000000)) ||
> > -        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && (value
> > & 0x80000000)
> > -          && !(decr & 0x80000000))) {
> > +        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative)
> > ||
> > +        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) &&
> > negative
> > +          && !(decr & (1ULL << (nr_bits - 1))))) {
> >          (*raise_excp)(cpu);
> >          return;
> >      }
> >  
> >      /* On MSB level based systems a 0 for the MSB stops interrupt
> > delivery */
> > -    if (!(value & 0x80000000) && (tb_env->flags &
> > PPC_DECR_UNDERFLOW_LEVEL)) {
> > +    if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
> >          (*lower_excp)(cpu);
> >      }
> >  
> > @@ -881,21 +904,24 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > *cpu, uint64_t *nextp,
> >      timer_mod(timer, next);
> >  }
> >  
> > -static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, uint32_t
> > decr,
> > -                                       uint32_t value)
> > +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu,
> > target_ulong decr,
> > +                                       target_ulong value, int
> > nr_bits)
> >  {
> >      ppc_tb_t *tb_env = cpu->env.tb_env;
> >  
> >      __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env-
> > >decr_timer,
> >                           tb_env->decr_timer->cb,
> > &cpu_ppc_decr_lower, decr,
> > -                         value);
> > +                         value, nr_bits);
> >  }
> >  
> > -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
> > +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value)
> >  {
> >      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > +    int nr_bits = 32;
> > +    if ((env->spr[SPR_LPCR] & LPCR_LD) && (env->large_decr_bits >
> > 32))
> > +        nr_bits = env->large_decr_bits;
> 
> This would be simpler if you initialized large_decr_bits to 32 on
> cpus that don't have large decr, wouldn't it?

Correct, will do.

> 
> >  
> > -    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value);
> > +    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value,
> > nr_bits);
> >  }
> >  
> >  static void cpu_ppc_decr_cb(void *opaque)
> > @@ -905,23 +931,25 @@ static void cpu_ppc_decr_cb(void *opaque)
> >      cpu_ppc_decr_excp(cpu);
> >  }
> >  
> > -static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t
> > hdecr,
> > -                                        uint32_t value)
> > +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu,
> > target_ulong hdecr,
> > +                                        target_ulong value, int
> > nr_bits)
> >  {
> >      ppc_tb_t *tb_env = cpu->env.tb_env;
> >  
> >      if (tb_env->hdecr_timer != NULL) {
> >          __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env-
> > >hdecr_timer,
> >                               tb_env->hdecr_timer->cb,
> > &cpu_ppc_hdecr_lower,
> > -                             hdecr, value);
> > +                             hdecr, value, nr_bits);
> >      }
> >  }
> >  
> > -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
> > +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value)
> >  {
> >      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > +    int nr_bits = (pcc->hdecr_bits > 32) ? pcc->hdecr_bits : 32;
> 
> Similarly with hdecr_bits.

Yep

> 
> > -    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value);
> > +    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
> > nr_bits);
> >  }
> >  
> >  static void cpu_ppc_hdecr_cb(void *opaque)
> > @@ -951,8 +979,8 @@ static void cpu_ppc_set_tb_clk (void *opaque,
> > uint32_t freq)
> >       * if a decrementer exception is pending when it enables
> > msr_ee at startup,
> >       * it's not ready to handle it...
> >       */
> > -    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> > -    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> > +    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
> > +    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
> >      cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
> >  }
> >  
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index acf62a2b9f..966bc74e68 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -557,6 +557,14 @@ static void spapr_populate_cpu_dt(CPUState
> > *cs, void *fdt, int offset,
> >                            pcc->radix_page_info->count *
> >                            sizeof(radix_AP_encodings[0]))));
> >      }
> > +
> > +    /*
> > +     * We set this property to let the guest know that it can use
> > the large
> > +     * decrementer and its width in bits.
> > +     */
> > +    if (env->large_decr_bits)
> > +        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
> > +                              env->large_decr_bits)));
> >  }
> >  
> >  static void spapr_populate_cpus_dt_node(void *fdt,
> > sPAPRMachineState *spapr)
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index 1545a02729..44542fdbb2 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -421,8 +421,43 @@ static void
> > cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
> >  static void cap_large_decr_apply(sPAPRMachineState *spapr,
> >                                   uint8_t val, Error **errp)
> >  {
> > -    if (val)
> > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > +
> > +    if (!val)
> > +        return; /* Disabled by default */
> > +
> > +    if (tcg_enabled()) {
> > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > +                              spapr->max_compat_pvr)) {
> 
> IIUC this is strictly redundant with the check against hdecr_bits,
> yes, but will  result in a more helpful error message.  Is that
> right?

The error message will be more useful.
Also if I go and set hdecr_bits to 32 as suggested above the P9 check
is required because otherwise you could set a 32 bit large decr on
P7/P8.

> 
> > +            error_setg(errp,
> > +                "Large decrementer only supported on POWER9, try
> > -cpu POWER9");
> > +            return;
> > +        }
> > +        if ((val < 32) || (val > pcc->hdecr_bits)) {
> > +            error_setg(errp,
> > +                "Large decrementer size unsupported, try -cap-
> > large-decr=%d",
> > +                pcc->hdecr_bits);
> > +            return;
> > +        }
> > +    } else {
> >          error_setg(errp, "No large decrementer support, try cap-
> > large-decr=0");
> > +    }
> > +}
> > +
> > +static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
> > +                                     PowerPCCPU *cpu,
> > +                                     uint8_t val, Error **errp)
> > +{
> > +    CPUPPCState *env = &cpu->env;
> > +    target_ulong lpcr = env->spr[SPR_LPCR];
> > +
> > +    if (val)
> > +        lpcr |= LPCR_LD;
> > +    else
> > +        lpcr &= ~LPCR_LD;
> > +    ppc_store_lpcr(cpu, lpcr);
> > +    env->large_decr_bits = val;
> >  }
> >  
> >  sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > @@ -511,6 +546,7 @@ sPAPRCapabilityInfo
> > capability_table[SPAPR_CAP_NUM] = {
> >          .set = spapr_cap_set_uint8,
> >          .type = "int",
> >          .apply = cap_large_decr_apply,
> > +        .cpu_apply = cap_large_decr_cpu_apply,
> >      },
> >  };
> >  
> > diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
> > index ae51fe754e..cced705e30 100644
> > --- a/target/ppc/cpu-qom.h
> > +++ b/target/ppc/cpu-qom.h
> > @@ -190,6 +190,7 @@ typedef struct PowerPCCPUClass {
> >  #endif
> >      const PPCHash64Options *hash64_opts;
> >      struct ppc_radix_page_info *radix_page_info;
> > +    uint32_t hdecr_bits;
> >      void (*init_proc)(CPUPPCState *env);
> >      int  (*check_pow)(CPUPPCState *env);
> >      int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> > int mmu_idx);
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index 26604ddf98..8da333e9da 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -1171,6 +1171,9 @@ struct CPUPPCState {
> >      uint32_t tm_vscr;
> >      uint64_t tm_dscr;
> >      uint64_t tm_tar;
> > +
> > +    /* Large Decrementer */
> > +    int large_decr_bits;
> >  };
> >  
> >  #define SET_FIT_PERIOD(a_, b_, c_, d_)          \
> > @@ -1321,10 +1324,10 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState
> > *env);
> >  void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value);
> >  void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value);
> >  bool ppc_decr_clear_on_delivery(CPUPPCState *env);
> > -uint32_t cpu_ppc_load_decr (CPUPPCState *env);
> > -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value);
> > -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env);
> > -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value);
> > +target_ulong cpu_ppc_load_decr (CPUPPCState *env);
> > +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value);
> > +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env);
> > +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value);
> >  uint64_t cpu_ppc_load_purr (CPUPPCState *env);
> >  uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env);
> >  uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env);
> > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> > index c431303eff..a2b1ec5040 100644
> > --- a/target/ppc/mmu-hash64.c
> > +++ b/target/ppc/mmu-hash64.c
> > @@ -1109,7 +1109,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu,
> > target_ulong val)
> >      case POWERPC_MMU_3_00: /* P9 */
> >          lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD
> > |
> >                        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE |
> > LPCR_AIL |
> > -                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR
> > |
> > +                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR
> > | LPCR_LD |
> >                        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE |
> > LPCR_EEE |
> >                        LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE
> > | LPCR_TC |
> >                        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE |
> > LPCR_HDICE);
> > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > index 819221f246..b156be4d98 100644
> > --- a/target/ppc/translate.c
> > +++ b/target/ppc/translate.c
> > @@ -7417,7 +7417,7 @@ void ppc_cpu_dump_state(CPUState *cs, FILE
> > *f, fprintf_function cpu_fprintf,
> >  #if !defined(NO_TIMER_DUMP)
> >      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
> >  #if !defined(CONFIG_USER_ONLY)
> > -                " DECR %08" PRIu32
> > +                " DECR " TARGET_FMT_lu
> >  #endif
> >                  "\n",
> >                  cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
> > diff --git a/target/ppc/translate_init.inc.c
> > b/target/ppc/translate_init.inc.c
> > index 58542c0fe0..4e0bf1f47a 100644
> > --- a/target/ppc/translate_init.inc.c
> > +++ b/target/ppc/translate_init.inc.c
> > @@ -8926,6 +8926,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void
> > *data)
> >      /* segment page size remain the same */
> >      pcc->hash64_opts = &ppc_hash64_opts_POWER7;
> >      pcc->radix_page_info = &POWER9_radix_page_info;
> > +    pcc->hdecr_bits = 56;
> >  #endif
> >      pcc->excp_model = POWERPC_EXCP_POWER9;
> >      pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM
  2019-02-26  3:55   ` David Gibson
@ 2019-02-26 23:34     ` Suraj Jitindar Singh
  2019-02-26 23:40       ` David Gibson
  0 siblings, 1 reply; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26 23:34 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, clg, qemu-devel

On Tue, 2019-02-26 at 14:55 +1100, David Gibson wrote:
> On Tue, Feb 26, 2019 at 02:05:30PM +1100, Suraj Jitindar Singh wrote:
> > Implement support to allow KVM guests to take advantage of the
> > large
> > decrementer introduced on POWER9 cpus.
> > 
> > To determine if the host can support the requested large
> > decrementer
> > size, we check it matches that specified in the ibm,dec-bits
> > device-tree
> > property. We also need to enable it in KVM by setting the LPCR_LD
> > bit in
> > the LPCR. Note that to do this we need to try and set the bit, then
> > read
> > it back to check the host allowed us to set it, if so we can use it
> > but
> > if we were unable to set it the host cannot support it and we must
> > not
> > use the large decrementer.
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Although changes might be necessary to match it to things I've
> suggested in the earlier patches in the series.
> 
> Is the KVM side support for this already merged?  If so, as of when?

Yes, as of v4.13

> 
> > ---
> >  hw/ppc/spapr_caps.c  | 17 +++++++++++++++--
> >  target/ppc/kvm.c     | 39 +++++++++++++++++++++++++++++++++++++++
> >  target/ppc/kvm_ppc.h | 12 ++++++++++++
> >  3 files changed, 66 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index 44542fdbb2..e07568fb94 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -440,8 +440,16 @@ static void
> > cap_large_decr_apply(sPAPRMachineState *spapr,
> >                  pcc->hdecr_bits);
> >              return;
> >          }
> > -    } else {
> > -        error_setg(errp, "No large decrementer support, try cap-
> > large-decr=0");
> > +    } else if (kvm_enabled()) {
> > +        int kvm_nr_bits = kvmppc_get_cap_large_decr();
> > +
> > +        if (!kvm_nr_bits) {
> > +            error_setg(errp, "No large decrementer support, try
> > cap-large-decr=0");
> > +        } else if (val != kvm_nr_bits) {
> > +            error_setg(errp,
> > +                "Large decrementer size unsupported, try -cap-
> > large-decr=%d",
> > +                kvm_nr_bits);
> > +        }
> >      }
> >  }
> >  
> > @@ -452,6 +460,11 @@ static void
> > cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
> >      CPUPPCState *env = &cpu->env;
> >      target_ulong lpcr = env->spr[SPR_LPCR];
> >  
> > +    if (kvm_enabled()) {
> > +        if (kvmppc_enable_cap_large_decr(cpu, !!val))
> > +            error_setg(errp, "No large decrementer support, try
> > cap-large-decr=0");
> > +    }
> > +
> >      if (val)
> >          lpcr |= LPCR_LD;
> >      else
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index d01852fe31..3f650c8fc4 100644
> > --- a/target/ppc/kvm.c
> > +++ b/target/ppc/kvm.c
> > @@ -91,6 +91,7 @@ static int cap_ppc_safe_cache;
> >  static int cap_ppc_safe_bounds_check;
> >  static int cap_ppc_safe_indirect_branch;
> >  static int cap_ppc_nested_kvm_hv;
> > +static int cap_large_decr;
> >  
> >  static uint32_t debug_inst_opcode;
> >  
> > @@ -124,6 +125,7 @@ static bool kvmppc_is_pr(KVMState *ks)
> >  
> >  static int kvm_ppc_register_host_cpu_type(MachineState *ms);
> >  static void kvmppc_get_cpu_characteristics(KVMState *s);
> > +static int kvmppc_get_dec_bits(void);
> >  
> >  int kvm_arch_init(MachineState *ms, KVMState *s)
> >  {
> > @@ -151,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState
> > *s)
> >      cap_resize_hpt = kvm_vm_check_extension(s,
> > KVM_CAP_SPAPR_RESIZE_HPT);
> >      kvmppc_get_cpu_characteristics(s);
> >      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s,
> > KVM_CAP_PPC_NESTED_HV);
> > +    cap_large_decr = kvmppc_get_dec_bits();
> >      /*
> >       * Note: setting it to false because there is not such
> > capability
> >       * in KVM at this moment.
> > @@ -1927,6 +1930,15 @@ uint64_t kvmppc_get_clockfreq(void)
> >      return kvmppc_read_int_cpu_dt("clock-frequency");
> >  }
> >  
> > +static int kvmppc_get_dec_bits(void)
> > +{
> > +    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
> > +
> > +    if (nr_bits > 0)
> > +        return nr_bits;
> > +    return 0;
> > +}
> > +
> >  static int kvmppc_get_pvinfo(CPUPPCState *env, struct
> > kvm_ppc_pvinfo *pvinfo)
> >   {
> >       PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > @@ -2442,6 +2454,33 @@ bool kvmppc_has_cap_spapr_vfio(void)
> >      return cap_spapr_vfio;
> >  }
> >  
> > +int kvmppc_get_cap_large_decr(void)
> > +{
> > +    return cap_large_decr;
> > +}
> > +
> > +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
> > +{
> > +    CPUState *cs = CPU(cpu);
> > +    uint64_t lpcr;
> > +
> > +    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > +    /* Do we need to modify the LPCR? */
> > +    if (!!(lpcr & LPCR_LD) != !!enable) {
> > +        if (enable)
> > +            lpcr |= LPCR_LD;
> > +        else
> > +            lpcr &= ~LPCR_LD;
> > +        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > +        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > +
> > +        if (!!(lpcr & LPCR_LD) != !!enable)
> > +            return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
> >  {
> >      uint32_t host_pvr = mfpvr();
> > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> > index bdfaa4e70a..a79835bd14 100644
> > --- a/target/ppc/kvm_ppc.h
> > +++ b/target/ppc/kvm_ppc.h
> > @@ -64,6 +64,8 @@ int kvmppc_get_cap_safe_bounds_check(void);
> >  int kvmppc_get_cap_safe_indirect_branch(void);
> >  bool kvmppc_has_cap_nested_kvm_hv(void);
> >  int kvmppc_set_cap_nested_kvm_hv(int enable);
> > +int kvmppc_get_cap_large_decr(void);
> > +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
> >  int kvmppc_enable_hwrng(void);
> >  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
> >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> > @@ -332,6 +334,16 @@ static inline int
> > kvmppc_set_cap_nested_kvm_hv(int enable)
> >      return -1;
> >  }
> >  
> > +static inline int kvmppc_get_cap_large_decr(void)
> > +{
> > +    return 0;
> > +}
> > +
> > +static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu,
> > int enable)
> > +{
> > +    return -1;
> > +}
> > +
> >  static inline int kvmppc_enable_hwrng(void)
> >  {
> >      return -1;
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG
  2019-02-26 23:28     ` Suraj Jitindar Singh
@ 2019-02-26 23:39       ` David Gibson
  0 siblings, 0 replies; 15+ messages in thread
From: David Gibson @ 2019-02-26 23:39 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 19693 bytes --]

On Wed, Feb 27, 2019 at 10:28:05AM +1100, Suraj Jitindar Singh wrote:
> On Tue, 2019-02-26 at 14:53 +1100, David Gibson wrote:
> > On Tue, Feb 26, 2019 at 02:05:29PM +1100, Suraj Jitindar Singh wrote:
> > > Prior to POWER9 the decrementer was a 32-bit register which
> > > decremented
> > > with each tick of the timebase. From POWER9 onwards the decrementer
> > > can
> > > be set to operate in a mode called large decrementer where it acts
> > > as a
> > > n-bit decrementing register which is visible as a 64-bit register,
> > > that
> > > is the value of the decrementer is sign extended to 64 bits (where
> > > n is
> > > implementation dependant).
> > > 
> > > The mode in which the decrementer operates is controlled by the
> > > LPCR_LD
> > > bit in the logical paritition control register (LPCR).
> > > 
> > > > From POWER9 onwards the HDEC (hypervisor decrementer) was
> > > > enlarged to
> > > 
> > > h-bits, also sign extended to 64 bits (where h is implementation
> > > dependant). Note this isn't configurable and is always enabled.
> > > 
> > > For TCG we allow the user to configure a custom large decrementer
> > > size,
> > > so long as it's at least 32 and less than the size of the HDEC (the
> > > restrictions imposed by the ISA).
> > > 
> > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > > ---
> > >  hw/ppc/ppc.c                    | 78 ++++++++++++++++++++++++++++-
> > > ------------
> > >  hw/ppc/spapr.c                  |  8 +++++
> > >  hw/ppc/spapr_caps.c             | 38 +++++++++++++++++++-
> > >  target/ppc/cpu-qom.h            |  1 +
> > >  target/ppc/cpu.h                | 11 +++---
> > >  target/ppc/mmu-hash64.c         |  2 +-
> > >  target/ppc/translate.c          |  2 +-
> > >  target/ppc/translate_init.inc.c |  1 +
> > >  8 files changed, 109 insertions(+), 32 deletions(-)
> > > 
> > > diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> > > index d1e3d4cd20..853afeed6a 100644
> > > --- a/hw/ppc/ppc.c
> > > +++ b/hw/ppc/ppc.c
> > > @@ -744,10 +744,10 @@ bool ppc_decr_clear_on_delivery(CPUPPCState
> > > *env)
> > >      return ((tb_env->flags & flags) ==
> > > PPC_DECR_UNDERFLOW_TRIGGERED);
> > >  }
> > >  
> > > -static inline uint32_t _cpu_ppc_load_decr(CPUPPCState *env,
> > > uint64_t next)
> > > +static inline uint64_t _cpu_ppc_load_decr(CPUPPCState *env,
> > > uint64_t next)
> > 
> > Hrm.  Given how we use this - and how muldiv64 works, wouldn't it
> > make
> > more sense to have it return int64_t (i.e. signed).
> 
> I mean, functionally it's the same. So sure
> 
> > 
> > >  {
> > >      ppc_tb_t *tb_env = env->tb_env;
> > > -    uint32_t decr;
> > > +    uint64_t decr;
> > >      int64_t diff;
> > >  
> > >      diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> > > @@ -758,27 +758,42 @@ static inline uint32_t
> > > _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
> > >      }  else {
> > >          decr = -muldiv64(-diff, tb_env->decr_freq,
> > > NANOSECONDS_PER_SECOND);
> > >      }
> > > -    LOG_TB("%s: %08" PRIx32 "\n", __func__, decr);
> > > +    LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
> > >  
> > >      return decr;
> > >  }
> > >  
> > > -uint32_t cpu_ppc_load_decr (CPUPPCState *env)
> > > +target_ulong cpu_ppc_load_decr (CPUPPCState *env)
> > >  {
> > >      ppc_tb_t *tb_env = env->tb_env;
> > > +    uint64_t decr;
> > >  
> > >      if (kvm_enabled()) {
> > >          return env->spr[SPR_DECR];
> > >      }
> > >  
> > > -    return _cpu_ppc_load_decr(env, tb_env->decr_next);
> > > +    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
> > > +
> > > +    /*
> > > +     * If large decrementer is enabled then the decrementer is
> > > signed extened
> > > +     * to 64 bits, otherwise it is a 32 bit value.
> > > +     */
> > > +    if (env->spr[SPR_LPCR] & LPCR_LD)
> > > +        return decr;
> > > +    return (uint32_t) decr;
> > >  }
> > >  
> > > -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env)
> > > +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env)
> > >  {
> > >      ppc_tb_t *tb_env = env->tb_env;
> > > +    uint64_t decr;
> > >  
> > > -    return _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> > > +    decr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
> > > +
> > > +    /* If POWER9 or later then hdecr is sign extended to 64 bits
> > > otherwise 32 */
> > > +    if (env->mmu_model & POWERPC_MMU_3_00)
> > 
> > You already have a pcc->hdecr_bits.  Wouldn't it make more sense to
> > check against that than the cpu model directly?
> 
> The end result is the same since P9 is the only one with hdecr_bits
> defined. So I'll change it if you prefer.

I prefer - this way it would need changing again when we get a second
cpu with large hdecr.

> 
> > 
> > > +        return decr;
> > > +    return (uint32_t) decr;
> > >  }
> > >  
> > >  uint64_t cpu_ppc_load_purr (CPUPPCState *env)
> > > @@ -832,13 +847,21 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > > *cpu, uint64_t *nextp,
> > >                                   QEMUTimer *timer,
> > >                                   void (*raise_excp)(void *),
> > >                                   void (*lower_excp)(PowerPCCPU *),
> > > -                                 uint32_t decr, uint32_t value)
> > > +                                 target_ulong decr, target_ulong
> > > value,
> > > +                                 int nr_bits)
> > >  {
> > >      CPUPPCState *env = &cpu->env;
> > >      ppc_tb_t *tb_env = env->tb_env;
> > >      uint64_t now, next;
> > > +    bool negative;
> > > +
> > > +    /* Truncate value to decr_width and sign extend for simplicity
> > > */
> > > +    value &= ((1ULL << nr_bits) - 1);
> > > +    negative = !!(value & (1ULL << (nr_bits - 1)));
> > 
> > Could you simplify this by just using
> > 	negative = (int64_t)decr < 0;
> > before you mask?  Or would that be wrong in some edge case or other?
> 
> Value might not be a 64 bit number. It could be 56 bits for example
> where the 56th bit being set would indicate that it's negative. So we
> need to explicitly check as far as I can tell, unless I've missed
> something.

Ah, I thought the incoming value would already be sign-extended to
64-bits, but maybe not.

> Casting it to a (int64_t) would only make it negative if the 64th bit
> was set correct?
> 
> But on a 56 bit decrementer you could load -1 as:
> 0x00FFFFFFFFFFFFFF
> Which when cast to a (int64_t) wouldn't be negative.

Ok.

> 
> > 
> > > +    if (negative)
> > > +        value |= (0xFFFFFFFFULL << nr_bits);
> > >  
> > > -    LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__,
> > > +    LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n",
> > > __func__,
> > >                  decr, value);
> > >  
> > >      if (kvm_enabled()) {
> > > @@ -860,15 +883,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > > *cpu, uint64_t *nextp,
> > >       * an edge interrupt, so raise it here too.
> > >       */
> > >      if ((value < 3) ||
> > > -        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && (value &
> > > 0x80000000)) ||
> > > -        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && (value
> > > & 0x80000000)
> > > -          && !(decr & 0x80000000))) {
> > > +        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative)
> > > ||
> > > +        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) &&
> > > negative
> > > +          && !(decr & (1ULL << (nr_bits - 1))))) {
> > >          (*raise_excp)(cpu);
> > >          return;
> > >      }
> > >  
> > >      /* On MSB level based systems a 0 for the MSB stops interrupt
> > > delivery */
> > > -    if (!(value & 0x80000000) && (tb_env->flags &
> > > PPC_DECR_UNDERFLOW_LEVEL)) {
> > > +    if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
> > >          (*lower_excp)(cpu);
> > >      }
> > >  
> > > @@ -881,21 +904,24 @@ static void __cpu_ppc_store_decr(PowerPCCPU
> > > *cpu, uint64_t *nextp,
> > >      timer_mod(timer, next);
> > >  }
> > >  
> > > -static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, uint32_t
> > > decr,
> > > -                                       uint32_t value)
> > > +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu,
> > > target_ulong decr,
> > > +                                       target_ulong value, int
> > > nr_bits)
> > >  {
> > >      ppc_tb_t *tb_env = cpu->env.tb_env;
> > >  
> > >      __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env-
> > > >decr_timer,
> > >                           tb_env->decr_timer->cb,
> > > &cpu_ppc_decr_lower, decr,
> > > -                         value);
> > > +                         value, nr_bits);
> > >  }
> > >  
> > > -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
> > > +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value)
> > >  {
> > >      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > > +    int nr_bits = 32;
> > > +    if ((env->spr[SPR_LPCR] & LPCR_LD) && (env->large_decr_bits >
> > > 32))
> > > +        nr_bits = env->large_decr_bits;
> > 
> > This would be simpler if you initialized large_decr_bits to 32 on
> > cpus that don't have large decr, wouldn't it?
> 
> Correct, will do.
> 
> > 
> > >  
> > > -    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value);
> > > +    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value,
> > > nr_bits);
> > >  }
> > >  
> > >  static void cpu_ppc_decr_cb(void *opaque)
> > > @@ -905,23 +931,25 @@ static void cpu_ppc_decr_cb(void *opaque)
> > >      cpu_ppc_decr_excp(cpu);
> > >  }
> > >  
> > > -static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t
> > > hdecr,
> > > -                                        uint32_t value)
> > > +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu,
> > > target_ulong hdecr,
> > > +                                        target_ulong value, int
> > > nr_bits)
> > >  {
> > >      ppc_tb_t *tb_env = cpu->env.tb_env;
> > >  
> > >      if (tb_env->hdecr_timer != NULL) {
> > >          __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env-
> > > >hdecr_timer,
> > >                               tb_env->hdecr_timer->cb,
> > > &cpu_ppc_hdecr_lower,
> > > -                             hdecr, value);
> > > +                             hdecr, value, nr_bits);
> > >      }
> > >  }
> > >  
> > > -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
> > > +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value)
> > >  {
> > >      PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > +    int nr_bits = (pcc->hdecr_bits > 32) ? pcc->hdecr_bits : 32;
> > 
> > Similarly with hdecr_bits.
> 
> Yep
> 
> > 
> > > -    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value);
> > > +    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
> > > nr_bits);
> > >  }
> > >  
> > >  static void cpu_ppc_hdecr_cb(void *opaque)
> > > @@ -951,8 +979,8 @@ static void cpu_ppc_set_tb_clk (void *opaque,
> > > uint32_t freq)
> > >       * if a decrementer exception is pending when it enables
> > > msr_ee at startup,
> > >       * it's not ready to handle it...
> > >       */
> > > -    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> > > -    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF);
> > > +    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
> > > +    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
> > >      cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
> > >  }
> > >  
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index acf62a2b9f..966bc74e68 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -557,6 +557,14 @@ static void spapr_populate_cpu_dt(CPUState
> > > *cs, void *fdt, int offset,
> > >                            pcc->radix_page_info->count *
> > >                            sizeof(radix_AP_encodings[0]))));
> > >      }
> > > +
> > > +    /*
> > > +     * We set this property to let the guest know that it can use
> > > the large
> > > +     * decrementer and its width in bits.
> > > +     */
> > > +    if (env->large_decr_bits)
> > > +        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
> > > +                              env->large_decr_bits)));
> > >  }
> > >  
> > >  static void spapr_populate_cpus_dt_node(void *fdt,
> > > sPAPRMachineState *spapr)
> > > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > > index 1545a02729..44542fdbb2 100644
> > > --- a/hw/ppc/spapr_caps.c
> > > +++ b/hw/ppc/spapr_caps.c
> > > @@ -421,8 +421,43 @@ static void
> > > cap_nested_kvm_hv_apply(sPAPRMachineState *spapr,
> > >  static void cap_large_decr_apply(sPAPRMachineState *spapr,
> > >                                   uint8_t val, Error **errp)
> > >  {
> > > -    if (val)
> > > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > +
> > > +    if (!val)
> > > +        return; /* Disabled by default */
> > > +
> > > +    if (tcg_enabled()) {
> > > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > > +                              spapr->max_compat_pvr)) {
> > 
> > IIUC this is strictly redundant with the check against hdecr_bits,
> > yes, but will  result in a more helpful error message.  Is that
> > right?
> 
> The error message will be more useful.
> Also if I go and set hdecr_bits to 32 as suggested above the P9 check
> is required because otherwise you could set a 32 bit large decr on
> P7/P8.
> 
> > 
> > > +            error_setg(errp,
> > > +                "Large decrementer only supported on POWER9, try
> > > -cpu POWER9");
> > > +            return;
> > > +        }
> > > +        if ((val < 32) || (val > pcc->hdecr_bits)) {
> > > +            error_setg(errp,
> > > +                "Large decrementer size unsupported, try -cap-
> > > large-decr=%d",
> > > +                pcc->hdecr_bits);
> > > +            return;
> > > +        }
> > > +    } else {
> > >          error_setg(errp, "No large decrementer support, try cap-
> > > large-decr=0");
> > > +    }
> > > +}
> > > +
> > > +static void cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
> > > +                                     PowerPCCPU *cpu,
> > > +                                     uint8_t val, Error **errp)
> > > +{
> > > +    CPUPPCState *env = &cpu->env;
> > > +    target_ulong lpcr = env->spr[SPR_LPCR];
> > > +
> > > +    if (val)
> > > +        lpcr |= LPCR_LD;
> > > +    else
> > > +        lpcr &= ~LPCR_LD;
> > > +    ppc_store_lpcr(cpu, lpcr);
> > > +    env->large_decr_bits = val;
> > >  }
> > >  
> > >  sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > > @@ -511,6 +546,7 @@ sPAPRCapabilityInfo
> > > capability_table[SPAPR_CAP_NUM] = {
> > >          .set = spapr_cap_set_uint8,
> > >          .type = "int",
> > >          .apply = cap_large_decr_apply,
> > > +        .cpu_apply = cap_large_decr_cpu_apply,
> > >      },
> > >  };
> > >  
> > > diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
> > > index ae51fe754e..cced705e30 100644
> > > --- a/target/ppc/cpu-qom.h
> > > +++ b/target/ppc/cpu-qom.h
> > > @@ -190,6 +190,7 @@ typedef struct PowerPCCPUClass {
> > >  #endif
> > >      const PPCHash64Options *hash64_opts;
> > >      struct ppc_radix_page_info *radix_page_info;
> > > +    uint32_t hdecr_bits;
> > >      void (*init_proc)(CPUPPCState *env);
> > >      int  (*check_pow)(CPUPPCState *env);
> > >      int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> > > int mmu_idx);
> > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > > index 26604ddf98..8da333e9da 100644
> > > --- a/target/ppc/cpu.h
> > > +++ b/target/ppc/cpu.h
> > > @@ -1171,6 +1171,9 @@ struct CPUPPCState {
> > >      uint32_t tm_vscr;
> > >      uint64_t tm_dscr;
> > >      uint64_t tm_tar;
> > > +
> > > +    /* Large Decrementer */
> > > +    int large_decr_bits;
> > >  };
> > >  
> > >  #define SET_FIT_PERIOD(a_, b_, c_, d_)          \
> > > @@ -1321,10 +1324,10 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState
> > > *env);
> > >  void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value);
> > >  void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value);
> > >  bool ppc_decr_clear_on_delivery(CPUPPCState *env);
> > > -uint32_t cpu_ppc_load_decr (CPUPPCState *env);
> > > -void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value);
> > > -uint32_t cpu_ppc_load_hdecr (CPUPPCState *env);
> > > -void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value);
> > > +target_ulong cpu_ppc_load_decr (CPUPPCState *env);
> > > +void cpu_ppc_store_decr (CPUPPCState *env, target_ulong value);
> > > +target_ulong cpu_ppc_load_hdecr (CPUPPCState *env);
> > > +void cpu_ppc_store_hdecr (CPUPPCState *env, target_ulong value);
> > >  uint64_t cpu_ppc_load_purr (CPUPPCState *env);
> > >  uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env);
> > >  uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env);
> > > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> > > index c431303eff..a2b1ec5040 100644
> > > --- a/target/ppc/mmu-hash64.c
> > > +++ b/target/ppc/mmu-hash64.c
> > > @@ -1109,7 +1109,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu,
> > > target_ulong val)
> > >      case POWERPC_MMU_3_00: /* P9 */
> > >          lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD
> > > |
> > >                        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE |
> > > LPCR_AIL |
> > > -                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR
> > > |
> > > +                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR
> > > | LPCR_LD |
> > >                        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE |
> > > LPCR_EEE |
> > >                        LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE
> > > | LPCR_TC |
> > >                        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE |
> > > LPCR_HDICE);
> > > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > > index 819221f246..b156be4d98 100644
> > > --- a/target/ppc/translate.c
> > > +++ b/target/ppc/translate.c
> > > @@ -7417,7 +7417,7 @@ void ppc_cpu_dump_state(CPUState *cs, FILE
> > > *f, fprintf_function cpu_fprintf,
> > >  #if !defined(NO_TIMER_DUMP)
> > >      cpu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
> > >  #if !defined(CONFIG_USER_ONLY)
> > > -                " DECR %08" PRIu32
> > > +                " DECR " TARGET_FMT_lu
> > >  #endif
> > >                  "\n",
> > >                  cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
> > > diff --git a/target/ppc/translate_init.inc.c
> > > b/target/ppc/translate_init.inc.c
> > > index 58542c0fe0..4e0bf1f47a 100644
> > > --- a/target/ppc/translate_init.inc.c
> > > +++ b/target/ppc/translate_init.inc.c
> > > @@ -8926,6 +8926,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void
> > > *data)
> > >      /* segment page size remain the same */
> > >      pcc->hash64_opts = &ppc_hash64_opts_POWER7;
> > >      pcc->radix_page_info = &POWER9_radix_page_info;
> > > +    pcc->hdecr_bits = 56;
> > >  #endif
> > >      pcc->excp_model = POWERPC_EXCP_POWER9;
> > >      pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
> > 
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM
  2019-02-26 23:34     ` Suraj Jitindar Singh
@ 2019-02-26 23:40       ` David Gibson
  0 siblings, 0 replies; 15+ messages in thread
From: David Gibson @ 2019-02-26 23:40 UTC (permalink / raw)
  To: Suraj Jitindar Singh; +Cc: qemu-ppc, clg, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 7325 bytes --]

On Wed, Feb 27, 2019 at 10:34:15AM +1100, Suraj Jitindar Singh wrote:
> On Tue, 2019-02-26 at 14:55 +1100, David Gibson wrote:
> > On Tue, Feb 26, 2019 at 02:05:30PM +1100, Suraj Jitindar Singh wrote:
> > > Implement support to allow KVM guests to take advantage of the
> > > large
> > > decrementer introduced on POWER9 cpus.
> > > 
> > > To determine if the host can support the requested large
> > > decrementer
> > > size, we check it matches that specified in the ibm,dec-bits
> > > device-tree
> > > property. We also need to enable it in KVM by setting the LPCR_LD
> > > bit in
> > > the LPCR. Note that to do this we need to try and set the bit, then
> > > read
> > > it back to check the host allowed us to set it, if so we can use it
> > > but
> > > if we were unable to set it the host cannot support it and we must
> > > not
> > > use the large decrementer.
> > > 
> > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Although changes might be necessary to match it to things I've
> > suggested in the earlier patches in the series.
> > 
> > Is the KVM side support for this already merged?  If so, as of when?
> 
> Yes, as of v4.13

Ah, so already in RHEL8.  Great!

> 
> > 
> > > ---
> > >  hw/ppc/spapr_caps.c  | 17 +++++++++++++++--
> > >  target/ppc/kvm.c     | 39 +++++++++++++++++++++++++++++++++++++++
> > >  target/ppc/kvm_ppc.h | 12 ++++++++++++
> > >  3 files changed, 66 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > > index 44542fdbb2..e07568fb94 100644
> > > --- a/hw/ppc/spapr_caps.c
> > > +++ b/hw/ppc/spapr_caps.c
> > > @@ -440,8 +440,16 @@ static void
> > > cap_large_decr_apply(sPAPRMachineState *spapr,
> > >                  pcc->hdecr_bits);
> > >              return;
> > >          }
> > > -    } else {
> > > -        error_setg(errp, "No large decrementer support, try cap-
> > > large-decr=0");
> > > +    } else if (kvm_enabled()) {
> > > +        int kvm_nr_bits = kvmppc_get_cap_large_decr();
> > > +
> > > +        if (!kvm_nr_bits) {
> > > +            error_setg(errp, "No large decrementer support, try
> > > cap-large-decr=0");
> > > +        } else if (val != kvm_nr_bits) {
> > > +            error_setg(errp,
> > > +                "Large decrementer size unsupported, try -cap-
> > > large-decr=%d",
> > > +                kvm_nr_bits);
> > > +        }
> > >      }
> > >  }
> > >  
> > > @@ -452,6 +460,11 @@ static void
> > > cap_large_decr_cpu_apply(sPAPRMachineState *spapr,
> > >      CPUPPCState *env = &cpu->env;
> > >      target_ulong lpcr = env->spr[SPR_LPCR];
> > >  
> > > +    if (kvm_enabled()) {
> > > +        if (kvmppc_enable_cap_large_decr(cpu, !!val))
> > > +            error_setg(errp, "No large decrementer support, try
> > > cap-large-decr=0");
> > > +    }
> > > +
> > >      if (val)
> > >          lpcr |= LPCR_LD;
> > >      else
> > > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > > index d01852fe31..3f650c8fc4 100644
> > > --- a/target/ppc/kvm.c
> > > +++ b/target/ppc/kvm.c
> > > @@ -91,6 +91,7 @@ static int cap_ppc_safe_cache;
> > >  static int cap_ppc_safe_bounds_check;
> > >  static int cap_ppc_safe_indirect_branch;
> > >  static int cap_ppc_nested_kvm_hv;
> > > +static int cap_large_decr;
> > >  
> > >  static uint32_t debug_inst_opcode;
> > >  
> > > @@ -124,6 +125,7 @@ static bool kvmppc_is_pr(KVMState *ks)
> > >  
> > >  static int kvm_ppc_register_host_cpu_type(MachineState *ms);
> > >  static void kvmppc_get_cpu_characteristics(KVMState *s);
> > > +static int kvmppc_get_dec_bits(void);
> > >  
> > >  int kvm_arch_init(MachineState *ms, KVMState *s)
> > >  {
> > > @@ -151,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState
> > > *s)
> > >      cap_resize_hpt = kvm_vm_check_extension(s,
> > > KVM_CAP_SPAPR_RESIZE_HPT);
> > >      kvmppc_get_cpu_characteristics(s);
> > >      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s,
> > > KVM_CAP_PPC_NESTED_HV);
> > > +    cap_large_decr = kvmppc_get_dec_bits();
> > >      /*
> > >       * Note: setting it to false because there is not such
> > > capability
> > >       * in KVM at this moment.
> > > @@ -1927,6 +1930,15 @@ uint64_t kvmppc_get_clockfreq(void)
> > >      return kvmppc_read_int_cpu_dt("clock-frequency");
> > >  }
> > >  
> > > +static int kvmppc_get_dec_bits(void)
> > > +{
> > > +    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
> > > +
> > > +    if (nr_bits > 0)
> > > +        return nr_bits;
> > > +    return 0;
> > > +}
> > > +
> > >  static int kvmppc_get_pvinfo(CPUPPCState *env, struct
> > > kvm_ppc_pvinfo *pvinfo)
> > >   {
> > >       PowerPCCPU *cpu = ppc_env_get_cpu(env);
> > > @@ -2442,6 +2454,33 @@ bool kvmppc_has_cap_spapr_vfio(void)
> > >      return cap_spapr_vfio;
> > >  }
> > >  
> > > +int kvmppc_get_cap_large_decr(void)
> > > +{
> > > +    return cap_large_decr;
> > > +}
> > > +
> > > +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
> > > +{
> > > +    CPUState *cs = CPU(cpu);
> > > +    uint64_t lpcr;
> > > +
> > > +    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > > +    /* Do we need to modify the LPCR? */
> > > +    if (!!(lpcr & LPCR_LD) != !!enable) {
> > > +        if (enable)
> > > +            lpcr |= LPCR_LD;
> > > +        else
> > > +            lpcr &= ~LPCR_LD;
> > > +        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > > +        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
> > > +
> > > +        if (!!(lpcr & LPCR_LD) != !!enable)
> > > +            return -1;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
> > >  {
> > >      uint32_t host_pvr = mfpvr();
> > > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> > > index bdfaa4e70a..a79835bd14 100644
> > > --- a/target/ppc/kvm_ppc.h
> > > +++ b/target/ppc/kvm_ppc.h
> > > @@ -64,6 +64,8 @@ int kvmppc_get_cap_safe_bounds_check(void);
> > >  int kvmppc_get_cap_safe_indirect_branch(void);
> > >  bool kvmppc_has_cap_nested_kvm_hv(void);
> > >  int kvmppc_set_cap_nested_kvm_hv(int enable);
> > > +int kvmppc_get_cap_large_decr(void);
> > > +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
> > >  int kvmppc_enable_hwrng(void);
> > >  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
> > >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> > > @@ -332,6 +334,16 @@ static inline int
> > > kvmppc_set_cap_nested_kvm_hv(int enable)
> > >      return -1;
> > >  }
> > >  
> > > +static inline int kvmppc_get_cap_large_decr(void)
> > > +{
> > > +    return 0;
> > > +}
> > > +
> > > +static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu,
> > > int enable)
> > > +{
> > > +    return -1;
> > > +}
> > > +
> > >  static inline int kvmppc_enable_hwrng(void)
> > >  {
> > >      return -1;
> > 
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9
  2019-02-26  3:59   ` David Gibson
@ 2019-02-26 23:50     ` Suraj Jitindar Singh
  0 siblings, 0 replies; 15+ messages in thread
From: Suraj Jitindar Singh @ 2019-02-26 23:50 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, clg, qemu-devel

On Tue, 2019-02-26 at 14:59 +1100, David Gibson wrote:
> On Tue, Feb 26, 2019 at 02:05:31PM +1100, Suraj Jitindar Singh wrote:
> > Enable the large decrementer by default on POWER9 cpu models. The
> > default value applied is that provided in the cpu class.
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > ---
> >  hw/ppc/spapr_caps.c | 7 +++++++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index e07568fb94..f48aa367e3 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -566,11 +566,18 @@ sPAPRCapabilityInfo
> > capability_table[SPAPR_CAP_NUM] = {
> >  static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState
> > *spapr,
> >                                                 const char
> > *cputype)
> >  {
> > +    PowerPCCPUClass *pcc =
> > POWERPC_CPU_CLASS(object_class_by_name(cputype));
> >      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >      sPAPRCapabilities caps;
> >  
> >      caps = smc->default_caps;
> >  
> > +    caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = pcc->hdecr_bits;
> 
> So, the rule with default_caps_with_cpu() is that it can reduce the
> value from the machine-global default, but never increase it (because
> that could change guest visible behaviour for existing machine
> versions).
> 
> I think the line above will do that.

Yep

> 
> > +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> > +                               0, spapr->max_compat_pvr)) {
> > +        caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 0;
> 
> And this part I think is redundant, because hdecr_bits won't be large
> for anything pre-POWER9.
> 
> > +    }
> > +
> >      if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07,
> >                                 0, spapr->max_compat_pvr)) {
> >          caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-02-26 23:51 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-26  3:05 [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER Suraj Jitindar Singh
2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 2/4] target/ppc: Implement large decrementer support for TCG Suraj Jitindar Singh
2019-02-26  3:53   ` David Gibson
2019-02-26 23:28     ` Suraj Jitindar Singh
2019-02-26 23:39       ` David Gibson
2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 3/4] target/ppc: Implement large decrementer support for KVM Suraj Jitindar Singh
2019-02-26  3:55   ` David Gibson
2019-02-26 23:34     ` Suraj Jitindar Singh
2019-02-26 23:40       ` David Gibson
2019-02-26  3:05 ` [Qemu-devel] [QEMU-PPC] [PATCH 4/4] target/ppc/spapr: Enable the large decrementer by default on POWER9 Suraj Jitindar Singh
2019-02-26  3:59   ` David Gibson
2019-02-26 23:50     ` Suraj Jitindar Singh
2019-02-26  3:39 ` [Qemu-devel] [QEMU-PPC] [PATCH 1/4] target/ppc/spapr: Add SPAPR_CAP_LARGE_DECREMENTER David Gibson
2019-02-26  6:26   ` Suraj Jitindar Singh
2019-02-26 22:49     ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.