On Fri, Nov 16, 2018 at 11:57:16AM +0100, Cédric Le Goater wrote: > This extends the KVM XIVE models to handle the state synchronization > with KVM, for the monitor usage and for the migration. > > The migration priority of the XIVE interrupt controller sPAPRXive is > raised for KVM. It operates first and orchestrates the capture > sequence of the states of all the XIVE models. The XIVE sources are > masked to quiesce the interrupt flow and a XIVE xync is performed to > stabilize the OS Event Queues. The state of the ENDs are then captured > by the XIVE interrupt controller model, sPAPRXive, and the state of > the thread contexts by the thread interrupt presenter model, > XiveTCTX. When done, a rollback is performed to restore the sources to > their initial state. > > The sPAPRXive 'post_load' method is called from the sPAPR machine, > after all XIVE device states have been transfered and loaded. First, > sPAPRXive restores the XIVE routing tables: ENDT and EAT. Next, are > restored the thread interrupt context registers and the source PQ > bits. > > The get/set operations rely on their KVM counterpart in the host > kernel which acts as a proxy for OPAL, the host firmware. > > Signed-off-by: Cédric Le Goater > --- > > WIP: > > If migration occurs when a VCPU is 'ceded', some the OS event > notification queues are mapped to the ZERO_PAGE on the receiving > side. As if the HW had triggered a page fault before the dirty > page was transferred from the source or as if we were not using > the correct page table. > > include/hw/ppc/spapr_xive.h | 5 + > include/hw/ppc/xive.h | 3 + > include/migration/vmstate.h | 1 + > linux-headers/asm-powerpc/kvm.h | 33 +++ > hw/intc/spapr_xive.c | 32 +++ > hw/intc/spapr_xive_kvm.c | 494 ++++++++++++++++++++++++++++++++ > hw/intc/xive.c | 46 +++ > hw/ppc/spapr_irq.c | 2 +- > 8 files changed, 615 insertions(+), 1 deletion(-) > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > index 9c817bb7ae74..d2517c040958 100644 > --- a/include/hw/ppc/spapr_xive.h > +++ b/include/hw/ppc/spapr_xive.h > @@ -55,12 +55,17 @@ typedef struct sPAPRXiveClass { > XiveRouterClass parent_class; > > DeviceRealize parent_realize; > + > + void (*synchronize_state)(sPAPRXive *xive); > + int (*pre_save)(sPAPRXive *xsrc); > + int (*post_load)(sPAPRXive *xsrc, int version_id); This should go away if the KVM and non-KVM versions are in the same object. > } sPAPRXiveClass; > > bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi); > bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn); > void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon); > qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn); > +int spapr_xive_post_load(sPAPRXive *xive, int version_id); > > /* > * sPAPR NVT and END indexing helpers > diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > index 7aaf5a182cb3..c8201462d698 100644 > --- a/include/hw/ppc/xive.h > +++ b/include/hw/ppc/xive.h > @@ -309,6 +309,9 @@ typedef struct XiveTCTXClass { > DeviceClass parent_class; > > DeviceRealize parent_realize; > + > + void (*synchronize_state)(XiveTCTX *tctx); > + int (*post_load)(XiveTCTX *tctx, int version_id); .. and this too. > } XiveTCTXClass; > > /* > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h > index 2b501d04669a..ee2e836cc1c1 100644 > --- a/include/migration/vmstate.h > +++ b/include/migration/vmstate.h > @@ -154,6 +154,7 @@ typedef enum { > MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ > MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */ > MIG_PRI_GICV3, /* Must happen before the ITS */ > + MIG_PRI_XIVE_IC, /* Must happen before all XIVE models */ Ugh.. explicit priority / order levels are a pretty bad code smell. Usually migration ordering can be handled by getting the object heirarchy right. What exactly is the problem you're addessing with this? > MIG_PRI_MAX, > } MigrationPriority; > > diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h > index f34c971491dd..9d55ade23634 100644 > --- a/linux-headers/asm-powerpc/kvm.h > +++ b/linux-headers/asm-powerpc/kvm.h Again, linux-headers need to be split out. > @@ -480,6 +480,8 @@ struct kvm_ppc_cpu_char { > #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ > #define KVM_REG_PPC_ICP_PPRI_MASK 0xff > > +#define KVM_REG_PPC_NVT_STATE (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8d) > + > /* Device control API: PPC-specific devices */ > #define KVM_DEV_MPIC_GRP_MISC 1 > #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ > @@ -681,10 +683,41 @@ struct kvm_ppc_cpu_char { > #define KVM_DEV_XIVE_GET_TIMA_FD 2 > #define KVM_DEV_XIVE_VC_BASE 3 > #define KVM_DEV_XIVE_GRP_SOURCES 2 /* 64-bit source attributes */ > +#define KVM_DEV_XIVE_GRP_SYNC 3 /* 64-bit source attributes */ > +#define KVM_DEV_XIVE_GRP_EAS 4 /* 64-bit eas attributes */ > +#define KVM_DEV_XIVE_GRP_EQ 5 /* 64-bit eq attributes */ > > /* Layout of 64-bit XIVE source attribute values */ > #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) > #define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) > > +/* Layout of 64-bit eas attribute values */ > +#define KVM_XIVE_EAS_PRIORITY_SHIFT 0 > +#define KVM_XIVE_EAS_PRIORITY_MASK 0x7 > +#define KVM_XIVE_EAS_SERVER_SHIFT 3 > +#define KVM_XIVE_EAS_SERVER_MASK 0xfffffff8ULL > +#define KVM_XIVE_EAS_MASK_SHIFT 32 > +#define KVM_XIVE_EAS_MASK_MASK 0x100000000ULL > +#define KVM_XIVE_EAS_EISN_SHIFT 33 > +#define KVM_XIVE_EAS_EISN_MASK 0xfffffffe00000000ULL > + > +/* Layout of 64-bit eq attribute */ > +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0 > +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7 > +#define KVM_XIVE_EQ_SERVER_SHIFT 3 > +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL > + > +/* Layout of 64-bit eq attribute values */ > +struct kvm_ppc_xive_eq { > + __u32 flags; > + __u32 qsize; > + __u64 qpage; > + __u32 qtoggle; > + __u32 qindex; > +}; > + > +#define KVM_XIVE_EQ_FLAG_ENABLED 0x00000001 > +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY 0x00000002 > +#define KVM_XIVE_EQ_FLAG_ESCALATE 0x00000004 > > #endif /* __LINUX_KVM_POWERPC_H */ > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index ec85f7e4f88d..c5c0e063dc33 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -27,9 +27,14 @@ > > void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon) > { > + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive); > int i; > uint32_t offset = 0; > > + if (sxc->synchronize_state) { > + sxc->synchronize_state(xive); > + } > + > monitor_printf(mon, "XIVE Source %08x .. %08x\n", offset, > offset + xive->source.nr_irqs - 1); > xive_source_pic_print_info(&xive->source, offset, mon); > @@ -354,10 +359,37 @@ static const VMStateDescription vmstate_spapr_xive_eas = { > }, > }; > > +static int vmstate_spapr_xive_pre_save(void *opaque) > +{ > + sPAPRXive *xive = SPAPR_XIVE_BASE(opaque); > + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive); > + > + if (sxc->pre_save) { > + return sxc->pre_save(xive); > + } > + > + return 0; > +} > + > +/* handled at the machine level */ > +int spapr_xive_post_load(sPAPRXive *xive, int version_id) > +{ > + sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(xive); > + > + if (sxc->post_load) { > + return sxc->post_load(xive, version_id); > + } > + > + return 0; > +} > + > static const VMStateDescription vmstate_spapr_xive_base = { > .name = TYPE_SPAPR_XIVE, > .version_id = 1, > .minimum_version_id = 1, > + .pre_save = vmstate_spapr_xive_pre_save, > + .post_load = NULL, /* handled at the machine level */ > + .priority = MIG_PRI_XIVE_IC, > .fields = (VMStateField[]) { > VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL), > VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs, > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 767f90826e43..176083c37d61 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -58,6 +58,58 @@ static void kvm_cpu_enable(CPUState *cs) > /* > * XIVE Thread Interrupt Management context (KVM) > */ > +static void xive_tctx_kvm_set_state(XiveTCTX *tctx, Error **errp) > +{ > + uint64_t state[4]; > + int ret; > + > + /* word0 and word1 of the OS ring. */ > + state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); > + > + /* VP identifier. Only for KVM pr_debug() */ > + state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]); > + > + ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state); > + if (ret != 0) { > + error_setg_errno(errp, errno, "Could restore KVM XIVE CPU %ld state", > + kvm_arch_vcpu_id(tctx->cs)); > + } > +} > + > +static void xive_tctx_kvm_get_state(XiveTCTX *tctx, Error **errp) > +{ > + uint64_t state[4] = { 0 }; > + int ret; > + > + ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state); > + if (ret != 0) { > + error_setg_errno(errp, errno, "Could capture KVM XIVE CPU %ld state", > + kvm_arch_vcpu_id(tctx->cs)); > + return; > + } > + > + /* word0 and word1 of the OS ring. */ > + *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; > + > + /* > + * KVM also returns word2 containing the VP CAM line value which > + * is interesting to print out the VP identifier in the QEMU > + * monitor. No need to restore it. > + */ > + *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1]; > +} > + > +static void xive_tctx_kvm_do_synchronize_state(CPUState *cpu, > + run_on_cpu_data arg) > +{ > + xive_tctx_kvm_get_state(arg.host_ptr, &error_fatal); > +} > + > +static void xive_tctx_kvm_synchronize_state(XiveTCTX *tctx) > +{ > + run_on_cpu(tctx->cs, xive_tctx_kvm_do_synchronize_state, > + RUN_ON_CPU_HOST_PTR(tctx)); > +} > > static void xive_tctx_kvm_init(XiveTCTX *tctx, Error **errp) > { > @@ -112,6 +164,8 @@ static void xive_tctx_kvm_class_init(ObjectClass *klass, void *data) > > device_class_set_parent_realize(dc, xive_tctx_kvm_realize, > &xtc->parent_realize); > + > + xtc->synchronize_state = xive_tctx_kvm_synchronize_state; > } > > static const TypeInfo xive_tctx_kvm_info = { > @@ -166,6 +220,34 @@ static void xive_source_kvm_reset(DeviceState *dev) > xive_source_kvm_init(xsrc, &error_fatal); > } > > +/* > + * This is used to perform the magic loads on the ESB pages, described > + * in xive.h. > + */ > +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) > +{ > + unsigned long addr = (unsigned long) xsrc->esb_mmap + > + xive_source_esb_mgmt(xsrc, srcno) + offset; > + > + /* Prevent the compiler from optimizing away the load */ > + volatile uint64_t value = *((uint64_t *) addr); > + > + return be64_to_cpu(value) & 0x3; > +} > + > +static void xive_source_kvm_get_state(XiveSource *xsrc) > +{ > + int i; > + > + for (i = 0; i < xsrc->nr_irqs; i++) { > + /* Perform a load without side effect to retrieve the PQ bits */ > + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); > + > + /* and save PQ locally */ > + xive_source_esb_set(xsrc, i, pq); > + } > +} > + > static void xive_source_kvm_set_irq(void *opaque, int srcno, int val) > { > XiveSource *xsrc = opaque; > @@ -295,6 +377,414 @@ static const TypeInfo xive_source_kvm_info = { > /* > * sPAPR XIVE Router (KVM) > */ > +static int spapr_xive_kvm_set_eq_state(sPAPRXive *xive, CPUState *cs, > + Error **errp) > +{ > + XiveRouter *xrtr = XIVE_ROUTER(xive); > + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); > + int ret; > + int i; > + > + for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) { > + Error *local_err = NULL; > + XiveEND end; > + uint8_t end_blk; > + uint32_t end_idx; > + struct kvm_ppc_xive_eq kvm_eq = { 0 }; > + uint64_t kvm_eq_idx; > + > + if (!spapr_xive_priority_is_valid(i)) { > + continue; > + } > + > + spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx); > + > + ret = xive_router_get_end(xrtr, end_blk, end_idx, &end); > + if (ret) { > + error_setg(errp, "XIVE: No END for CPU %ld priority %d", > + vcpu_id, i); > + return ret; > + } > + > + if (!(end.w0 & END_W0_VALID)) { > + continue; > + } > + > + /* Build the KVM state from the local END structure */ > + kvm_eq.flags = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY; > + kvm_eq.qsize = GETFIELD(END_W0_QSIZE, end.w0) + 12; > + kvm_eq.qpage = (((uint64_t)(end.w2 & 0x0fffffff)) << 32) | end.w3; > + kvm_eq.qtoggle = GETFIELD(END_W1_GENERATION, end.w1); > + kvm_eq.qindex = GETFIELD(END_W1_PAGE_OFF, end.w1); > + > + /* Encode the tuple (server, prio) as a KVM EQ index */ > + kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT & > + KVM_XIVE_EQ_PRIORITY_MASK; > + kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT & > + KVM_XIVE_EQ_SERVER_MASK; > + > + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx, > + &kvm_eq, true, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int spapr_xive_kvm_get_eq_state(sPAPRXive *xive, CPUState *cs, > + Error **errp) > +{ > + XiveRouter *xrtr = XIVE_ROUTER(xive); > + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); > + int ret; > + int i; > + > + for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) { > + Error *local_err = NULL; > + struct kvm_ppc_xive_eq kvm_eq = { 0 }; > + uint64_t kvm_eq_idx; > + XiveEND end = { 0 }; > + uint8_t end_blk, nvt_blk; > + uint32_t end_idx, nvt_idx; > + > + /* Skip priorities reserved for the hypervisor */ > + if (!spapr_xive_priority_is_valid(i)) { > + continue; > + } > + > + /* Encode the tuple (server, prio) as a KVM EQ index */ > + kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT & > + KVM_XIVE_EQ_PRIORITY_MASK; > + kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT & > + KVM_XIVE_EQ_SERVER_MASK; > + > + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx, > + &kvm_eq, false, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return ret; > + } > + > + if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) { > + continue; > + } > + > + /* Update the local END structure with the KVM input */ > + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) { > + end.w0 |= END_W0_VALID | END_W0_ENQUEUE; > + } > + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) { > + end.w0 |= END_W0_UCOND_NOTIFY; > + } > + if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) { > + end.w0 |= END_W0_ESCALATE_CTL; > + } > + end.w0 |= SETFIELD(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12); > + > + end.w1 = SETFIELD(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | > + SETFIELD(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); > + end.w2 = (kvm_eq.qpage >> 32) & 0x0fffffff; > + end.w3 = kvm_eq.qpage & 0xffffffff; > + end.w4 = 0; > + end.w5 = 0; > + > + ret = spapr_xive_cpu_to_nvt(xive, POWERPC_CPU(cs), &nvt_blk, &nvt_idx); > + if (ret) { > + error_setg(errp, "XIVE: No NVT for CPU %ld", vcpu_id); > + return ret; > + } > + > + end.w6 = SETFIELD(END_W6_NVT_BLOCK, 0ul, nvt_blk) | > + SETFIELD(END_W6_NVT_INDEX, 0ul, nvt_idx); > + end.w7 = SETFIELD(END_W7_F0_PRIORITY, 0ul, i); > + > + spapr_xive_cpu_to_end(xive, POWERPC_CPU(cs), i, &end_blk, &end_idx); > + > + ret = xive_router_set_end(xrtr, end_blk, end_idx, &end); > + if (ret) { > + error_setg(errp, "XIVE: No END for CPU %ld priority %d", > + vcpu_id, i); > + return ret; > + } > + } > + > + return 0; > +} > + > +static void spapr_xive_kvm_set_eas_state(sPAPRXive *xive, Error **errp) > +{ > + XiveSource *xsrc = &xive->source; > + int i; > + > + for (i = 0; i < xsrc->nr_irqs; i++) { > + XiveEAS *eas = &xive->eat[i]; > + uint32_t end_idx; > + uint32_t end_blk; > + uint32_t eisn; > + uint8_t priority; > + uint32_t server; > + uint64_t kvm_eas; > + Error *local_err = NULL; > + > + /* No need to set MASKED EAS, this is the default state after reset */ > + if (!(eas->w & EAS_VALID) || eas->w & EAS_MASKED) { > + continue; > + } > + > + end_idx = GETFIELD(EAS_END_INDEX, eas->w); > + end_blk = GETFIELD(EAS_END_BLOCK, eas->w); > + eisn = GETFIELD(EAS_END_DATA, eas->w); > + > + spapr_xive_end_to_target(xive, end_blk, end_idx, &server, &priority); > + > + kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT & > + KVM_XIVE_EAS_PRIORITY_MASK; > + kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT & > + KVM_XIVE_EAS_SERVER_MASK; > + kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) & > + KVM_XIVE_EAS_EISN_MASK; > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, true, > + &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > +} > + > +static void spapr_xive_kvm_get_eas_state(sPAPRXive *xive, Error **errp) > +{ > + XiveSource *xsrc = &xive->source; > + int i; > + > + for (i = 0; i < xsrc->nr_irqs; i++) { > + XiveEAS *eas = &xive->eat[i]; > + XiveEAS new_eas; > + uint64_t kvm_eas; > + uint8_t priority; > + uint32_t server; > + uint32_t end_idx; > + uint8_t end_blk; > + uint32_t eisn; > + Error *local_err = NULL; > + > + if (!(eas->w & EAS_VALID)) { > + continue; > + } > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, false, > + &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > + priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >> > + KVM_XIVE_EAS_PRIORITY_SHIFT; > + server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >> > + KVM_XIVE_EAS_SERVER_SHIFT; > + eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT; > + > + if (spapr_xive_target_to_end(xive, server, priority, &end_blk, > + &end_idx)) { > + error_setg(errp, "XIVE: invalid tuple CPU %d priority %d", server, > + priority); > + return; > + } > + > + new_eas.w = EAS_VALID; > + if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) { > + new_eas.w |= EAS_MASKED; > + } > + > + new_eas.w = SETFIELD(EAS_END_INDEX, new_eas.w, end_idx); > + new_eas.w = SETFIELD(EAS_END_BLOCK, new_eas.w, end_blk); > + new_eas.w = SETFIELD(EAS_END_DATA, new_eas.w, eisn); > + > + *eas = new_eas; > + } > +} > + > +static void spapr_xive_kvm_sync_all(sPAPRXive *xive, Error **errp) > +{ > + XiveSource *xsrc = &xive->source; > + Error *local_err = NULL; > + int i; > + > + /* Sync the KVM source. This reaches the XIVE HW through OPAL */ > + for (i = 0; i < xsrc->nr_irqs; i++) { > + XiveEAS *eas = &xive->eat[i]; > + > + if (!(eas->w & EAS_VALID)) { > + continue; > + } > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SYNC, i, NULL, true, > + &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > +} > + > +/* > + * The sPAPRXive KVM model migration priority is higher to make sure Higher than what? > + * its 'pre_save' method runs before all the other XIVE models. It If the other XIVE components are children of sPAPRXive (which I think they are or could be), then I believe the parent object's pre_save will automatically be called first. > + * orchestrates the capture sequence of the XIVE states in the > + * following order: > + * > + * 1. mask all the sources by setting PQ=01, which returns the > + * previous value and save it. > + * 2. sync the sources in KVM to stabilize all the queues > + * sync the ENDs to make sure END -> VP is fully completed > + * 3. dump the EAS table > + * 4. dump the END table > + * 5. dump the thread context (IPB) > + * > + * Rollback to restore the current configuration of the sources > + */ > +static int spapr_xive_kvm_pre_save(sPAPRXive *xive) > +{ > + XiveSource *xsrc = &xive->source; > + Error *local_err = NULL; > + CPUState *cs; > + int i; > + int ret = 0; > + > + /* Quiesce the sources, to stop the flow of event notifications */ > + for (i = 0; i < xsrc->nr_irqs; i++) { > + /* > + * Mask and save the ESB PQs locally in the XiveSource object. > + */ > + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_01); > + xive_source_esb_set(xsrc, i, pq); > + } > + > + /* Sync the sources in KVM */ > + spapr_xive_kvm_sync_all(xive, &local_err); > + if (local_err) { > + error_report_err(local_err); > + goto out; > + } > + > + /* Grab the EAT (could be done earlier ?) */ > + spapr_xive_kvm_get_eas_state(xive, &local_err); > + if (local_err) { > + error_report_err(local_err); > + goto out; > + } > + > + /* > + * Grab the ENDs. The EQ index and the toggle bit are what we want > + * to capture > + */ > + CPU_FOREACH(cs) { > + spapr_xive_kvm_get_eq_state(xive, cs, &local_err); > + if (local_err) { > + error_report_err(local_err); > + goto out; > + } > + } > + > + /* Capture the thread interrupt contexts */ > + CPU_FOREACH(cs) { > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + > + /* TODO: Check if we need to use under run_on_cpu() ? */ > + xive_tctx_kvm_get_state(XIVE_TCTX_KVM(cpu->intc), &local_err); > + if (local_err) { > + error_report_err(local_err); > + goto out; > + } > + } > + > + /* All done. */ > + > +out: > + /* Restore the sources to their initial state */ > + for (i = 0; i < xsrc->nr_irqs; i++) { > + uint8_t pq = xive_source_esb_get(xsrc, i); > + if (xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)) != 0x1) { > + error_report("XIVE: IRQ %d has an invalid state", i); > + } > + } > + > + /* > + * The XiveSource and the XiveTCTX states will be collected by > + * their respective vmstate handlers afterwards. > + */ > + return ret; > +} > + > +/* > + * The sPAPRXive 'post_load' method is called by the sPAPR machine, > + * after all XIVE device states have been transfered and loaded. > + * > + * All should be in place when the VCPUs resume execution. > + */ > +static int spapr_xive_kvm_post_load(sPAPRXive *xive, int version_id) > +{ > + XiveSource *xsrc = &xive->source; > + Error *local_err = NULL; > + CPUState *cs; > + int i; > + > + /* Set the ENDs first. The targetting depends on it. */ > + CPU_FOREACH(cs) { > + spapr_xive_kvm_set_eq_state(xive, cs, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return -1; > + } > + } > + > + /* Restore the targetting, if any */ > + spapr_xive_kvm_set_eas_state(xive, &local_err); > + if (local_err) { > + error_report_err(local_err); > + return -1; > + } > + > + /* Restore the thread interrupt contexts */ > + CPU_FOREACH(cs) { > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + > + xive_tctx_kvm_set_state(XIVE_TCTX_KVM(cpu->intc), &local_err); > + if (local_err) { > + error_report_err(local_err); > + return -1; > + } > + } > + > + /* > + * Get the saved state from the XiveSource model and restore the > + * PQ bits > + */ > + for (i = 0; i < xsrc->nr_irqs; i++) { > + uint8_t pq = xive_source_esb_get(xsrc, i); > + xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); > + } > + return 0; > +} > + > +static void spapr_xive_kvm_synchronize_state(sPAPRXive *xive) > +{ > + XiveSource *xsrc = &xive->source; > + CPUState *cs; > + > + xive_source_kvm_get_state(xsrc); > + > + spapr_xive_kvm_get_eas_state(xive, &error_fatal); > + > + CPU_FOREACH(cs) { > + spapr_xive_kvm_get_eq_state(xive, cs, &error_fatal); > + } > +} > > static void spapr_xive_kvm_instance_init(Object *obj) > { > @@ -409,6 +899,10 @@ static void spapr_xive_kvm_class_init(ObjectClass *klass, void *data) > > dc->desc = "sPAPR XIVE KVM Interrupt Controller"; > dc->unrealize = spapr_xive_kvm_unrealize; > + > + sxc->synchronize_state = spapr_xive_kvm_synchronize_state; > + sxc->pre_save = spapr_xive_kvm_pre_save; > + sxc->post_load = spapr_xive_kvm_post_load; > } > > static const TypeInfo spapr_xive_kvm_info = { > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > index 9bb37553c9ec..c9aedecc8216 100644 > --- a/hw/intc/xive.c > +++ b/hw/intc/xive.c > @@ -438,9 +438,14 @@ static const struct { > > void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) > { > + XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx); > int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; > int i; > > + if (xtc->synchronize_state) { > + xtc->synchronize_state(tctx); > + } > + > monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR" > " W2\n", cpu_index); > > @@ -552,10 +557,23 @@ static void xive_tctx_base_unrealize(DeviceState *dev, Error **errp) > qemu_unregister_reset(xive_tctx_base_reset, dev); > } > > +static int vmstate_xive_tctx_post_load(void *opaque, int version_id) > +{ > + XiveTCTX *tctx = XIVE_TCTX_BASE(opaque); > + XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(tctx); > + > + if (xtc->post_load) { > + return xtc->post_load(tctx, version_id); > + } > + > + return 0; > +} > + > static const VMStateDescription vmstate_xive_tctx_base = { > .name = TYPE_XIVE_TCTX, > .version_id = 1, > .minimum_version_id = 1, > + .post_load = vmstate_xive_tctx_post_load, > .fields = (VMStateField[]) { > VMSTATE_BUFFER(regs, XiveTCTX), > VMSTATE_END_OF_LIST() > @@ -581,9 +599,37 @@ static const TypeInfo xive_tctx_base_info = { > .class_size = sizeof(XiveTCTXClass), > }; > > +static int xive_tctx_post_load(XiveTCTX *tctx, int version_id) > +{ > + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr); > + > + /* > + * When we collect the states from KVM XIVE irqchip, we set word2 > + * of the thread context to print out the OS CAM line under the > + * QEMU monitor. > + * > + * This breaks migration on a guest using TCG or not using a KVM > + * irqchip. Fix with an extra reset of the thread contexts. > + */ > + if (xrc->reset_tctx) { > + xrc->reset_tctx(tctx->xrtr, tctx); > + } > + return 0; > +} > + > +static void xive_tctx_class_init(ObjectClass *klass, void *data) > +{ > + XiveTCTXClass *xtc = XIVE_TCTX_BASE_CLASS(klass); > + > + xtc->post_load = xive_tctx_post_load; > +} > + > static const TypeInfo xive_tctx_info = { > .name = TYPE_XIVE_TCTX, > .parent = TYPE_XIVE_TCTX_BASE, > + .instance_size = sizeof(XiveTCTX), > + .class_init = xive_tctx_class_init, > + .class_size = sizeof(XiveTCTXClass), > }; > > Object *xive_tctx_create(Object *cpu, const char *type, XiveRouter *xrtr, > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > index 92ef53743b64..6fac6ca70595 100644 > --- a/hw/ppc/spapr_irq.c > +++ b/hw/ppc/spapr_irq.c > @@ -359,7 +359,7 @@ static Object *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr, > > static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id) > { > - return 0; > + return spapr_xive_post_load(spapr->xive, version_id); > } > > /* -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson