From mboxrd@z Thu Jan 1 00:00:00 1970 From: =?UTF-8?Q?C=c3=a9dric_Le_Goater?= Subject: Re: [PATCH v3 03/17] KVM: PPC: Book3S HV: XIVE: introduce a new capability KVM_CAP_PPC_IRQ_XIVE Date: Mon, 18 Mar 2019 11:00:42 +0100 Message-ID: References: <20190315120609.25910-1-clg@kaod.org> <20190315120609.25910-4-clg@kaod.org> <20190318001955.GD6874@umbus.fritz.box> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 8bit Cc: linuxppc-dev@lists.ozlabs.org, Paul Mackerras , kvm@vger.kernel.org, kvm-ppc@vger.kernel.org To: David Gibson Return-path: In-Reply-To: <20190318001955.GD6874@umbus.fritz.box> Content-Language: en-US List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppe-linuxppc-embedded-2=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" List-Id: kvm.vger.kernel.org On 3/18/19 1:19 AM, David Gibson wrote: > On Fri, Mar 15, 2019 at 01:05:55PM +0100, Cédric Le Goater wrote: >> The user interface exposes a new capability KVM_CAP_PPC_IRQ_XIVE to >> let QEMU connect the vCPU presenters to the XIVE KVM device if >> required. The capability is not advertised for now as the full support >> for the XIVE native exploitation mode is not yet available. When this >> is case, the capability will be advertised on PowerNV Hypervisors >> only. Nested guests (pseries KVM Hypervisor) are not supported. >> >> Internally, the interface to the new KVM device is protected with a >> new interrupt mode: KVMPPC_IRQ_XIVE. >> >> Signed-off-by: Cédric Le Goater > > Reviewed-by: David Gibson > > Though a couple of minor nits are noted below. > >> --- >> >> Changes since v2: >> >> - made use of the xive_vp() macro to compute VP identifiers >> - reworked locking in kvmppc_xive_native_connect_vcpu() to fix races >> - stop advertising KVM_CAP_PPC_IRQ_XIVE as support is not fully >> available yet >> >> arch/powerpc/include/asm/kvm_host.h | 1 + >> arch/powerpc/include/asm/kvm_ppc.h | 13 +++ >> arch/powerpc/kvm/book3s_xive.h | 11 ++ >> include/uapi/linux/kvm.h | 1 + >> arch/powerpc/kvm/book3s_xive.c | 88 ++++++++------- >> arch/powerpc/kvm/book3s_xive_native.c | 150 ++++++++++++++++++++++++++ >> arch/powerpc/kvm/powerpc.c | 36 +++++++ >> Documentation/virtual/kvm/api.txt | 9 ++ >> 8 files changed, 268 insertions(+), 41 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h >> index 9f75a75a07f2..eb8581be0ee8 100644 >> --- a/arch/powerpc/include/asm/kvm_host.h >> +++ b/arch/powerpc/include/asm/kvm_host.h >> @@ -448,6 +448,7 @@ struct kvmppc_passthru_irqmap { >> #define KVMPPC_IRQ_DEFAULT 0 >> #define KVMPPC_IRQ_MPIC 1 >> #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ >> +#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ >> >> #define MMIO_HPTE_CACHE_SIZE 4 >> >> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h >> index 4b72ddde7dc1..1e61877fe147 100644 >> --- a/arch/powerpc/include/asm/kvm_ppc.h >> +++ b/arch/powerpc/include/asm/kvm_ppc.h >> @@ -594,6 +594,14 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, >> int level, bool line_status); >> extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); >> >> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) >> +{ >> + return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE; >> +} >> + >> +extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, >> + struct kvm_vcpu *vcpu, u32 cpu); >> +extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); >> extern void kvmppc_xive_native_init_module(void); >> extern void kvmppc_xive_native_exit_module(void); >> >> @@ -621,6 +629,11 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir >> int level, bool line_status) { return -ENODEV; } >> static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } >> >> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) >> + { return 0; } >> +static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, >> + struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } >> +static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } >> static inline void kvmppc_xive_native_init_module(void) { } >> static inline void kvmppc_xive_native_exit_module(void) { } >> >> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h >> index a08ae6fd4c51..d366df69b9cb 100644 >> --- a/arch/powerpc/kvm/book3s_xive.h >> +++ b/arch/powerpc/kvm/book3s_xive.h >> @@ -198,6 +198,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp >> return xive->src_blocks[bid]; >> } >> >> +static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) >> +{ >> + return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); >> +} >> + >> /* >> * Mapping between guest priorities and host priorities >> * is as follow. >> @@ -248,5 +253,11 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, >> extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); >> extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); >> >> +/* >> + * Common Xive routines for XICS-over-XIVE and XIVE native >> + */ >> +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); >> +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); >> + >> #endif /* CONFIG_KVM_XICS */ >> #endif /* _KVM_PPC_BOOK3S_XICS_H */ >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index e6368163d3a0..52bf74a1616e 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { >> #define KVM_CAP_ARM_VM_IPA_SIZE 165 >> #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 >> #define KVM_CAP_HYPERV_CPUID 167 >> +#define KVM_CAP_PPC_IRQ_XIVE 168 >> >> #ifdef KVM_CAP_IRQ_ROUTING >> >> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c >> index f78d002f0fe0..e7f1ada1c3de 100644 >> --- a/arch/powerpc/kvm/book3s_xive.c >> +++ b/arch/powerpc/kvm/book3s_xive.c >> @@ -380,11 +380,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) >> return -EBUSY; >> } >> >> -static u32 xive_vp(struct kvmppc_xive *xive, u32 server) >> -{ >> - return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); >> -} >> - >> static u8 xive_lock_and_mask(struct kvmppc_xive *xive, >> struct kvmppc_xive_src_block *sb, >> struct kvmppc_xive_irq_state *state) >> @@ -430,8 +425,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, >> */ >> if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { >> xive_native_configure_irq(hw_num, >> - xive_vp(xive, state->act_server), >> - MASKED, state->number); >> + kvmppc_xive_vp(xive, state->act_server), >> + MASKED, state->number); >> /* set old_p so we can track if an H_EOI was done */ >> state->old_p = true; >> state->old_q = false; >> @@ -486,8 +481,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, >> */ >> if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { >> xive_native_configure_irq(hw_num, >> - xive_vp(xive, state->act_server), >> - state->act_priority, state->number); >> + kvmppc_xive_vp(xive, state->act_server), >> + state->act_priority, state->number); >> /* If an EOI is needed, do it here */ >> if (!state->old_p) >> xive_vm_source_eoi(hw_num, xd); >> @@ -563,7 +558,7 @@ static int xive_target_interrupt(struct kvm *kvm, >> kvmppc_xive_select_irq(state, &hw_num, NULL); >> >> return xive_native_configure_irq(hw_num, >> - xive_vp(xive, server), >> + kvmppc_xive_vp(xive, server), >> prio, state->number); >> } >> >> @@ -951,7 +946,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, >> * which is fine for a never started interrupt. >> */ >> xive_native_configure_irq(hw_irq, >> - xive_vp(xive, state->act_server), >> + kvmppc_xive_vp(xive, state->act_server), >> state->act_priority, state->number); >> >> /* >> @@ -1027,7 +1022,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, >> >> /* Reconfigure the IPI */ >> xive_native_configure_irq(state->ipi_number, >> - xive_vp(xive, state->act_server), >> + kvmppc_xive_vp(xive, state->act_server), >> state->act_priority, state->number); >> >> /* >> @@ -1049,7 +1044,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, >> } >> EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); >> >> -static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) >> +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) >> { >> struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> struct kvm *kvm = vcpu->kvm; >> @@ -1166,7 +1161,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, >> xc->xive = xive; >> xc->vcpu = vcpu; >> xc->server_num = cpu; >> - xc->vp_id = xive_vp(xive, cpu); >> + xc->vp_id = kvmppc_xive_vp(xive, cpu); >> xc->mfrr = 0xff; >> xc->valid = true; >> >> @@ -1883,6 +1878,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) >> return 0; >> } >> >> +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) >> +{ >> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> + unsigned int i; >> + >> + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> + struct xive_q *q = &xc->queues[i]; >> + u32 i0, i1, idx; >> + >> + if (!q->qpage && !xc->esc_virq[i]) >> + continue; >> + >> + seq_printf(m, " [q%d]: ", i); >> + >> + if (q->qpage) { >> + idx = q->idx; >> + i0 = be32_to_cpup(q->qpage + idx); >> + idx = (idx + 1) & q->msk; >> + i1 = be32_to_cpup(q->qpage + idx); >> + seq_printf(m, "T=%d %08x %08x...\n", q->toggle, >> + i0, i1); >> + } >> + if (xc->esc_virq[i]) { >> + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); >> + struct xive_irq_data *xd = >> + irq_data_get_irq_handler_data(d); >> + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); >> + >> + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", >> + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', >> + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', >> + xc->esc_virq[i], pq, xd->eoi_page); >> + seq_puts(m, "\n"); >> + } >> + } >> + return 0; >> +} >> >> static int xive_debug_show(struct seq_file *m, void *private) >> { >> @@ -1908,7 +1940,6 @@ static int xive_debug_show(struct seq_file *m, void *private) >> >> kvm_for_each_vcpu(i, vcpu, kvm) { >> struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> - unsigned int i; >> >> if (!xc) >> continue; >> @@ -1918,33 +1949,8 @@ static int xive_debug_show(struct seq_file *m, void *private) >> xc->server_num, xc->cppr, xc->hw_cppr, >> xc->mfrr, xc->pending, >> xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); >> - for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> - struct xive_q *q = &xc->queues[i]; >> - u32 i0, i1, idx; >> >> - if (!q->qpage && !xc->esc_virq[i]) >> - continue; >> - >> - seq_printf(m, " [q%d]: ", i); >> - >> - if (q->qpage) { >> - idx = q->idx; >> - i0 = be32_to_cpup(q->qpage + idx); >> - idx = (idx + 1) & q->msk; >> - i1 = be32_to_cpup(q->qpage + idx); >> - seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); >> - } >> - if (xc->esc_virq[i]) { >> - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); >> - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); >> - u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); >> - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", >> - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', >> - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', >> - xc->esc_virq[i], pq, xd->eoi_page); >> - seq_printf(m, "\n"); >> - } >> - } >> + kvmppc_xive_debug_show_queues(m, vcpu); >> >> t_rm_h_xirr += xc->stat_rm_h_xirr; >> t_rm_h_ipoll += xc->stat_rm_h_ipoll; >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c >> index 76d45bcc7060..a078f99bc156 100644 >> --- a/arch/powerpc/kvm/book3s_xive_native.c >> +++ b/arch/powerpc/kvm/book3s_xive_native.c >> @@ -31,6 +31,134 @@ >> >> #include "book3s_xive.h" >> >> +static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) >> +{ >> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> + struct xive_q *q = &xc->queues[prio]; >> + >> + xive_native_disable_queue(xc->vp_id, q, prio); >> + if (q->qpage) { >> + put_page(virt_to_page(q->qpage)); >> + q->qpage = NULL; >> + } >> +} >> + >> +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) >> +{ >> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> + int i; >> + >> + if (!kvmppc_xive_enabled(vcpu)) >> + return; >> + >> + if (!xc) >> + return; >> + >> + pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); >> + >> + /* Ensure no interrupt is still routed to that VP */ >> + xc->valid = false; >> + kvmppc_xive_disable_vcpu_interrupts(vcpu); >> + >> + /* Disable the VP */ >> + xive_native_disable_vp(xc->vp_id); >> + >> + /* Free the queues & associated interrupts */ >> + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> + /* Free the escalation irq */ >> + if (xc->esc_virq[i]) { >> + free_irq(xc->esc_virq[i], vcpu); >> + irq_dispose_mapping(xc->esc_virq[i]); >> + kfree(xc->esc_virq_names[i]); >> + xc->esc_virq[i] = 0; > > Should that actually be NO_IRQ on the right (it evaluates to the same > thing). Fine to fix that as a later followup. yes. It would be better to clarify some tests in both KVM devices. ( NO_IRQ is rarely used and its value is platform specific ) >> + } >> + >> + /* Free the queue */ >> + kvmppc_xive_native_cleanup_queue(vcpu, i); >> + } >> + >> + /* Free the VP */ >> + kfree(xc); >> + >> + /* Cleanup the vcpu */ >> + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; >> + vcpu->arch.xive_vcpu = NULL; >> +} >> + >> +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, >> + struct kvm_vcpu *vcpu, u32 server_num) >> +{ >> + struct kvmppc_xive *xive = dev->private; >> + struct kvmppc_xive_vcpu *xc = NULL; >> + int rc; >> + >> + pr_devel("native_connect_vcpu(server=%d)\n", server_num); >> + >> + if (dev->ops != &kvm_xive_native_ops) { >> + pr_devel("Wrong ops !\n"); >> + return -EPERM; >> + } >> + if (xive->kvm != vcpu->kvm) >> + return -EPERM; >> + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) >> + return -EBUSY; >> + if (server_num >= KVM_MAX_VCPUS) { >> + pr_devel("Out of bounds !\n"); >> + return -EINVAL; >> + } >> + >> + mutex_lock(&vcpu->kvm->lock); >> + >> + if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { >> + pr_devel("Duplicate !\n"); >> + rc = -EEXIST; >> + goto bail; >> + } >> + >> + xc = kzalloc(sizeof(*xc), GFP_KERNEL); >> + if (!xc) { >> + rc = -ENOMEM; >> + goto bail; >> + } >> + >> + vcpu->arch.xive_vcpu = xc; >> + xc->xive = xive; >> + xc->vcpu = vcpu; >> + xc->server_num = server_num; >> + >> + xc->vp_id = kvmppc_xive_vp(xive, server_num); >> + xc->valid = true; > > This 'valid' field doesn't seem useful, since it's initialized to true > immediately after allocating xc and set to false moments before > free()ing it. ..and I can't see anything that tests it. It is used under the hood by kvmppc_xive_select_target() when configuring the target of a source. I agree it is not very useful as it is redundant with the xc pointer. It seems it is also the case for the XICS-on-XIVE KVM device. > Again, ok to deal with that as a later cleanup. Yes I rather do that for both device at the same time. Thanks, C. > >> + vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; >> + >> + rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); >> + if (rc) { >> + pr_err("Failed to get VP info from OPAL: %d\n", rc); >> + goto bail; >> + } >> + >> + /* >> + * Enable the VP first as the single escalation mode will >> + * affect escalation interrupts numbering >> + */ >> + rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); >> + if (rc) { >> + pr_err("Failed to enable VP in OPAL: %d\n", rc); >> + goto bail; >> + } >> + >> + /* Configure VCPU fields for use by assembly push/pull */ >> + vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); >> + vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); >> + >> + /* TODO: reset all queues to a clean state ? */ >> +bail: >> + mutex_unlock(&vcpu->kvm->lock); >> + if (rc) >> + kvmppc_xive_native_cleanup_vcpu(vcpu); >> + >> + return rc; >> +} >> + >> static int kvmppc_xive_native_set_attr(struct kvm_device *dev, >> struct kvm_device_attr *attr) >> { >> @@ -119,10 +247,32 @@ static int xive_native_debug_show(struct seq_file *m, void *private) >> { >> struct kvmppc_xive *xive = m->private; >> struct kvm *kvm = xive->kvm; >> + struct kvm_vcpu *vcpu; >> + unsigned int i; >> >> if (!kvm) >> return 0; >> >> + seq_puts(m, "=========\nVCPU state\n=========\n"); >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) { >> + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> + >> + if (!xc) >> + continue; >> + >> + seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", >> + xc->server_num, >> + vcpu->arch.xive_saved_state.nsr, >> + vcpu->arch.xive_saved_state.cppr, >> + vcpu->arch.xive_saved_state.ipb, >> + vcpu->arch.xive_saved_state.pipr, >> + vcpu->arch.xive_saved_state.w01, >> + (u32) vcpu->arch.xive_cam_word); >> + >> + kvmppc_xive_debug_show_queues(m, vcpu); >> + } >> + >> return 0; >> } >> >> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c >> index 8c69af10f91d..bb51faf29162 100644 >> --- a/arch/powerpc/kvm/powerpc.c >> +++ b/arch/powerpc/kvm/powerpc.c >> @@ -570,6 +570,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) >> case KVM_CAP_PPC_GET_CPU_CHAR: >> r = 1; >> break; >> +#ifdef CONFIG_KVM_XIVE >> + case KVM_CAP_PPC_IRQ_XIVE: >> + /* >> + * Return false until all the XIVE infrastructure is >> + * in place including support for migration. >> + */ >> + r = 0; >> + break; >> +#endif >> >> case KVM_CAP_PPC_ALLOC_HTAB: >> r = hv_enabled; >> @@ -753,6 +762,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) >> else >> kvmppc_xics_free_icp(vcpu); >> break; >> + case KVMPPC_IRQ_XIVE: >> + kvmppc_xive_native_cleanup_vcpu(vcpu); >> + break; >> } >> >> kvmppc_core_vcpu_free(vcpu); >> @@ -1941,6 +1953,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, >> break; >> } >> #endif /* CONFIG_KVM_XICS */ >> +#ifdef CONFIG_KVM_XIVE >> + case KVM_CAP_PPC_IRQ_XIVE: { >> + struct fd f; >> + struct kvm_device *dev; >> + >> + r = -EBADF; >> + f = fdget(cap->args[0]); >> + if (!f.file) >> + break; >> + >> + r = -ENXIO; >> + if (!xive_enabled()) >> + break; >> + >> + r = -EPERM; >> + dev = kvm_device_from_filp(f.file); >> + if (dev) >> + r = kvmppc_xive_native_connect_vcpu(dev, vcpu, >> + cap->args[1]); >> + >> + fdput(f); >> + break; >> + } >> +#endif /* CONFIG_KVM_XIVE */ >> #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE >> case KVM_CAP_PPC_FWNMI: >> r = -EINVAL; >> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt >> index 356156f5c52d..1db1435769b4 100644 >> --- a/Documentation/virtual/kvm/api.txt >> +++ b/Documentation/virtual/kvm/api.txt >> @@ -4458,6 +4458,15 @@ struct kvm_sync_regs { >> struct kvm_vcpu_events events; >> }; >> >> +6.75 KVM_CAP_PPC_IRQ_XIVE >> + >> +Architectures: ppc >> +Target: vcpu >> +Parameters: args[0] is the XIVE device fd >> + args[1] is the XIVE CPU number (server ID) for this vcpu >> + >> +This capability connects the vcpu to an in-kernel XIVE device. >> + >> 7. Capabilities that can be enabled on VMs >> ------------------------------------------ >> > From mboxrd@z Thu Jan 1 00:00:00 1970 From: =?UTF-8?Q?C=c3=a9dric_Le_Goater?= Date: Mon, 18 Mar 2019 10:00:42 +0000 Subject: Re: [PATCH v3 03/17] KVM: PPC: Book3S HV: XIVE: introduce a new capability KVM_CAP_PPC_IRQ_XIVE Message-Id: List-Id: References: <20190315120609.25910-1-clg@kaod.org> <20190315120609.25910-4-clg@kaod.org> <20190318001955.GD6874@umbus.fritz.box> In-Reply-To: <20190318001955.GD6874@umbus.fritz.box> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable To: David Gibson Cc: linuxppc-dev@lists.ozlabs.org, Paul Mackerras , kvm@vger.kernel.org, kvm-ppc@vger.kernel.org On 3/18/19 1:19 AM, David Gibson wrote: > On Fri, Mar 15, 2019 at 01:05:55PM +0100, C=E9dric Le Goater wrote: >> The user interface exposes a new capability KVM_CAP_PPC_IRQ_XIVE to >> let QEMU connect the vCPU presenters to the XIVE KVM device if >> required. The capability is not advertised for now as the full support >> for the XIVE native exploitation mode is not yet available. When this >> is case, the capability will be advertised on PowerNV Hypervisors >> only. Nested guests (pseries KVM Hypervisor) are not supported. >> >> Internally, the interface to the new KVM device is protected with a >> new interrupt mode: KVMPPC_IRQ_XIVE. >> >> Signed-off-by: C=E9dric Le Goater >=20 > Reviewed-by: David Gibson >=20 > Though a couple of minor nits are noted below. >=20 >> --- >> >> Changes since v2: >> >> - made use of the xive_vp() macro to compute VP identifiers >> - reworked locking in kvmppc_xive_native_connect_vcpu() to fix races=20 >> - stop advertising KVM_CAP_PPC_IRQ_XIVE as support is not fully >> available yet=20 >> =20 >> arch/powerpc/include/asm/kvm_host.h | 1 + >> arch/powerpc/include/asm/kvm_ppc.h | 13 +++ >> arch/powerpc/kvm/book3s_xive.h | 11 ++ >> include/uapi/linux/kvm.h | 1 + >> arch/powerpc/kvm/book3s_xive.c | 88 ++++++++------- >> arch/powerpc/kvm/book3s_xive_native.c | 150 ++++++++++++++++++++++++++ >> arch/powerpc/kvm/powerpc.c | 36 +++++++ >> Documentation/virtual/kvm/api.txt | 9 ++ >> 8 files changed, 268 insertions(+), 41 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/= asm/kvm_host.h >> index 9f75a75a07f2..eb8581be0ee8 100644 >> --- a/arch/powerpc/include/asm/kvm_host.h >> +++ b/arch/powerpc/include/asm/kvm_host.h >> @@ -448,6 +448,7 @@ struct kvmppc_passthru_irqmap { >> #define KVMPPC_IRQ_DEFAULT 0 >> #define KVMPPC_IRQ_MPIC 1 >> #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ >> +#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ >> =20 >> #define MMIO_HPTE_CACHE_SIZE 4 >> =20 >> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/a= sm/kvm_ppc.h >> index 4b72ddde7dc1..1e61877fe147 100644 >> --- a/arch/powerpc/include/asm/kvm_ppc.h >> +++ b/arch/powerpc/include/asm/kvm_ppc.h >> @@ -594,6 +594,14 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int= irq_source_id, u32 irq, >> int level, bool line_status); >> extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); >> =20 >> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) >> +{ >> + return vcpu->arch.irq_type =3D KVMPPC_IRQ_XIVE; >> +} >> + >> +extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, >> + struct kvm_vcpu *vcpu, u32 cpu); >> +extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); >> extern void kvmppc_xive_native_init_module(void); >> extern void kvmppc_xive_native_exit_module(void); >> =20 >> @@ -621,6 +629,11 @@ static inline int kvmppc_xive_set_irq(struct kvm *k= vm, int irq_source_id, u32 ir >> int level, bool line_status) { return -ENODEV; } >> static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } >> =20 >> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) >> + { return 0; } >> +static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *de= v, >> + struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } >> +static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcp= u) { } >> static inline void kvmppc_xive_native_init_module(void) { } >> static inline void kvmppc_xive_native_exit_module(void) { } >> =20 >> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xi= ve.h >> index a08ae6fd4c51..d366df69b9cb 100644 >> --- a/arch/powerpc/kvm/book3s_xive.h >> +++ b/arch/powerpc/kvm/book3s_xive.h >> @@ -198,6 +198,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_= xive_find_source(struct kvmpp >> return xive->src_blocks[bid]; >> } >> =20 >> +static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) >> +{ >> + return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); >> +} >> + >> /* >> * Mapping between guest priorities and host priorities >> * is as follow. >> @@ -248,5 +253,11 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu= , unsigned long server, >> extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cpp= r); >> extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr= ); >> =20 >> +/* >> + * Common Xive routines for XICS-over-XIVE and XIVE native >> + */ >> +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); >> +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *= vcpu); >> + >> #endif /* CONFIG_KVM_XICS */ >> #endif /* _KVM_PPC_BOOK3S_XICS_H */ >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index e6368163d3a0..52bf74a1616e 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { >> #define KVM_CAP_ARM_VM_IPA_SIZE 165 >> #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 >> #define KVM_CAP_HYPERV_CPUID 167 >> +#define KVM_CAP_PPC_IRQ_XIVE 168 >> =20 >> #ifdef KVM_CAP_IRQ_ROUTING >> =20 >> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xi= ve.c >> index f78d002f0fe0..e7f1ada1c3de 100644 >> --- a/arch/powerpc/kvm/book3s_xive.c >> +++ b/arch/powerpc/kvm/book3s_xive.c >> @@ -380,11 +380,6 @@ static int xive_select_target(struct kvm *kvm, u32 = *server, u8 prio) >> return -EBUSY; >> } >> =20 >> -static u32 xive_vp(struct kvmppc_xive *xive, u32 server) >> -{ >> - return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); >> -} >> - >> static u8 xive_lock_and_mask(struct kvmppc_xive *xive, >> struct kvmppc_xive_src_block *sb, >> struct kvmppc_xive_irq_state *state) >> @@ -430,8 +425,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xiv= e, >> */ >> if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { >> xive_native_configure_irq(hw_num, >> - xive_vp(xive, state->act_server), >> - MASKED, state->number); >> + kvmppc_xive_vp(xive, state->act_server), >> + MASKED, state->number); >> /* set old_p so we can track if an H_EOI was done */ >> state->old_p =3D true; >> state->old_q =3D false; >> @@ -486,8 +481,8 @@ static void xive_finish_unmask(struct kvmppc_xive *x= ive, >> */ >> if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { >> xive_native_configure_irq(hw_num, >> - xive_vp(xive, state->act_server), >> - state->act_priority, state->number); >> + kvmppc_xive_vp(xive, state->act_server), >> + state->act_priority, state->number); >> /* If an EOI is needed, do it here */ >> if (!state->old_p) >> xive_vm_source_eoi(hw_num, xd); >> @@ -563,7 +558,7 @@ static int xive_target_interrupt(struct kvm *kvm, >> kvmppc_xive_select_irq(state, &hw_num, NULL); >> =20 >> return xive_native_configure_irq(hw_num, >> - xive_vp(xive, server), >> + kvmppc_xive_vp(xive, server), >> prio, state->number); >> } >> =20 >> @@ -951,7 +946,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned= long guest_irq, >> * which is fine for a never started interrupt. >> */ >> xive_native_configure_irq(hw_irq, >> - xive_vp(xive, state->act_server), >> + kvmppc_xive_vp(xive, state->act_server), >> state->act_priority, state->number); >> =20 >> /* >> @@ -1027,7 +1022,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsign= ed long guest_irq, >> =20 >> /* Reconfigure the IPI */ >> xive_native_configure_irq(state->ipi_number, >> - xive_vp(xive, state->act_server), >> + kvmppc_xive_vp(xive, state->act_server), >> state->act_priority, state->number); >> =20 >> /* >> @@ -1049,7 +1044,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsign= ed long guest_irq, >> } >> EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); >> =20 >> -static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) >> +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) >> { >> struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> struct kvm *kvm =3D vcpu->kvm; >> @@ -1166,7 +1161,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *de= v, >> xc->xive =3D xive; >> xc->vcpu =3D vcpu; >> xc->server_num =3D cpu; >> - xc->vp_id =3D xive_vp(xive, cpu); >> + xc->vp_id =3D kvmppc_xive_vp(xive, cpu); >> xc->mfrr =3D 0xff; >> xc->valid =3D true; >> =20 >> @@ -1883,6 +1878,43 @@ static int kvmppc_xive_create(struct kvm_device *= dev, u32 type) >> return 0; >> } >> =20 >> +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *= vcpu) >> +{ >> + struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> + unsigned int i; >> + >> + for (i =3D 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> + struct xive_q *q =3D &xc->queues[i]; >> + u32 i0, i1, idx; >> + >> + if (!q->qpage && !xc->esc_virq[i]) >> + continue; >> + >> + seq_printf(m, " [q%d]: ", i); >> + >> + if (q->qpage) { >> + idx =3D q->idx; >> + i0 =3D be32_to_cpup(q->qpage + idx); >> + idx =3D (idx + 1) & q->msk; >> + i1 =3D be32_to_cpup(q->qpage + idx); >> + seq_printf(m, "T=3D%d %08x %08x...\n", q->toggle, >> + i0, i1); >> + } >> + if (xc->esc_virq[i]) { >> + struct irq_data *d =3D irq_get_irq_data(xc->esc_virq[i]); >> + struct xive_irq_data *xd >> + irq_data_get_irq_handler_data(d); >> + u64 pq =3D xive_vm_esb_load(xd, XIVE_ESB_GET); >> + >> + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", >> + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', >> + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', >> + xc->esc_virq[i], pq, xd->eoi_page); >> + seq_puts(m, "\n"); >> + } >> + } >> + return 0; >> +} >> =20 >> static int xive_debug_show(struct seq_file *m, void *private) >> { >> @@ -1908,7 +1940,6 @@ static int xive_debug_show(struct seq_file *m, voi= d *private) >> =20 >> kvm_for_each_vcpu(i, vcpu, kvm) { >> struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> - unsigned int i; >> =20 >> if (!xc) >> continue; >> @@ -1918,33 +1949,8 @@ static int xive_debug_show(struct seq_file *m, vo= id *private) >> xc->server_num, xc->cppr, xc->hw_cppr, >> xc->mfrr, xc->pending, >> xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); >> - for (i =3D 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> - struct xive_q *q =3D &xc->queues[i]; >> - u32 i0, i1, idx; >> =20 >> - if (!q->qpage && !xc->esc_virq[i]) >> - continue; >> - >> - seq_printf(m, " [q%d]: ", i); >> - >> - if (q->qpage) { >> - idx =3D q->idx; >> - i0 =3D be32_to_cpup(q->qpage + idx); >> - idx =3D (idx + 1) & q->msk; >> - i1 =3D be32_to_cpup(q->qpage + idx); >> - seq_printf(m, "T=3D%d %08x %08x... \n", q->toggle, i0, i1); >> - } >> - if (xc->esc_virq[i]) { >> - struct irq_data *d =3D irq_get_irq_data(xc->esc_virq[i]); >> - struct xive_irq_data *xd =3D irq_data_get_irq_handler_data(d); >> - u64 pq =3D xive_vm_esb_load(xd, XIVE_ESB_GET); >> - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", >> - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', >> - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', >> - xc->esc_virq[i], pq, xd->eoi_page); >> - seq_printf(m, "\n"); >> - } >> - } >> + kvmppc_xive_debug_show_queues(m, vcpu); >> =20 >> t_rm_h_xirr +=3D xc->stat_rm_h_xirr; >> t_rm_h_ipoll +=3D xc->stat_rm_h_ipoll; >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/bo= ok3s_xive_native.c >> index 76d45bcc7060..a078f99bc156 100644 >> --- a/arch/powerpc/kvm/book3s_xive_native.c >> +++ b/arch/powerpc/kvm/book3s_xive_native.c >> @@ -31,6 +31,134 @@ >> =20 >> #include "book3s_xive.h" >> =20 >> +static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int= prio) >> +{ >> + struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> + struct xive_q *q =3D &xc->queues[prio]; >> + >> + xive_native_disable_queue(xc->vp_id, q, prio); >> + if (q->qpage) { >> + put_page(virt_to_page(q->qpage)); >> + q->qpage =3D NULL; >> + } >> +} >> + >> +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) >> +{ >> + struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> + int i; >> + >> + if (!kvmppc_xive_enabled(vcpu)) >> + return; >> + >> + if (!xc) >> + return; >> + >> + pr_devel("native_cleanup_vcpu(cpu=3D%d)\n", xc->server_num); >> + >> + /* Ensure no interrupt is still routed to that VP */ >> + xc->valid =3D false; >> + kvmppc_xive_disable_vcpu_interrupts(vcpu); >> + >> + /* Disable the VP */ >> + xive_native_disable_vp(xc->vp_id); >> + >> + /* Free the queues & associated interrupts */ >> + for (i =3D 0; i < KVMPPC_XIVE_Q_COUNT; i++) { >> + /* Free the escalation irq */ >> + if (xc->esc_virq[i]) { >> + free_irq(xc->esc_virq[i], vcpu); >> + irq_dispose_mapping(xc->esc_virq[i]); >> + kfree(xc->esc_virq_names[i]); >> + xc->esc_virq[i] =3D 0; >=20 > Should that actually be NO_IRQ on the right (it evaluates to the same > thing). Fine to fix that as a later followup. yes. It would be better to clarify some tests in both KVM devices.=20 ( NO_IRQ is rarely used and its value is platform specific ) >> + } >> + >> + /* Free the queue */ >> + kvmppc_xive_native_cleanup_queue(vcpu, i); >> + } >> + >> + /* Free the VP */ >> + kfree(xc); >> + >> + /* Cleanup the vcpu */ >> + vcpu->arch.irq_type =3D KVMPPC_IRQ_DEFAULT; >> + vcpu->arch.xive_vcpu =3D NULL; >> +} >> + >> +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, >> + struct kvm_vcpu *vcpu, u32 server_num) >> +{ >> + struct kvmppc_xive *xive =3D dev->private; >> + struct kvmppc_xive_vcpu *xc =3D NULL; >> + int rc; >> + >> + pr_devel("native_connect_vcpu(server=3D%d)\n", server_num); >> + >> + if (dev->ops !=3D &kvm_xive_native_ops) { >> + pr_devel("Wrong ops !\n"); >> + return -EPERM; >> + } >> + if (xive->kvm !=3D vcpu->kvm) >> + return -EPERM; >> + if (vcpu->arch.irq_type !=3D KVMPPC_IRQ_DEFAULT) >> + return -EBUSY; >> + if (server_num >=3D KVM_MAX_VCPUS) { >> + pr_devel("Out of bounds !\n"); >> + return -EINVAL; >> + } >> + >> + mutex_lock(&vcpu->kvm->lock); >> + >> + if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { >> + pr_devel("Duplicate !\n"); >> + rc =3D -EEXIST; >> + goto bail; >> + } >> + >> + xc =3D kzalloc(sizeof(*xc), GFP_KERNEL); >> + if (!xc) { >> + rc =3D -ENOMEM; >> + goto bail; >> + } >> + >> + vcpu->arch.xive_vcpu =3D xc; >> + xc->xive =3D xive; >> + xc->vcpu =3D vcpu; >> + xc->server_num =3D server_num; >> + >> + xc->vp_id =3D kvmppc_xive_vp(xive, server_num); >> + xc->valid =3D true; >=20 > This 'valid' field doesn't seem useful, since it's initialized to true > immediately after allocating xc and set to false moments before > free()ing it. ..and I can't see anything that tests it. It is used under the hood by kvmppc_xive_select_target() when configuring the target of a source. I agree it is not very useful as it is redundant=20 with the xc pointer. It seems it is also the case for the XICS-on-XIVE=20 KVM device. > Again, ok to deal with that as a later cleanup. Yes I rather do that for both device at the same time. Thanks, C. >=20 >> + vcpu->arch.irq_type =3D KVMPPC_IRQ_XIVE; >> + >> + rc =3D xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id= ); >> + if (rc) { >> + pr_err("Failed to get VP info from OPAL: %d\n", rc); >> + goto bail; >> + } >> + >> + /* >> + * Enable the VP first as the single escalation mode will >> + * affect escalation interrupts numbering >> + */ >> + rc =3D xive_native_enable_vp(xc->vp_id, xive->single_escalation); >> + if (rc) { >> + pr_err("Failed to enable VP in OPAL: %d\n", rc); >> + goto bail; >> + } >> + >> + /* Configure VCPU fields for use by assembly push/pull */ >> + vcpu->arch.xive_saved_state.w01 =3D cpu_to_be64(0xff000000); >> + vcpu->arch.xive_cam_word =3D cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); >> + >> + /* TODO: reset all queues to a clean state ? */ >> +bail: >> + mutex_unlock(&vcpu->kvm->lock); >> + if (rc) >> + kvmppc_xive_native_cleanup_vcpu(vcpu); >> + >> + return rc; >> +} >> + >> static int kvmppc_xive_native_set_attr(struct kvm_device *dev, >> struct kvm_device_attr *attr) >> { >> @@ -119,10 +247,32 @@ static int xive_native_debug_show(struct seq_file = *m, void *private) >> { >> struct kvmppc_xive *xive =3D m->private; >> struct kvm *kvm =3D xive->kvm; >> + struct kvm_vcpu *vcpu; >> + unsigned int i; >> =20 >> if (!kvm) >> return 0; >> =20 >> + seq_puts(m, "=3D=3D=3D=3D=3D\nVCPU state\n=3D=3D=3D=3D=3D\n"); >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) { >> + struct kvmppc_xive_vcpu *xc =3D vcpu->arch.xive_vcpu; >> + >> + if (!xc) >> + continue; >> + >> + seq_printf(m, "cpu server %#x NSR=3D%02x CPPR=3D%02x IBP=3D%02x PIPR= =3D%02x w01=3D%016llx w2=3D%08x\n", >> + xc->server_num, >> + vcpu->arch.xive_saved_state.nsr, >> + vcpu->arch.xive_saved_state.cppr, >> + vcpu->arch.xive_saved_state.ipb, >> + vcpu->arch.xive_saved_state.pipr, >> + vcpu->arch.xive_saved_state.w01, >> + (u32) vcpu->arch.xive_cam_word); >> + >> + kvmppc_xive_debug_show_queues(m, vcpu); >> + } >> + >> return 0; >> } >> =20 >> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c >> index 8c69af10f91d..bb51faf29162 100644 >> --- a/arch/powerpc/kvm/powerpc.c >> +++ b/arch/powerpc/kvm/powerpc.c >> @@ -570,6 +570,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, l= ong ext) >> case KVM_CAP_PPC_GET_CPU_CHAR: >> r =3D 1; >> break; >> +#ifdef CONFIG_KVM_XIVE >> + case KVM_CAP_PPC_IRQ_XIVE: >> + /* >> + * Return false until all the XIVE infrastructure is >> + * in place including support for migration. >> + */ >> + r =3D 0; >> + break; >> +#endif >> =20 >> case KVM_CAP_PPC_ALLOC_HTAB: >> r =3D hv_enabled; >> @@ -753,6 +762,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) >> else >> kvmppc_xics_free_icp(vcpu); >> break; >> + case KVMPPC_IRQ_XIVE: >> + kvmppc_xive_native_cleanup_vcpu(vcpu); >> + break; >> } >> =20 >> kvmppc_core_vcpu_free(vcpu); >> @@ -1941,6 +1953,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_v= cpu *vcpu, >> break; >> } >> #endif /* CONFIG_KVM_XICS */ >> +#ifdef CONFIG_KVM_XIVE >> + case KVM_CAP_PPC_IRQ_XIVE: { >> + struct fd f; >> + struct kvm_device *dev; >> + >> + r =3D -EBADF; >> + f =3D fdget(cap->args[0]); >> + if (!f.file) >> + break; >> + >> + r =3D -ENXIO; >> + if (!xive_enabled()) >> + break; >> + >> + r =3D -EPERM; >> + dev =3D kvm_device_from_filp(f.file); >> + if (dev) >> + r =3D kvmppc_xive_native_connect_vcpu(dev, vcpu, >> + cap->args[1]); >> + >> + fdput(f); >> + break; >> + } >> +#endif /* CONFIG_KVM_XIVE */ >> #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE >> case KVM_CAP_PPC_FWNMI: >> r =3D -EINVAL; >> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/k= vm/api.txt >> index 356156f5c52d..1db1435769b4 100644 >> --- a/Documentation/virtual/kvm/api.txt >> +++ b/Documentation/virtual/kvm/api.txt >> @@ -4458,6 +4458,15 @@ struct kvm_sync_regs { >> struct kvm_vcpu_events events; >> }; >> =20 >> +6.75 KVM_CAP_PPC_IRQ_XIVE >> + >> +Architectures: ppc >> +Target: vcpu >> +Parameters: args[0] is the XIVE device fd >> + args[1] is the XIVE CPU number (server ID) for this vcpu >> + >> +This capability connects the vcpu to an in-kernel XIVE device. >> + >> 7. Capabilities that can be enabled on VMs >> ------------------------------------------ >> =20 >=20