On Mon, Jan 07, 2019 at 07:43:17PM +0100, Cédric Le Goater wrote: > This is the basic framework for the new KVM device supporting the XIVE > native exploitation mode. The user interface exposes a new capability > and a new KVM device to be used by QEMU. > > Internally, the interface to the new KVM device is protected with a > new interrupt mode: KVMPPC_IRQ_XIVE. > > Signed-off-by: Cédric Le Goater > --- > arch/powerpc/include/asm/kvm_host.h | 2 + > arch/powerpc/include/asm/kvm_ppc.h | 21 ++ > arch/powerpc/kvm/book3s_xive.h | 3 + > include/uapi/linux/kvm.h | 3 + > arch/powerpc/kvm/book3s.c | 7 +- > arch/powerpc/kvm/book3s_xive_native.c | 332 ++++++++++++++++++++++++++ > arch/powerpc/kvm/powerpc.c | 30 +++ > arch/powerpc/kvm/Makefile | 2 +- > 8 files changed, 398 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/kvm/book3s_xive_native.c > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index 0f98f00da2ea..c522e8274ad9 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -220,6 +220,7 @@ extern struct kvm_device_ops kvm_xics_ops; > struct kvmppc_xive; > struct kvmppc_xive_vcpu; > extern struct kvm_device_ops kvm_xive_ops; > +extern struct kvm_device_ops kvm_xive_native_ops; > > struct kvmppc_passthru_irqmap; > > @@ -446,6 +447,7 @@ struct kvmppc_passthru_irqmap { > #define KVMPPC_IRQ_DEFAULT 0 > #define KVMPPC_IRQ_MPIC 1 > #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ > +#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ > > #define MMIO_HPTE_CACHE_SIZE 4 > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index eb0d79f0ca45..1bb313f238fe 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -591,6 +591,18 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); > extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, > int level, bool line_status); > extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); > + > +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE; > +} > + > +extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, > + struct kvm_vcpu *vcpu, u32 cpu); > +extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); > +extern void kvmppc_xive_native_init_module(void); > +extern void kvmppc_xive_native_exit_module(void); > + > #else > static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, > u32 priority) { return -1; } > @@ -614,6 +626,15 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur > static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, > int level, bool line_status) { return -ENODEV; } > static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } > + > +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) > + { return 0; } > +static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, > + struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } > +static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } > +static inline void kvmppc_xive_native_init_module(void) { } > +static inline void kvmppc_xive_native_exit_module(void) { } > + > #endif /* CONFIG_KVM_XIVE */ > > /* > diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h > index 10c4aa5cd010..5f22415520b4 100644 > --- a/arch/powerpc/kvm/book3s_xive.h > +++ b/arch/powerpc/kvm/book3s_xive.h > @@ -12,6 +12,9 @@ > #ifdef CONFIG_KVM_XICS > #include "book3s_xics.h" > > +#define KVMPPC_XIVE_FIRST_IRQ 0 > +#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS > + > /* > * State for one guest irq source. > * > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 6d4ea4b6c922..52bf74a1616e 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_ARM_VM_IPA_SIZE 165 > #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 > #define KVM_CAP_HYPERV_CPUID 167 > +#define KVM_CAP_PPC_IRQ_XIVE 168 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1211,6 +1212,8 @@ enum kvm_device_type { > #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 > KVM_DEV_TYPE_ARM_VGIC_ITS, > #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS > + KVM_DEV_TYPE_XIVE, > +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE > KVM_DEV_TYPE_MAX, > }; > > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c > index bd1a677dd9e4..de7eed191107 100644 > --- a/arch/powerpc/kvm/book3s.c > +++ b/arch/powerpc/kvm/book3s.c > @@ -1039,7 +1039,10 @@ static int kvmppc_book3s_init(void) > #ifdef CONFIG_KVM_XIVE > if (xive_enabled()) { > kvmppc_xive_init_module(); > + kvmppc_xive_native_init_module(); > kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); > + kvm_register_device_ops(&kvm_xive_native_ops, > + KVM_DEV_TYPE_XIVE); > } else > #endif > kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); > @@ -1050,8 +1053,10 @@ static int kvmppc_book3s_init(void) > static void kvmppc_book3s_exit(void) > { > #ifdef CONFIG_KVM_XICS > - if (xive_enabled()) > + if (xive_enabled()) { > kvmppc_xive_exit_module(); > + kvmppc_xive_native_exit_module(); > + } > #endif > #ifdef CONFIG_KVM_BOOK3S_32_HANDLER > kvmppc_book3s_exit_pr(); > diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c > new file mode 100644 > index 000000000000..115143e76c45 > --- /dev/null > +++ b/arch/powerpc/kvm/book3s_xive_native.c > @@ -0,0 +1,332 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2017-2019, IBM Corporation. > + */ > + > +#define pr_fmt(fmt) "xive-kvm: " fmt > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > + > +#include "book3s_xive.h" > + > +static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) > +{ > + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > + struct xive_q *q = &xc->queues[prio]; > + > + xive_native_disable_queue(xc->vp_id, q, prio); > + if (q->qpage) { > + put_page(virt_to_page(q->qpage)); > + q->qpage = NULL; > + } > +} > + > +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) > +{ > + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > + int i; > + > + if (!kvmppc_xive_enabled(vcpu)) > + return; > + > + if (!xc) > + return; > + > + pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); > + > + /* Ensure no interrupt is still routed to that VP */ > + xc->valid = false; > + kvmppc_xive_disable_vcpu_interrupts(vcpu); > + > + /* Disable the VP */ > + xive_native_disable_vp(xc->vp_id); > + > + /* Free the queues & associated interrupts */ > + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { > + /* Free the escalation irq */ > + if (xc->esc_virq[i]) { > + free_irq(xc->esc_virq[i], vcpu); > + irq_dispose_mapping(xc->esc_virq[i]); > + kfree(xc->esc_virq_names[i]); > + xc->esc_virq[i] = 0; > + } > + > + /* Free the queue */ > + xive_native_cleanup_queue(vcpu, i); > + } > + > + /* Free the VP */ > + kfree(xc); > + > + /* Cleanup the vcpu */ > + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; > + vcpu->arch.xive_vcpu = NULL; > +} > + > +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, > + struct kvm_vcpu *vcpu, u32 cpu) Why do we need both a *vcpu and a cpu number as an integer? > +{ > + struct kvmppc_xive *xive = dev->private; > + struct kvmppc_xive_vcpu *xc; > + int rc; > + > + pr_devel("native_connect_vcpu(cpu=%d)\n", cpu); > + > + if (dev->ops != &kvm_xive_native_ops) { > + pr_devel("Wrong ops !\n"); > + return -EPERM; > + } > + if (xive->kvm != vcpu->kvm) > + return -EPERM; > + if (vcpu->arch.irq_type) Please use an explicit == / != here so we don't have to remember which symbolic value corresponds to 0. > + return -EBUSY; > + if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { > + pr_devel("Duplicate !\n"); > + return -EEXIST; > + } > + if (cpu >= KVM_MAX_VCPUS) { > + pr_devel("Out of bounds !\n"); > + return -EINVAL; > + } > + xc = kzalloc(sizeof(*xc), GFP_KERNEL); > + if (!xc) > + return -ENOMEM; > + > + mutex_lock(&vcpu->kvm->lock); > + vcpu->arch.xive_vcpu = xc; > + xc->xive = xive; > + xc->vcpu = vcpu; > + xc->server_num = cpu; > + xc->vp_id = xive->vp_base + cpu; > + xc->valid = true; > + > + rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); > + if (rc) { > + pr_err("Failed to get VP info from OPAL: %d\n", rc); > + goto bail; > + } > + > + /* > + * Enable the VP first as the single escalation mode will > + * affect escalation interrupts numbering > + */ > + rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); > + if (rc) { > + pr_err("Failed to enable VP in OPAL: %d\n", rc); > + goto bail; > + } > + > + /* Configure VCPU fields for use by assembly push/pull */ > + vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); > + vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); > + > + /* TODO: initialize queues ? */ > + > +bail: > + vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; > + mutex_unlock(&vcpu->kvm->lock); > + if (rc) > + kvmppc_xive_native_cleanup_vcpu(vcpu); > + > + return rc; > +} > + > +static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > + struct kvm_device_attr *attr) > +{ > + return -ENXIO; > +} > + > +static int kvmppc_xive_native_get_attr(struct kvm_device *dev, > + struct kvm_device_attr *attr) > +{ > + return -ENXIO; > +} > + > +static int kvmppc_xive_native_has_attr(struct kvm_device *dev, > + struct kvm_device_attr *attr) > +{ > + return -ENXIO; > +} > + > +static void kvmppc_xive_native_free(struct kvm_device *dev) > +{ > + struct kvmppc_xive *xive = dev->private; > + struct kvm *kvm = xive->kvm; > + int i; > + > + debugfs_remove(xive->dentry); > + > + pr_devel("Destroying xive native for partition\n"); > + > + if (kvm) > + kvm->arch.xive = NULL; > + > + /* Mask and free interrupts */ > + for (i = 0; i <= xive->max_sbid; i++) { > + if (xive->src_blocks[i]) > + kvmppc_xive_free_sources(xive->src_blocks[i]); > + kfree(xive->src_blocks[i]); > + xive->src_blocks[i] = NULL; > + } > + > + if (xive->vp_base != XIVE_INVALID_VP) > + xive_native_free_vp_block(xive->vp_base); > + > + kfree(xive); > + kfree(dev); > +} > + > +static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) > +{ > + struct kvmppc_xive *xive; > + struct kvm *kvm = dev->kvm; > + int ret = 0; > + > + pr_devel("Creating xive native for partition\n"); > + > + if (kvm->arch.xive) > + return -EEXIST; > + > + xive = kzalloc(sizeof(*xive), GFP_KERNEL); > + if (!xive) > + return -ENOMEM; > + > + dev->private = xive; > + xive->dev = dev; > + xive->kvm = kvm; > + kvm->arch.xive = xive; > + > + /* We use the default queue size set by the host */ > + xive->q_order = xive_native_default_eq_shift(); > + if (xive->q_order < PAGE_SHIFT) > + xive->q_page_order = 0; > + else > + xive->q_page_order = xive->q_order - PAGE_SHIFT; > + > + /* Allocate a bunch of VPs */ > + xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); > + pr_devel("VP_Base=%x\n", xive->vp_base); > + > + if (xive->vp_base == XIVE_INVALID_VP) > + ret = -ENOMEM; > + > + xive->single_escalation = xive_native_has_single_escalation(); > + > + if (ret) > + kfree(xive); > + > + return ret; > +} > + > +static int xive_native_debug_show(struct seq_file *m, void *private) > +{ > + struct kvmppc_xive *xive = m->private; > + struct kvm *kvm = xive->kvm; > + struct kvm_vcpu *vcpu; > + unsigned int i; > + > + if (!kvm) > + return 0; > + > + seq_puts(m, "=========\nVCPU state\n=========\n"); > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > + > + if (!xc) > + continue; > + > + seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", > + xc->server_num, > + vcpu->arch.xive_saved_state.nsr, > + vcpu->arch.xive_saved_state.cppr, > + vcpu->arch.xive_saved_state.ipb, > + vcpu->arch.xive_saved_state.pipr, > + vcpu->arch.xive_saved_state.w01, > + (u32) vcpu->arch.xive_cam_word); > + > + kvmppc_xive_debug_show_queues(m, vcpu); > + } > + > + return 0; > +} > + > +static int xive_native_debug_open(struct inode *inode, struct file *file) > +{ > + return single_open(file, xive_native_debug_show, inode->i_private); > +} > + > +static const struct file_operations xive_native_debug_fops = { > + .open = xive_native_debug_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > + > +static void xive_native_debugfs_init(struct kvmppc_xive *xive) > +{ > + char *name; > + > + name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); > + if (!name) { > + pr_err("%s: no memory for name\n", __func__); > + return; > + } > + > + xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, > + xive, &xive_native_debug_fops); > + > + pr_debug("%s: created %s\n", __func__, name); > + kfree(name); > +} > + > +static void kvmppc_xive_native_init(struct kvm_device *dev) > +{ > + struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; > + > + /* Register some debug interfaces */ > + xive_native_debugfs_init(xive); > +} > + > +struct kvm_device_ops kvm_xive_native_ops = { > + .name = "kvm-xive-native", > + .create = kvmppc_xive_native_create, > + .init = kvmppc_xive_native_init, > + .destroy = kvmppc_xive_native_free, > + .set_attr = kvmppc_xive_native_set_attr, > + .get_attr = kvmppc_xive_native_get_attr, > + .has_attr = kvmppc_xive_native_has_attr, > +}; > + > +void kvmppc_xive_native_init_module(void) > +{ > + ; > +} > + > +void kvmppc_xive_native_exit_module(void) > +{ > + ; > +} > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index b90a7d154180..01d526e15e9d 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -566,6 +566,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_PPC_ENABLE_HCALL: > #ifdef CONFIG_KVM_XICS > case KVM_CAP_IRQ_XICS: > +#endif > +#ifdef CONFIG_KVM_XIVE > + case KVM_CAP_PPC_IRQ_XIVE: > #endif > case KVM_CAP_PPC_GET_CPU_CHAR: > r = 1; > @@ -753,6 +756,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) > else > kvmppc_xics_free_icp(vcpu); > break; > + case KVMPPC_IRQ_XIVE: > + kvmppc_xive_native_cleanup_vcpu(vcpu); > + break; > } > > kvmppc_core_vcpu_free(vcpu); > @@ -1941,6 +1947,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, > break; > } > #endif /* CONFIG_KVM_XICS */ > +#ifdef CONFIG_KVM_XIVE > + case KVM_CAP_PPC_IRQ_XIVE: { > + struct fd f; > + struct kvm_device *dev; > + > + r = -EBADF; > + f = fdget(cap->args[0]); > + if (!f.file) > + break; > + > + r = -ENXIO; > + if (!xive_enabled()) > + break; > + > + r = -EPERM; > + dev = kvm_device_from_filp(f.file); > + if (dev) > + r = kvmppc_xive_native_connect_vcpu(dev, vcpu, > + cap->args[1]); > + > + fdput(f); > + break; > + } > +#endif /* CONFIG_KVM_XIVE */ > #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > case KVM_CAP_PPC_FWNMI: > r = -EINVAL; > diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile > index 64f1135e7732..806cbe488410 100644 > --- a/arch/powerpc/kvm/Makefile > +++ b/arch/powerpc/kvm/Makefile > @@ -99,7 +99,7 @@ endif > kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ > book3s_xics.o > > -kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o > +kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o > kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o > > kvm-book3s_64-module-objs := \ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson