On Fri, Feb 22, 2019 at 12:28:28PM +0100, Cédric Le Goater wrote: > The associated HW interrupt source is simply allocated at the OPAL/HW > level and then MASKED. KVM only needs to know about its type: LSI or > MSI. > > Signed-off-by: Cédric Le Goater > --- > arch/powerpc/include/uapi/asm/kvm.h | 5 + > arch/powerpc/kvm/book3s_xive.h | 10 ++ > arch/powerpc/kvm/book3s_xive.c | 8 +- > arch/powerpc/kvm/book3s_xive_native.c | 114 +++++++++++++++++++++ > Documentation/virtual/kvm/devices/xive.txt | 15 +++ > 5 files changed, 148 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index b002c0c67787..a9ad99f2a11b 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -677,5 +677,10 @@ struct kvm_ppc_cpu_char { > > /* POWER9 XIVE Native Interrupt Controller */ > #define KVM_DEV_XIVE_GRP_CTRL 1 > +#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source attributes */ > + > +/* Layout of 64-bit XIVE source attribute values */ > +#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) > +#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) > > #endif /* __LINUX_KVM_POWERPC_H */ > diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h > index bcb1bbcf0359..f22f2d46d0f0 100644 > --- a/arch/powerpc/kvm/book3s_xive.h > +++ b/arch/powerpc/kvm/book3s_xive.h > @@ -12,6 +12,13 @@ > #ifdef CONFIG_KVM_XICS > #include "book3s_xics.h" > > +/* > + * The XIVE IRQ number space is aligned with the XICS IRQ number > + * space, CPU IPIs being allocated in the first 4K. We do align these in qemu, but I don't see that the kernel part cares: as far as it's concerned only one of XICS or XIVE is active at a time, and the irq numbers are chosen by userspace. > + */ > +#define KVMPPC_XIVE_FIRST_IRQ 0 > +#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS > + > /* > * State for one guest irq source. > * > @@ -253,6 +260,9 @@ extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); > */ > void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); > int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); > +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( > + struct kvmppc_xive *xive, int irq); > +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); > > #endif /* CONFIG_KVM_XICS */ > #endif /* _KVM_PPC_BOOK3S_XICS_H */ > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c > index d1cc18a5b1c4..6f950ecb3592 100644 > --- a/arch/powerpc/kvm/book3s_xive.c > +++ b/arch/powerpc/kvm/book3s_xive.c I wonder if we should rename this book3s_xics_on_xive.c or something at some point, I keep getting confused because I forget that this is only dealing with host xive, not guest xive. > @@ -1485,8 +1485,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) > return 0; > } > > -static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, > - int irq) > +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( > + struct kvmppc_xive *xive, int irq) > { > struct kvm *kvm = xive->kvm; > struct kvmppc_xive_src_block *sb; It's odd that this function, now used from the xive-on-xive path as well as the xics-on-xive path references KVMPPC_XICS_ICS_SHIFT a few lines down from this change. > @@ -1565,7 +1565,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) > sb = kvmppc_xive_find_source(xive, irq, &idx); > if (!sb) { > pr_devel("No source, creating source block...\n"); > - sb = xive_create_src_block(xive, irq); > + sb = kvmppc_xive_create_src_block(xive, irq); > if (!sb) { > pr_devel("Failed to create block...\n"); > return -ENOMEM; > @@ -1789,7 +1789,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) > xive_cleanup_irq_data(xd); > } > > -static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) > +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) > { > int i; > > diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c > index 1f3da47a4a6a..a9b2d2d9af99 100644 > --- a/arch/powerpc/kvm/book3s_xive_native.c > +++ b/arch/powerpc/kvm/book3s_xive_native.c > @@ -31,6 +31,29 @@ > > #include "book3s_xive.h" > > +/* > + * TODO: introduce a common template file with the XIVE native layer > + * and the XICS-on-XIVE glue for the utility functions > + */ > +#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio)) > +#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio)) > +#define __x_readq __raw_readq > +#define __x_writeq __raw_writeq > + > +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) > +{ > + u64 val; > + > + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) > + offset |= offset << 4; > + > + val = __x_readq(__x_eoi_page(xd) + offset); > +#ifdef __LITTLE_ENDIAN__ > + val >>= 64-8; > +#endif > + return (u8)val; > +} > + > static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) > { > struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > @@ -153,12 +176,89 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, > return rc; > } > > +static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, > + u64 addr) > +{ > + struct kvmppc_xive_src_block *sb; > + struct kvmppc_xive_irq_state *state; > + u64 __user *ubufp = (u64 __user *) addr; > + u64 val; > + u16 idx; > + > + pr_devel("%s irq=0x%lx\n", __func__, irq); > + > + if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) > + return -E2BIG; > + > + sb = kvmppc_xive_find_source(xive, irq, &idx); > + if (!sb) { > + pr_debug("No source, creating source block...\n"); > + sb = kvmppc_xive_create_src_block(xive, irq); > + if (!sb) { > + pr_err("Failed to create block...\n"); > + return -ENOMEM; > + } > + } > + state = &sb->irq_state[idx]; > + > + if (get_user(val, ubufp)) { > + pr_err("fault getting user info !\n"); > + return -EFAULT; > + } You should validate the value loaded here to check it doesn't have any bits set we don't know about. > + > + /* > + * If the source doesn't already have an IPI, allocate > + * one and get the corresponding data > + */ > + if (!state->ipi_number) { > + state->ipi_number = xive_native_alloc_irq(); > + if (state->ipi_number == 0) { > + pr_err("Failed to allocate IRQ !\n"); > + return -ENXIO; > + } > + xive_native_populate_irq_data(state->ipi_number, > + &state->ipi_data); > + pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, > + state->ipi_number, irq); > + } > + > + arch_spin_lock(&sb->lock); Why the direct call to arch_spin_lock() rather than just spin_lock()? > + > + /* Restore LSI state */ > + if (val & KVM_XIVE_LEVEL_SENSITIVE) { > + state->lsi = true; > + if (val & KVM_XIVE_LEVEL_ASSERTED) > + state->asserted = true; > + pr_devel(" LSI ! Asserted=%d\n", state->asserted); > + } > + > + /* Mask IRQ to start with */ > + state->act_server = 0; > + state->act_priority = MASKED; > + xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); > + xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); > + > + /* Increment the number of valid sources and mark this one valid */ > + if (!state->valid) > + xive->src_count++; > + state->valid = true; > + > + arch_spin_unlock(&sb->lock); > + > + return 0; > +} > + > static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > struct kvm_device_attr *attr) > { > + struct kvmppc_xive *xive = dev->private; > + > switch (attr->group) { > case KVM_DEV_XIVE_GRP_CTRL: > break; > + case KVM_DEV_XIVE_GRP_SOURCE: > + return kvmppc_xive_native_set_source(xive, attr->attr, > + attr->addr); > } > return -ENXIO; > } > @@ -175,6 +275,11 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, > switch (attr->group) { > case KVM_DEV_XIVE_GRP_CTRL: > break; > + case KVM_DEV_XIVE_GRP_SOURCE: > + if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && > + attr->attr < KVMPPC_XIVE_NR_IRQS) > + return 0; > + break; > } > return -ENXIO; > } > @@ -183,6 +288,7 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) > { > struct kvmppc_xive *xive = dev->private; > struct kvm *kvm = xive->kvm; > + int i; > > debugfs_remove(xive->dentry); > > @@ -191,6 +297,14 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) > if (kvm) > kvm->arch.xive = NULL; > > + /* Mask and free interrupts */ > + for (i = 0; i <= xive->max_sbid; i++) { > + if (xive->src_blocks[i]) > + kvmppc_xive_free_sources(xive->src_blocks[i]); > + kfree(xive->src_blocks[i]); > + xive->src_blocks[i] = NULL; > + } > + > if (xive->vp_base != XIVE_INVALID_VP) > xive_native_free_vp_block(xive->vp_base); > > diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt > index fdbd2ff92a88..cd8bfc37b72e 100644 > --- a/Documentation/virtual/kvm/devices/xive.txt > +++ b/Documentation/virtual/kvm/devices/xive.txt > @@ -17,3 +17,18 @@ the legacy interrupt mode, referred as XICS (POWER7/8). > > 1. KVM_DEV_XIVE_GRP_CTRL > Provides global controls on the device > + > + 2. KVM_DEV_XIVE_GRP_SOURCE (write only) > + Initializes a new source in the XIVE device and mask it. > + Attributes: > + Interrupt source number (64-bit) > + The kvm_device_attr.addr points to a __u64 value: > + bits: | 63 .... 2 | 1 | 0 > + values: | unused | level | type > + - type: 0:MSI 1:LSI > + - level: assertion level in case of an LSI. > + Errors: > + -E2BIG: Interrupt source number is out of range > + -ENOMEM: Could not create a new source block > + -EFAULT: Invalid user pointer for attr->addr. > + -ENXIO: Could not allocate underlying HW interrupt -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson