From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754227AbbFKRPL (ORCPT ); Thu, 11 Jun 2015 13:15:11 -0400 Received: from mx1.redhat.com ([209.132.183.28]:51793 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752062AbbFKRPI (ORCPT ); Thu, 11 Jun 2015 13:15:08 -0400 Message-ID: <1434042907.4927.194.camel@redhat.com> Subject: Re: [v4 12/16] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts From: Alex Williamson To: Feng Wu Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org, pbonzini@redhat.com, mtosatti@redhat.com, eric.auger@linaro.org Date: Thu, 11 Jun 2015 11:15:07 -0600 In-Reply-To: <1434019912-15423-13-git-send-email-feng.wu@intel.com> References: <1434019912-15423-1-git-send-email-feng.wu@intel.com> <1434019912-15423-13-git-send-email-feng.wu@intel.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, 2015-06-11 at 18:51 +0800, Feng Wu wrote: > This patch adds the kvm-vfio interface for VT-d Posted-Interrupts. > When guests update MSI/MSI-x information for an assigned-device, > QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup > IRTE for VT-d PI. Userspace program can also use > KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping mode. > This patch implements these IRQ attributes. > > Signed-off-by: Feng Wu > --- > include/linux/kvm_host.h | 22 +++++++++ > virt/kvm/vfio.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 148 insertions(+) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index f591f7c..69f8711 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1073,6 +1073,28 @@ extern struct kvm_device_ops kvm_xics_ops; > extern struct kvm_device_ops kvm_arm_vgic_v2_ops; > extern struct kvm_device_ops kvm_arm_vgic_v3_ops; > > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST > +/* > + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts > + * > + * @kvm: kvm > + * @host_irq: host irq of the interrupt > + * @guest_irq: gsi of the interrupt > + * @set: set or unset PI > + * returns 0 on success, < 0 on failure > + */ > +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq, > + uint32_t guest_irq, bool set); > +#else > +static inline int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, > + unsigned int host_irq, > + uint32_t guest_irq, > + bool set) > +{ > + return 0; > +} The code below can't get to this function without __KVM_HAVE_ARCH_KVM_VFIO_POST, but this seems like it should return an error if not implemented. > +#endif > + > #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT > > static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) > diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c > index 80a45e4..547fc51 100644 > --- a/virt/kvm/vfio.c > +++ b/virt/kvm/vfio.c > @@ -18,6 +18,7 @@ > #include > #include > #include > +#include This only exists on x86. Are we also getting lucky with some of the include chains that give us the PCI related defines? It looks like we're implicitly assuming CONFIG_PCI > #include "vfio.h" > > struct kvm_vfio_group { > @@ -276,12 +277,128 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) > return -ENXIO; > } > > +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type) > +{ > + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { > + u8 pin; > + > + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); > + if (pin) > + return 1; > + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { > + return pci_msi_vec_count(pdev); > + } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) { > + return pci_msix_vec_count(pdev); > + } > + > + return 0; > +} > + > +static int kvm_vfio_control_pi(struct kvm_device *kdev, > + int32_t __user *argp, bool set) > +{ > + struct kvm_vfio_dev_irq pi_info; > + uint32_t *gsi; > + unsigned long minsz; > + struct vfio_device *vdev; > + struct msi_desc *entry; > + struct device *dev; > + struct pci_dev *pdev; > + int i, max, ret; > + > + minsz = offsetofend(struct kvm_vfio_dev_irq, count); > + > + if (copy_from_user(&pi_info, (void __user *)argp, minsz)) > + return -EFAULT; > + > + if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS) > + return -EINVAL; Could we also abort on pi_info.count == 0? > + > + vdev = kvm_vfio_get_vfio_device(pi_info.fd); > + if (IS_ERR(vdev)) > + return PTR_ERR(vdev); > + > + dev = kvm_vfio_external_base_device(vdev); > + if (!dev || !dev_is_pci(dev)) { > + ret = -EFAULT; > + goto put_vfio_device; > + } > + > + pdev = to_pci_dev(dev); > + > + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index); > + if (max <= 0) { > + ret = -EFAULT; > + goto put_vfio_device; > + } > + > + if (pi_info.argsz - minsz < pi_info.count * sizeof(u32) || > + pi_info.start >= max || pi_info.start + pi_info.count > max) { > + ret = -EINVAL; > + goto put_vfio_device; > + } > + > + gsi = memdup_user((void __user *)((unsigned long)argp + minsz), > + pi_info.count * sizeof(u32)); > + if (IS_ERR(gsi)) { > + ret = PTR_ERR(gsi); > + goto put_vfio_device; > + } > + > +#ifdef CONFIG_PCI_MSI > + for (i = 0; i < pi_info.count; i++) { > + list_for_each_entry(entry, &pdev->msi_list, list) { Should we be able to get here for INTx? > + if (entry->msi_attrib.entry_nr != pi_info.start+i) > + continue; > + > + ret = kvm_arch_vfio_update_pi_irte(kdev->kvm, > + entry->irq, > + gsi[i], > + set); > + if (ret) > + goto free_gsi; > + } > + } > +#endif > + > + ret = 0; So if we didn't do anything, return success? That seems strange. Should we also be doing some unwind on failure? Thanks, Alex > + > +free_gsi: > + kfree(gsi); > + > +put_vfio_device: > + kvm_vfio_put_vfio_device(vdev); > + return ret; > +} > + > +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) > +{ > + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; > + int ret; > + > + switch (attr) { > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST > + case KVM_DEV_VFIO_DEVICE_POST_IRQ: > + ret = kvm_vfio_control_pi(kdev, argp, 1); > + break; > + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: > + ret = kvm_vfio_control_pi(kdev, argp, 0); > + break; > +#endif > + default: > + ret = -ENXIO; > + } > + return ret; > +} > + > static int kvm_vfio_set_attr(struct kvm_device *dev, > struct kvm_device_attr *attr) > { > switch (attr->group) { > case KVM_DEV_VFIO_GROUP: > return kvm_vfio_set_group(dev, attr->attr, attr->addr); > + case KVM_DEV_VFIO_DEVICE: > + return kvm_vfio_set_device(dev, attr->attr, attr->addr); > } > > return -ENXIO; > @@ -299,6 +416,15 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, > } > > break; > + case KVM_DEV_VFIO_DEVICE: > + switch (attr->attr) { > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST > + case KVM_DEV_VFIO_DEVICE_POST_IRQ: > + case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: > + return irq_remapping_cap(IRQ_POSTING_CAP) ? 0 : -ENXIO; > +#endif > + } > + break; > } > > return -ENXIO;