From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753874AbaIKJg2 (ORCPT ); Thu, 11 Sep 2014 05:36:28 -0400 Received: from mail-we0-f170.google.com ([74.125.82.170]:64170 "EHLO mail-we0-f170.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752408AbaIKJgX (ORCPT ); Thu, 11 Sep 2014 05:36:23 -0400 Message-ID: <54116CFC.1050807@linaro.org> Date: Thu, 11 Sep 2014 11:35:56 +0200 From: Eric Auger User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.2.0 MIME-Version: 1.0 To: Christoffer Dall CC: eric.auger@st.com, marc.zyngier@arm.com, linux-arm-kernel@lists.infradead.org, kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org, alex.williamson@redhat.com, joel.schopp@amd.com, kim.phillips@freescale.com, paulus@samba.org, gleb@kernel.org, pbonzini@redhat.com, linux-kernel@vger.kernel.org, patches@linaro.org, will.deacon@arm.com, a.motakis@virtualopensystems.com, a.rigo@virtualopensystems.com, john.liuli@huawei.com Subject: Re: [RFC v2 8/9] KVM: KVM-VFIO: generic KVM_DEV_VFIO_DEVICE command and IRQ forwarding control References: <1409575968-5329-1-git-send-email-eric.auger@linaro.org> <1409575968-5329-9-git-send-email-eric.auger@linaro.org> <20140911031026.GI2784@lvm> In-Reply-To: <20140911031026.GI2784@lvm> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 09/11/2014 05:10 AM, Christoffer Dall wrote: > On Mon, Sep 01, 2014 at 02:52:47PM +0200, Eric Auger wrote: >> This patch introduces a new KVM_DEV_VFIO_DEVICE attribute. >> >> This is a new control channel which enables KVM to cooperate with >> viable VFIO devices. >> >> The kvm-vfio device now holds a list of devices (kvm_vfio_device) >> in addition to a list of groups (kvm_vfio_group). The new >> infrastructure enables to check the validity of the VFIO device >> file descriptor, get and hold a reference to it. >> >> The first concrete implemented command is IRQ forward control: >> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. >> >> It consists in programing the VFIO driver and KVM in a consistent manner >> so that an optimized IRQ injection/completion is set up. Each >> kvm_vfio_device holds a list of forwarded IRQ. When putting a >> kvm_vfio_device, the implementation makes sure the forwarded IRQs >> are set again in the normal handling state (non forwarded). > > 'putting a kvm_vfio_device' sounds to like you're golf'ing :) > > When a kvm_vfio_device is released? sure > >> >> The forwarding programmming is architecture specific, embodied by the >> kvm_arch_set_fwd_state function. Its implementation is given in a >> separate patch file. > > I would drop the last sentence and instead indicate that this is handled > properly when the architecture does not support such a feature. ok > >> >> The forwarding control modality is enabled by the >> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD define. >> >> Signed-off-by: Eric Auger >> >> --- >> >> v1 -> v2: >> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> - original patch file separated into 2 parts: generic part moved in vfio.c >> and ARM specific part(kvm_arch_set_fwd_state) >> --- >> include/linux/kvm_host.h | 27 +++ >> virt/kvm/vfio.c | 452 ++++++++++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 477 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >> index a4c33b3..24350dc 100644 >> --- a/include/linux/kvm_host.h >> +++ b/include/linux/kvm_host.h >> @@ -1065,6 +1065,21 @@ struct kvm_device_ops { >> unsigned long arg); >> }; >> >> +enum kvm_fwd_irq_action { >> + KVM_VFIO_IRQ_SET_FORWARD, >> + KVM_VFIO_IRQ_SET_NORMAL, >> + KVM_VFIO_IRQ_CLEANUP, > > This is KVM internal API, so it would probably be good to document this. > Especially the CLEANUP bit worries me, see below. I will document it > >> +}; >> + >> +/* internal structure describing a forwarded IRQ */ >> +struct kvm_fwd_irq { >> + struct list_head link; > > this list entry is local to the kvm vfio device, right? that means you > probably want a struct with just the below fields, and then have a > containing struct in the generic device file, private to it's logic. I will introduce 2 separate structs > >> + __u32 index; /* platform device irq index */ >> + __u32 hwirq; /*physical IRQ */ >> + __u32 gsi; /* virtual IRQ */ >> + struct kvm_vcpu *vcpu; /* vcpu to inject into*/ >> +}; >> + >> void kvm_device_get(struct kvm_device *dev); >> void kvm_device_put(struct kvm_device *dev); >> struct kvm_device *kvm_device_from_filp(struct file *filp); >> @@ -1075,6 +1090,18 @@ extern struct kvm_device_ops kvm_vfio_ops; >> extern struct kvm_device_ops kvm_arm_vgic_v2_ops; >> extern struct kvm_device_ops kvm_flic_ops; >> >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> +int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, > > what's the 'p' in pfwd? will rename > >> + enum kvm_fwd_irq_action action); >> + >> +#else >> +static inline int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, >> + enum kvm_fwd_irq_action action) >> +{ >> + return 0; >> +} >> +#endif >> + >> #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT >> >> static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) >> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c >> index 76dc7a1..e4a81c4 100644 >> --- a/virt/kvm/vfio.c >> +++ b/virt/kvm/vfio.c >> @@ -18,14 +18,24 @@ >> #include >> #include >> #include >> +#include >> >> struct kvm_vfio_group { >> struct list_head node; >> struct vfio_group *vfio_group; >> }; >> >> +struct kvm_vfio_device { >> + struct list_head node; >> + struct vfio_device *vfio_device; >> + /* list of forwarded IRQs for that VFIO device */ >> + struct list_head fwd_irq_list; >> + int fd; >> +}; >> + >> struct kvm_vfio { >> struct list_head group_list; >> + struct list_head device_list; >> struct mutex lock; >> bool noncoherent; >> }; >> @@ -246,12 +256,441 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) >> return -ENXIO; >> } >> >> +/** >> + * get_vfio_device - returns the vfio-device corresponding to this fd >> + * @fd:fd of the vfio platform device >> + * >> + * checks it is a vfio device >> + * increment its ref counter > > why the short lines? Just write this out in proper English. OK > >> + */ >> +static struct vfio_device *kvm_vfio_get_vfio_device(int fd) >> +{ >> + struct fd f; >> + struct vfio_device *vdev; >> + >> + f = fdget(fd); >> + if (!f.file) >> + return NULL; >> + vdev = kvm_vfio_device_get_external_user(f.file); >> + fdput(f); >> + return vdev; >> +} >> + >> +/** >> + * put_vfio_device: put the vfio platform device >> + * @vdev: vfio_device to put >> + * >> + * decrement the ref counter >> + */ >> +static void kvm_vfio_put_vfio_device(struct vfio_device *vdev) >> +{ >> + kvm_vfio_device_put_external_user(vdev); >> +} >> + >> +/** >> + * kvm_vfio_find_device - look for the device in the assigned >> + * device list >> + * @kv: the kvm-vfio device >> + * @vdev: the vfio_device to look for >> + * >> + * returns the associated kvm_vfio_device if the device is known, >> + * meaning at least 1 IRQ is forwarded for this device. >> + * in the device is not registered, returns NULL. >> + */ > > are these functions meant to be exported? Otherwise they should be > static, and the documentation on these simple list iteration wrappers > seems like overkill imho. could be static indeed > >> +struct kvm_vfio_device *kvm_vfio_find_device(struct kvm_vfio *kv, >> + struct vfio_device *vdev) >> +{ >> + struct kvm_vfio_device *kvm_vdev_iter; >> + >> + list_for_each_entry(kvm_vdev_iter, &kv->device_list, node) { >> + if (kvm_vdev_iter->vfio_device == vdev) >> + return kvm_vdev_iter; >> + } >> + return NULL; >> +} >> + >> +/** >> + * kvm_vfio_find_irq - look for a an irq in the device IRQ list >> + * @kvm_vdev: the kvm_vfio_device >> + * @irq_index: irq index >> + * >> + * returns the forwarded irq struct if it exists, NULL in the negative >> + */ >> +struct kvm_fwd_irq *kvm_vfio_find_irq(struct kvm_vfio_device *kvm_vdev, >> + int irq_index) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter; >> + >> + list_for_each_entry(fwd_irq_iter, &kvm_vdev->fwd_irq_list, link) { >> + if (fwd_irq_iter->index == irq_index) >> + return fwd_irq_iter; >> + } >> + return NULL; >> +} >> + >> +/** >> + * validate_forward - checks whether forwarding a given IRQ is meaningful >> + * @vdev: vfio_device the IRQ belongs to >> + * @fwd_irq: user struct containing the irq_index to forward >> + * @kvm_vdev: if a forwarded IRQ already exists for that VFIO device, >> + * kvm_vfio_device that holds it >> + * @hwirq: irq numberthe irq index corresponds to >> + * >> + * checks the vfio-device is a platform vfio device >> + * checks the irq_index corresponds to an actual hwirq and >> + * checks this hwirq is not already forwarded >> + * returns < 0 on following errors: >> + * not a platform device, bad irq index, already forwarded >> + */ >> +static int kvm_vfio_validate_forward(struct kvm_vfio *kv, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + struct kvm_vfio_device **kvm_vdev, >> + int *hwirq) >> +{ >> + struct device *dev = kvm_vfio_external_base_device(vdev); >> + struct platform_device *platdev; >> + >> + *hwirq = -1; >> + *kvm_vdev = NULL; >> + if (strcmp(dev->bus->name, "platform") == 0) { >> + platdev = to_platform_device(dev); >> + *hwirq = platform_get_irq(platdev, fwd_irq->index); >> + if (*hwirq < 0) { >> + kvm_err("%s incorrect index\n", __func__); >> + return -EINVAL; >> + } >> + } else { >> + kvm_err("%s not a platform device\n", __func__); >> + return -EINVAL; >> + } > > need some spaceing here, also, I would turn this around, first check if > the strcmp fails, and then error out, then do you next check etc., to > avoid so many nested statements. ok > >> + /* is a ref to this device already owned by the KVM-VFIO device? */ > > this comment is not particularly helpful in its current form, it would > be helpful if you specified that we're checking whether that particular > device/irq combo is already registered. > >> + *kvm_vdev = kvm_vfio_find_device(kv, vdev); >> + if (*kvm_vdev) { >> + if (kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index)) { >> + kvm_err("%s irq %d already forwarded\n", >> + __func__, *hwirq); > > don't flood the kernel log because of a user error, just allocate an > error code for this purpose and document it in the ABI, -EEXIST or > something. ok > >> + return -EINVAL; >> + } >> + } >> + return 0; >> +} >> + >> +/** >> + * validate_unforward: check a deassignment is meaningful >> + * @kv: the kvm_vfio device >> + * @vdev: the vfio_device whose irq to deassign belongs to >> + * @fwd_irq: the user struct that contains the fd and irq_index of the irq >> + * @kvm_vdev: the kvm_vfio_device the forwarded irq belongs to, if >> + * it exists >> + * >> + * returns 0 if the provided irq effectively is forwarded >> + * (a ref to this vfio_device is hold and this irq belongs to > held >> + * the forwarded irq of this device) >> + * returns -EINVAL in the negative > > ENOENT should be returned if you don't have an entry. > EINVAL could be used if you supply an fd that isn't a > VFIO device file descriptor, for example. Again, > consider documenting all this in the API. > >> + */ >> +static int kvm_vfio_validate_unforward(struct kvm_vfio *kv, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + struct kvm_vfio_device **kvm_vdev) >> +{ >> + struct kvm_fwd_irq *pfwd; >> + >> + *kvm_vdev = kvm_vfio_find_device(kv, vdev); >> + if (!kvm_vdev) { >> + kvm_err("%s no forwarded irq for this device\n", __func__); > > don't flood the kernel log ok > >> + return -EINVAL; >> + } >> + pfwd = kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index); >> + if (!pfwd) { >> + kvm_err("%s irq %d is not forwarded\n", __func__, fwd_irq->fd); > > >> + return -EINVAL; > > same here ok > >> + } >> + return 0; >> +} >> + >> +/** >> + * kvm_vfio_forward - set a forwarded IRQ >> + * @kdev: the kvm device >> + * @vdev: the vfio device the IRQ belongs to >> + * @fwd_irq: the user struct containing the irq_index and guest irq >> + * @must_put: tells the caller whether the vfio_device must be put after >> + * the call (ref must be released in case a ref onto this device was >> + * already hold or in case of new device and failure) >> + * >> + * validate the injection, activate forward and store the information > Validate >> + * about which irq and which device is concerned so that on deassign or >> + * kvm-vfio destruction everuthing can be cleaned up. > everything > > I'm not sure I understand this explanation. Do we have concerned > devices? > > I think you want to say something along the lines of: If userspace passed > a valid vfio device and irq handle and the architecture supports > forwarding this combination, register the vfio_device and irq > combination in the .... ok > >> + */ >> +static int kvm_vfio_forward(struct kvm_device *kdev, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + bool *must_put) >> +{ >> + int ret; >> + struct kvm_fwd_irq *pfwd = NULL; >> + struct kvm_vfio_device *kvm_vdev = NULL; >> + struct kvm_vfio *kv = kdev->private; >> + int hwirq; >> + >> + *must_put = true; >> + ret = kvm_vfio_validate_forward(kv, vdev, fwd_irq, >> + &kvm_vdev, &hwirq); >> + if (ret < 0) >> + return -EINVAL; >> + >> + pfwd = kzalloc(sizeof(*pfwd), GFP_KERNEL); > > seems a bit pointless to zero-out the memory if you're setting all > fields below. ok > >> + if (!pfwd) >> + return -ENOMEM; >> + pfwd->index = fwd_irq->index; >> + pfwd->gsi = fwd_irq->gsi; >> + pfwd->hwirq = hwirq; >> + pfwd->vcpu = kvm_get_vcpu(kdev->kvm, 0); >> + ret = kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_SET_FORWARD); >> + if (ret < 0) { >> + kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_CLEANUP); > > this whole thing feels incredibly broken to me. Setting a forward > should either work or not work, not something in between that leaves > something to be cleaned up. Why this two-stage thingy here? I wanted to exploit the return value of vgic_map_phys_irq which is likely to fail if the phys/virt mapping exists at VGIC level. I already validated the injection from a KVM_VFIO_DEVICE point of view (the device/irq is not known internally). But what if another external component - which does not exist yet - maps the IRQ at VGIC level? Maybe I need to replace the existing validation check by querying the VGIC at low level instead of checking KVM-VFIO local variables. > >> + kfree(pfwd); > > probably want to move your free-and-return-error to the end of the > function. ok > >> + return ret; >> + } >> + >> + if (!kvm_vdev) { >> + /* create & insert the new device and keep the ref */ >> + kvm_vdev = kzalloc(sizeof(*kvm_vdev), GFP_KERNEL); > > again, no need for zeroing out the memory. ok > >> + if (!kvm_vdev) { >> + kvm_arch_set_fwd_state(pfwd, false); >> + kfree(pfwd); >> + return -ENOMEM; >> + } >> + >> + kvm_vdev->vfio_device = vdev; >> + kvm_vdev->fd = fwd_irq->fd; >> + INIT_LIST_HEAD(&kvm_vdev->fwd_irq_list); >> + list_add(&kvm_vdev->node, &kv->device_list); >> + /* >> + * the only case where we keep the ref: >> + * new device and forward setting successful >> + */ >> + *must_put = false; >> + } >> + >> + list_add(&pfwd->link, &kvm_vdev->fwd_irq_list); >> + >> + kvm_debug("forwarding set for fd=%d, hwirq=%d, gsi=%d\n", >> + fwd_irq->fd, hwirq, fwd_irq->gsi); > > please indent this to align with the opening parenthesis. ok > >> + >> + return 0; >> +} >> + >> +/** >> + * remove_assigned_device - put a given device from the list > > this isn't a 'put', at least not *just* a put. correct, I will rephrase > >> + * @kv: the kvm-vfio device >> + * @vdev: the vfio-device to remove >> + * >> + * change the state of all forwarded IRQs, free the forwarded IRQ list, >> + * remove the corresponding kvm_vfio_device from the assigned device >> + * list. >> + * returns true if the device could be removed, false in the negative >> + */ >> +bool remove_assigned_device(struct kvm_vfio *kv, >> + struct vfio_device *vdev) >> +{ >> + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + bool removed = false; >> + int ret; >> + >> + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, >> + &kv->device_list, node) { >> + if (kvm_vdev_iter->vfio_device == vdev) { >> + /* loop on all its forwarded IRQ */ >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev_iter->fwd_irq_list, >> + link) { > > hmm, seems this function is only called when you have no more forwarded > IRQs, so isn't all of this completely dead (and unnecessary) code? yep I can simplify all that cleanup > >> + ret = kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_SET_NORMAL); >> + if (ret < 0) >> + return ret; > > you're returning an error code to a bool function, which means you'll > return true when there was an error. Is this your intention? ;) definitively not! > > if we have an error here, this would be a very very bad situation wouldn't it? sure. I will simplify this, transform kvm_arch_set_fwd_state into a void function > >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + } >> + /* all IRQs could be deassigned */ >> + list_del(&kvm_vdev_iter->node); >> + kvm_vfio_device_put_external_user( >> + kvm_vdev_iter->vfio_device); >> + kfree(kvm_vdev_iter); >> + removed = true; >> + break; >> + } >> + } >> + return removed; >> +} >> + >> + >> +/** >> + * remove_fwd_irq - remove a forwarded irq >> + * >> + * @kv: kvm-vfio device >> + * kvm_vdev: the kvm_vfio_device the IRQ belongs to >> + * irq_index: the index of the IRQ >> + * >> + * change the forwarded state of the IRQ, remove the IRQ from >> + * the device forwarded IRQ list. In case it is the last one, >> + * put the device >> + */ >> +int remove_fwd_irq(struct kvm_vfio *kv, >> + struct kvm_vfio_device *kvm_vdev, >> + int irq_index) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + int ret = -1; >> + >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev->fwd_irq_list, link) { > > hmmm, you can only forward one irq for a specific device once, right? > And you already have a lookup function, so why not call that, and then > remove it? > > I'm confused. will fix that > >> + if (fwd_irq_iter->index == irq_index) { >> + ret = kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_SET_NORMAL); >> + if (ret < 0) >> + break; >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + ret = 0; >> + break; >> + } >> + } >> + if (list_empty(&kvm_vdev->fwd_irq_list)) >> + remove_assigned_device(kv, kvm_vdev->vfio_device); >> + >> + return ret; >> +} >> + >> +/** >> + * kvm_vfio_unforward - remove a forwarded IRQ >> + * @kdev: the kvm device >> + * @vdev: the vfio_device >> + * @fwd_irq: user struct >> + * after checking this IRQ effectively is forwarded, change its state, >> + * remove it from the corresponding kvm_vfio_device list >> + */ >> +static int kvm_vfio_unforward(struct kvm_device *kdev, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq) >> +{ >> + struct kvm_vfio *kv = kdev->private; >> + struct kvm_vfio_device *kvm_vdev; >> + int ret; >> + >> + ret = kvm_vfio_validate_unforward(kv, vdev, fwd_irq, &kvm_vdev); >> + if (ret < 0) >> + return -EINVAL; > > why do you override the return value? Propagate it. ok > >> + >> + ret = remove_fwd_irq(kv, kvm_vdev, fwd_irq->index); >> + if (ret < 0) >> + kvm_err("%s fail unforwarding (fd=%d, index=%d)\n", >> + __func__, fwd_irq->fd, fwd_irq->index); >> + else >> + kvm_debug("%s unforwarding IRQ (fd=%d, index=%d)\n", >> + __func__, fwd_irq->fd, fwd_irq->index); > > again with the kernel log here. ok > > > >> + return ret; >> +} >> + >> + >> + >> + >> +/** >> + * kvm_vfio_set_device - the top function for interracting with a vfio > > top? interacting > >> + * device >> + */ > > probably just skip this comment ok > >> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) >> +{ >> + struct kvm_vfio *kv = kdev->private; >> + struct vfio_device *vdev; >> + struct kvm_arch_forwarded_irq fwd_irq; /* user struct */ >> + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; >> + >> + switch (attr) { >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:{ >> + bool must_put; >> + int ret; >> + >> + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) >> + return -EFAULT; >> + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); >> + if (IS_ERR(vdev)) >> + return PTR_ERR(vdev); > > seems like this whole block of code is replicated below, needs > refactoring. ok > >> + mutex_lock(&kv->lock); >> + ret = kvm_vfio_forward(kdev, vdev, &fwd_irq, &must_put); >> + if (must_put) >> + kvm_vfio_put_vfio_device(vdev); > > this must_put looks plain weird. I think you want to balance your > get/put's always; can't you just get an extra reference in > kvm_vfio_forward() ? I will investigate that. Makes sense > >> + mutex_unlock(&kv->lock); >> + return ret; >> + } >> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: { >> + int ret; >> + >> + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) >> + return -EFAULT; >> + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); >> + if (IS_ERR(vdev)) >> + return PTR_ERR(vdev); >> + >> + kvm_vfio_device_put_external_user(vdev); > > you're dropping the reference to the device but referencing it in your > unfoward call below? thanks for identifying that bug. > >> + mutex_lock(&kv->lock); >> + ret = kvm_vfio_unforward(kdev, vdev, &fwd_irq); >> + mutex_unlock(&kv->lock); >> + return ret; >> + } >> +#endif >> + default: >> + return -ENXIO; >> + } >> +} >> + >> +/** >> + * kvm_vfio_put_all_devices - cancel forwarded IRQs and put all devices >> + * @kv: kvm-vfio device >> + * >> + * loop on all got devices and their associated forwarded IRQs > > 'loop on all got' ? > > Restore the non-forwarded state for all registered devices and ... ok > >> + * restore the non forwarded state, remove IRQs and their devices from >> + * the respective list, put the vfio platform devices >> + * >> + * When this function is called, the vcpu already are destroyed. No > the VPUCs are already destroyed. >> + * vgic manipulation can happen hence the KVM_VFIO_IRQ_CLEANUP >> + * kvm_arch_set_fwd_state action > > this last bit didn't make any sense to me. Also, why are we referring > to the vgic in generic code? doesn't make sense anymore indeed. I wanted to emphasize the fact that VGIC KVM device is destroyed before the KVM VFIO device and this explains why I need a special CLEANUP cmd (besides the fact I need to call chip->irq_eoi(d) for the forwarded IRQs); > >> + */ >> +int kvm_vfio_put_all_devices(struct kvm_vfio *kv) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; >> + >> + /* loop on all the assigned devices */ > > unnecessary comment ok > >> + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, >> + &kv->device_list, node) { >> + >> + /* loop on all its forwarded IRQ */ > > same ok Thanks for the detailed review Best Regards Eric > >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev_iter->fwd_irq_list, link) { >> + kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_CLEANUP); >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + } >> + list_del(&kvm_vdev_iter->node); >> + kvm_vfio_device_put_external_user(kvm_vdev_iter->vfio_device); >> + kfree(kvm_vdev_iter); >> + } >> + return 0; >> +} >> + >> + >> static int kvm_vfio_set_attr(struct kvm_device *dev, >> struct kvm_device_attr *attr) >> { >> switch (attr->group) { >> case KVM_DEV_VFIO_GROUP: >> return kvm_vfio_set_group(dev, attr->attr, attr->addr); >> + case KVM_DEV_VFIO_DEVICE: >> + return kvm_vfio_set_device(dev, attr->attr, attr->addr); >> } >> >> return -ENXIO; >> @@ -267,10 +706,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, >> case KVM_DEV_VFIO_GROUP_DEL: >> return 0; >> } >> - >> break; >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> + case KVM_DEV_VFIO_DEVICE: >> + switch (attr->attr) { >> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: >> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: >> + return 0; >> + } >> + break; >> +#endif >> } >> - >> return -ENXIO; >> } >> >> @@ -284,6 +730,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) >> list_del(&kvg->node); >> kfree(kvg); >> } >> + kvm_vfio_put_all_devices(kv); >> >> kvm_vfio_update_coherency(dev); >> >> @@ -306,6 +753,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) >> return -ENOMEM; >> >> INIT_LIST_HEAD(&kv->group_list); >> + INIT_LIST_HEAD(&kv->device_list); >> mutex_init(&kv->lock); >> >> dev->private = kv; >> -- >> 1.9.1 >> From mboxrd@z Thu Jan 1 00:00:00 1970 From: eric.auger@linaro.org (Eric Auger) Date: Thu, 11 Sep 2014 11:35:56 +0200 Subject: [RFC v2 8/9] KVM: KVM-VFIO: generic KVM_DEV_VFIO_DEVICE command and IRQ forwarding control In-Reply-To: <20140911031026.GI2784@lvm> References: <1409575968-5329-1-git-send-email-eric.auger@linaro.org> <1409575968-5329-9-git-send-email-eric.auger@linaro.org> <20140911031026.GI2784@lvm> Message-ID: <54116CFC.1050807@linaro.org> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org On 09/11/2014 05:10 AM, Christoffer Dall wrote: > On Mon, Sep 01, 2014 at 02:52:47PM +0200, Eric Auger wrote: >> This patch introduces a new KVM_DEV_VFIO_DEVICE attribute. >> >> This is a new control channel which enables KVM to cooperate with >> viable VFIO devices. >> >> The kvm-vfio device now holds a list of devices (kvm_vfio_device) >> in addition to a list of groups (kvm_vfio_group). The new >> infrastructure enables to check the validity of the VFIO device >> file descriptor, get and hold a reference to it. >> >> The first concrete implemented command is IRQ forward control: >> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. >> >> It consists in programing the VFIO driver and KVM in a consistent manner >> so that an optimized IRQ injection/completion is set up. Each >> kvm_vfio_device holds a list of forwarded IRQ. When putting a >> kvm_vfio_device, the implementation makes sure the forwarded IRQs >> are set again in the normal handling state (non forwarded). > > 'putting a kvm_vfio_device' sounds to like you're golf'ing :) > > When a kvm_vfio_device is released? sure > >> >> The forwarding programmming is architecture specific, embodied by the >> kvm_arch_set_fwd_state function. Its implementation is given in a >> separate patch file. > > I would drop the last sentence and instead indicate that this is handled > properly when the architecture does not support such a feature. ok > >> >> The forwarding control modality is enabled by the >> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD define. >> >> Signed-off-by: Eric Auger >> >> --- >> >> v1 -> v2: >> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> - original patch file separated into 2 parts: generic part moved in vfio.c >> and ARM specific part(kvm_arch_set_fwd_state) >> --- >> include/linux/kvm_host.h | 27 +++ >> virt/kvm/vfio.c | 452 ++++++++++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 477 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >> index a4c33b3..24350dc 100644 >> --- a/include/linux/kvm_host.h >> +++ b/include/linux/kvm_host.h >> @@ -1065,6 +1065,21 @@ struct kvm_device_ops { >> unsigned long arg); >> }; >> >> +enum kvm_fwd_irq_action { >> + KVM_VFIO_IRQ_SET_FORWARD, >> + KVM_VFIO_IRQ_SET_NORMAL, >> + KVM_VFIO_IRQ_CLEANUP, > > This is KVM internal API, so it would probably be good to document this. > Especially the CLEANUP bit worries me, see below. I will document it > >> +}; >> + >> +/* internal structure describing a forwarded IRQ */ >> +struct kvm_fwd_irq { >> + struct list_head link; > > this list entry is local to the kvm vfio device, right? that means you > probably want a struct with just the below fields, and then have a > containing struct in the generic device file, private to it's logic. I will introduce 2 separate structs > >> + __u32 index; /* platform device irq index */ >> + __u32 hwirq; /*physical IRQ */ >> + __u32 gsi; /* virtual IRQ */ >> + struct kvm_vcpu *vcpu; /* vcpu to inject into*/ >> +}; >> + >> void kvm_device_get(struct kvm_device *dev); >> void kvm_device_put(struct kvm_device *dev); >> struct kvm_device *kvm_device_from_filp(struct file *filp); >> @@ -1075,6 +1090,18 @@ extern struct kvm_device_ops kvm_vfio_ops; >> extern struct kvm_device_ops kvm_arm_vgic_v2_ops; >> extern struct kvm_device_ops kvm_flic_ops; >> >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> +int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, > > what's the 'p' in pfwd? will rename > >> + enum kvm_fwd_irq_action action); >> + >> +#else >> +static inline int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, >> + enum kvm_fwd_irq_action action) >> +{ >> + return 0; >> +} >> +#endif >> + >> #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT >> >> static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) >> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c >> index 76dc7a1..e4a81c4 100644 >> --- a/virt/kvm/vfio.c >> +++ b/virt/kvm/vfio.c >> @@ -18,14 +18,24 @@ >> #include >> #include >> #include >> +#include >> >> struct kvm_vfio_group { >> struct list_head node; >> struct vfio_group *vfio_group; >> }; >> >> +struct kvm_vfio_device { >> + struct list_head node; >> + struct vfio_device *vfio_device; >> + /* list of forwarded IRQs for that VFIO device */ >> + struct list_head fwd_irq_list; >> + int fd; >> +}; >> + >> struct kvm_vfio { >> struct list_head group_list; >> + struct list_head device_list; >> struct mutex lock; >> bool noncoherent; >> }; >> @@ -246,12 +256,441 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) >> return -ENXIO; >> } >> >> +/** >> + * get_vfio_device - returns the vfio-device corresponding to this fd >> + * @fd:fd of the vfio platform device >> + * >> + * checks it is a vfio device >> + * increment its ref counter > > why the short lines? Just write this out in proper English. OK > >> + */ >> +static struct vfio_device *kvm_vfio_get_vfio_device(int fd) >> +{ >> + struct fd f; >> + struct vfio_device *vdev; >> + >> + f = fdget(fd); >> + if (!f.file) >> + return NULL; >> + vdev = kvm_vfio_device_get_external_user(f.file); >> + fdput(f); >> + return vdev; >> +} >> + >> +/** >> + * put_vfio_device: put the vfio platform device >> + * @vdev: vfio_device to put >> + * >> + * decrement the ref counter >> + */ >> +static void kvm_vfio_put_vfio_device(struct vfio_device *vdev) >> +{ >> + kvm_vfio_device_put_external_user(vdev); >> +} >> + >> +/** >> + * kvm_vfio_find_device - look for the device in the assigned >> + * device list >> + * @kv: the kvm-vfio device >> + * @vdev: the vfio_device to look for >> + * >> + * returns the associated kvm_vfio_device if the device is known, >> + * meaning at least 1 IRQ is forwarded for this device. >> + * in the device is not registered, returns NULL. >> + */ > > are these functions meant to be exported? Otherwise they should be > static, and the documentation on these simple list iteration wrappers > seems like overkill imho. could be static indeed > >> +struct kvm_vfio_device *kvm_vfio_find_device(struct kvm_vfio *kv, >> + struct vfio_device *vdev) >> +{ >> + struct kvm_vfio_device *kvm_vdev_iter; >> + >> + list_for_each_entry(kvm_vdev_iter, &kv->device_list, node) { >> + if (kvm_vdev_iter->vfio_device == vdev) >> + return kvm_vdev_iter; >> + } >> + return NULL; >> +} >> + >> +/** >> + * kvm_vfio_find_irq - look for a an irq in the device IRQ list >> + * @kvm_vdev: the kvm_vfio_device >> + * @irq_index: irq index >> + * >> + * returns the forwarded irq struct if it exists, NULL in the negative >> + */ >> +struct kvm_fwd_irq *kvm_vfio_find_irq(struct kvm_vfio_device *kvm_vdev, >> + int irq_index) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter; >> + >> + list_for_each_entry(fwd_irq_iter, &kvm_vdev->fwd_irq_list, link) { >> + if (fwd_irq_iter->index == irq_index) >> + return fwd_irq_iter; >> + } >> + return NULL; >> +} >> + >> +/** >> + * validate_forward - checks whether forwarding a given IRQ is meaningful >> + * @vdev: vfio_device the IRQ belongs to >> + * @fwd_irq: user struct containing the irq_index to forward >> + * @kvm_vdev: if a forwarded IRQ already exists for that VFIO device, >> + * kvm_vfio_device that holds it >> + * @hwirq: irq numberthe irq index corresponds to >> + * >> + * checks the vfio-device is a platform vfio device >> + * checks the irq_index corresponds to an actual hwirq and >> + * checks this hwirq is not already forwarded >> + * returns < 0 on following errors: >> + * not a platform device, bad irq index, already forwarded >> + */ >> +static int kvm_vfio_validate_forward(struct kvm_vfio *kv, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + struct kvm_vfio_device **kvm_vdev, >> + int *hwirq) >> +{ >> + struct device *dev = kvm_vfio_external_base_device(vdev); >> + struct platform_device *platdev; >> + >> + *hwirq = -1; >> + *kvm_vdev = NULL; >> + if (strcmp(dev->bus->name, "platform") == 0) { >> + platdev = to_platform_device(dev); >> + *hwirq = platform_get_irq(platdev, fwd_irq->index); >> + if (*hwirq < 0) { >> + kvm_err("%s incorrect index\n", __func__); >> + return -EINVAL; >> + } >> + } else { >> + kvm_err("%s not a platform device\n", __func__); >> + return -EINVAL; >> + } > > need some spaceing here, also, I would turn this around, first check if > the strcmp fails, and then error out, then do you next check etc., to > avoid so many nested statements. ok > >> + /* is a ref to this device already owned by the KVM-VFIO device? */ > > this comment is not particularly helpful in its current form, it would > be helpful if you specified that we're checking whether that particular > device/irq combo is already registered. > >> + *kvm_vdev = kvm_vfio_find_device(kv, vdev); >> + if (*kvm_vdev) { >> + if (kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index)) { >> + kvm_err("%s irq %d already forwarded\n", >> + __func__, *hwirq); > > don't flood the kernel log because of a user error, just allocate an > error code for this purpose and document it in the ABI, -EEXIST or > something. ok > >> + return -EINVAL; >> + } >> + } >> + return 0; >> +} >> + >> +/** >> + * validate_unforward: check a deassignment is meaningful >> + * @kv: the kvm_vfio device >> + * @vdev: the vfio_device whose irq to deassign belongs to >> + * @fwd_irq: the user struct that contains the fd and irq_index of the irq >> + * @kvm_vdev: the kvm_vfio_device the forwarded irq belongs to, if >> + * it exists >> + * >> + * returns 0 if the provided irq effectively is forwarded >> + * (a ref to this vfio_device is hold and this irq belongs to > held >> + * the forwarded irq of this device) >> + * returns -EINVAL in the negative > > ENOENT should be returned if you don't have an entry. > EINVAL could be used if you supply an fd that isn't a > VFIO device file descriptor, for example. Again, > consider documenting all this in the API. > >> + */ >> +static int kvm_vfio_validate_unforward(struct kvm_vfio *kv, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + struct kvm_vfio_device **kvm_vdev) >> +{ >> + struct kvm_fwd_irq *pfwd; >> + >> + *kvm_vdev = kvm_vfio_find_device(kv, vdev); >> + if (!kvm_vdev) { >> + kvm_err("%s no forwarded irq for this device\n", __func__); > > don't flood the kernel log ok > >> + return -EINVAL; >> + } >> + pfwd = kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index); >> + if (!pfwd) { >> + kvm_err("%s irq %d is not forwarded\n", __func__, fwd_irq->fd); > > >> + return -EINVAL; > > same here ok > >> + } >> + return 0; >> +} >> + >> +/** >> + * kvm_vfio_forward - set a forwarded IRQ >> + * @kdev: the kvm device >> + * @vdev: the vfio device the IRQ belongs to >> + * @fwd_irq: the user struct containing the irq_index and guest irq >> + * @must_put: tells the caller whether the vfio_device must be put after >> + * the call (ref must be released in case a ref onto this device was >> + * already hold or in case of new device and failure) >> + * >> + * validate the injection, activate forward and store the information > Validate >> + * about which irq and which device is concerned so that on deassign or >> + * kvm-vfio destruction everuthing can be cleaned up. > everything > > I'm not sure I understand this explanation. Do we have concerned > devices? > > I think you want to say something along the lines of: If userspace passed > a valid vfio device and irq handle and the architecture supports > forwarding this combination, register the vfio_device and irq > combination in the .... ok > >> + */ >> +static int kvm_vfio_forward(struct kvm_device *kdev, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq, >> + bool *must_put) >> +{ >> + int ret; >> + struct kvm_fwd_irq *pfwd = NULL; >> + struct kvm_vfio_device *kvm_vdev = NULL; >> + struct kvm_vfio *kv = kdev->private; >> + int hwirq; >> + >> + *must_put = true; >> + ret = kvm_vfio_validate_forward(kv, vdev, fwd_irq, >> + &kvm_vdev, &hwirq); >> + if (ret < 0) >> + return -EINVAL; >> + >> + pfwd = kzalloc(sizeof(*pfwd), GFP_KERNEL); > > seems a bit pointless to zero-out the memory if you're setting all > fields below. ok > >> + if (!pfwd) >> + return -ENOMEM; >> + pfwd->index = fwd_irq->index; >> + pfwd->gsi = fwd_irq->gsi; >> + pfwd->hwirq = hwirq; >> + pfwd->vcpu = kvm_get_vcpu(kdev->kvm, 0); >> + ret = kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_SET_FORWARD); >> + if (ret < 0) { >> + kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_CLEANUP); > > this whole thing feels incredibly broken to me. Setting a forward > should either work or not work, not something in between that leaves > something to be cleaned up. Why this two-stage thingy here? I wanted to exploit the return value of vgic_map_phys_irq which is likely to fail if the phys/virt mapping exists at VGIC level. I already validated the injection from a KVM_VFIO_DEVICE point of view (the device/irq is not known internally). But what if another external component - which does not exist yet - maps the IRQ at VGIC level? Maybe I need to replace the existing validation check by querying the VGIC at low level instead of checking KVM-VFIO local variables. > >> + kfree(pfwd); > > probably want to move your free-and-return-error to the end of the > function. ok > >> + return ret; >> + } >> + >> + if (!kvm_vdev) { >> + /* create & insert the new device and keep the ref */ >> + kvm_vdev = kzalloc(sizeof(*kvm_vdev), GFP_KERNEL); > > again, no need for zeroing out the memory. ok > >> + if (!kvm_vdev) { >> + kvm_arch_set_fwd_state(pfwd, false); >> + kfree(pfwd); >> + return -ENOMEM; >> + } >> + >> + kvm_vdev->vfio_device = vdev; >> + kvm_vdev->fd = fwd_irq->fd; >> + INIT_LIST_HEAD(&kvm_vdev->fwd_irq_list); >> + list_add(&kvm_vdev->node, &kv->device_list); >> + /* >> + * the only case where we keep the ref: >> + * new device and forward setting successful >> + */ >> + *must_put = false; >> + } >> + >> + list_add(&pfwd->link, &kvm_vdev->fwd_irq_list); >> + >> + kvm_debug("forwarding set for fd=%d, hwirq=%d, gsi=%d\n", >> + fwd_irq->fd, hwirq, fwd_irq->gsi); > > please indent this to align with the opening parenthesis. ok > >> + >> + return 0; >> +} >> + >> +/** >> + * remove_assigned_device - put a given device from the list > > this isn't a 'put', at least not *just* a put. correct, I will rephrase > >> + * @kv: the kvm-vfio device >> + * @vdev: the vfio-device to remove >> + * >> + * change the state of all forwarded IRQs, free the forwarded IRQ list, >> + * remove the corresponding kvm_vfio_device from the assigned device >> + * list. >> + * returns true if the device could be removed, false in the negative >> + */ >> +bool remove_assigned_device(struct kvm_vfio *kv, >> + struct vfio_device *vdev) >> +{ >> + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + bool removed = false; >> + int ret; >> + >> + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, >> + &kv->device_list, node) { >> + if (kvm_vdev_iter->vfio_device == vdev) { >> + /* loop on all its forwarded IRQ */ >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev_iter->fwd_irq_list, >> + link) { > > hmm, seems this function is only called when you have no more forwarded > IRQs, so isn't all of this completely dead (and unnecessary) code? yep I can simplify all that cleanup > >> + ret = kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_SET_NORMAL); >> + if (ret < 0) >> + return ret; > > you're returning an error code to a bool function, which means you'll > return true when there was an error. Is this your intention? ;) definitively not! > > if we have an error here, this would be a very very bad situation wouldn't it? sure. I will simplify this, transform kvm_arch_set_fwd_state into a void function > >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + } >> + /* all IRQs could be deassigned */ >> + list_del(&kvm_vdev_iter->node); >> + kvm_vfio_device_put_external_user( >> + kvm_vdev_iter->vfio_device); >> + kfree(kvm_vdev_iter); >> + removed = true; >> + break; >> + } >> + } >> + return removed; >> +} >> + >> + >> +/** >> + * remove_fwd_irq - remove a forwarded irq >> + * >> + * @kv: kvm-vfio device >> + * kvm_vdev: the kvm_vfio_device the IRQ belongs to >> + * irq_index: the index of the IRQ >> + * >> + * change the forwarded state of the IRQ, remove the IRQ from >> + * the device forwarded IRQ list. In case it is the last one, >> + * put the device >> + */ >> +int remove_fwd_irq(struct kvm_vfio *kv, >> + struct kvm_vfio_device *kvm_vdev, >> + int irq_index) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + int ret = -1; >> + >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev->fwd_irq_list, link) { > > hmmm, you can only forward one irq for a specific device once, right? > And you already have a lookup function, so why not call that, and then > remove it? > > I'm confused. will fix that > >> + if (fwd_irq_iter->index == irq_index) { >> + ret = kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_SET_NORMAL); >> + if (ret < 0) >> + break; >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + ret = 0; >> + break; >> + } >> + } >> + if (list_empty(&kvm_vdev->fwd_irq_list)) >> + remove_assigned_device(kv, kvm_vdev->vfio_device); >> + >> + return ret; >> +} >> + >> +/** >> + * kvm_vfio_unforward - remove a forwarded IRQ >> + * @kdev: the kvm device >> + * @vdev: the vfio_device >> + * @fwd_irq: user struct >> + * after checking this IRQ effectively is forwarded, change its state, >> + * remove it from the corresponding kvm_vfio_device list >> + */ >> +static int kvm_vfio_unforward(struct kvm_device *kdev, >> + struct vfio_device *vdev, >> + struct kvm_arch_forwarded_irq *fwd_irq) >> +{ >> + struct kvm_vfio *kv = kdev->private; >> + struct kvm_vfio_device *kvm_vdev; >> + int ret; >> + >> + ret = kvm_vfio_validate_unforward(kv, vdev, fwd_irq, &kvm_vdev); >> + if (ret < 0) >> + return -EINVAL; > > why do you override the return value? Propagate it. ok > >> + >> + ret = remove_fwd_irq(kv, kvm_vdev, fwd_irq->index); >> + if (ret < 0) >> + kvm_err("%s fail unforwarding (fd=%d, index=%d)\n", >> + __func__, fwd_irq->fd, fwd_irq->index); >> + else >> + kvm_debug("%s unforwarding IRQ (fd=%d, index=%d)\n", >> + __func__, fwd_irq->fd, fwd_irq->index); > > again with the kernel log here. ok > > > >> + return ret; >> +} >> + >> + >> + >> + >> +/** >> + * kvm_vfio_set_device - the top function for interracting with a vfio > > top? interacting > >> + * device >> + */ > > probably just skip this comment ok > >> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) >> +{ >> + struct kvm_vfio *kv = kdev->private; >> + struct vfio_device *vdev; >> + struct kvm_arch_forwarded_irq fwd_irq; /* user struct */ >> + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; >> + >> + switch (attr) { >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:{ >> + bool must_put; >> + int ret; >> + >> + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) >> + return -EFAULT; >> + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); >> + if (IS_ERR(vdev)) >> + return PTR_ERR(vdev); > > seems like this whole block of code is replicated below, needs > refactoring. ok > >> + mutex_lock(&kv->lock); >> + ret = kvm_vfio_forward(kdev, vdev, &fwd_irq, &must_put); >> + if (must_put) >> + kvm_vfio_put_vfio_device(vdev); > > this must_put looks plain weird. I think you want to balance your > get/put's always; can't you just get an extra reference in > kvm_vfio_forward() ? I will investigate that. Makes sense > >> + mutex_unlock(&kv->lock); >> + return ret; >> + } >> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: { >> + int ret; >> + >> + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) >> + return -EFAULT; >> + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); >> + if (IS_ERR(vdev)) >> + return PTR_ERR(vdev); >> + >> + kvm_vfio_device_put_external_user(vdev); > > you're dropping the reference to the device but referencing it in your > unfoward call below? thanks for identifying that bug. > >> + mutex_lock(&kv->lock); >> + ret = kvm_vfio_unforward(kdev, vdev, &fwd_irq); >> + mutex_unlock(&kv->lock); >> + return ret; >> + } >> +#endif >> + default: >> + return -ENXIO; >> + } >> +} >> + >> +/** >> + * kvm_vfio_put_all_devices - cancel forwarded IRQs and put all devices >> + * @kv: kvm-vfio device >> + * >> + * loop on all got devices and their associated forwarded IRQs > > 'loop on all got' ? > > Restore the non-forwarded state for all registered devices and ... ok > >> + * restore the non forwarded state, remove IRQs and their devices from >> + * the respective list, put the vfio platform devices >> + * >> + * When this function is called, the vcpu already are destroyed. No > the VPUCs are already destroyed. >> + * vgic manipulation can happen hence the KVM_VFIO_IRQ_CLEANUP >> + * kvm_arch_set_fwd_state action > > this last bit didn't make any sense to me. Also, why are we referring > to the vgic in generic code? doesn't make sense anymore indeed. I wanted to emphasize the fact that VGIC KVM device is destroyed before the KVM VFIO device and this explains why I need a special CLEANUP cmd (besides the fact I need to call chip->irq_eoi(d) for the forwarded IRQs); > >> + */ >> +int kvm_vfio_put_all_devices(struct kvm_vfio *kv) >> +{ >> + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; >> + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; >> + >> + /* loop on all the assigned devices */ > > unnecessary comment ok > >> + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, >> + &kv->device_list, node) { >> + >> + /* loop on all its forwarded IRQ */ > > same ok Thanks for the detailed review Best Regards Eric > >> + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, >> + &kvm_vdev_iter->fwd_irq_list, link) { >> + kvm_arch_set_fwd_state(fwd_irq_iter, >> + KVM_VFIO_IRQ_CLEANUP); >> + list_del(&fwd_irq_iter->link); >> + kfree(fwd_irq_iter); >> + } >> + list_del(&kvm_vdev_iter->node); >> + kvm_vfio_device_put_external_user(kvm_vdev_iter->vfio_device); >> + kfree(kvm_vdev_iter); >> + } >> + return 0; >> +} >> + >> + >> static int kvm_vfio_set_attr(struct kvm_device *dev, >> struct kvm_device_attr *attr) >> { >> switch (attr->group) { >> case KVM_DEV_VFIO_GROUP: >> return kvm_vfio_set_group(dev, attr->attr, attr->addr); >> + case KVM_DEV_VFIO_DEVICE: >> + return kvm_vfio_set_device(dev, attr->attr, attr->addr); >> } >> >> return -ENXIO; >> @@ -267,10 +706,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, >> case KVM_DEV_VFIO_GROUP_DEL: >> return 0; >> } >> - >> break; >> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD >> + case KVM_DEV_VFIO_DEVICE: >> + switch (attr->attr) { >> + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: >> + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: >> + return 0; >> + } >> + break; >> +#endif >> } >> - >> return -ENXIO; >> } >> >> @@ -284,6 +730,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) >> list_del(&kvg->node); >> kfree(kvg); >> } >> + kvm_vfio_put_all_devices(kv); >> >> kvm_vfio_update_coherency(dev); >> >> @@ -306,6 +753,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) >> return -ENOMEM; >> >> INIT_LIST_HEAD(&kv->group_list); >> + INIT_LIST_HEAD(&kv->device_list); >> mutex_init(&kv->lock); >> >> dev->private = kv; >> -- >> 1.9.1 >>