From mboxrd@z Thu Jan 1 00:00:00 1970 From: Julien Grall Subject: [PATCH v4 12/21] xen/passthrough: iommu: Split generic IOMMU code Date: Tue, 22 Apr 2014 14:14:26 +0100 Message-ID: <1398172475-27873-13-git-send-email-julien.grall@linaro.org> References: <1398172475-27873-1-git-send-email-julien.grall@linaro.org> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1WcaXT-00063x-VS for xen-devel@lists.xenproject.org; Tue, 22 Apr 2014 13:15:08 +0000 Received: by mail-ee0-f52.google.com with SMTP id e49so4646263eek.11 for ; Tue, 22 Apr 2014 06:15:04 -0700 (PDT) In-Reply-To: <1398172475-27873-1-git-send-email-julien.grall@linaro.org> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xenproject.org Cc: ian.campbell@citrix.com, Julien Grall , tim@xen.org, stefano.stabellini@citrix.com, Jan Beulich , Xiantao Zhang List-Id: xen-devel@lists.xenproject.org The generic IOMMU framework code (xen/drivers/passthrough/iommu.c) contains functions specific to x86 and PCI. Split the framework in 3 distincts files: - iommu.c: contains generic functions shared between x86 and ARM (when it will be supported) - pci.c: contains specific functions for PCI passthrough - x86/iommu.c: contains specific functions for x86 io.c contains x86 HVM specific code. Only compile for x86. This patch is mostly code movement in new files. Signed-off-by: Julien Grall Cc: Xiantao Zhang Cc: Jan Beulich --- Changes in v4: - Rebase on the latest xen (hwdom series was pushed) - Move iommu_populate_page_table in x86/iommu.c and rename it arch_iommu_populate_page_table - Introduce arch_iommu_check_hwdom_reqs which will check if the IOMMU is enabled. On ARM the IOMMU is not required because most of current hardware don't provide IOMMU - iommu_set_hwdom_mapping has been moved in vdt/extern.h (made by another patch) Changes in v3: - share_p2m_table should stay in common code - update_ire_from_msi and read_msi_from_ire should go in pci code - remove switch case in iommu_do_domctl Changes in v2: - Update commit message - Removing spurious change in drivers/passthrough/vtd/iommu.c - Move iommu_x86.c in x86/iommu.c - Merge iommu_pci.c in pci.c - Introduce iommu_do_pci_domctl --- xen/drivers/passthrough/Makefile | 4 +- xen/drivers/passthrough/iommu.c | 486 ++-------------------------------- xen/drivers/passthrough/pci.c | 387 +++++++++++++++++++++++++++ xen/drivers/passthrough/x86/Makefile | 1 + xen/drivers/passthrough/x86/iommu.c | 120 +++++++++ xen/include/asm-x86/iommu.h | 41 +++ xen/include/xen/hvm/iommu.h | 1 + xen/include/xen/iommu.h | 51 ++-- 8 files changed, 596 insertions(+), 495 deletions(-) create mode 100644 xen/drivers/passthrough/x86/iommu.c create mode 100644 xen/include/asm-x86/iommu.h diff --git a/xen/drivers/passthrough/Makefile b/xen/drivers/passthrough/Makefile index 7c40fa5..6e08f89 100644 --- a/xen/drivers/passthrough/Makefile +++ b/xen/drivers/passthrough/Makefile @@ -3,5 +3,5 @@ subdir-$(x86) += amd subdir-$(x86_64) += x86 obj-y += iommu.o -obj-y += io.o -obj-y += pci.o +obj-$(x86) += io.o +obj-$(HAS_PCI) += pci.o diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index ccb354d..921c67b 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -24,7 +24,6 @@ #include static void parse_iommu_param(char *s); -static int iommu_populate_page_table(struct domain *d); static void iommu_dump_p2m_table(unsigned char key); /* @@ -135,8 +134,7 @@ static void __hwdom_init check_hwdom_reqs(struct domain *d) if ( !paging_mode_translate(d) ) return; - if ( !iommu_enabled ) - panic("Presently, iommu must be enabled for pvh dom0\n"); + arch_iommu_check_hwdom_reqs(d); if ( iommu_passthrough ) panic("Dom0 uses paging translated mode, dom0-passthrough must not be " @@ -179,86 +177,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) return hd->platform_ops->hwdom_init(d); } -int iommu_add_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - int rc; - u8 devfn; - - if ( !pdev->domain ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - rc = hd->platform_ops->add_device(pdev->devfn, pdev); - if ( rc || !pdev->phantom_stride ) - return rc; - - for ( devfn = pdev->devfn ; ; ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - return 0; - rc = hd->platform_ops->add_device(devfn, pdev); - if ( rc ) - printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n", - pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); - } -} - -int iommu_enable_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - - if ( !pdev->domain ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops || - !hd->platform_ops->enable_device ) - return 0; - - return hd->platform_ops->enable_device(pdev); -} - -int iommu_remove_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - u8 devfn; - - if ( !pdev->domain ) - return -EINVAL; - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - for ( devfn = pdev->devfn ; pdev->phantom_stride; ) - { - int rc; - - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - rc = hd->platform_ops->remove_device(devfn, pdev); - if ( !rc ) - continue; - - printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n", - pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); - return rc; - } - - return hd->platform_ops->remove_device(pdev->devfn, pdev); -} - -static void iommu_teardown(struct domain *d) +void iommu_teardown(struct domain *d) { const struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -267,151 +186,6 @@ static void iommu_teardown(struct domain *d) tasklet_schedule(&iommu_pt_cleanup_tasklet); } -/* - * If the device isn't owned by the hardware domain, it means it already - * has been assigned to other domain, or it doesn't exist. - */ -static int device_assigned(u16 seg, u8 bus, u8 devfn) -{ - struct pci_dev *pdev; - - spin_lock(&pcidevs_lock); - pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); - spin_unlock(&pcidevs_lock); - - return pdev ? 0 : -EBUSY; -} - -static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev; - int rc = 0; - - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - /* Prevent device assign if mem paging or mem sharing have been - * enabled for this domain */ - if ( unlikely(!need_iommu(d) && - (d->arch.hvm_domain.mem_sharing_enabled || - d->mem_event->paging.ring_page)) ) - return -EXDEV; - - if ( !spin_trylock(&pcidevs_lock) ) - return -ERESTART; - - if ( need_iommu(d) <= 0 ) - { - if ( !iommu_use_hap_pt(d) ) - { - rc = iommu_populate_page_table(d); - if ( rc ) - { - spin_unlock(&pcidevs_lock); - return rc; - } - } - d->need_iommu = 1; - } - - pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); - if ( !pdev ) - { - rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; - goto done; - } - - pdev->fault.count = 0; - - if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) - goto done; - - for ( ; pdev->phantom_stride; rc = 0 ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - rc = hd->platform_ops->assign_device(d, devfn, pdev); - if ( rc ) - printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - rc); - } - - done: - if ( !has_arch_pdevs(d) && need_iommu(d) ) - iommu_teardown(d); - spin_unlock(&pcidevs_lock); - - return rc; -} - -static int iommu_populate_page_table(struct domain *d) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct page_info *page; - int rc = 0, n = 0; - - d->need_iommu = -1; - - this_cpu(iommu_dont_flush_iotlb) = 1; - spin_lock(&d->page_alloc_lock); - - if ( unlikely(d->is_dying) ) - rc = -ESRCH; - - while ( !rc && (page = page_list_remove_head(&d->page_list)) ) - { - if ( is_hvm_domain(d) || - (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) - { - BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page)))); - rc = hd->platform_ops->map_page( - d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page), - IOMMUF_readable|IOMMUF_writable); - if ( rc ) - { - page_list_add(page, &d->page_list); - break; - } - } - page_list_add_tail(page, &d->arch.relmem_list); - if ( !(++n & 0xff) && !page_list_empty(&d->page_list) && - hypercall_preempt_check() ) - rc = -ERESTART; - } - - if ( !rc ) - { - /* - * The expectation here is that generally there are many normal pages - * on relmem_list (the ones we put there) and only few being in an - * offline/broken state. The latter ones are always at the head of the - * list. Hence we first move the whole list, and then move back the - * first few entries. - */ - page_list_move(&d->page_list, &d->arch.relmem_list); - while ( (page = page_list_first(&d->page_list)) != NULL && - (page->count_info & (PGC_state|PGC_broken)) ) - { - page_list_del(page, &d->page_list); - page_list_add_tail(page, &d->arch.relmem_list); - } - } - - spin_unlock(&d->page_alloc_lock); - this_cpu(iommu_dont_flush_iotlb) = 0; - - if ( !rc ) - iommu_iotlb_flush_all(d); - else if ( rc != -ERESTART ) - iommu_teardown(d); - - return rc; -} - - void iommu_domain_destroy(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -490,53 +264,6 @@ void iommu_iotlb_flush_all(struct domain *d) hd->platform_ops->iotlb_flush_all(d); } -/* caller should hold the pcidevs_lock */ -int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev = NULL; - int ret = 0; - - if ( !iommu_enabled || !hd->platform_ops ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(d, seg, bus, devfn); - if ( !pdev ) - return -ENODEV; - - while ( pdev->phantom_stride ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, pdev); - if ( !ret ) - continue; - - printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret); - return ret; - } - - devfn = pdev->devfn; - ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, pdev); - if ( ret ) - { - dprintk(XENLOG_G_ERR, - "d%d: deassign device (%04x:%02x:%02x.%u) failed\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return ret; - } - - pdev->fault.count = 0; - - if ( !has_arch_pdevs(d) && need_iommu(d) ) - iommu_teardown(d); - - return ret; -} - int __init iommu_setup(void) { int rc = -ENODEV; @@ -577,91 +304,27 @@ int __init iommu_setup(void) return rc; } -static int iommu_get_device_group( - struct domain *d, u16 seg, u8 bus, u8 devfn, - XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev; - int group_id, sdev_id; - u32 bdf; - int i = 0; - const struct iommu_ops *ops = hd->platform_ops; - - if ( !iommu_enabled || !ops || !ops->get_device_group_id ) - return 0; - - group_id = ops->get_device_group_id(seg, bus, devfn); - - spin_lock(&pcidevs_lock); - for_each_pdev( d, pdev ) - { - if ( (pdev->seg != seg) || - ((pdev->bus == bus) && (pdev->devfn == devfn)) ) - continue; - - if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) ) - continue; - - sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn); - if ( (sdev_id == group_id) && (i < max_sdevs) ) - { - bdf = 0; - bdf |= (pdev->bus & 0xff) << 16; - bdf |= (pdev->devfn & 0xff) << 8; - - if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) - { - spin_unlock(&pcidevs_lock); - return -1; - } - i++; - } - } - spin_unlock(&pcidevs_lock); - - return i; -} - -void iommu_update_ire_from_apic( - unsigned int apic, unsigned int reg, unsigned int value) -{ - const struct iommu_ops *ops = iommu_get_ops(); - ops->update_ire_from_apic(apic, reg, value); -} - -int iommu_update_ire_from_msi( - struct msi_desc *msi_desc, struct msi_msg *msg) +void iommu_resume() { const struct iommu_ops *ops = iommu_get_ops(); - return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; + if ( iommu_enabled ) + ops->resume(); } -void iommu_read_msi_from_ire( - struct msi_desc *msi_desc, struct msi_msg *msg) +int iommu_do_domctl( + struct xen_domctl *domctl, struct domain *d, + XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) { - const struct iommu_ops *ops = iommu_get_ops(); - if ( iommu_intremap ) - ops->read_msi_from_ire(msi_desc, msg); -} + int ret = -ENOSYS; -unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) -{ - const struct iommu_ops *ops = iommu_get_ops(); - return ops->read_apic_from_ire(apic, reg); -} + if ( !iommu_enabled ) + return -ENOSYS; -int __init iommu_setup_hpet_msi(struct msi_desc *msi) -{ - const struct iommu_ops *ops = iommu_get_ops(); - return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV; -} +#ifdef HAS_PCI + ret = iommu_do_pci_domctl(domctl, d, u_domctl); +#endif -void iommu_resume() -{ - const struct iommu_ops *ops = iommu_get_ops(); - if ( iommu_enabled ) - ops->resume(); + return ret; } void iommu_suspend() @@ -687,125 +350,6 @@ void iommu_crash_shutdown(void) iommu_enabled = iommu_intremap = 0; } -int iommu_do_domctl( - struct xen_domctl *domctl, struct domain *d, - XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) -{ - u16 seg; - u8 bus, devfn; - int ret = 0; - - if ( !iommu_enabled ) - return -ENOSYS; - - switch ( domctl->cmd ) - { - case XEN_DOMCTL_get_device_group: - { - u32 max_sdevs; - XEN_GUEST_HANDLE_64(uint32) sdevs; - - ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.get_device_group.machine_sbdf >> 16; - bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.get_device_group.machine_sbdf & 0xff; - max_sdevs = domctl->u.get_device_group.max_sdevs; - sdevs = domctl->u.get_device_group.sdev_array; - - ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs); - if ( ret < 0 ) - { - dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); - ret = -EFAULT; - domctl->u.get_device_group.num_sdevs = 0; - } - else - { - domctl->u.get_device_group.num_sdevs = ret; - ret = 0; - } - if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) ) - ret = -EFAULT; - } - break; - - case XEN_DOMCTL_test_assign_device: - ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - if ( device_assigned(seg, bus, devfn) ) - { - printk(XENLOG_G_INFO - "%04x:%02x:%02x.%u already assigned, or non-existent\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - ret = -EINVAL; - } - break; - - case XEN_DOMCTL_assign_device: - if ( unlikely(d->is_dying) ) - { - ret = -EINVAL; - break; - } - - ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - ret = device_assigned(seg, bus, devfn) ?: - assign_device(d, seg, bus, devfn); - if ( ret == -ERESTART ) - ret = hypercall_create_continuation(__HYPERVISOR_domctl, - "h", u_domctl); - else if ( ret ) - printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " - "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - d->domain_id, ret); - - break; - - case XEN_DOMCTL_deassign_device: - ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - spin_lock(&pcidevs_lock); - ret = deassign_device(d, seg, bus, devfn); - spin_unlock(&pcidevs_lock); - if ( ret ) - printk(XENLOG_G_ERR - "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - d->domain_id, ret); - - break; - - default: - ret = -ENOSYS; - break; - } - - return ret; -} - static void iommu_dump_p2m_table(unsigned char key) { struct domain *d; diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 0794eaf..3488542 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -995,6 +998,390 @@ static int __init setup_dump_pcidevs(void) } __initcall(setup_dump_pcidevs); +int iommu_update_ire_from_msi( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; +} + +void iommu_read_msi_from_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + if ( iommu_intremap ) + ops->read_msi_from_ire(msi_desc, msg); +} + +int iommu_add_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + int rc; + u8 devfn; + + if ( !pdev->domain ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + rc = hd->platform_ops->add_device(pdev->devfn, pdev); + if ( rc || !pdev->phantom_stride ) + return rc; + + for ( devfn = pdev->devfn ; ; ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + return 0; + rc = hd->platform_ops->add_device(devfn, pdev); + if ( rc ) + printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + } +} + +int iommu_enable_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + + if ( !pdev->domain ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops || + !hd->platform_ops->enable_device ) + return 0; + + return hd->platform_ops->enable_device(pdev); +} + +int iommu_remove_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + u8 devfn; + + if ( !pdev->domain ) + return -EINVAL; + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + for ( devfn = pdev->devfn ; pdev->phantom_stride; ) + { + int rc; + + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->remove_device(devfn, pdev); + if ( !rc ) + continue; + + printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + return rc; + } + + return hd->platform_ops->remove_device(pdev->devfn, pdev); +} + +/* + * If the device isn't owned by the hardware domain, it means it already + * has been assigned to other domain, or it doesn't exist. + */ +static int device_assigned(u16 seg, u8 bus, u8 devfn) +{ + struct pci_dev *pdev; + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); + spin_unlock(&pcidevs_lock); + + return pdev ? 0 : -EBUSY; +} + +static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int rc = 0; + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + /* Prevent device assign if mem paging or mem sharing have been + * enabled for this domain */ + if ( unlikely(!need_iommu(d) && + (d->arch.hvm_domain.mem_sharing_enabled || + d->mem_event->paging.ring_page)) ) + return -EXDEV; + + if ( !spin_trylock(&pcidevs_lock) ) + return -ERESTART; + + if ( need_iommu(d) <= 0 ) + { + if ( !iommu_use_hap_pt(d) ) + { + rc = arch_iommu_populate_page_table(d); + if ( rc ) + { + spin_unlock(&pcidevs_lock); + return rc; + } + } + d->need_iommu = 1; + } + + pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); + if ( !pdev ) + { + rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; + goto done; + } + + pdev->fault.count = 0; + + if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) + goto done; + + for ( ; pdev->phantom_stride; rc = 0 ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->assign_device(d, devfn, pdev); + if ( rc ) + printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + rc); + } + + done: + if ( !has_arch_pdevs(d) && need_iommu(d) ) + iommu_teardown(d); + spin_unlock(&pcidevs_lock); + + return rc; +} + +/* caller should hold the pcidevs_lock */ +int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev = NULL; + int ret = 0; + + if ( !iommu_enabled || !hd->platform_ops ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + pdev = pci_get_pdev_by_domain(d, seg, bus, devfn); + if ( !pdev ) + return -ENODEV; + + while ( pdev->phantom_stride ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, pdev); + if ( !ret ) + continue; + + printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret); + return ret; + } + + devfn = pdev->devfn; + ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, pdev); + if ( ret ) + { + dprintk(XENLOG_G_ERR, + "d%d: deassign device (%04x:%02x:%02x.%u) failed\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return ret; + } + + pdev->fault.count = 0; + + if ( !has_arch_pdevs(d) && need_iommu(d) ) + iommu_teardown(d); + + return ret; +} + +static int iommu_get_device_group( + struct domain *d, u16 seg, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int group_id, sdev_id; + u32 bdf; + int i = 0; + const struct iommu_ops *ops = hd->platform_ops; + + if ( !iommu_enabled || !ops || !ops->get_device_group_id ) + return 0; + + group_id = ops->get_device_group_id(seg, bus, devfn); + + spin_lock(&pcidevs_lock); + for_each_pdev( d, pdev ) + { + if ( (pdev->seg != seg) || + ((pdev->bus == bus) && (pdev->devfn == devfn)) ) + continue; + + if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) ) + continue; + + sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn); + if ( (sdev_id == group_id) && (i < max_sdevs) ) + { + bdf = 0; + bdf |= (pdev->bus & 0xff) << 16; + bdf |= (pdev->devfn & 0xff) << 8; + + if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) + { + spin_unlock(&pcidevs_lock); + return -1; + } + i++; + } + } + + spin_unlock(&pcidevs_lock); + + return i; +} + +int iommu_do_pci_domctl( + struct xen_domctl *domctl, struct domain *d, + XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) +{ + u16 seg; + u8 bus, devfn; + int ret = 0; + + switch ( domctl->cmd ) + { + case XEN_DOMCTL_get_device_group: + { + u32 max_sdevs; + XEN_GUEST_HANDLE_64(uint32) sdevs; + + ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.get_device_group.machine_sbdf >> 16; + bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.get_device_group.machine_sbdf & 0xff; + max_sdevs = domctl->u.get_device_group.max_sdevs; + sdevs = domctl->u.get_device_group.sdev_array; + + ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs); + if ( ret < 0 ) + { + dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); + ret = -EFAULT; + domctl->u.get_device_group.num_sdevs = 0; + } + else + { + domctl->u.get_device_group.num_sdevs = ret; + ret = 0; + } + if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) ) + ret = -EFAULT; + } + break; + + case XEN_DOMCTL_test_assign_device: + ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + if ( device_assigned(seg, bus, devfn) ) + { + printk(XENLOG_G_INFO + "%04x:%02x:%02x.%u already assigned, or non-existent\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + ret = -EINVAL; + } + break; + + case XEN_DOMCTL_assign_device: + if ( unlikely(d->is_dying) ) + { + ret = -EINVAL; + break; + } + + ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + ret = device_assigned(seg, bus, devfn) ?: + assign_device(d, seg, bus, devfn); + if ( ret == -ERESTART ) + ret = hypercall_create_continuation(__HYPERVISOR_domctl, + "h", u_domctl); + else if ( ret ) + printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " + "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + d->domain_id, ret); + + break; + + case XEN_DOMCTL_deassign_device: + ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + spin_lock(&pcidevs_lock); + ret = deassign_device(d, seg, bus, devfn); + spin_unlock(&pcidevs_lock); + if ( ret ) + printk(XENLOG_G_ERR + "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + d->domain_id, ret); + + break; + + default: + ret = -ENOSYS; + break; + } + + return ret; +} + /* * Local variables: * mode: C diff --git a/xen/drivers/passthrough/x86/Makefile b/xen/drivers/passthrough/x86/Makefile index c124a51..a70cf94 100644 --- a/xen/drivers/passthrough/x86/Makefile +++ b/xen/drivers/passthrough/x86/Makefile @@ -1 +1,2 @@ obj-y += ats.o +obj-y += iommu.o diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c new file mode 100644 index 0000000..b97c58e --- /dev/null +++ b/xen/drivers/passthrough/x86/iommu.c @@ -0,0 +1,120 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +void iommu_update_ire_from_apic( + unsigned int apic, unsigned int reg, unsigned int value) +{ + const struct iommu_ops *ops = iommu_get_ops(); + ops->update_ire_from_apic(apic, reg, value); +} + +unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return ops->read_apic_from_ire(apic, reg); +} + +int __init iommu_setup_hpet_msi(struct msi_desc *msi) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV; +} + +int arch_iommu_populate_page_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct page_info *page; + int rc = 0, n = 0; + + d->need_iommu = -1; + + this_cpu(iommu_dont_flush_iotlb) = 1; + spin_lock(&d->page_alloc_lock); + + if ( unlikely(d->is_dying) ) + rc = -ESRCH; + + while ( !rc && (page = page_list_remove_head(&d->page_list)) ) + { + if ( is_hvm_domain(d) || + (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) + { + BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page)))); + rc = hd->platform_ops->map_page( + d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page), + IOMMUF_readable|IOMMUF_writable); + if ( rc ) + { + page_list_add(page, &d->page_list); + break; + } + } + page_list_add_tail(page, &d->arch.relmem_list); + if ( !(++n & 0xff) && !page_list_empty(&d->page_list) && + hypercall_preempt_check() ) + rc = -ERESTART; + } + + if ( !rc ) + { + /* + * The expectation here is that generally there are many normal pages + * on relmem_list (the ones we put there) and only few being in an + * offline/broken state. The latter ones are always at the head of the + * list. Hence we first move the whole list, and then move back the + * first few entries. + */ + page_list_move(&d->page_list, &d->arch.relmem_list); + while ( (page = page_list_first(&d->page_list)) != NULL && + (page->count_info & (PGC_state|PGC_broken)) ) + { + page_list_del(page, &d->page_list); + page_list_add_tail(page, &d->arch.relmem_list); + } + } + + spin_unlock(&d->page_alloc_lock); + this_cpu(iommu_dont_flush_iotlb) = 0; + + if ( !rc ) + iommu_iotlb_flush_all(d); + else if ( rc != -ERESTART ) + iommu_teardown(d); + + return rc; +} + +void __hwdom_init arch_iommu_check_hwdom_reqs(struct domain *d) +{ + if ( !iommu_enabled ) + panic("Presently, iommu must be enabled for pvh dom0\n"); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h new file mode 100644 index 0000000..10edfc2 --- /dev/null +++ b/xen/include/asm-x86/iommu.h @@ -0,0 +1,41 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. +*/ +#ifndef __ARCH_X86_IOMMU_H__ +#define __ARCH_X86_IOMMU_H__ + +#define MAX_IOMMUS 32 + +#include + +void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); +unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); +int iommu_setup_hpet_msi(struct msi_desc *); + +/* While VT-d specific, this must get declared in a generic header. */ +int adjust_vtd_irq_affinities(void); +void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); +int iommu_supports_eim(void); +int iommu_enable_x2apic_IR(void); +void iommu_disable_x2apic_IR(void); + +#endif /* !__ARCH_X86_IOMMU_H__ */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/xen/hvm/iommu.h b/xen/include/xen/hvm/iommu.h index dc9c766..9b32fa6 100644 --- a/xen/include/xen/hvm/iommu.h +++ b/xen/include/xen/hvm/iommu.h @@ -21,6 +21,7 @@ #define __XEN_HVM_IOMMU_H__ #include +#include struct g2m_ioport { struct list_head list; diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 1fd8fb9..6cbfad7 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -25,6 +25,7 @@ #include #include #include +#include extern bool_t iommu_enable, iommu_enabled; extern bool_t force_iommu, iommu_verbose; @@ -39,17 +40,12 @@ extern bool_t amd_iommu_perdev_intremap; #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) -#define MAX_IOMMUS 32 - #define PAGE_SHIFT_4K (12) #define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) #define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) int iommu_setup(void); -int iommu_supports_eim(void); -int iommu_enable_x2apic_IR(void); -void iommu_disable_x2apic_IR(void); int iommu_add_device(struct pci_dev *pdev); int iommu_enable_device(struct pci_dev *pdev); @@ -59,6 +55,12 @@ void iommu_hwdom_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn); +int arch_iommu_populate_page_table(struct domain *d); +void arch_iommu_check_hwdom_reqs(struct domain *d); + +/* Function used internally, use iommu_domain_destroy */ +void iommu_teardown(struct domain *d); + /* iommu_map_page() takes flags to direct the mapping operation. */ #define _IOMMUF_readable 0 #define IOMMUF_readable (1u<<_IOMMUF_readable) @@ -67,8 +69,8 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn); int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int iommu_unmap_page(struct domain *d, unsigned long gfn); -void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); +#ifdef HAS_PCI void pt_pci_init(void); struct pirq; @@ -82,32 +84,41 @@ struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *); void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci); bool_t pt_irq_need_timer(uint32_t flags); -#define PT_IRQ_TIME_OUT MILLISECS(8) - struct msi_desc; struct msi_msg; + +int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); +void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg); + +#define PT_IRQ_TIME_OUT MILLISECS(8) +#endif /* HAS_PCI */ + struct page_info; struct iommu_ops { int (*init)(struct domain *d); void (*hwdom_init)(struct domain *d); +#ifdef HAS_PCI int (*add_device)(u8 devfn, struct pci_dev *); int (*enable_device)(struct pci_dev *pdev); int (*remove_device)(u8 devfn, struct pci_dev *); int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *); + int (*reassign_device)(struct domain *s, struct domain *t, + u8 devfn, struct pci_dev *); + int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); + int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); + void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg); +#endif /* HAS_PCI */ void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int (*unmap_page)(struct domain *d, unsigned long gfn); void (*free_page_table)(struct page_info *); - int (*reassign_device)(struct domain *s, struct domain *t, - u8 devfn, struct pci_dev *); - int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); +#ifdef CONFIG_X86 void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value); - int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); - void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg); unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); int (*setup_hpet_msi)(struct msi_desc *); +#endif /* CONFIG_X86 */ void (*suspend)(void); void (*resume)(void); void (*share_p2m)(struct domain *d); @@ -117,27 +128,23 @@ struct iommu_ops { void (*dump_p2m_table)(struct domain *d); }; -void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); -int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); -void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg); -unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); -int iommu_setup_hpet_msi(struct msi_desc *); - void iommu_suspend(void); void iommu_resume(void); void iommu_crash_shutdown(void); void iommu_share_p2m_table(struct domain *d); +#ifdef HAS_PCI +int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d, + XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); +#endif + int iommu_do_domctl(struct xen_domctl *, struct domain *d, XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); void iommu_iotlb_flush_all(struct domain *d); -/* While VT-d specific, this must get declared in a generic header. */ -int adjust_vtd_irq_affinities(void); - /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to * avoid unecessary iotlb_flush in the low level IOMMU code. -- 1.7.10.4