From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Kevin Tian <kevin.tian@intel.com>, Feng Wu <feng.wu@intel.com>
Subject: [PATCH v15 3/3] VT-d: fix Device-TLB flush timeout issue
Date: Fri, 08 Jul 2016 00:46:15 -0600 [thread overview]
Message-ID: <577F685702000078000FC756@prv-mh.provo.novell.com> (raw)
In-Reply-To: <577F66BC02000078000FC743@prv-mh.provo.novell.com>
[-- Attachment #1: Type: text/plain, Size: 9129 bytes --]
From: Quan Xu <quan.xu@intel.com>
If Device-TLB flush timed out, we hide the target ATS device
immediately. By hiding the device, we make sure it can't be
assigned to any domain any longer (see device_assigned).
Signed-off-by: Quan Xu <quan.xu@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v15: Re-base on heavily changed patch 1. Call disable_ats_device() and
domain_crash() from iommu_dev_iotlb_flush_timeout() and move the
function to passthrough/pci.c to fix the ARM build. As a result of
calling disable_ats_device() also use list_for_each_entry_safe()
in dev_invalidate_iotlb().
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -32,6 +32,7 @@
#include <xen/tasklet.h>
#include <xsm/xsm.h>
#include <asm/msi.h>
+#include "ats.h"
struct pci_seg {
struct list_head alldevs_list;
@@ -1504,6 +1505,34 @@ static int iommu_get_device_group(
return i;
}
+void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
+{
+ pcidevs_lock();
+
+ disable_ats_device(pdev);
+
+ ASSERT(pdev->domain);
+ if ( d != pdev->domain )
+ {
+ pcidevs_unlock();
+ return;
+ }
+
+ list_del(&pdev->domain_list);
+ pdev->domain = NULL;
+ _pci_hide_device(pdev);
+
+ if ( !d->is_shutting_down && printk_ratelimit() )
+ printk(XENLOG_ERR
+ "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
+ d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+ if ( !is_hardware_domain(d) )
+ domain_crash(d);
+
+ pcidevs_unlock();
+}
+
int iommu_do_pci_domctl(
struct xen_domctl *domctl, struct domain *d,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -25,6 +25,7 @@
#define VTDPREFIX "[VT-D]"
+struct pci_ats_dev;
extern bool_t rwbf_quirk;
void print_iommu_regs(struct acpi_drhd_unit *drhd);
@@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *i
u64 addr, unsigned int size_order, u64 type);
int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
- u32 max_invs_pend,
- u16 sid, u16 size, u64 addr);
+ struct pci_dev *pdev,
+ u16 did, u16 size, u64 addr);
unsigned int get_cache_line_size(void);
void cacheline_flush(char *);
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,11 +27,11 @@
#include "dmar.h"
#include "vtd.h"
#include "extern.h"
+#include "../ats.h"
#define VTD_QI_TIMEOUT 1
-static int __must_check invalidate_sync(struct iommu *iommu,
- bool_t flush_dev_iotlb);
+static int __must_check invalidate_sync(struct iommu *iommu);
static void print_qi_regs(struct iommu *iommu)
{
@@ -103,7 +103,7 @@ static int __must_check queue_invalidate
unmap_vtd_domain_page(qinval_entries);
- return invalidate_sync(iommu, 0);
+ return invalidate_sync(iommu);
}
static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu,
@@ -140,7 +140,7 @@ static int __must_check queue_invalidate
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return invalidate_sync(iommu, 0);
+ return invalidate_sync(iommu);
}
static int __must_check queue_invalidate_wait(struct iommu *iommu,
@@ -199,25 +199,53 @@ static int __must_check queue_invalidate
return -EOPNOTSUPP;
}
-static int __must_check invalidate_sync(struct iommu *iommu,
- bool_t flush_dev_iotlb)
+static int __must_check invalidate_sync(struct iommu *iommu)
{
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
ASSERT(qi_ctrl->qinval_maddr);
- return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
+ return queue_invalidate_wait(iommu, 0, 1, 1, 0);
}
-int qinval_device_iotlb_sync(struct iommu *iommu,
- u32 max_invs_pend,
- u16 sid, u16 size, u64 addr)
+static int __must_check dev_invalidate_sync(struct iommu *iommu,
+ struct pci_dev *pdev, u16 did)
+{
+ struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ int rc;
+
+ ASSERT(qi_ctrl->qinval_maddr);
+ rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
+ if ( rc == -ETIMEDOUT )
+ {
+ struct domain *d = NULL;
+
+ if ( test_bit(did, iommu->domid_bitmap) )
+ d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+
+ /*
+ * In case the domain has been freed or the IOMMU domid bitmap is
+ * not valid, the device no longer belongs to this domain.
+ */
+ if ( d == NULL )
+ return rc;
+
+ iommu_dev_iotlb_flush_timeout(d, pdev);
+ rcu_unlock_domain(d);
+ }
+
+ return rc;
+}
+
+int qinval_device_iotlb_sync(struct iommu *iommu, struct pci_dev *pdev,
+ u16 did, u16 size, u64 addr)
{
unsigned long flags;
unsigned int index;
u64 entry_base;
struct qinval_entry *qinval_entry, *qinval_entries;
+ ASSERT(pdev);
spin_lock_irqsave(&iommu->register_lock, flags);
index = qinval_next_index(iommu);
entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
@@ -227,9 +255,9 @@ int qinval_device_iotlb_sync(struct iomm
qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
- qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+ qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = pdev->ats.queue_depth;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
- qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+ qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, pdev->devfn);
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
@@ -240,7 +268,7 @@ int qinval_device_iotlb_sync(struct iomm
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return invalidate_sync(iommu, 1);
+ return dev_invalidate_sync(iommu, pdev, did);
}
static int __must_check queue_invalidate_iec_sync(struct iommu *iommu,
@@ -271,7 +299,7 @@ static int __must_check queue_invalidate
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- ret = invalidate_sync(iommu, 0);
+ ret = invalidate_sync(iommu);
/*
* reading vt-d architecture register will ensure
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -109,15 +109,14 @@ out:
int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
u64 addr, unsigned int size_order, u64 type)
{
- const struct pci_dev *pdev;
+ struct pci_dev *pdev, *temp;
int ret = 0;
if ( !ecap_dev_iotlb(iommu->ecap) )
return ret;
- list_for_each_entry( pdev, &iommu->ats_devices, ats.list )
+ list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
{
- u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
bool_t sbit;
int rc = 0;
@@ -131,8 +130,7 @@ int dev_invalidate_iotlb(struct iommu *i
/* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
sbit = 1;
addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
- rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
break;
case DMA_TLB_PSI_FLUSH:
if ( !device_in_domain(iommu, pdev, did) )
@@ -151,8 +149,7 @@ int dev_invalidate_iotlb(struct iommu *i
addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
}
- rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
break;
default:
dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -207,6 +207,8 @@ int __must_check iommu_iotlb_flush(struc
unsigned int page_count);
int __must_check iommu_iotlb_flush_all(struct domain *d);
+void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev);
+
/*
* The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
* avoid unecessary iotlb_flush in the low level IOMMU code.
[-- Attachment #2: VT-d-fix-flush-timeout.patch --]
[-- Type: text/plain, Size: 9169 bytes --]
VT-d: fix Device-TLB flush timeout issue
From: Quan Xu <quan.xu@intel.com>
If Device-TLB flush timed out, we hide the target ATS device
immediately. By hiding the device, we make sure it can't be
assigned to any domain any longer (see device_assigned).
Signed-off-by: Quan Xu <quan.xu@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v15: Re-base on heavily changed patch 1. Call disable_ats_device() and
domain_crash() from iommu_dev_iotlb_flush_timeout() and move the
function to passthrough/pci.c to fix the ARM build. As a result of
calling disable_ats_device() also use list_for_each_entry_safe()
in dev_invalidate_iotlb().
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -32,6 +32,7 @@
#include <xen/tasklet.h>
#include <xsm/xsm.h>
#include <asm/msi.h>
+#include "ats.h"
struct pci_seg {
struct list_head alldevs_list;
@@ -1504,6 +1505,34 @@ static int iommu_get_device_group(
return i;
}
+void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
+{
+ pcidevs_lock();
+
+ disable_ats_device(pdev);
+
+ ASSERT(pdev->domain);
+ if ( d != pdev->domain )
+ {
+ pcidevs_unlock();
+ return;
+ }
+
+ list_del(&pdev->domain_list);
+ pdev->domain = NULL;
+ _pci_hide_device(pdev);
+
+ if ( !d->is_shutting_down && printk_ratelimit() )
+ printk(XENLOG_ERR
+ "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
+ d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+ if ( !is_hardware_domain(d) )
+ domain_crash(d);
+
+ pcidevs_unlock();
+}
+
int iommu_do_pci_domctl(
struct xen_domctl *domctl, struct domain *d,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -25,6 +25,7 @@
#define VTDPREFIX "[VT-D]"
+struct pci_ats_dev;
extern bool_t rwbf_quirk;
void print_iommu_regs(struct acpi_drhd_unit *drhd);
@@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *i
u64 addr, unsigned int size_order, u64 type);
int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
- u32 max_invs_pend,
- u16 sid, u16 size, u64 addr);
+ struct pci_dev *pdev,
+ u16 did, u16 size, u64 addr);
unsigned int get_cache_line_size(void);
void cacheline_flush(char *);
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,11 +27,11 @@
#include "dmar.h"
#include "vtd.h"
#include "extern.h"
+#include "../ats.h"
#define VTD_QI_TIMEOUT 1
-static int __must_check invalidate_sync(struct iommu *iommu,
- bool_t flush_dev_iotlb);
+static int __must_check invalidate_sync(struct iommu *iommu);
static void print_qi_regs(struct iommu *iommu)
{
@@ -103,7 +103,7 @@ static int __must_check queue_invalidate
unmap_vtd_domain_page(qinval_entries);
- return invalidate_sync(iommu, 0);
+ return invalidate_sync(iommu);
}
static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu,
@@ -140,7 +140,7 @@ static int __must_check queue_invalidate
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return invalidate_sync(iommu, 0);
+ return invalidate_sync(iommu);
}
static int __must_check queue_invalidate_wait(struct iommu *iommu,
@@ -199,25 +199,53 @@ static int __must_check queue_invalidate
return -EOPNOTSUPP;
}
-static int __must_check invalidate_sync(struct iommu *iommu,
- bool_t flush_dev_iotlb)
+static int __must_check invalidate_sync(struct iommu *iommu)
{
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
ASSERT(qi_ctrl->qinval_maddr);
- return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
+ return queue_invalidate_wait(iommu, 0, 1, 1, 0);
}
-int qinval_device_iotlb_sync(struct iommu *iommu,
- u32 max_invs_pend,
- u16 sid, u16 size, u64 addr)
+static int __must_check dev_invalidate_sync(struct iommu *iommu,
+ struct pci_dev *pdev, u16 did)
+{
+ struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ int rc;
+
+ ASSERT(qi_ctrl->qinval_maddr);
+ rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
+ if ( rc == -ETIMEDOUT )
+ {
+ struct domain *d = NULL;
+
+ if ( test_bit(did, iommu->domid_bitmap) )
+ d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+
+ /*
+ * In case the domain has been freed or the IOMMU domid bitmap is
+ * not valid, the device no longer belongs to this domain.
+ */
+ if ( d == NULL )
+ return rc;
+
+ iommu_dev_iotlb_flush_timeout(d, pdev);
+ rcu_unlock_domain(d);
+ }
+
+ return rc;
+}
+
+int qinval_device_iotlb_sync(struct iommu *iommu, struct pci_dev *pdev,
+ u16 did, u16 size, u64 addr)
{
unsigned long flags;
unsigned int index;
u64 entry_base;
struct qinval_entry *qinval_entry, *qinval_entries;
+ ASSERT(pdev);
spin_lock_irqsave(&iommu->register_lock, flags);
index = qinval_next_index(iommu);
entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
@@ -227,9 +255,9 @@ int qinval_device_iotlb_sync(struct iomm
qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
- qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+ qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = pdev->ats.queue_depth;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
- qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+ qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, pdev->devfn);
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
@@ -240,7 +268,7 @@ int qinval_device_iotlb_sync(struct iomm
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return invalidate_sync(iommu, 1);
+ return dev_invalidate_sync(iommu, pdev, did);
}
static int __must_check queue_invalidate_iec_sync(struct iommu *iommu,
@@ -271,7 +299,7 @@ static int __must_check queue_invalidate
qinval_update_qtail(iommu, index);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- ret = invalidate_sync(iommu, 0);
+ ret = invalidate_sync(iommu);
/*
* reading vt-d architecture register will ensure
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -109,15 +109,14 @@ out:
int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
u64 addr, unsigned int size_order, u64 type)
{
- const struct pci_dev *pdev;
+ struct pci_dev *pdev, *temp;
int ret = 0;
if ( !ecap_dev_iotlb(iommu->ecap) )
return ret;
- list_for_each_entry( pdev, &iommu->ats_devices, ats.list )
+ list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
{
- u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
bool_t sbit;
int rc = 0;
@@ -131,8 +130,7 @@ int dev_invalidate_iotlb(struct iommu *i
/* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
sbit = 1;
addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
- rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
break;
case DMA_TLB_PSI_FLUSH:
if ( !device_in_domain(iommu, pdev, did) )
@@ -151,8 +149,7 @@ int dev_invalidate_iotlb(struct iommu *i
addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
}
- rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
break;
default:
dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -207,6 +207,8 @@ int __must_check iommu_iotlb_flush(struc
unsigned int page_count);
int __must_check iommu_iotlb_flush_all(struct domain *d);
+void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev);
+
/*
* The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
* avoid unecessary iotlb_flush in the low level IOMMU code.
[-- Attachment #3: Type: text/plain, Size: 127 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-07-08 6:46 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-07-08 6:39 [PATCH v15 0/3] VT-d Device-TLB flush issue Jan Beulich
2016-07-08 6:44 ` [PATCH v15 1/3] IOMMU/ATS: use a struct pci_dev * instead of SBDF Jan Beulich
2016-07-11 3:07 ` Tian, Kevin
2016-07-08 6:45 ` [PATCH v15 2/3] IOMMU: add domain crash logic Jan Beulich
2016-07-08 6:46 ` Jan Beulich [this message]
2016-07-11 3:10 ` [PATCH v15 3/3] VT-d: fix Device-TLB flush timeout issue Tian, Kevin
2016-07-08 6:52 ` [PATCH v15 0/3] VT-d Device-TLB flush issue Jan Beulich
2016-07-13 3:05 ` Xu, Quan
2016-07-15 9:59 ` Andrew Cooper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=577F685702000078000FC756@prv-mh.provo.novell.com \
--to=jbeulich@suse.com \
--cc=feng.wu@intel.com \
--cc=kevin.tian@intel.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).