* [PATCH v3 1/4] iommu/vt-d: Refactor device_to_iommu() helper
2020-07-09 7:05 [PATCH v3 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
@ 2020-07-09 7:05 ` Lu Baolu
2020-07-09 7:05 ` [PATCH v3 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
` (2 subsequent siblings)
3 siblings, 0 replies; 11+ messages in thread
From: Lu Baolu @ 2020-07-09 7:05 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
It is refactored in two ways:
- Make it global so that it could be used in other files.
- Make bus/devfn optional so that callers could ignore these two returned
values when they only want to get the coresponding iommu pointer.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
drivers/iommu/intel/iommu.c | 55 +++++++++++--------------------------
drivers/iommu/intel/svm.c | 8 +++---
include/linux/intel-iommu.h | 3 +-
3 files changed, 21 insertions(+), 45 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2ce490c2eab8..4a6b6960fc32 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
return false;
}
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
- if (iommu_dummy(dev))
+ if (!dev || iommu_dummy(dev))
return NULL;
if (dev_is_pci(dev)) {
@@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && pdev->is_virtfn)
goto got_pdev;
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
+ if (bus && devfn) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ }
goto out;
}
@@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && drhd->include_all) {
got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
+ if (bus && devfn) {
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ }
goto out;
}
}
@@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
struct device *dev)
{
int ret;
- u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum iommu_cap cap)
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return ERR_PTR(-ENODEV);
@@ -5673,9 +5674,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
@@ -5825,37 +5825,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
- struct intel_iommu *iommu;
- u8 bus, devfn;
-
- if (iommu_dummy(dev)) {
- dev_warn(dev,
- "No IOMMU translation for device; cannot enable SVM\n");
- return NULL;
- }
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if ((!iommu)) {
- dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
- return NULL;
- }
-
- return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
static int intel_iommu_enable_auxd(struct device *dev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
- u8 bus, devfn;
int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu || dmar_disabled)
return -EINVAL;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 6c87c807a0ab..25dd74f27252 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -231,7 +231,7 @@ static LIST_HEAD(global_svm_list);
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct dmar_domain *dmar_domain;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
@@ -373,7 +373,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
int intel_svm_unbind_gpasid(struct device *dev, int pasid)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
int ret = -EINVAL;
@@ -430,7 +430,7 @@ static int
intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
struct mm_struct *mm, struct intel_svm_dev **sd)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
@@ -608,7 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
struct intel_svm *svm;
int ret = -EINVAL;
- iommu = intel_svm_device_to_iommu(dev);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
goto out;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3e8fa1c7a1e6..fc2cfc3db6e1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -728,6 +728,7 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu);
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
struct dmar_domain *find_domain(struct device *dev);
struct device_domain_info *get_domain_info(struct device *dev);
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
#ifdef CONFIG_INTEL_IOMMU_SVM
extern void intel_svm_check(struct intel_iommu *iommu);
@@ -766,8 +767,6 @@ struct intel_svm {
struct list_head devs;
struct list_head list;
};
-
-extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
#endif
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid
2020-07-09 7:05 [PATCH v3 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
2020-07-09 7:05 ` [PATCH v3 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
@ 2020-07-09 7:05 ` Lu Baolu
2020-07-09 7:05 ` [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
2020-07-09 7:05 ` [PATCH v3 4/4] iommu/vt-d: Add page response ops support Lu Baolu
3 siblings, 0 replies; 11+ messages in thread
From: Lu Baolu @ 2020-07-09 7:05 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
There are several places in the code that need to get the pointers of
svm and sdev according to a pasid and device. Add a helper to achieve
this for code consolidation and readability.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
drivers/iommu/intel/svm.c | 121 +++++++++++++++++++++-----------------
1 file changed, 68 insertions(+), 53 deletions(-)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 25dd74f27252..c23167877b2b 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+ struct intel_svm **rsvm,
+ struct intel_svm_dev **rsdev)
+{
+ struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm *svm;
+
+ /* The caller should hold the pasid_mutex lock */
+ if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
+ return -EINVAL;
+
+ if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+ return -EINVAL;
+
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (IS_ERR(svm))
+ return PTR_ERR(svm);
+
+ if (!svm)
+ goto out;
+
+ /*
+ * If we found svm for the PASID, there must be at least one device
+ * bond.
+ */
+ if (WARN_ON(list_empty(&svm->devs)))
+ return -EINVAL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(d, &svm->devs, list) {
+ if (d->dev == dev) {
+ sdev = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+out:
+ *rsvm = svm;
+ *rsdev = sdev;
+
+ return 0;
+}
+
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data)
{
@@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
dmar_domain = to_dmar_domain(domain);
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, data->hpasid, NULL);
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- if (svm) {
+ if (sdev) {
/*
- * If we found svm for the PASID, there must be at
- * least one device bond, otherwise svm should be freed.
+ * For devices with aux domains, we should allow
+ * multiple bind calls with the same PASID and pdev.
*/
- if (WARN_ON(list_empty(&svm->devs))) {
- ret = -EINVAL;
- goto out;
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) {
+ sdev->users++;
+ } else {
+ dev_warn_ratelimited(dev,
+ "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
}
+ goto out;
+ }
- for_each_svm_dev(sdev, svm, dev) {
- /*
- * For devices with aux domains, we should allow
- * multiple bind calls with the same PASID and pdev.
- */
- if (iommu_dev_feature_enabled(dev,
- IOMMU_DEV_FEAT_AUX)) {
- sdev->users++;
- } else {
- dev_warn_ratelimited(dev,
- "Already bound with PASID %u\n",
- svm->pasid);
- ret = -EBUSY;
- }
- goto out;
- }
- } else {
+ if (!svm) {
/* We come here when PASID has never been bond to a device. */
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
@@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
- int ret = -EINVAL;
+ int ret;
if (WARN_ON(!iommu))
return -EINVAL;
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm) {
- ret = -EINVAL;
- goto out;
- }
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
sdev->users--;
if (!sdev->users) {
@@ -418,7 +442,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
out:
mutex_unlock(&pasid_mutex);
@@ -596,7 +619,7 @@ intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
if (sd)
*sd = sdev;
ret = 0;
- out:
+out:
return ret;
}
@@ -612,17 +635,11 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
if (!iommu)
goto out;
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm)
- goto out;
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
sdev->users--;
if (!sdev->users) {
list_del_rcu(&sdev->list);
@@ -651,10 +668,8 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
- out:
-
+out:
return ret;
}
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-09 7:05 [PATCH v3 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
2020-07-09 7:05 ` [PATCH v3 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
2020-07-09 7:05 ` [PATCH v3 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
@ 2020-07-09 7:05 ` Lu Baolu
2020-07-10 2:24 ` Tian, Kevin
2020-07-09 7:05 ` [PATCH v3 4/4] iommu/vt-d: Add page response ops support Lu Baolu
3 siblings, 1 reply; 11+ messages in thread
From: Lu Baolu @ 2020-07-09 7:05 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
A pasid might be bound to a page table from a VM guest via the iommu
ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
on the physical IOMMU, we need to inject the page fault request into
the guest. After the guest completes handling the page fault, a page
response need to be sent back via the iommu ops.page_response().
This adds support to report a page request fault. Any external module
which is interested in handling this fault should regiester a notifier
with iommu_register_device_fault_handler().
Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/svm.c | 103 +++++++++++++++++++++++++++++++-------
1 file changed, 85 insertions(+), 18 deletions(-)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c23167877b2b..d24e71bac8db 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -815,8 +815,63 @@ static void intel_svm_drain_prq(struct device *dev, int pasid)
}
}
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+ int prot = 0;
+
+ if (req->rd_req)
+ prot |= IOMMU_FAULT_PERM_READ;
+ if (req->wr_req)
+ prot |= IOMMU_FAULT_PERM_WRITE;
+ if (req->exe_req)
+ prot |= IOMMU_FAULT_PERM_EXEC;
+ if (req->pm_req)
+ prot |= IOMMU_FAULT_PERM_PRIV;
+
+ return prot;
+}
+
+static int
+intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+{
+ struct iommu_fault_event event;
+
+ /* Fill in event data for device specific processing */
+ memset(&event, 0, sizeof(struct iommu_fault_event));
+ event.fault.type = IOMMU_FAULT_PAGE_REQ;
+ event.fault.prm.addr = desc->addr;
+ event.fault.prm.pasid = desc->pasid;
+ event.fault.prm.grpid = desc->prg_index;
+ event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ if (desc->lpig)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ if (desc->pasid_present) {
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ }
+ if (desc->priv_data_present) {
+ /*
+ * Set last page in group bit if private data is present,
+ * page response is required as it does for LPIG.
+ * iommu_report_device_fault() doesn't understand this vendor
+ * specific requirement thus we set last_page as a workaround.
+ */
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ memcpy(event.fault.prm.private_data, desc->priv_data,
+ sizeof(desc->priv_data));
+ }
+
+ return iommu_report_device_fault(dev, &event);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
+ struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
int head, tail, handled = 0;
@@ -828,7 +883,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
- struct intel_svm_dev *sdev;
struct vm_area_struct *vma;
struct page_req_dsc *req;
struct qi_desc resp;
@@ -864,6 +918,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
}
}
+ if (!sdev || sdev->sid != req->rid) {
+ struct intel_svm_dev *t;
+
+ sdev = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == req->rid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ }
+
result = QI_RESP_INVALID;
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
@@ -874,6 +942,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!is_canonical_address(address))
goto bad_req;
+ /*
+ * If prq is to be handled outside iommu driver via receiver of
+ * the fault notifiers, we skip the page response here.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ if (sdev && !intel_svm_prq_report(sdev->dev, req))
+ goto prq_advance;
+ else
+ goto bad_req;
+ }
+
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
@@ -892,24 +971,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
goto invalid;
result = QI_RESP_SUCCESS;
- invalid:
+invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
- bad_req:
- /* Accounting for major/minor faults? */
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- if (sdev->sid == req->rid)
- break;
- }
- /* Other devices can go away, but the drivers are not permitted
- * to unbind while any page faults might be in flight. So it's
- * OK to drop the 'lock' here now we have it. */
- rcu_read_unlock();
-
- if (WARN_ON(&sdev->list == &svm->devs))
- sdev = NULL;
-
+bad_req:
+ WARN_ON(!sdev);
if (sdev && sdev->ops && sdev->ops->fault_cb) {
int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
(req->exe_req << 1) | (req->pm_req);
@@ -920,7 +986,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
- no_pasid:
+no_pasid:
if (req->lpig || req->priv_data_present) {
/*
* Per VT-d spec. v3.0 ch7.7, system software must
@@ -945,6 +1011,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
resp.qw3 = 0;
qi_submit_sync(iommu, &resp, 1, 0);
}
+prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 11+ messages in thread
* RE: [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-09 7:05 ` [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-10 2:24 ` Tian, Kevin
2020-07-10 5:25 ` Lu Baolu
0 siblings, 1 reply; 11+ messages in thread
From: Tian, Kevin @ 2020-07-10 2:24 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Thursday, July 9, 2020 3:06 PM
>
> A pasid might be bound to a page table from a VM guest via the iommu
> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> on the physical IOMMU, we need to inject the page fault request into
> the guest. After the guest completes handling the page fault, a page
> response need to be sent back via the iommu ops.page_response().
>
> This adds support to report a page request fault. Any external module
> which is interested in handling this fault should regiester a notifier
> with iommu_register_device_fault_handler().
>
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/svm.c | 103 +++++++++++++++++++++++++++++++-------
> 1 file changed, 85 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index c23167877b2b..d24e71bac8db 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -815,8 +815,63 @@ static void intel_svm_drain_prq(struct device *dev,
> int pasid)
> }
> }
>
> +static int prq_to_iommu_prot(struct page_req_dsc *req)
> +{
> + int prot = 0;
> +
> + if (req->rd_req)
> + prot |= IOMMU_FAULT_PERM_READ;
> + if (req->wr_req)
> + prot |= IOMMU_FAULT_PERM_WRITE;
> + if (req->exe_req)
> + prot |= IOMMU_FAULT_PERM_EXEC;
> + if (req->pm_req)
> + prot |= IOMMU_FAULT_PERM_PRIV;
> +
> + return prot;
> +}
> +
> +static int
> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> +{
> + struct iommu_fault_event event;
> +
> + /* Fill in event data for device specific processing */
> + memset(&event, 0, sizeof(struct iommu_fault_event));
> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
> + event.fault.prm.addr = desc->addr;
> + event.fault.prm.pasid = desc->pasid;
> + event.fault.prm.grpid = desc->prg_index;
> + event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> + if (!dev || !dev_is_pci(dev))
> + return -ENODEV;
move the check before memset.
> +
> + if (desc->lpig)
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + if (desc->pasid_present) {
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
> + }
if pasid is not present, should we return error directly instead of
submitting the req and let iommu core to figure out? I don't have
a strong preference, thus:
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> + if (desc->priv_data_present) {
> + /*
> + * Set last page in group bit if private data is present,
> + * page response is required as it does for LPIG.
> + * iommu_report_device_fault() doesn't understand this
> vendor
> + * specific requirement thus we set last_page as a
> workaround.
> + */
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> + memcpy(event.fault.prm.private_data, desc->priv_data,
> + sizeof(desc->priv_data));
> + }
> +
> + return iommu_report_device_fault(dev, &event);
> +}
> +
> static irqreturn_t prq_event_thread(int irq, void *d)
> {
> + struct intel_svm_dev *sdev = NULL;
> struct intel_iommu *iommu = d;
> struct intel_svm *svm = NULL;
> int head, tail, handled = 0;
> @@ -828,7 +883,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> PRQ_RING_MASK;
> head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> PRQ_RING_MASK;
> while (head != tail) {
> - struct intel_svm_dev *sdev;
> struct vm_area_struct *vma;
> struct page_req_dsc *req;
> struct qi_desc resp;
> @@ -864,6 +918,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> }
> }
>
> + if (!sdev || sdev->sid != req->rid) {
> + struct intel_svm_dev *t;
> +
> + sdev = NULL;
> + rcu_read_lock();
> + list_for_each_entry_rcu(t, &svm->devs, list) {
> + if (t->sid == req->rid) {
> + sdev = t;
> + break;
> + }
> + }
> + rcu_read_unlock();
> + }
> +
> result = QI_RESP_INVALID;
> /* Since we're using init_mm.pgd directly, we should never
> take
> * any faults on kernel addresses. */
> @@ -874,6 +942,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> if (!is_canonical_address(address))
> goto bad_req;
>
> + /*
> + * If prq is to be handled outside iommu driver via receiver of
> + * the fault notifiers, we skip the page response here.
> + */
> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
> + if (sdev && !intel_svm_prq_report(sdev->dev, req))
> + goto prq_advance;
> + else
> + goto bad_req;
> + }
> +
> /* If the mm is already defunct, don't handle faults. */
> if (!mmget_not_zero(svm->mm))
> goto bad_req;
> @@ -892,24 +971,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> goto invalid;
>
> result = QI_RESP_SUCCESS;
> - invalid:
> +invalid:
> mmap_read_unlock(svm->mm);
> mmput(svm->mm);
> - bad_req:
> - /* Accounting for major/minor faults? */
> - rcu_read_lock();
> - list_for_each_entry_rcu(sdev, &svm->devs, list) {
> - if (sdev->sid == req->rid)
> - break;
> - }
> - /* Other devices can go away, but the drivers are not
> permitted
> - * to unbind while any page faults might be in flight. So it's
> - * OK to drop the 'lock' here now we have it. */
> - rcu_read_unlock();
> -
> - if (WARN_ON(&sdev->list == &svm->devs))
> - sdev = NULL;
> -
> +bad_req:
> + WARN_ON(!sdev);
> if (sdev && sdev->ops && sdev->ops->fault_cb) {
> int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
> (req->exe_req << 1) | (req->pm_req);
> @@ -920,7 +986,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> and these can be NULL. Do not use them below this point!
> */
> sdev = NULL;
> svm = NULL;
> - no_pasid:
> +no_pasid:
> if (req->lpig || req->priv_data_present) {
> /*
> * Per VT-d spec. v3.0 ch7.7, system software must
> @@ -945,6 +1011,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> resp.qw3 = 0;
> qi_submit_sync(iommu, &resp, 1, 0);
> }
> +prq_advance:
> head = (head + sizeof(*req)) & PRQ_RING_MASK;
> }
>
> --
> 2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-10 2:24 ` Tian, Kevin
@ 2020-07-10 5:25 ` Lu Baolu
0 siblings, 0 replies; 11+ messages in thread
From: Lu Baolu @ 2020-07-10 5:25 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: Raj, Ashok, linux-kernel
Hi Kevin,
On 2020/7/10 10:24, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Thursday, July 9, 2020 3:06 PM
>>
>> A pasid might be bound to a page table from a VM guest via the iommu
>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>> on the physical IOMMU, we need to inject the page fault request into
>> the guest. After the guest completes handling the page fault, a page
>> response need to be sent back via the iommu ops.page_response().
>>
>> This adds support to report a page request fault. Any external module
>> which is interested in handling this fault should regiester a notifier
>> with iommu_register_device_fault_handler().
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>> drivers/iommu/intel/svm.c | 103 +++++++++++++++++++++++++++++++-------
>> 1 file changed, 85 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index c23167877b2b..d24e71bac8db 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -815,8 +815,63 @@ static void intel_svm_drain_prq(struct device *dev,
>> int pasid)
>> }
>> }
>>
>> +static int prq_to_iommu_prot(struct page_req_dsc *req)
>> +{
>> + int prot = 0;
>> +
>> + if (req->rd_req)
>> + prot |= IOMMU_FAULT_PERM_READ;
>> + if (req->wr_req)
>> + prot |= IOMMU_FAULT_PERM_WRITE;
>> + if (req->exe_req)
>> + prot |= IOMMU_FAULT_PERM_EXEC;
>> + if (req->pm_req)
>> + prot |= IOMMU_FAULT_PERM_PRIV;
>> +
>> + return prot;
>> +}
>> +
>> +static int
>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>> +{
>> + struct iommu_fault_event event;
>> +
>> + /* Fill in event data for device specific processing */
>> + memset(&event, 0, sizeof(struct iommu_fault_event));
>> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
>> + event.fault.prm.addr = desc->addr;
>> + event.fault.prm.pasid = desc->pasid;
>> + event.fault.prm.grpid = desc->prg_index;
>> + event.fault.prm.perm = prq_to_iommu_prot(desc);
>> +
>> + if (!dev || !dev_is_pci(dev))
>> + return -ENODEV;
>
> move the check before memset.
Yes.
>
>> +
>> + if (desc->lpig)
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + if (desc->pasid_present) {
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
>> + }
>
> if pasid is not present, should we return error directly instead of
> submitting the req and let iommu core to figure out? I don't have
This has been done in prq_event_thread(), so I don't need to check it
again here.
> a strong preference, thus:
>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Thanks a lot for your time.
Best regards,
baolu
>
>> + if (desc->priv_data_present) {
>> + /*
>> + * Set last page in group bit if private data is present,
>> + * page response is required as it does for LPIG.
>> + * iommu_report_device_fault() doesn't understand this
>> vendor
>> + * specific requirement thus we set last_page as a
>> workaround.
>> + */
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> + memcpy(event.fault.prm.private_data, desc->priv_data,
>> + sizeof(desc->priv_data));
>> + }
>> +
>> + return iommu_report_device_fault(dev, &event);
>> +}
>> +
>> static irqreturn_t prq_event_thread(int irq, void *d)
>> {
>> + struct intel_svm_dev *sdev = NULL;
>> struct intel_iommu *iommu = d;
>> struct intel_svm *svm = NULL;
>> int head, tail, handled = 0;
>> @@ -828,7 +883,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
>> PRQ_RING_MASK;
>> head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
>> PRQ_RING_MASK;
>> while (head != tail) {
>> - struct intel_svm_dev *sdev;
>> struct vm_area_struct *vma;
>> struct page_req_dsc *req;
>> struct qi_desc resp;
>> @@ -864,6 +918,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> }
>> }
>>
>> + if (!sdev || sdev->sid != req->rid) {
>> + struct intel_svm_dev *t;
>> +
>> + sdev = NULL;
>> + rcu_read_lock();
>> + list_for_each_entry_rcu(t, &svm->devs, list) {
>> + if (t->sid == req->rid) {
>> + sdev = t;
>> + break;
>> + }
>> + }
>> + rcu_read_unlock();
>> + }
>> +
>> result = QI_RESP_INVALID;
>> /* Since we're using init_mm.pgd directly, we should never
>> take
>> * any faults on kernel addresses. */
>> @@ -874,6 +942,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> if (!is_canonical_address(address))
>> goto bad_req;
>>
>> + /*
>> + * If prq is to be handled outside iommu driver via receiver of
>> + * the fault notifiers, we skip the page response here.
>> + */
>> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
>> + if (sdev && !intel_svm_prq_report(sdev->dev, req))
>> + goto prq_advance;
>> + else
>> + goto bad_req;
>> + }
>> +
>> /* If the mm is already defunct, don't handle faults. */
>> if (!mmget_not_zero(svm->mm))
>> goto bad_req;
>> @@ -892,24 +971,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> goto invalid;
>>
>> result = QI_RESP_SUCCESS;
>> - invalid:
>> +invalid:
>> mmap_read_unlock(svm->mm);
>> mmput(svm->mm);
>> - bad_req:
>> - /* Accounting for major/minor faults? */
>> - rcu_read_lock();
>> - list_for_each_entry_rcu(sdev, &svm->devs, list) {
>> - if (sdev->sid == req->rid)
>> - break;
>> - }
>> - /* Other devices can go away, but the drivers are not
>> permitted
>> - * to unbind while any page faults might be in flight. So it's
>> - * OK to drop the 'lock' here now we have it. */
>> - rcu_read_unlock();
>> -
>> - if (WARN_ON(&sdev->list == &svm->devs))
>> - sdev = NULL;
>> -
>> +bad_req:
>> + WARN_ON(!sdev);
>> if (sdev && sdev->ops && sdev->ops->fault_cb) {
>> int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>> (req->exe_req << 1) | (req->pm_req);
>> @@ -920,7 +986,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> and these can be NULL. Do not use them below this point!
>> */
>> sdev = NULL;
>> svm = NULL;
>> - no_pasid:
>> +no_pasid:
>> if (req->lpig || req->priv_data_present) {
>> /*
>> * Per VT-d spec. v3.0 ch7.7, system software must
>> @@ -945,6 +1011,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> resp.qw3 = 0;
>> qi_submit_sync(iommu, &resp, 1, 0);
>> }
>> +prq_advance:
>> head = (head + sizeof(*req)) & PRQ_RING_MASK;
>> }
>>
>> --
>> 2.17.1
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 4/4] iommu/vt-d: Add page response ops support
2020-07-09 7:05 [PATCH v3 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
` (2 preceding siblings ...)
2020-07-09 7:05 ` [PATCH v3 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-09 7:05 ` Lu Baolu
2020-07-10 2:42 ` Tian, Kevin
3 siblings, 1 reply; 11+ messages in thread
From: Lu Baolu @ 2020-07-09 7:05 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
After page requests are handled, software must respond to the device
which raised the page request with the result. This is done through
the iommu ops.page_response if the request was reported to outside of
vendor iommu driver through iommu_report_device_fault(). This adds the
VT-d implementation of page_response ops.
Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.c | 1 +
drivers/iommu/intel/svm.c | 100 ++++++++++++++++++++++++++++++++++++
include/linux/intel-iommu.h | 3 ++
3 files changed, 104 insertions(+)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4a6b6960fc32..98390a6d8113 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
.sva_bind = intel_svm_bind,
.sva_unbind = intel_svm_unbind,
.sva_get_pasid = intel_svm_get_pasid,
+ .page_response = intel_svm_page_response,
#endif
};
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index d24e71bac8db..839d2af377b6 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
return pasid;
}
+
+int intel_svm_page_response(struct device *dev,
+ struct iommu_fault_event *evt,
+ struct iommu_page_response *msg)
+{
+ struct iommu_fault_page_request *prm;
+ struct intel_svm_dev *sdev = NULL;
+ struct intel_svm *svm = NULL;
+ struct intel_iommu *iommu;
+ bool private_present;
+ bool pasid_present;
+ bool last_page;
+ u8 bus, devfn;
+ int ret = 0;
+ u16 sid;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!msg || !evt)
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+
+ prm = &evt->fault.prm;
+ sid = PCI_DEVID(bus, devfn);
+ pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ if (pasid_present) {
+ if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
+ if (ret || !sdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * For responses from userspace, need to make sure that the
+ * pasid has been bound to its mm.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ struct mm_struct *mm;
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mm != svm->mm) {
+ ret = -ENODEV;
+ mmput(mm);
+ goto out;
+ }
+
+ mmput(mm);
+ }
+ } else {
+ pr_err_ratelimited("Invalid page response: no pasid\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must respond
+ * with page group response if private data is present (PDP)
+ * or last page in group (LPIG) bit is set. This is an
+ * additional VT-d requirement beyond PCI ATS spec.
+ */
+ if (last_page || private_present) {
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_PDP(private_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ if (private_present)
+ memcpy(&desc.qw2, prm->private_data,
+ sizeof(prm->private_data));
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fc2cfc3db6e1..bf6009a344f5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
void *drvdata);
void intel_svm_unbind(struct iommu_sva *handle);
int intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+ struct iommu_page_response *msg);
+
struct svm_dev_ops;
struct intel_svm_dev {
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 11+ messages in thread
* RE: [PATCH v3 4/4] iommu/vt-d: Add page response ops support
2020-07-09 7:05 ` [PATCH v3 4/4] iommu/vt-d: Add page response ops support Lu Baolu
@ 2020-07-10 2:42 ` Tian, Kevin
2020-07-10 5:36 ` Lu Baolu
0 siblings, 1 reply; 11+ messages in thread
From: Tian, Kevin @ 2020-07-10 2:42 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Thursday, July 9, 2020 3:06 PM
>
> After page requests are handled, software must respond to the device
> which raised the page request with the result. This is done through
> the iommu ops.page_response if the request was reported to outside of
> vendor iommu driver through iommu_report_device_fault(). This adds the
> VT-d implementation of page_response ops.
>
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/iommu.c | 1 +
> drivers/iommu/intel/svm.c | 100
> ++++++++++++++++++++++++++++++++++++
> include/linux/intel-iommu.h | 3 ++
> 3 files changed, 104 insertions(+)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 4a6b6960fc32..98390a6d8113 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
> .sva_bind = intel_svm_bind,
> .sva_unbind = intel_svm_unbind,
> .sva_get_pasid = intel_svm_get_pasid,
> + .page_response = intel_svm_page_response,
> #endif
> };
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index d24e71bac8db..839d2af377b6 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
>
> return pasid;
> }
> +
> +int intel_svm_page_response(struct device *dev,
> + struct iommu_fault_event *evt,
> + struct iommu_page_response *msg)
> +{
> + struct iommu_fault_page_request *prm;
> + struct intel_svm_dev *sdev = NULL;
> + struct intel_svm *svm = NULL;
> + struct intel_iommu *iommu;
> + bool private_present;
> + bool pasid_present;
> + bool last_page;
> + u8 bus, devfn;
> + int ret = 0;
> + u16 sid;
> +
> + if (!dev || !dev_is_pci(dev))
> + return -ENODEV;
> +
> + iommu = device_to_iommu(dev, &bus, &devfn);
> + if (!iommu)
> + return -ENODEV;
> +
> + if (!msg || !evt)
> + return -EINVAL;
> +
> + mutex_lock(&pasid_mutex);
> +
> + prm = &evt->fault.prm;
> + sid = PCI_DEVID(bus, devfn);
> + pasid_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + private_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> + last_page = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +
> + if (pasid_present) {
> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
> + if (ret || !sdev) {
> + ret = -ENODEV;
> + goto out;
> + }
> +
> + /*
> + * For responses from userspace, need to make sure that the
> + * pasid has been bound to its mm.
> + */
> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
> + struct mm_struct *mm;
> +
> + mm = get_task_mm(current);
> + if (!mm) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (mm != svm->mm) {
> + ret = -ENODEV;
> + mmput(mm);
> + goto out;
> + }
> +
> + mmput(mm);
> + }
> + } else {
> + pr_err_ratelimited("Invalid page response: no pasid\n");
> + ret = -EINVAL;
> + goto out;
check pasid=0 first, then no need to indent so many lines above.
> + }
> +
> + /*
> + * Per VT-d spec. v3.0 ch7.7, system software must respond
> + * with page group response if private data is present (PDP)
> + * or last page in group (LPIG) bit is set. This is an
> + * additional VT-d requirement beyond PCI ATS spec.
> + */
What is the behavior if system software doesn't follow the requirement?
en... maybe the question is really about whether the information in prm
comes from userspace or from internally-recorded info in iommu core.
The former cannot be trusted. The latter one is OK.
Thanks
Kevin
> + if (last_page || private_present) {
> + struct qi_desc desc;
> +
> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
> |
> + QI_PGRP_PASID_P(pasid_present) |
> + QI_PGRP_PDP(private_present) |
> + QI_PGRP_RESP_CODE(msg->code) |
> + QI_PGRP_RESP_TYPE;
> + desc.qw1 = QI_PGRP_IDX(prm->grpid) |
> QI_PGRP_LPIG(last_page);
> + desc.qw2 = 0;
> + desc.qw3 = 0;
> + if (private_present)
> + memcpy(&desc.qw2, prm->private_data,
> + sizeof(prm->private_data));
> +
> + qi_submit_sync(iommu, &desc, 1, 0);
> + }
> +out:
> + mutex_unlock(&pasid_mutex);
> + return ret;
> +}
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index fc2cfc3db6e1..bf6009a344f5 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
> *dev, struct mm_struct *mm,
> void *drvdata);
> void intel_svm_unbind(struct iommu_sva *handle);
> int intel_svm_get_pasid(struct iommu_sva *handle);
> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
> *evt,
> + struct iommu_page_response *msg);
> +
> struct svm_dev_ops;
>
> struct intel_svm_dev {
> --
> 2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 4/4] iommu/vt-d: Add page response ops support
2020-07-10 2:42 ` Tian, Kevin
@ 2020-07-10 5:36 ` Lu Baolu
2020-07-10 5:49 ` Tian, Kevin
0 siblings, 1 reply; 11+ messages in thread
From: Lu Baolu @ 2020-07-10 5:36 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: Raj, Ashok, linux-kernel
Hi Kevin,
On 2020/7/10 10:42, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Thursday, July 9, 2020 3:06 PM
>>
>> After page requests are handled, software must respond to the device
>> which raised the page request with the result. This is done through
>> the iommu ops.page_response if the request was reported to outside of
>> vendor iommu driver through iommu_report_device_fault(). This adds the
>> VT-d implementation of page_response ops.
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>> drivers/iommu/intel/iommu.c | 1 +
>> drivers/iommu/intel/svm.c | 100
>> ++++++++++++++++++++++++++++++++++++
>> include/linux/intel-iommu.h | 3 ++
>> 3 files changed, 104 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
>> index 4a6b6960fc32..98390a6d8113 100644
>> --- a/drivers/iommu/intel/iommu.c
>> +++ b/drivers/iommu/intel/iommu.c
>> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
>> .sva_bind = intel_svm_bind,
>> .sva_unbind = intel_svm_unbind,
>> .sva_get_pasid = intel_svm_get_pasid,
>> + .page_response = intel_svm_page_response,
>> #endif
>> };
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index d24e71bac8db..839d2af377b6 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
>>
>> return pasid;
>> }
>> +
>> +int intel_svm_page_response(struct device *dev,
>> + struct iommu_fault_event *evt,
>> + struct iommu_page_response *msg)
>> +{
>> + struct iommu_fault_page_request *prm;
>> + struct intel_svm_dev *sdev = NULL;
>> + struct intel_svm *svm = NULL;
>> + struct intel_iommu *iommu;
>> + bool private_present;
>> + bool pasid_present;
>> + bool last_page;
>> + u8 bus, devfn;
>> + int ret = 0;
>> + u16 sid;
>> +
>> + if (!dev || !dev_is_pci(dev))
>> + return -ENODEV;
>> +
>> + iommu = device_to_iommu(dev, &bus, &devfn);
>> + if (!iommu)
>> + return -ENODEV;
>> +
>> + if (!msg || !evt)
>> + return -EINVAL;
>> +
>> + mutex_lock(&pasid_mutex);
>> +
>> + prm = &evt->fault.prm;
>> + sid = PCI_DEVID(bus, devfn);
>> + pasid_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> + private_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> + last_page = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +
>> + if (pasid_present) {
>> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
>> + if (ret || !sdev) {
>> + ret = -ENODEV;
>> + goto out;
>> + }
>> +
>> + /*
>> + * For responses from userspace, need to make sure that the
>> + * pasid has been bound to its mm.
>> + */
>> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
>> + struct mm_struct *mm;
>> +
>> + mm = get_task_mm(current);
>> + if (!mm) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + if (mm != svm->mm) {
>> + ret = -ENODEV;
>> + mmput(mm);
>> + goto out;
>> + }
>> +
>> + mmput(mm);
>> + }
>> + } else {
>> + pr_err_ratelimited("Invalid page response: no pasid\n");
>> + ret = -EINVAL;
>> + goto out;
>
> check pasid=0 first, then no need to indent so many lines above.
Yes.
>
>> + }
>> +
>> + /*
>> + * Per VT-d spec. v3.0 ch7.7, system software must respond
>> + * with page group response if private data is present (PDP)
>> + * or last page in group (LPIG) bit is set. This is an
>> + * additional VT-d requirement beyond PCI ATS spec.
>> + */
>
> What is the behavior if system software doesn't follow the requirement?
> en... maybe the question is really about whether the information in prm
> comes from userspace or from internally-recorded info in iommu core.
> The former cannot be trusted. The latter one is OK.
We require a page response when reporting such event. The upper layer
(IOMMU core or VFIO) will be implemented with a timer, if userspace
doesn't respond in time, the timer will get expired and a FAILURE
response will be sent to device.
Best regards,
baolu
>
> Thanks
> Kevin
>
>> + if (last_page || private_present) {
>> + struct qi_desc desc;
>> +
>> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
>> |
>> + QI_PGRP_PASID_P(pasid_present) |
>> + QI_PGRP_PDP(private_present) |
>> + QI_PGRP_RESP_CODE(msg->code) |
>> + QI_PGRP_RESP_TYPE;
>> + desc.qw1 = QI_PGRP_IDX(prm->grpid) |
>> QI_PGRP_LPIG(last_page);
>> + desc.qw2 = 0;
>> + desc.qw3 = 0;
>> + if (private_present)
>> + memcpy(&desc.qw2, prm->private_data,
>> + sizeof(prm->private_data));
>> +
>> + qi_submit_sync(iommu, &desc, 1, 0);
>> + }
>> +out:
>> + mutex_unlock(&pasid_mutex);
>> + return ret;
>> +}
>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index fc2cfc3db6e1..bf6009a344f5 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
>> *dev, struct mm_struct *mm,
>> void *drvdata);
>> void intel_svm_unbind(struct iommu_sva *handle);
>> int intel_svm_get_pasid(struct iommu_sva *handle);
>> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
>> *evt,
>> + struct iommu_page_response *msg);
>> +
>> struct svm_dev_ops;
>>
>> struct intel_svm_dev {
>> --
>> 2.17.1
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH v3 4/4] iommu/vt-d: Add page response ops support
2020-07-10 5:36 ` Lu Baolu
@ 2020-07-10 5:49 ` Tian, Kevin
2020-07-10 8:17 ` Lu Baolu
0 siblings, 1 reply; 11+ messages in thread
From: Tian, Kevin @ 2020-07-10 5:49 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, July 10, 2020 1:37 PM
>
> Hi Kevin,
>
> On 2020/7/10 10:42, Tian, Kevin wrote:
> >> From: Lu Baolu <baolu.lu@linux.intel.com>
> >> Sent: Thursday, July 9, 2020 3:06 PM
> >>
> >> After page requests are handled, software must respond to the device
> >> which raised the page request with the result. This is done through
> >> the iommu ops.page_response if the request was reported to outside of
> >> vendor iommu driver through iommu_report_device_fault(). This adds
> the
> >> VT-d implementation of page_response ops.
> >>
> >> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> >> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> >> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> >> ---
> >> drivers/iommu/intel/iommu.c | 1 +
> >> drivers/iommu/intel/svm.c | 100
> >> ++++++++++++++++++++++++++++++++++++
> >> include/linux/intel-iommu.h | 3 ++
> >> 3 files changed, 104 insertions(+)
> >>
> >> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> >> index 4a6b6960fc32..98390a6d8113 100644
> >> --- a/drivers/iommu/intel/iommu.c
> >> +++ b/drivers/iommu/intel/iommu.c
> >> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
> >> .sva_bind = intel_svm_bind,
> >> .sva_unbind = intel_svm_unbind,
> >> .sva_get_pasid = intel_svm_get_pasid,
> >> + .page_response = intel_svm_page_response,
> >> #endif
> >> };
> >>
> >> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> >> index d24e71bac8db..839d2af377b6 100644
> >> --- a/drivers/iommu/intel/svm.c
> >> +++ b/drivers/iommu/intel/svm.c
> >> @@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva
> *sva)
> >>
> >> return pasid;
> >> }
> >> +
> >> +int intel_svm_page_response(struct device *dev,
> >> + struct iommu_fault_event *evt,
> >> + struct iommu_page_response *msg)
> >> +{
> >> + struct iommu_fault_page_request *prm;
> >> + struct intel_svm_dev *sdev = NULL;
> >> + struct intel_svm *svm = NULL;
> >> + struct intel_iommu *iommu;
> >> + bool private_present;
> >> + bool pasid_present;
> >> + bool last_page;
> >> + u8 bus, devfn;
> >> + int ret = 0;
> >> + u16 sid;
> >> +
> >> + if (!dev || !dev_is_pci(dev))
> >> + return -ENODEV;
> >> +
> >> + iommu = device_to_iommu(dev, &bus, &devfn);
> >> + if (!iommu)
> >> + return -ENODEV;
> >> +
> >> + if (!msg || !evt)
> >> + return -EINVAL;
> >> +
> >> + mutex_lock(&pasid_mutex);
> >> +
> >> + prm = &evt->fault.prm;
> >> + sid = PCI_DEVID(bus, devfn);
> >> + pasid_present = prm->flags &
> >> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> >> + private_present = prm->flags &
> >> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> >> + last_page = prm->flags &
> >> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> >> +
> >> + if (pasid_present) {
> >> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
> >> + ret = -EINVAL;
> >> + goto out;
> >> + }
> >> +
> >> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
> >> + if (ret || !sdev) {
> >> + ret = -ENODEV;
> >> + goto out;
> >> + }
> >> +
> >> + /*
> >> + * For responses from userspace, need to make sure that the
> >> + * pasid has been bound to its mm.
> >> + */
> >> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
> >> + struct mm_struct *mm;
> >> +
> >> + mm = get_task_mm(current);
> >> + if (!mm) {
> >> + ret = -EINVAL;
> >> + goto out;
> >> + }
> >> +
> >> + if (mm != svm->mm) {
> >> + ret = -ENODEV;
> >> + mmput(mm);
> >> + goto out;
> >> + }
> >> +
> >> + mmput(mm);
> >> + }
> >> + } else {
> >> + pr_err_ratelimited("Invalid page response: no pasid\n");
> >> + ret = -EINVAL;
> >> + goto out;
> >
> > check pasid=0 first, then no need to indent so many lines above.
>
> Yes.
>
> >
> >> + }
> >> +
> >> + /*
> >> + * Per VT-d spec. v3.0 ch7.7, system software must respond
> >> + * with page group response if private data is present (PDP)
> >> + * or last page in group (LPIG) bit is set. This is an
> >> + * additional VT-d requirement beyond PCI ATS spec.
> >> + */
> >
> > What is the behavior if system software doesn't follow the requirement?
> > en... maybe the question is really about whether the information in prm
> > comes from userspace or from internally-recorded info in iommu core.
> > The former cannot be trusted. The latter one is OK.
>
> We require a page response when reporting such event. The upper layer
> (IOMMU core or VFIO) will be implemented with a timer, if userspace
> doesn't respond in time, the timer will get expired and a FAILURE
> response will be sent to device.
Yes, timer helps when userspace doesn't respond. Then I'm fine with
this patch.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
btw when you say IOMMU core or VFIO, does it mean the timer mechanism
is not implemented yet?
Thanks
Kevin
>
> Best regards,
> baolu
>
> >
> > Thanks
> > Kevin
> >
> >> + if (last_page || private_present) {
> >> + struct qi_desc desc;
> >> +
> >> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
> >> |
> >> + QI_PGRP_PASID_P(pasid_present) |
> >> + QI_PGRP_PDP(private_present) |
> >> + QI_PGRP_RESP_CODE(msg->code) |
> >> + QI_PGRP_RESP_TYPE;
> >> + desc.qw1 = QI_PGRP_IDX(prm->grpid) |
> >> QI_PGRP_LPIG(last_page);
> >> + desc.qw2 = 0;
> >> + desc.qw3 = 0;
> >> + if (private_present)
> >> + memcpy(&desc.qw2, prm->private_data,
> >> + sizeof(prm->private_data));
> >> +
> >> + qi_submit_sync(iommu, &desc, 1, 0);
> >> + }
> >> +out:
> >> + mutex_unlock(&pasid_mutex);
> >> + return ret;
> >> +}
> >> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> >> index fc2cfc3db6e1..bf6009a344f5 100644
> >> --- a/include/linux/intel-iommu.h
> >> +++ b/include/linux/intel-iommu.h
> >> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
> >> *dev, struct mm_struct *mm,
> >> void *drvdata);
> >> void intel_svm_unbind(struct iommu_sva *handle);
> >> int intel_svm_get_pasid(struct iommu_sva *handle);
> >> +int intel_svm_page_response(struct device *dev, struct
> iommu_fault_event
> >> *evt,
> >> + struct iommu_page_response *msg);
> >> +
> >> struct svm_dev_ops;
> >>
> >> struct intel_svm_dev {
> >> --
> >> 2.17.1
> >
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3 4/4] iommu/vt-d: Add page response ops support
2020-07-10 5:49 ` Tian, Kevin
@ 2020-07-10 8:17 ` Lu Baolu
0 siblings, 0 replies; 11+ messages in thread
From: Lu Baolu @ 2020-07-10 8:17 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: Raj, Ashok, linux-kernel
Hi Kevin,
On 2020/7/10 13:49, Tian, Kevin wrote:
>> From: Lu Baolu<baolu.lu@linux.intel.com>
>> Sent: Friday, July 10, 2020 1:37 PM
>>
>> Hi Kevin,
>>
>> On 2020/7/10 10:42, Tian, Kevin wrote:
>>>> From: Lu Baolu<baolu.lu@linux.intel.com>
>>>> Sent: Thursday, July 9, 2020 3:06 PM
>>>>
>>>> After page requests are handled, software must respond to the device
>>>> which raised the page request with the result. This is done through
>>>> the iommu ops.page_response if the request was reported to outside of
>>>> vendor iommu driver through iommu_report_device_fault(). This adds
>> the
>>>> VT-d implementation of page_response ops.
>>>>
>>>> Co-developed-by: Jacob Pan<jacob.jun.pan@linux.intel.com>
>>>> Signed-off-by: Jacob Pan<jacob.jun.pan@linux.intel.com>
>>>> Co-developed-by: Liu Yi L<yi.l.liu@intel.com>
>>>> Signed-off-by: Liu Yi L<yi.l.liu@intel.com>
>>>> Signed-off-by: Lu Baolu<baolu.lu@linux.intel.com>
>>>> ---
>>>> drivers/iommu/intel/iommu.c | 1 +
>>>> drivers/iommu/intel/svm.c | 100
>>>> ++++++++++++++++++++++++++++++++++++
>>>> include/linux/intel-iommu.h | 3 ++
>>>> 3 files changed, 104 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
>>>> index 4a6b6960fc32..98390a6d8113 100644
>>>> --- a/drivers/iommu/intel/iommu.c
>>>> +++ b/drivers/iommu/intel/iommu.c
>>>> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
>>>> .sva_bind = intel_svm_bind,
>>>> .sva_unbind = intel_svm_unbind,
>>>> .sva_get_pasid = intel_svm_get_pasid,
>>>> + .page_response = intel_svm_page_response,
>>>> #endif
>>>> };
>>>>
>>>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>>>> index d24e71bac8db..839d2af377b6 100644
>>>> --- a/drivers/iommu/intel/svm.c
>>>> +++ b/drivers/iommu/intel/svm.c
>>>> @@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva
>> *sva)
>>>> return pasid;
>>>> }
>>>> +
>>>> +int intel_svm_page_response(struct device *dev,
>>>> + struct iommu_fault_event *evt,
>>>> + struct iommu_page_response *msg)
>>>> +{
>>>> + struct iommu_fault_page_request *prm;
>>>> + struct intel_svm_dev *sdev = NULL;
>>>> + struct intel_svm *svm = NULL;
>>>> + struct intel_iommu *iommu;
>>>> + bool private_present;
>>>> + bool pasid_present;
>>>> + bool last_page;
>>>> + u8 bus, devfn;
>>>> + int ret = 0;
>>>> + u16 sid;
>>>> +
>>>> + if (!dev || !dev_is_pci(dev))
>>>> + return -ENODEV;
>>>> +
>>>> + iommu = device_to_iommu(dev, &bus, &devfn);
>>>> + if (!iommu)
>>>> + return -ENODEV;
>>>> +
>>>> + if (!msg || !evt)
>>>> + return -EINVAL;
>>>> +
>>>> + mutex_lock(&pasid_mutex);
>>>> +
>>>> + prm = &evt->fault.prm;
>>>> + sid = PCI_DEVID(bus, devfn);
>>>> + pasid_present = prm->flags &
>>>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>>>> + private_present = prm->flags &
>>>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>>>> + last_page = prm->flags &
>>>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>>>> +
>>>> + if (pasid_present) {
>>>> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
>>>> + ret = -EINVAL;
>>>> + goto out;
>>>> + }
>>>> +
>>>> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
>>>> + if (ret || !sdev) {
>>>> + ret = -ENODEV;
>>>> + goto out;
>>>> + }
>>>> +
>>>> + /*
>>>> + * For responses from userspace, need to make sure that the
>>>> + * pasid has been bound to its mm.
>>>> + */
>>>> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
>>>> + struct mm_struct *mm;
>>>> +
>>>> + mm = get_task_mm(current);
>>>> + if (!mm) {
>>>> + ret = -EINVAL;
>>>> + goto out;
>>>> + }
>>>> +
>>>> + if (mm != svm->mm) {
>>>> + ret = -ENODEV;
>>>> + mmput(mm);
>>>> + goto out;
>>>> + }
>>>> +
>>>> + mmput(mm);
>>>> + }
>>>> + } else {
>>>> + pr_err_ratelimited("Invalid page response: no pasid\n");
>>>> + ret = -EINVAL;
>>>> + goto out;
>>> check pasid=0 first, then no need to indent so many lines above.
>> Yes.
>>
>>>> + }
>>>> +
>>>> + /*
>>>> + * Per VT-d spec. v3.0 ch7.7, system software must respond
>>>> + * with page group response if private data is present (PDP)
>>>> + * or last page in group (LPIG) bit is set. This is an
>>>> + * additional VT-d requirement beyond PCI ATS spec.
>>>> + */
>>> What is the behavior if system software doesn't follow the requirement?
>>> en... maybe the question is really about whether the information in prm
>>> comes from userspace or from internally-recorded info in iommu core.
>>> The former cannot be trusted. The latter one is OK.
>> We require a page response when reporting such event. The upper layer
>> (IOMMU core or VFIO) will be implemented with a timer, if userspace
>> doesn't respond in time, the timer will get expired and a FAILURE
>> response will be sent to device.
> Yes, timer helps when userspace doesn't respond. Then I'm fine with
> this patch.
>
> Reviewed-by: Kevin Tian<kevin.tian@intel.com>
>
> btw when you say IOMMU core or VFIO, does it mean the timer mechanism
> is not implemented yet?
>
It's in local tree, not upstream yet.
Best regards,
baolu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 11+ messages in thread