linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/4] iommu/vt-d: Add prq report and response support
@ 2020-07-06  0:25 Lu Baolu
  2020-07-06  0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  0:25 UTC (permalink / raw)
  To: iommu
  Cc: Joerg Roedel, Jacob Pan, Kevin Tian, Ashok Raj, Liu Yi L,
	linux-kernel, Lu Baolu

Hi,

This series adds page request event reporting and response support to
the Intel IOMMU driver. This is necessary when the page requests must
be processed by any component other than the vendor IOMMU driver. For
example, when a guest page table was bound to a PASID through the
iommu_ops->sva_bind_gpasid() api, the page requests should be routed to
the guest, and after the page is served, the device should be responded
with the result.

Your review comments are very appreciated.

Best regards,
baolu

Change log:
v1->v2:
  - v1 posted at https://lkml.org/lkml/2020/6/27/387
  - Remove unnecessary pci_get_domain_bus_and_slot()
  - Return error when sdev == NULL in intel_svm_page_response()

Lu Baolu (4):
  iommu/vt-d: Refactor device_to_iommu() helper
  iommu/vt-d: Add a helper to get svm and sdev for pasid
  iommu/vt-d: Report page request faults for guest SVA
  iommu/vt-d: Add page response ops support

 drivers/iommu/intel/iommu.c |  56 ++-----
 drivers/iommu/intel/svm.c   | 302 +++++++++++++++++++++++++++---------
 include/linux/intel-iommu.h |   6 +-
 3 files changed, 248 insertions(+), 116 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper
  2020-07-06  0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
@ 2020-07-06  0:25 ` Lu Baolu
  2020-07-06  1:02   ` Tian, Kevin
  2020-07-06  0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  0:25 UTC (permalink / raw)
  To: iommu
  Cc: Joerg Roedel, Jacob Pan, Kevin Tian, Ashok Raj, Liu Yi L,
	linux-kernel, Lu Baolu

It is refactored in two ways:

- Make it global so that it could be used in other files.

- Make bus/devfn optional so that callers could ignore these two returned
values when they only want to get the coresponding iommu pointer.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 55 +++++++++++--------------------------
 drivers/iommu/intel/svm.c   |  8 +++---
 include/linux/intel-iommu.h |  3 +-
 3 files changed, 21 insertions(+), 45 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..de17952ed133 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
 	return false;
 }
 
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
 	struct dmar_drhd_unit *drhd = NULL;
+	struct pci_dev *pdev = NULL;
 	struct intel_iommu *iommu;
 	struct device *tmp;
-	struct pci_dev *pdev = NULL;
 	u16 segment = 0;
 	int i;
 
-	if (iommu_dummy(dev))
+	if (!dev || iommu_dummy(dev))
 		return NULL;
 
 	if (dev_is_pci(dev)) {
@@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 				if (pdev && pdev->is_virtfn)
 					goto got_pdev;
 
-				*bus = drhd->devices[i].bus;
-				*devfn = drhd->devices[i].devfn;
+				if (bus && devfn) {
+					*bus = drhd->devices[i].bus;
+					*devfn = drhd->devices[i].devfn;
+				}
 				goto out;
 			}
 
@@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 
 		if (pdev && drhd->include_all) {
 		got_pdev:
-			*bus = pdev->bus->number;
-			*devfn = pdev->devfn;
+			if (bus && devfn) {
+				*bus = pdev->bus->number;
+				*devfn = pdev->devfn;
+			}
 			goto out;
 		}
 	}
@@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
 			      struct device *dev)
 {
 	int ret;
-	u8 bus, devfn;
 	unsigned long flags;
 	struct intel_iommu *iommu;
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		return -ENODEV;
 
@@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct intel_iommu *iommu;
 	int addr_width;
-	u8 bus, devfn;
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		return -ENODEV;
 
@@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum iommu_cap cap)
 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 {
 	struct intel_iommu *iommu;
-	u8 bus, devfn;
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		return ERR_PTR(-ENODEV);
 
@@ -5673,9 +5674,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 static void intel_iommu_release_device(struct device *dev)
 {
 	struct intel_iommu *iommu;
-	u8 bus, devfn;
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		return;
 
@@ -5825,37 +5825,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
 	return generic_device_group(dev);
 }
 
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
-	struct intel_iommu *iommu;
-	u8 bus, devfn;
-
-	if (iommu_dummy(dev)) {
-		dev_warn(dev,
-			 "No IOMMU translation for device; cannot enable SVM\n");
-		return NULL;
-	}
-
-	iommu = device_to_iommu(dev, &bus, &devfn);
-	if ((!iommu)) {
-		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
-		return NULL;
-	}
-
-	return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
 static int intel_iommu_enable_auxd(struct device *dev)
 {
 	struct device_domain_info *info;
 	struct intel_iommu *iommu;
 	unsigned long flags;
-	u8 bus, devfn;
 	int ret;
 
-	iommu = device_to_iommu(dev, &bus, &devfn);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu || dmar_disabled)
 		return -EINVAL;
 
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 6c87c807a0ab..25dd74f27252 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -231,7 +231,7 @@ static LIST_HEAD(global_svm_list);
 int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 			  struct iommu_gpasid_bind_data *data)
 {
-	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct dmar_domain *dmar_domain;
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm;
@@ -373,7 +373,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 
 int intel_svm_unbind_gpasid(struct device *dev, int pasid)
 {
-	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm;
 	int ret = -EINVAL;
@@ -430,7 +430,7 @@ static int
 intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
 		  struct mm_struct *mm, struct intel_svm_dev **sd)
 {
-	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct device_domain_info *info;
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm = NULL;
@@ -608,7 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
 	struct intel_svm *svm;
 	int ret = -EINVAL;
 
-	iommu = intel_svm_device_to_iommu(dev);
+	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		goto out;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3e8fa1c7a1e6..fc2cfc3db6e1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -728,6 +728,7 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu);
 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
 struct dmar_domain *find_domain(struct device *dev);
 struct device_domain_info *get_domain_info(struct device *dev);
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern void intel_svm_check(struct intel_iommu *iommu);
@@ -766,8 +767,6 @@ struct intel_svm {
 	struct list_head devs;
 	struct list_head list;
 };
-
-extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
 #else
 static inline void intel_svm_check(struct intel_iommu *iommu) {}
 #endif
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid
  2020-07-06  0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
  2020-07-06  0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
@ 2020-07-06  0:25 ` Lu Baolu
  2020-07-06  1:10   ` Tian, Kevin
  2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
  2020-07-06  0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
  3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  0:25 UTC (permalink / raw)
  To: iommu
  Cc: Joerg Roedel, Jacob Pan, Kevin Tian, Ashok Raj, Liu Yi L,
	linux-kernel, Lu Baolu

There are several places in the code that need to get the pointers of
svm and sdev according to a pasid and device. Add a helper to achieve
this for code consolidation and readability.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/svm.c | 121 +++++++++++++++++++++-----------------
 1 file changed, 68 insertions(+), 53 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 25dd74f27252..c23167877b2b 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
 	list_for_each_entry((sdev), &(svm)->devs, list)	\
 		if ((d) != (sdev)->dev) {} else
 
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+			     struct intel_svm **rsvm,
+			     struct intel_svm_dev **rsdev)
+{
+	struct intel_svm_dev *d, *sdev = NULL;
+	struct intel_svm *svm;
+
+	/* The caller should hold the pasid_mutex lock */
+	if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
+		return -EINVAL;
+
+	if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+		return -EINVAL;
+
+	svm = ioasid_find(NULL, pasid, NULL);
+	if (IS_ERR(svm))
+		return PTR_ERR(svm);
+
+	if (!svm)
+		goto out;
+
+	/*
+	 * If we found svm for the PASID, there must be at least one device
+	 * bond.
+	 */
+	if (WARN_ON(list_empty(&svm->devs)))
+		return -EINVAL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(d, &svm->devs, list) {
+		if (d->dev == dev) {
+			sdev = d;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+out:
+	*rsvm = svm;
+	*rsdev = sdev;
+
+	return 0;
+}
+
 int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 			  struct iommu_gpasid_bind_data *data)
 {
@@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	dmar_domain = to_dmar_domain(domain);
 
 	mutex_lock(&pasid_mutex);
-	svm = ioasid_find(NULL, data->hpasid, NULL);
-	if (IS_ERR(svm)) {
-		ret = PTR_ERR(svm);
+	ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
+	if (ret)
 		goto out;
-	}
 
-	if (svm) {
+	if (sdev) {
 		/*
-		 * If we found svm for the PASID, there must be at
-		 * least one device bond, otherwise svm should be freed.
+		 * For devices with aux domains, we should allow
+		 * multiple bind calls with the same PASID and pdev.
 		 */
-		if (WARN_ON(list_empty(&svm->devs))) {
-			ret = -EINVAL;
-			goto out;
+		if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) {
+			sdev->users++;
+		} else {
+			dev_warn_ratelimited(dev,
+					     "Already bound with PASID %u\n",
+					     svm->pasid);
+			ret = -EBUSY;
 		}
+		goto out;
+	}
 
-		for_each_svm_dev(sdev, svm, dev) {
-			/*
-			 * For devices with aux domains, we should allow
-			 * multiple bind calls with the same PASID and pdev.
-			 */
-			if (iommu_dev_feature_enabled(dev,
-						      IOMMU_DEV_FEAT_AUX)) {
-				sdev->users++;
-			} else {
-				dev_warn_ratelimited(dev,
-						     "Already bound with PASID %u\n",
-						     svm->pasid);
-				ret = -EBUSY;
-			}
-			goto out;
-		}
-	} else {
+	if (!svm) {
 		/* We come here when PASID has never been bond to a device. */
 		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
 		if (!svm) {
@@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm;
-	int ret = -EINVAL;
+	int ret;
 
 	if (WARN_ON(!iommu))
 		return -EINVAL;
 
 	mutex_lock(&pasid_mutex);
-	svm = ioasid_find(NULL, pasid, NULL);
-	if (!svm) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (IS_ERR(svm)) {
-		ret = PTR_ERR(svm);
+	ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+	if (ret)
 		goto out;
-	}
 
-	for_each_svm_dev(sdev, svm, dev) {
-		ret = 0;
+	if (sdev) {
 		if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
 			sdev->users--;
 		if (!sdev->users) {
@@ -418,7 +442,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
 				kfree(svm);
 			}
 		}
-		break;
 	}
 out:
 	mutex_unlock(&pasid_mutex);
@@ -596,7 +619,7 @@ intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
 	if (sd)
 		*sd = sdev;
 	ret = 0;
- out:
+out:
 	return ret;
 }
 
@@ -612,17 +635,11 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
 	if (!iommu)
 		goto out;
 
-	svm = ioasid_find(NULL, pasid, NULL);
-	if (!svm)
-		goto out;
-
-	if (IS_ERR(svm)) {
-		ret = PTR_ERR(svm);
+	ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+	if (ret)
 		goto out;
-	}
 
-	for_each_svm_dev(sdev, svm, dev) {
-		ret = 0;
+	if (sdev) {
 		sdev->users--;
 		if (!sdev->users) {
 			list_del_rcu(&sdev->list);
@@ -651,10 +668,8 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
 				kfree(svm);
 			}
 		}
-		break;
 	}
- out:
-
+out:
 	return ret;
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
  2020-07-06  0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
  2020-07-06  0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
@ 2020-07-06  0:25 ` Lu Baolu
  2020-07-06  1:29   ` Tian, Kevin
                     ` (2 more replies)
  2020-07-06  0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
  3 siblings, 3 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  0:25 UTC (permalink / raw)
  To: iommu
  Cc: Joerg Roedel, Jacob Pan, Kevin Tian, Ashok Raj, Liu Yi L,
	linux-kernel, Lu Baolu

A pasid might be bound to a page table from a VM guest via the iommu
ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
on the physical IOMMU, we need to inject the page fault request into
the guest. After the guest completes handling the page fault, a page
response need to be sent back via the iommu ops.page_response().

This adds support to report a page request fault. Any external module
which is interested in handling this fault should regiester a notifier
callback.

Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c23167877b2b..08c58c2b1a06 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev, int pasid)
 	}
 }
 
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+	int prot = 0;
+
+	if (req->rd_req)
+		prot |= IOMMU_FAULT_PERM_READ;
+	if (req->wr_req)
+		prot |= IOMMU_FAULT_PERM_WRITE;
+	if (req->exe_req)
+		prot |= IOMMU_FAULT_PERM_EXEC;
+	if (req->pm_req)
+		prot |= IOMMU_FAULT_PERM_PRIV;
+
+	return prot;
+}
+
+static int
+intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+{
+	struct iommu_fault_event event;
+	u8 bus, devfn;
+
+	memset(&event, 0, sizeof(struct iommu_fault_event));
+	bus = PCI_BUS_NUM(desc->rid);
+	devfn = desc->rid & 0xff;
+
+	/* Fill in event data for device specific processing */
+	event.fault.type = IOMMU_FAULT_PAGE_REQ;
+	event.fault.prm.addr = desc->addr;
+	event.fault.prm.pasid = desc->pasid;
+	event.fault.prm.grpid = desc->prg_index;
+	event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+	/*
+	 * Set last page in group bit if private data is present,
+	 * page response is required as it does for LPIG.
+	 */
+	if (desc->lpig)
+		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+	if (desc->pasid_present)
+		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+	if (desc->priv_data_present) {
+		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+		memcpy(event.fault.prm.private_data, desc->priv_data,
+		       sizeof(desc->priv_data));
+	}
+
+	return iommu_report_device_fault(dev, &event);
+}
+
 static irqreturn_t prq_event_thread(int irq, void *d)
 {
 	struct intel_iommu *iommu = d;
@@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 	while (head != tail) {
-		struct intel_svm_dev *sdev;
+		struct intel_svm_dev *sdev = NULL;
 		struct vm_area_struct *vma;
 		struct page_req_dsc *req;
 		struct qi_desc resp;
@@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 			}
 		}
 
+		if (!sdev || sdev->sid != req->rid) {
+			struct intel_svm_dev *t;
+
+			sdev = NULL;
+			rcu_read_lock();
+			list_for_each_entry_rcu(t, &svm->devs, list) {
+				if (t->sid == req->rid) {
+					sdev = t;
+					break;
+				}
+			}
+			rcu_read_unlock();
+		}
+
 		result = QI_RESP_INVALID;
 		/* Since we're using init_mm.pgd directly, we should never take
 		 * any faults on kernel addresses. */
@@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		if (!is_canonical_address(address))
 			goto bad_req;
 
+		/*
+		 * If prq is to be handled outside iommu driver via receiver of
+		 * the fault notifiers, we skip the page response here.
+		 */
+		if (svm->flags & SVM_FLAG_GUEST_MODE) {
+			if (sdev && !intel_svm_prq_report(sdev->dev, req))
+				goto prq_advance;
+			else
+				goto bad_req;
+		}
+
 		/* If the mm is already defunct, don't handle faults. */
 		if (!mmget_not_zero(svm->mm))
 			goto bad_req;
@@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 			goto invalid;
 
 		result = QI_RESP_SUCCESS;
-	invalid:
+invalid:
 		mmap_read_unlock(svm->mm);
 		mmput(svm->mm);
-	bad_req:
-		/* Accounting for major/minor faults? */
-		rcu_read_lock();
-		list_for_each_entry_rcu(sdev, &svm->devs, list) {
-			if (sdev->sid == req->rid)
-				break;
-		}
-		/* Other devices can go away, but the drivers are not permitted
-		 * to unbind while any page faults might be in flight. So it's
-		 * OK to drop the 'lock' here now we have it. */
-		rcu_read_unlock();
-
-		if (WARN_ON(&sdev->list == &svm->devs))
-			sdev = NULL;
-
+bad_req:
 		if (sdev && sdev->ops && sdev->ops->fault_cb) {
 			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
 				(req->exe_req << 1) | (req->pm_req);
@@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		   and these can be NULL. Do not use them below this point! */
 		sdev = NULL;
 		svm = NULL;
-	no_pasid:
+no_pasid:
 		if (req->lpig || req->priv_data_present) {
 			/*
 			 * Per VT-d spec. v3.0 ch7.7, system software must
@@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 			resp.qw3 = 0;
 			qi_submit_sync(iommu, &resp, 1, 0);
 		}
+prq_advance:
 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 4/4] iommu/vt-d: Add page response ops support
  2020-07-06  0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
                   ` (2 preceding siblings ...)
  2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-06  0:25 ` Lu Baolu
  2020-07-06  1:47   ` Tian, Kevin
  3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  0:25 UTC (permalink / raw)
  To: iommu
  Cc: Joerg Roedel, Jacob Pan, Kevin Tian, Ashok Raj, Liu Yi L,
	linux-kernel, Lu Baolu

After a page request is handled, software must response the device which
raised the page request with the handling result. This is done through
the iommu ops.page_response if the request was reported to outside of
vendor iommu driver through iommu_report_device_fault(). This adds the
VT-d implementation of page_response ops.

Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.c |  1 +
 drivers/iommu/intel/svm.c   | 74 +++++++++++++++++++++++++++++++++++++
 include/linux/intel-iommu.h |  3 ++
 3 files changed, 78 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index de17952ed133..7eb29167e8f9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
 	.sva_bind		= intel_svm_bind,
 	.sva_unbind		= intel_svm_unbind,
 	.sva_get_pasid		= intel_svm_get_pasid,
+	.page_response		= intel_svm_page_response,
 #endif
 };
 
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 08c58c2b1a06..1c7d8a9ea124 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
 
 	return pasid;
 }
+
+int intel_svm_page_response(struct device *dev,
+			    struct iommu_fault_event *evt,
+			    struct iommu_page_response *msg)
+{
+	struct iommu_fault_page_request *prm;
+	struct intel_svm_dev *sdev;
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	bool private_present;
+	bool pasid_present;
+	bool last_page;
+	u8 bus, devfn;
+	int ret = 0;
+	u16 sid;
+
+	if (!dev || !dev_is_pci(dev))
+		return -ENODEV;
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
+		return -ENODEV;
+
+	if (!msg || !evt)
+		return -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+
+	prm = &evt->fault.prm;
+	sid = PCI_DEVID(bus, devfn);
+	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+	private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+	if (pasid_present) {
+		if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
+		if (ret || !sdev) {
+			ret = -ENODEV;
+			goto out;
+		}
+	}
+
+	/*
+	 * Per VT-d spec. v3.0 ch7.7, system software must respond
+	 * with page group response if private data is present (PDP)
+	 * or last page in group (LPIG) bit is set. This is an
+	 * additional VT-d requirement beyond PCI ATS spec.
+	 */
+	if (last_page || private_present) {
+		struct qi_desc desc;
+
+		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+				QI_PGRP_PASID_P(pasid_present) |
+				QI_PGRP_PDP(private_present) |
+				QI_PGRP_RESP_CODE(msg->code) |
+				QI_PGRP_RESP_TYPE;
+		desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+		desc.qw2 = 0;
+		desc.qw3 = 0;
+		if (private_present)
+			memcpy(&desc.qw2, prm->private_data,
+			       sizeof(prm->private_data));
+
+		qi_submit_sync(iommu, &desc, 1, 0);
+	}
+out:
+	mutex_unlock(&pasid_mutex);
+	return ret;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fc2cfc3db6e1..bf6009a344f5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
 				 void *drvdata);
 void intel_svm_unbind(struct iommu_sva *handle);
 int intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+			    struct iommu_page_response *msg);
+
 struct svm_dev_ops;
 
 struct intel_svm_dev {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper
  2020-07-06  0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
@ 2020-07-06  1:02   ` Tian, Kevin
  0 siblings, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06  1:02 UTC (permalink / raw)
  To: Lu Baolu, iommu
  Cc: Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
> 
> It is refactored in two ways:
> 
> - Make it global so that it could be used in other files.
> 
> - Make bus/devfn optional so that callers could ignore these two returned
> values when they only want to get the coresponding iommu pointer.
> 
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel/iommu.c | 55 +++++++++++--------------------------
>  drivers/iommu/intel/svm.c   |  8 +++---
>  include/linux/intel-iommu.h |  3 +-
>  3 files changed, 21 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index d759e7234e98..de17952ed133 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev,
> struct device *bridge)
>  	return false;
>  }
> 
> -static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn)
> +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn)
>  {
>  	struct dmar_drhd_unit *drhd = NULL;
> +	struct pci_dev *pdev = NULL;
>  	struct intel_iommu *iommu;
>  	struct device *tmp;
> -	struct pci_dev *pdev = NULL;
>  	u16 segment = 0;
>  	int i;
> 
> -	if (iommu_dummy(dev))
> +	if (!dev || iommu_dummy(dev))
>  		return NULL;
> 
>  	if (dev_is_pci(dev)) {
> @@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct
> device *dev, u8 *bus, u8 *devf
>  				if (pdev && pdev->is_virtfn)
>  					goto got_pdev;
> 
> -				*bus = drhd->devices[i].bus;
> -				*devfn = drhd->devices[i].devfn;
> +				if (bus && devfn) {
> +					*bus = drhd->devices[i].bus;
> +					*devfn = drhd->devices[i].devfn;
> +				}
>  				goto out;
>  			}
> 
> @@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct
> device *dev, u8 *bus, u8 *devf
> 
>  		if (pdev && drhd->include_all) {
>  		got_pdev:
> -			*bus = pdev->bus->number;
> -			*devfn = pdev->devfn;
> +			if (bus && devfn) {
> +				*bus = pdev->bus->number;
> +				*devfn = pdev->devfn;
> +			}
>  			goto out;
>  		}
>  	}
> @@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct
> dmar_domain *domain,
>  			      struct device *dev)
>  {
>  	int ret;
> -	u8 bus, devfn;
>  	unsigned long flags;
>  	struct intel_iommu *iommu;
> 
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu)
>  		return -ENODEV;
> 
> @@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct
> iommu_domain *domain,
>  	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>  	struct intel_iommu *iommu;
>  	int addr_width;
> -	u8 bus, devfn;
> 
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu)
>  		return -ENODEV;
> 
> @@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum
> iommu_cap cap)
>  static struct iommu_device *intel_iommu_probe_device(struct device *dev)
>  {
>  	struct intel_iommu *iommu;
> -	u8 bus, devfn;
> 
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu)
>  		return ERR_PTR(-ENODEV);
> 
> @@ -5673,9 +5674,8 @@ static struct iommu_device
> *intel_iommu_probe_device(struct device *dev)
>  static void intel_iommu_release_device(struct device *dev)
>  {
>  	struct intel_iommu *iommu;
> -	u8 bus, devfn;
> 
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu)
>  		return;
> 
> @@ -5825,37 +5825,14 @@ static struct iommu_group
> *intel_iommu_device_group(struct device *dev)
>  	return generic_device_group(dev);
>  }
> 
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> -struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
> -{
> -	struct intel_iommu *iommu;
> -	u8 bus, devfn;
> -
> -	if (iommu_dummy(dev)) {
> -		dev_warn(dev,
> -			 "No IOMMU translation for device; cannot enable
> SVM\n");
> -		return NULL;
> -	}
> -
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> -	if ((!iommu)) {
> -		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
> -		return NULL;
> -	}
> -
> -	return iommu;
> -}
> -#endif /* CONFIG_INTEL_IOMMU_SVM */
> -
>  static int intel_iommu_enable_auxd(struct device *dev)
>  {
>  	struct device_domain_info *info;
>  	struct intel_iommu *iommu;
>  	unsigned long flags;
> -	u8 bus, devfn;
>  	int ret;
> 
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu || dmar_disabled)
>  		return -EINVAL;
> 
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 6c87c807a0ab..25dd74f27252 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -231,7 +231,7 @@ static LIST_HEAD(global_svm_list);
>  int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device
> *dev,
>  			  struct iommu_gpasid_bind_data *data)
>  {
> -	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> +	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>  	struct dmar_domain *dmar_domain;
>  	struct intel_svm_dev *sdev;
>  	struct intel_svm *svm;
> @@ -373,7 +373,7 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain, struct device *dev,
> 
>  int intel_svm_unbind_gpasid(struct device *dev, int pasid)
>  {
> -	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> +	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>  	struct intel_svm_dev *sdev;
>  	struct intel_svm *svm;
>  	int ret = -EINVAL;
> @@ -430,7 +430,7 @@ static int
>  intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
>  		  struct mm_struct *mm, struct intel_svm_dev **sd)
>  {
> -	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> +	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>  	struct device_domain_info *info;
>  	struct intel_svm_dev *sdev;
>  	struct intel_svm *svm = NULL;
> @@ -608,7 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
>  	struct intel_svm *svm;
>  	int ret = -EINVAL;
> 
> -	iommu = intel_svm_device_to_iommu(dev);
> +	iommu = device_to_iommu(dev, NULL, NULL);
>  	if (!iommu)
>  		goto out;
> 
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 3e8fa1c7a1e6..fc2cfc3db6e1 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -728,6 +728,7 @@ void iommu_flush_write_buffer(struct intel_iommu
> *iommu);
>  int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device
> *dev);
>  struct dmar_domain *find_domain(struct device *dev);
>  struct device_domain_info *get_domain_info(struct device *dev);
> +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn);
> 
>  #ifdef CONFIG_INTEL_IOMMU_SVM
>  extern void intel_svm_check(struct intel_iommu *iommu);
> @@ -766,8 +767,6 @@ struct intel_svm {
>  	struct list_head devs;
>  	struct list_head list;
>  };
> -
> -extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
>  #else
>  static inline void intel_svm_check(struct intel_iommu *iommu) {}
>  #endif
> --
> 2.17.1

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid
  2020-07-06  0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
@ 2020-07-06  1:10   ` Tian, Kevin
  0 siblings, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06  1:10 UTC (permalink / raw)
  To: Lu Baolu, iommu
  Cc: Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
> 
> There are several places in the code that need to get the pointers of
> svm and sdev according to a pasid and device. Add a helper to achieve
> this for code consolidation and readability.
> 
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel/svm.c | 121 +++++++++++++++++++++-----------------
>  1 file changed, 68 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 25dd74f27252..c23167877b2b 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
>  	list_for_each_entry((sdev), &(svm)->devs, list)	\
>  		if ((d) != (sdev)->dev) {} else
> 
> +static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
> +			     struct intel_svm **rsvm,
> +			     struct intel_svm_dev **rsdev)
> +{
> +	struct intel_svm_dev *d, *sdev = NULL;
> +	struct intel_svm *svm;
> +
> +	/* The caller should hold the pasid_mutex lock */
> +	if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
> +		return -EINVAL;
> +
> +	if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
> +		return -EINVAL;
> +
> +	svm = ioasid_find(NULL, pasid, NULL);
> +	if (IS_ERR(svm))
> +		return PTR_ERR(svm);
> +
> +	if (!svm)
> +		goto out;
> +
> +	/*
> +	 * If we found svm for the PASID, there must be at least one device
> +	 * bond.
> +	 */
> +	if (WARN_ON(list_empty(&svm->devs)))
> +		return -EINVAL;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(d, &svm->devs, list) {
> +		if (d->dev == dev) {
> +			sdev = d;
> +			break;
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +out:
> +	*rsvm = svm;
> +	*rsdev = sdev;
> +
> +	return 0;
> +}
> +
>  int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device
> *dev,
>  			  struct iommu_gpasid_bind_data *data)
>  {
> @@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain, struct device *dev,
>  	dmar_domain = to_dmar_domain(domain);
> 
>  	mutex_lock(&pasid_mutex);
> -	svm = ioasid_find(NULL, data->hpasid, NULL);
> -	if (IS_ERR(svm)) {
> -		ret = PTR_ERR(svm);
> +	ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
> +	if (ret)
>  		goto out;
> -	}
> 
> -	if (svm) {
> +	if (sdev) {
>  		/*
> -		 * If we found svm for the PASID, there must be at
> -		 * least one device bond, otherwise svm should be freed.
> +		 * For devices with aux domains, we should allow
> +		 * multiple bind calls with the same PASID and pdev.
>  		 */
> -		if (WARN_ON(list_empty(&svm->devs))) {
> -			ret = -EINVAL;
> -			goto out;
> +		if (iommu_dev_feature_enabled(dev,
> IOMMU_DEV_FEAT_AUX)) {
> +			sdev->users++;
> +		} else {
> +			dev_warn_ratelimited(dev,
> +					     "Already bound with PASID %u\n",
> +					     svm->pasid);
> +			ret = -EBUSY;
>  		}
> +		goto out;
> +	}
> 
> -		for_each_svm_dev(sdev, svm, dev) {
> -			/*
> -			 * For devices with aux domains, we should allow
> -			 * multiple bind calls with the same PASID and pdev.
> -			 */
> -			if (iommu_dev_feature_enabled(dev,
> -						      IOMMU_DEV_FEAT_AUX))
> {
> -				sdev->users++;
> -			} else {
> -				dev_warn_ratelimited(dev,
> -						     "Already bound with
> PASID %u\n",
> -						     svm->pasid);
> -				ret = -EBUSY;
> -			}
> -			goto out;
> -		}
> -	} else {
> +	if (!svm) {
>  		/* We come here when PASID has never been bond to a
> device. */
>  		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
>  		if (!svm) {
> @@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev,
> int pasid)
>  	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>  	struct intel_svm_dev *sdev;
>  	struct intel_svm *svm;
> -	int ret = -EINVAL;
> +	int ret;
> 
>  	if (WARN_ON(!iommu))
>  		return -EINVAL;
> 
>  	mutex_lock(&pasid_mutex);
> -	svm = ioasid_find(NULL, pasid, NULL);
> -	if (!svm) {
> -		ret = -EINVAL;
> -		goto out;
> -	}
> -
> -	if (IS_ERR(svm)) {
> -		ret = PTR_ERR(svm);
> +	ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
> +	if (ret)
>  		goto out;
> -	}
> 
> -	for_each_svm_dev(sdev, svm, dev) {
> -		ret = 0;
> +	if (sdev) {
>  		if (iommu_dev_feature_enabled(dev,
> IOMMU_DEV_FEAT_AUX))
>  			sdev->users--;
>  		if (!sdev->users) {
> @@ -418,7 +442,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int
> pasid)
>  				kfree(svm);
>  			}
>  		}
> -		break;
>  	}
>  out:
>  	mutex_unlock(&pasid_mutex);
> @@ -596,7 +619,7 @@ intel_svm_bind_mm(struct device *dev, int flags,
> struct svm_dev_ops *ops,
>  	if (sd)
>  		*sd = sdev;
>  	ret = 0;
> - out:
> +out:
>  	return ret;
>  }
> 
> @@ -612,17 +635,11 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
>  	if (!iommu)
>  		goto out;
> 
> -	svm = ioasid_find(NULL, pasid, NULL);
> -	if (!svm)
> -		goto out;
> -
> -	if (IS_ERR(svm)) {
> -		ret = PTR_ERR(svm);
> +	ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
> +	if (ret)
>  		goto out;
> -	}
> 
> -	for_each_svm_dev(sdev, svm, dev) {
> -		ret = 0;
> +	if (sdev) {
>  		sdev->users--;
>  		if (!sdev->users) {
>  			list_del_rcu(&sdev->list);
> @@ -651,10 +668,8 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
>  				kfree(svm);
>  			}
>  		}
> -		break;
>  	}
> - out:
> -
> +out:
>  	return ret;
>  }
> 
> --
> 2.17.1

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-06  1:29   ` Tian, Kevin
  2020-07-06  7:37     ` Lu Baolu
  2020-07-06  1:36   ` Tian, Kevin
  2020-07-07 11:23   ` Jean-Philippe Brucker
  2 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06  1:29 UTC (permalink / raw)
  To: Lu Baolu, iommu
  Cc: Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
> 
> A pasid might be bound to a page table from a VM guest via the iommu
> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> on the physical IOMMU, we need to inject the page fault request into
> the guest. After the guest completes handling the page fault, a page
> response need to be sent back via the iommu ops.page_response().
> 
> This adds support to report a page request fault. Any external module
> which is interested in handling this fault should regiester a notifier
> callback.

be specific on which notifier to be registered...

> 
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 81 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index c23167877b2b..08c58c2b1a06 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
> int pasid)
>  	}
>  }
> 
> +static int prq_to_iommu_prot(struct page_req_dsc *req)
> +{
> +	int prot = 0;
> +
> +	if (req->rd_req)
> +		prot |= IOMMU_FAULT_PERM_READ;
> +	if (req->wr_req)
> +		prot |= IOMMU_FAULT_PERM_WRITE;
> +	if (req->exe_req)
> +		prot |= IOMMU_FAULT_PERM_EXEC;
> +	if (req->pm_req)
> +		prot |= IOMMU_FAULT_PERM_PRIV;
> +
> +	return prot;
> +}
> +
> +static int
> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> +{
> +	struct iommu_fault_event event;
> +	u8 bus, devfn;
> +
> +	memset(&event, 0, sizeof(struct iommu_fault_event));
> +	bus = PCI_BUS_NUM(desc->rid);
> +	devfn = desc->rid & 0xff;

not required.

> +
> +	/* Fill in event data for device specific processing */
> +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
> +	event.fault.prm.addr = desc->addr;
> +	event.fault.prm.pasid = desc->pasid;
> +	event.fault.prm.grpid = desc->prg_index;
> +	event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> +	/*
> +	 * Set last page in group bit if private data is present,
> +	 * page response is required as it does for LPIG.
> +	 */

move to priv_data_present check?

> +	if (desc->lpig)
> +		event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +	if (desc->pasid_present)
> +		event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> +	if (desc->priv_data_present) {
> +		event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +		event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> +		memcpy(event.fault.prm.private_data, desc->priv_data,
> +		       sizeof(desc->priv_data));
> +	}
> +
> +	return iommu_report_device_fault(dev, &event);
> +}
> +
>  static irqreturn_t prq_event_thread(int irq, void *d)
>  {
>  	struct intel_iommu *iommu = d;
> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> PRQ_RING_MASK;
>  	head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> PRQ_RING_MASK;
>  	while (head != tail) {
> -		struct intel_svm_dev *sdev;
> +		struct intel_svm_dev *sdev = NULL;

move to outside of the loop, otherwise later check always hit "if (!sdev)"

>  		struct vm_area_struct *vma;
>  		struct page_req_dsc *req;
>  		struct qi_desc resp;
> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  			}
>  		}
> 
> +		if (!sdev || sdev->sid != req->rid) {
> +			struct intel_svm_dev *t;
> +
> +			sdev = NULL;
> +			rcu_read_lock();
> +			list_for_each_entry_rcu(t, &svm->devs, list) {
> +				if (t->sid == req->rid) {
> +					sdev = t;
> +					break;
> +				}
> +			}
> +			rcu_read_unlock();
> +		}
> +
>  		result = QI_RESP_INVALID;
>  		/* Since we're using init_mm.pgd directly, we should never
> take
>  		 * any faults on kernel addresses. */
> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  		if (!is_canonical_address(address))
>  			goto bad_req;
> 
> +		/*
> +		 * If prq is to be handled outside iommu driver via receiver of
> +		 * the fault notifiers, we skip the page response here.
> +		 */
> +		if (svm->flags & SVM_FLAG_GUEST_MODE) {
> +			if (sdev && !intel_svm_prq_report(sdev->dev, req))
> +				goto prq_advance;
> +			else
> +				goto bad_req;
> +		}
> +
>  		/* If the mm is already defunct, don't handle faults. */
>  		if (!mmget_not_zero(svm->mm))
>  			goto bad_req;
> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  			goto invalid;
> 
>  		result = QI_RESP_SUCCESS;
> -	invalid:
> +invalid:
>  		mmap_read_unlock(svm->mm);
>  		mmput(svm->mm);
> -	bad_req:
> -		/* Accounting for major/minor faults? */
> -		rcu_read_lock();
> -		list_for_each_entry_rcu(sdev, &svm->devs, list) {
> -			if (sdev->sid == req->rid)
> -				break;
> -		}
> -		/* Other devices can go away, but the drivers are not
> permitted
> -		 * to unbind while any page faults might be in flight. So it's
> -		 * OK to drop the 'lock' here now we have it. */

should we keep and move this comment to earlier sdev lookup? and
regarding to guest unbind, ae we preventing the fault owner (outside
of iommu driver) to unbind against in-flight fault request?

> -		rcu_read_unlock();
> -
> -		if (WARN_ON(&sdev->list == &svm->devs))
> -			sdev = NULL;

similarly should we keep the WARN_ON check here?

> -
> +bad_req:
>  		if (sdev && sdev->ops && sdev->ops->fault_cb) {
>  			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>  				(req->exe_req << 1) | (req->pm_req);
> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  		   and these can be NULL. Do not use them below this point!
> */
>  		sdev = NULL;
>  		svm = NULL;
> -	no_pasid:
> +no_pasid:
>  		if (req->lpig || req->priv_data_present) {
>  			/*
>  			 * Per VT-d spec. v3.0 ch7.7, system software must
> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>  			resp.qw3 = 0;
>  			qi_submit_sync(iommu, &resp, 1, 0);
>  		}
> +prq_advance:
>  		head = (head + sizeof(*req)) & PRQ_RING_MASK;
>  	}
> 
> --
> 2.17.1

Thanks
Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
  2020-07-06  1:29   ` Tian, Kevin
@ 2020-07-06  1:36   ` Tian, Kevin
  2020-07-06  7:47     ` Lu Baolu
  2020-07-07 11:23   ` Jean-Philippe Brucker
  2 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06  1:36 UTC (permalink / raw)
  To: Lu Baolu, iommu
  Cc: Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

> From: Tian, Kevin
> Sent: Monday, July 6, 2020 9:30 AM
> 
> > From: Lu Baolu <baolu.lu@linux.intel.com>
> > Sent: Monday, July 6, 2020 8:26 AM
> >
> > A pasid might be bound to a page table from a VM guest via the iommu
> > ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> > on the physical IOMMU, we need to inject the page fault request into
> > the guest. After the guest completes handling the page fault, a page
> > response need to be sent back via the iommu ops.page_response().
> >
> > This adds support to report a page request fault. Any external module
> > which is interested in handling this fault should regiester a notifier
> > callback.
> 
> be specific on which notifier to be registered...
> 
> >
> > Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > ---
> >  drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-----
> --
> >  1 file changed, 81 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> > index c23167877b2b..08c58c2b1a06 100644
> > --- a/drivers/iommu/intel/svm.c
> > +++ b/drivers/iommu/intel/svm.c
> > @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
> > int pasid)
> >  	}
> >  }
> >
> > +static int prq_to_iommu_prot(struct page_req_dsc *req)
> > +{
> > +	int prot = 0;
> > +
> > +	if (req->rd_req)
> > +		prot |= IOMMU_FAULT_PERM_READ;
> > +	if (req->wr_req)
> > +		prot |= IOMMU_FAULT_PERM_WRITE;
> > +	if (req->exe_req)
> > +		prot |= IOMMU_FAULT_PERM_EXEC;
> > +	if (req->pm_req)
> > +		prot |= IOMMU_FAULT_PERM_PRIV;
> > +
> > +	return prot;
> > +}
> > +
> > +static int
> > +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> > +{
> > +	struct iommu_fault_event event;
> > +	u8 bus, devfn;
> > +
> > +	memset(&event, 0, sizeof(struct iommu_fault_event));
> > +	bus = PCI_BUS_NUM(desc->rid);
> > +	devfn = desc->rid & 0xff;
> 
> not required.
> 
> > +
> > +	/* Fill in event data for device specific processing */
> > +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
> > +	event.fault.prm.addr = desc->addr;
> > +	event.fault.prm.pasid = desc->pasid;
> > +	event.fault.prm.grpid = desc->prg_index;
> > +	event.fault.prm.perm = prq_to_iommu_prot(desc);
> > +
> > +	/*
> > +	 * Set last page in group bit if private data is present,
> > +	 * page response is required as it does for LPIG.
> > +	 */
> 
> move to priv_data_present check?
> 
> > +	if (desc->lpig)
> > +		event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> > +	if (desc->pasid_present)
> > +		event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> > +	if (desc->priv_data_present) {
> > +		event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;

btw earlier comment is more about the behavior of the fault
handler (e.g. the guest), but not about why we need convert
to last_page prm flag. Let's make it clear that doing so is 
because iommu_report_device_fault doesn't understand this
vt-d specific requirement thus we set last_page as a workaround.

Thanks
Kevin

> > +		event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> > +		memcpy(event.fault.prm.private_data, desc->priv_data,
> > +		       sizeof(desc->priv_data));
> > +	}
> > +
> > +	return iommu_report_device_fault(dev, &event);
> > +}
> > +
> >  static irqreturn_t prq_event_thread(int irq, void *d)
> >  {
> >  	struct intel_iommu *iommu = d;
> > @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> >  	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> > PRQ_RING_MASK;
> >  	head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> > PRQ_RING_MASK;
> >  	while (head != tail) {
> > -		struct intel_svm_dev *sdev;
> > +		struct intel_svm_dev *sdev = NULL;
> 
> move to outside of the loop, otherwise later check always hit "if (!sdev)"
> 
> >  		struct vm_area_struct *vma;
> >  		struct page_req_dsc *req;
> >  		struct qi_desc resp;
> > @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> >  			}
> >  		}
> >
> > +		if (!sdev || sdev->sid != req->rid) {
> > +			struct intel_svm_dev *t;
> > +
> > +			sdev = NULL;
> > +			rcu_read_lock();
> > +			list_for_each_entry_rcu(t, &svm->devs, list) {
> > +				if (t->sid == req->rid) {
> > +					sdev = t;
> > +					break;
> > +				}
> > +			}
> > +			rcu_read_unlock();
> > +		}
> > +
> >  		result = QI_RESP_INVALID;
> >  		/* Since we're using init_mm.pgd directly, we should never
> > take
> >  		 * any faults on kernel addresses. */
> > @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> >  		if (!is_canonical_address(address))
> >  			goto bad_req;
> >
> > +		/*
> > +		 * If prq is to be handled outside iommu driver via receiver of
> > +		 * the fault notifiers, we skip the page response here.
> > +		 */
> > +		if (svm->flags & SVM_FLAG_GUEST_MODE) {
> > +			if (sdev && !intel_svm_prq_report(sdev->dev, req))
> > +				goto prq_advance;
> > +			else
> > +				goto bad_req;
> > +		}
> > +
> >  		/* If the mm is already defunct, don't handle faults. */
> >  		if (!mmget_not_zero(svm->mm))
> >  			goto bad_req;
> > @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> >  			goto invalid;
> >
> >  		result = QI_RESP_SUCCESS;
> > -	invalid:
> > +invalid:
> >  		mmap_read_unlock(svm->mm);
> >  		mmput(svm->mm);
> > -	bad_req:
> > -		/* Accounting for major/minor faults? */
> > -		rcu_read_lock();
> > -		list_for_each_entry_rcu(sdev, &svm->devs, list) {
> > -			if (sdev->sid == req->rid)
> > -				break;
> > -		}
> > -		/* Other devices can go away, but the drivers are not
> > permitted
> > -		 * to unbind while any page faults might be in flight. So it's
> > -		 * OK to drop the 'lock' here now we have it. */
> 
> should we keep and move this comment to earlier sdev lookup? and
> regarding to guest unbind, ae we preventing the fault owner (outside
> of iommu driver) to unbind against in-flight fault request?
> 
> > -		rcu_read_unlock();
> > -
> > -		if (WARN_ON(&sdev->list == &svm->devs))
> > -			sdev = NULL;
> 
> similarly should we keep the WARN_ON check here?
> 
> > -
> > +bad_req:
> >  		if (sdev && sdev->ops && sdev->ops->fault_cb) {
> >  			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
> >  				(req->exe_req << 1) | (req->pm_req);
> > @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> >  		   and these can be NULL. Do not use them below this point!
> > */
> >  		sdev = NULL;
> >  		svm = NULL;
> > -	no_pasid:
> > +no_pasid:
> >  		if (req->lpig || req->priv_data_present) {
> >  			/*
> >  			 * Per VT-d spec. v3.0 ch7.7, system software must
> > @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> >  			resp.qw3 = 0;
> >  			qi_submit_sync(iommu, &resp, 1, 0);
> >  		}
> > +prq_advance:
> >  		head = (head + sizeof(*req)) & PRQ_RING_MASK;
> >  	}
> >
> > --
> > 2.17.1
> 
> Thanks
> Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 4/4] iommu/vt-d: Add page response ops support
  2020-07-06  0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
@ 2020-07-06  1:47   ` Tian, Kevin
  2020-07-09  0:32     ` Lu Baolu
  0 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06  1:47 UTC (permalink / raw)
  To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel

> From: Lu Baolu
> Sent: Monday, July 6, 2020 8:26 AM
> 
> After a page request is handled, software must response the device which
> raised the page request with the handling result. This is done through

'response' is a noun. 

> the iommu ops.page_response if the request was reported to outside of
> vendor iommu driver through iommu_report_device_fault(). This adds the
> VT-d implementation of page_response ops.
> 
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel/iommu.c |  1 +
>  drivers/iommu/intel/svm.c   | 74
> +++++++++++++++++++++++++++++++++++++
>  include/linux/intel-iommu.h |  3 ++
>  3 files changed, 78 insertions(+)
> 
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index de17952ed133..7eb29167e8f9 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
>  	.sva_bind		= intel_svm_bind,
>  	.sva_unbind		= intel_svm_unbind,
>  	.sva_get_pasid		= intel_svm_get_pasid,
> +	.page_response		= intel_svm_page_response,
>  #endif
>  };
> 
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 08c58c2b1a06..1c7d8a9ea124 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
> 
>  	return pasid;
>  }
> +
> +int intel_svm_page_response(struct device *dev,
> +			    struct iommu_fault_event *evt,
> +			    struct iommu_page_response *msg)
> +{
> +	struct iommu_fault_page_request *prm;
> +	struct intel_svm_dev *sdev;
> +	struct intel_iommu *iommu;
> +	struct intel_svm *svm;
> +	bool private_present;
> +	bool pasid_present;
> +	bool last_page;
> +	u8 bus, devfn;
> +	int ret = 0;
> +	u16 sid;
> +
> +	if (!dev || !dev_is_pci(dev))
> +		return -ENODEV;

but we didn't do same check when reporting fault?

> +
> +	iommu = device_to_iommu(dev, &bus, &devfn);
> +	if (!iommu)
> +		return -ENODEV;
> +
> +	if (!msg || !evt)
> +		return -EINVAL;
> +
> +	mutex_lock(&pasid_mutex);
> +
> +	prm = &evt->fault.prm;
> +	sid = PCI_DEVID(bus, devfn);
> +	pasid_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> +	private_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> +	last_page = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +
> +	if (pasid_present) {
> +		if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
> +		if (ret || !sdev) {
> +			ret = -ENODEV;
> +			goto out;
> +		}
> +	}

what about pasid_present==0? Do we support recoverable fault now
with this patch?

and who guarantees that the external fault handler (e.g. guest)
cannot do bad thing with this interface, e.g. by specifying a PASID
belonging to other guests (when Scalable IOV is enabled)?

Thanks
Kevin

> +
> +	/*
> +	 * Per VT-d spec. v3.0 ch7.7, system software must respond
> +	 * with page group response if private data is present (PDP)
> +	 * or last page in group (LPIG) bit is set. This is an
> +	 * additional VT-d requirement beyond PCI ATS spec.
> +	 */
> +	if (last_page || private_present) {
> +		struct qi_desc desc;
> +
> +		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
> |
> +				QI_PGRP_PASID_P(pasid_present) |
> +				QI_PGRP_PDP(private_present) |
> +				QI_PGRP_RESP_CODE(msg->code) |
> +				QI_PGRP_RESP_TYPE;
> +		desc.qw1 = QI_PGRP_IDX(prm->grpid) |
> QI_PGRP_LPIG(last_page);
> +		desc.qw2 = 0;
> +		desc.qw3 = 0;
> +		if (private_present)
> +			memcpy(&desc.qw2, prm->private_data,
> +			       sizeof(prm->private_data));
> +
> +		qi_submit_sync(iommu, &desc, 1, 0);
> +	}
> +out:
> +	mutex_unlock(&pasid_mutex);
> +	return ret;
> +}
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index fc2cfc3db6e1..bf6009a344f5 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
> *dev, struct mm_struct *mm,
>  				 void *drvdata);
>  void intel_svm_unbind(struct iommu_sva *handle);
>  int intel_svm_get_pasid(struct iommu_sva *handle);
> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
> *evt,
> +			    struct iommu_page_response *msg);
> +
>  struct svm_dev_ops;
> 
>  struct intel_svm_dev {
> --
> 2.17.1
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  1:29   ` Tian, Kevin
@ 2020-07-06  7:37     ` Lu Baolu
  0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  7:37 UTC (permalink / raw)
  To: Tian, Kevin, iommu
  Cc: baolu.lu, Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

Hi Kevin,

On 2020/7/6 9:29, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Monday, July 6, 2020 8:26 AM
>>
>> A pasid might be bound to a page table from a VM guest via the iommu
>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>> on the physical IOMMU, we need to inject the page fault request into
>> the guest. After the guest completes handling the page fault, a page
>> response need to be sent back via the iommu ops.page_response().
>>
>> This adds support to report a page request fault. Any external module
>> which is interested in handling this fault should regiester a notifier
>> callback.
> 
> be specific on which notifier to be registered...

Sure.

> 
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>>   drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
>>   1 file changed, 81 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index c23167877b2b..08c58c2b1a06 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
>> int pasid)
>>   	}
>>   }
>>
>> +static int prq_to_iommu_prot(struct page_req_dsc *req)
>> +{
>> +	int prot = 0;
>> +
>> +	if (req->rd_req)
>> +		prot |= IOMMU_FAULT_PERM_READ;
>> +	if (req->wr_req)
>> +		prot |= IOMMU_FAULT_PERM_WRITE;
>> +	if (req->exe_req)
>> +		prot |= IOMMU_FAULT_PERM_EXEC;
>> +	if (req->pm_req)
>> +		prot |= IOMMU_FAULT_PERM_PRIV;
>> +
>> +	return prot;
>> +}
>> +
>> +static int
>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>> +{
>> +	struct iommu_fault_event event;
>> +	u8 bus, devfn;
>> +
>> +	memset(&event, 0, sizeof(struct iommu_fault_event));
>> +	bus = PCI_BUS_NUM(desc->rid);
>> +	devfn = desc->rid & 0xff;
> 
> not required.

Yes. Will remove them.

> 
>> +
>> +	/* Fill in event data for device specific processing */
>> +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
>> +	event.fault.prm.addr = desc->addr;
>> +	event.fault.prm.pasid = desc->pasid;
>> +	event.fault.prm.grpid = desc->prg_index;
>> +	event.fault.prm.perm = prq_to_iommu_prot(desc);
>> +
>> +	/*
>> +	 * Set last page in group bit if private data is present,
>> +	 * page response is required as it does for LPIG.
>> +	 */
> 
> move to priv_data_present check?

Yes.

> 
>> +	if (desc->lpig)
>> +		event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +	if (desc->pasid_present)
>> +		event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> +	if (desc->priv_data_present) {
>> +		event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +		event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> +		memcpy(event.fault.prm.private_data, desc->priv_data,
>> +		       sizeof(desc->priv_data));
>> +	}
>> +
>> +	return iommu_report_device_fault(dev, &event);
>> +}
>> +
>>   static irqreturn_t prq_event_thread(int irq, void *d)
>>   {
>>   	struct intel_iommu *iommu = d;
>> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
>> PRQ_RING_MASK;
>>   	head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
>> PRQ_RING_MASK;
>>   	while (head != tail) {
>> -		struct intel_svm_dev *sdev;
>> +		struct intel_svm_dev *sdev = NULL;
> 
> move to outside of the loop, otherwise later check always hit "if (!sdev)"

Yes, good catch!

> 
>>   		struct vm_area_struct *vma;
>>   		struct page_req_dsc *req;
>>   		struct qi_desc resp;
>> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   			}
>>   		}
>>
>> +		if (!sdev || sdev->sid != req->rid) {
>> +			struct intel_svm_dev *t;
>> +
>> +			sdev = NULL;
>> +			rcu_read_lock();
>> +			list_for_each_entry_rcu(t, &svm->devs, list) {
>> +				if (t->sid == req->rid) {
>> +					sdev = t;
>> +					break;
>> +				}
>> +			}
>> +			rcu_read_unlock();
>> +		}
>> +
>>   		result = QI_RESP_INVALID;
>>   		/* Since we're using init_mm.pgd directly, we should never
>> take
>>   		 * any faults on kernel addresses. */
>> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   		if (!is_canonical_address(address))
>>   			goto bad_req;
>>
>> +		/*
>> +		 * If prq is to be handled outside iommu driver via receiver of
>> +		 * the fault notifiers, we skip the page response here.
>> +		 */
>> +		if (svm->flags & SVM_FLAG_GUEST_MODE) {
>> +			if (sdev && !intel_svm_prq_report(sdev->dev, req))
>> +				goto prq_advance;
>> +			else
>> +				goto bad_req;
>> +		}
>> +
>>   		/* If the mm is already defunct, don't handle faults. */
>>   		if (!mmget_not_zero(svm->mm))
>>   			goto bad_req;
>> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   			goto invalid;
>>
>>   		result = QI_RESP_SUCCESS;
>> -	invalid:
>> +invalid:
>>   		mmap_read_unlock(svm->mm);
>>   		mmput(svm->mm);
>> -	bad_req:
>> -		/* Accounting for major/minor faults? */
>> -		rcu_read_lock();
>> -		list_for_each_entry_rcu(sdev, &svm->devs, list) {
>> -			if (sdev->sid == req->rid)
>> -				break;
>> -		}
>> -		/* Other devices can go away, but the drivers are not
>> permitted
>> -		 * to unbind while any page faults might be in flight. So it's
>> -		 * OK to drop the 'lock' here now we have it. */
> 
> should we keep and move this comment to earlier sdev lookup? and

I thought this comment explained why rcu_read_unlock() before the next
checking. In the new lookup code, we don't need to check, hence I
removed the comments.

> regarding to guest unbind, ae we preventing the fault owner (outside
> of iommu driver) to unbind against in-flight fault request?

Yes. We always wait until all prq with the same pasid completes in
gpasid_unbind().

> 
>> -		rcu_read_unlock();
>> -
>> -		if (WARN_ON(&sdev->list == &svm->devs))
>> -			sdev = NULL;
> 
> similarly should we keep the WARN_ON check here?

Yes, agreed. We can keep a WARN_ON() here.

> 
>> -
>> +bad_req:
>>   		if (sdev && sdev->ops && sdev->ops->fault_cb) {
>>   			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>>   				(req->exe_req << 1) | (req->pm_req);
>> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   		   and these can be NULL. Do not use them below this point!
>> */
>>   		sdev = NULL;
>>   		svm = NULL;
>> -	no_pasid:
>> +no_pasid:
>>   		if (req->lpig || req->priv_data_present) {
>>   			/*
>>   			 * Per VT-d spec. v3.0 ch7.7, system software must
>> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>   			resp.qw3 = 0;
>>   			qi_submit_sync(iommu, &resp, 1, 0);
>>   		}
>> +prq_advance:
>>   		head = (head + sizeof(*req)) & PRQ_RING_MASK;
>>   	}
>>
>> --
>> 2.17.1
> 
> Thanks
> Kevin
> 

Best regards,
baolu

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  1:36   ` Tian, Kevin
@ 2020-07-06  7:47     ` Lu Baolu
  0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06  7:47 UTC (permalink / raw)
  To: Tian, Kevin, iommu
  Cc: baolu.lu, Joerg Roedel, Jacob Pan, Raj, Ashok, Liu, Yi L, linux-kernel

On 2020/7/6 9:36, Tian, Kevin wrote:
>> From: Tian, Kevin
>> Sent: Monday, July 6, 2020 9:30 AM
>>
>>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>> Sent: Monday, July 6, 2020 8:26 AM
>>>
>>> A pasid might be bound to a page table from a VM guest via the iommu
>>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>>> on the physical IOMMU, we need to inject the page fault request into
>>> the guest. After the guest completes handling the page fault, a page
>>> response need to be sent back via the iommu ops.page_response().
>>>
>>> This adds support to report a page request fault. Any external module
>>> which is interested in handling this fault should regiester a notifier
>>> callback.
>>
>> be specific on which notifier to be registered...
>>
>>>
>>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>>> ---
>>>   drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-----
>> --
>>>   1 file changed, 81 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>>> index c23167877b2b..08c58c2b1a06 100644
>>> --- a/drivers/iommu/intel/svm.c
>>> +++ b/drivers/iommu/intel/svm.c
>>> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
>>> int pasid)
>>>   	}
>>>   }
>>>
>>> +static int prq_to_iommu_prot(struct page_req_dsc *req)
>>> +{
>>> +	int prot = 0;
>>> +
>>> +	if (req->rd_req)
>>> +		prot |= IOMMU_FAULT_PERM_READ;
>>> +	if (req->wr_req)
>>> +		prot |= IOMMU_FAULT_PERM_WRITE;
>>> +	if (req->exe_req)
>>> +		prot |= IOMMU_FAULT_PERM_EXEC;
>>> +	if (req->pm_req)
>>> +		prot |= IOMMU_FAULT_PERM_PRIV;
>>> +
>>> +	return prot;
>>> +}
>>> +
>>> +static int
>>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>>> +{
>>> +	struct iommu_fault_event event;
>>> +	u8 bus, devfn;
>>> +
>>> +	memset(&event, 0, sizeof(struct iommu_fault_event));
>>> +	bus = PCI_BUS_NUM(desc->rid);
>>> +	devfn = desc->rid & 0xff;
>>
>> not required.
>>
>>> +
>>> +	/* Fill in event data for device specific processing */
>>> +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
>>> +	event.fault.prm.addr = desc->addr;
>>> +	event.fault.prm.pasid = desc->pasid;
>>> +	event.fault.prm.grpid = desc->prg_index;
>>> +	event.fault.prm.perm = prq_to_iommu_prot(desc);
>>> +
>>> +	/*
>>> +	 * Set last page in group bit if private data is present,
>>> +	 * page response is required as it does for LPIG.
>>> +	 */
>>
>> move to priv_data_present check?
>>
>>> +	if (desc->lpig)
>>> +		event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>>> +	if (desc->pasid_present)
>>> +		event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>>> +	if (desc->priv_data_present) {
>>> +		event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> 
> btw earlier comment is more about the behavior of the fault
> handler (e.g. the guest), but not about why we need convert
> to last_page prm flag. Let's make it clear that doing so is
> because iommu_report_device_fault doesn't understand this
> vt-d specific requirement thus we set last_page as a workaround.

Yes. I will add this in the comment.

Best regards,
baolu

> 
> Thanks
> Kevin
> 
>>> +		event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>>> +		memcpy(event.fault.prm.private_data, desc->priv_data,
>>> +		       sizeof(desc->priv_data));
>>> +	}
>>> +
>>> +	return iommu_report_device_fault(dev, &event);
>>> +}
>>> +
>>>   static irqreturn_t prq_event_thread(int irq, void *d)
>>>   {
>>>   	struct intel_iommu *iommu = d;
>>> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>>   	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
>>> PRQ_RING_MASK;
>>>   	head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
>>> PRQ_RING_MASK;
>>>   	while (head != tail) {
>>> -		struct intel_svm_dev *sdev;
>>> +		struct intel_svm_dev *sdev = NULL;
>>
>> move to outside of the loop, otherwise later check always hit "if (!sdev)"
>>
>>>   		struct vm_area_struct *vma;
>>>   		struct page_req_dsc *req;
>>>   		struct qi_desc resp;
>>> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>>   			}
>>>   		}
>>>
>>> +		if (!sdev || sdev->sid != req->rid) {
>>> +			struct intel_svm_dev *t;
>>> +
>>> +			sdev = NULL;
>>> +			rcu_read_lock();
>>> +			list_for_each_entry_rcu(t, &svm->devs, list) {
>>> +				if (t->sid == req->rid) {
>>> +					sdev = t;
>>> +					break;
>>> +				}
>>> +			}
>>> +			rcu_read_unlock();
>>> +		}
>>> +
>>>   		result = QI_RESP_INVALID;
>>>   		/* Since we're using init_mm.pgd directly, we should never
>>> take
>>>   		 * any faults on kernel addresses. */
>>> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>>   		if (!is_canonical_address(address))
>>>   			goto bad_req;
>>>
>>> +		/*
>>> +		 * If prq is to be handled outside iommu driver via receiver of
>>> +		 * the fault notifiers, we skip the page response here.
>>> +		 */
>>> +		if (svm->flags & SVM_FLAG_GUEST_MODE) {
>>> +			if (sdev && !intel_svm_prq_report(sdev->dev, req))
>>> +				goto prq_advance;
>>> +			else
>>> +				goto bad_req;
>>> +		}
>>> +
>>>   		/* If the mm is already defunct, don't handle faults. */
>>>   		if (!mmget_not_zero(svm->mm))
>>>   			goto bad_req;
>>> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>>   			goto invalid;
>>>
>>>   		result = QI_RESP_SUCCESS;
>>> -	invalid:
>>> +invalid:
>>>   		mmap_read_unlock(svm->mm);
>>>   		mmput(svm->mm);
>>> -	bad_req:
>>> -		/* Accounting for major/minor faults? */
>>> -		rcu_read_lock();
>>> -		list_for_each_entry_rcu(sdev, &svm->devs, list) {
>>> -			if (sdev->sid == req->rid)
>>> -				break;
>>> -		}
>>> -		/* Other devices can go away, but the drivers are not
>>> permitted
>>> -		 * to unbind while any page faults might be in flight. So it's
>>> -		 * OK to drop the 'lock' here now we have it. */
>>
>> should we keep and move this comment to earlier sdev lookup? and
>> regarding to guest unbind, ae we preventing the fault owner (outside
>> of iommu driver) to unbind against in-flight fault request?
>>
>>> -		rcu_read_unlock();
>>> -
>>> -		if (WARN_ON(&sdev->list == &svm->devs))
>>> -			sdev = NULL;
>>
>> similarly should we keep the WARN_ON check here?
>>
>>> -
>>> +bad_req:
>>>   		if (sdev && sdev->ops && sdev->ops->fault_cb) {
>>>   			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>>>   				(req->exe_req << 1) | (req->pm_req);
>>> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>>   		   and these can be NULL. Do not use them below this point!
>>> */
>>>   		sdev = NULL;
>>>   		svm = NULL;
>>> -	no_pasid:
>>> +no_pasid:
>>>   		if (req->lpig || req->priv_data_present) {
>>>   			/*
>>>   			 * Per VT-d spec. v3.0 ch7.7, system software must
>>> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>>   			resp.qw3 = 0;
>>>   			qi_submit_sync(iommu, &resp, 1, 0);
>>>   		}
>>> +prq_advance:
>>>   		head = (head + sizeof(*req)) & PRQ_RING_MASK;
>>>   	}
>>>
>>> --
>>> 2.17.1
>>
>> Thanks
>> Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
  2020-07-06  1:29   ` Tian, Kevin
  2020-07-06  1:36   ` Tian, Kevin
@ 2020-07-07 11:23   ` Jean-Philippe Brucker
  2020-07-08  2:13     ` Lu Baolu
  2 siblings, 1 reply; 15+ messages in thread
From: Jean-Philippe Brucker @ 2020-07-07 11:23 UTC (permalink / raw)
  To: Lu Baolu; +Cc: iommu, Kevin Tian, Ashok Raj, linux-kernel

On Mon, Jul 06, 2020 at 08:25:34AM +0800, Lu Baolu wrote:
> A pasid might be bound to a page table from a VM guest via the iommu
> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> on the physical IOMMU, we need to inject the page fault request into
> the guest. After the guest completes handling the page fault, a page
> response need to be sent back via the iommu ops.page_response().
> 
> This adds support to report a page request fault. Any external module
> which is interested in handling this fault should regiester a notifier
> callback.
> 
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
[...]
> +static int
> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> +{
> +	struct iommu_fault_event event;
> +	u8 bus, devfn;
> +
> +	memset(&event, 0, sizeof(struct iommu_fault_event));
> +	bus = PCI_BUS_NUM(desc->rid);
> +	devfn = desc->rid & 0xff;
> +
> +	/* Fill in event data for device specific processing */
> +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
> +	event.fault.prm.addr = desc->addr;
> +	event.fault.prm.pasid = desc->pasid;
> +	event.fault.prm.grpid = desc->prg_index;
> +	event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> +	/*
> +	 * Set last page in group bit if private data is present,
> +	 * page response is required as it does for LPIG.
> +	 */
> +	if (desc->lpig)
> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +	if (desc->pasid_present)
> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;

Do you also need to set IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID?  I added
the flag to deal with devices that do not want a PASID value in their PRI
response (bit 15 in the PCIe Page Request Status Register):
https://lore.kernel.org/linux-iommu/20200616144712.748818-1-jean-philippe@linaro.org/
(applied by Joerg for v5.9)

Grepping for pci_prg_resp_pasid_required() in intel/iommu.c it seems to
currently reject devices that do not want a PASID in a PRI response, so I
think you can set this flag unconditionally for now.

Thanks,
Jean

> +	if (desc->priv_data_present) {
> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> +		memcpy(event.fault.prm.private_data, desc->priv_data,
> +		       sizeof(desc->priv_data));
> +	}
> +
> +	return iommu_report_device_fault(dev, &event);
> +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
  2020-07-07 11:23   ` Jean-Philippe Brucker
@ 2020-07-08  2:13     ` Lu Baolu
  0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-08  2:13 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: baolu.lu, iommu, Kevin Tian, Ashok Raj, linux-kernel

Hi Jean,

On 7/7/20 7:23 PM, Jean-Philippe Brucker wrote:
> On Mon, Jul 06, 2020 at 08:25:34AM +0800, Lu Baolu wrote:
>> A pasid might be bound to a page table from a VM guest via the iommu
>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>> on the physical IOMMU, we need to inject the page fault request into
>> the guest. After the guest completes handling the page fault, a page
>> response need to be sent back via the iommu ops.page_response().
>>
>> This adds support to report a page request fault. Any external module
>> which is interested in handling this fault should regiester a notifier
>> callback.
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
> [...]
>> +static int
>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>> +{
>> +	struct iommu_fault_event event;
>> +	u8 bus, devfn;
>> +
>> +	memset(&event, 0, sizeof(struct iommu_fault_event));
>> +	bus = PCI_BUS_NUM(desc->rid);
>> +	devfn = desc->rid & 0xff;
>> +
>> +	/* Fill in event data for device specific processing */
>> +	event.fault.type = IOMMU_FAULT_PAGE_REQ;
>> +	event.fault.prm.addr = desc->addr;
>> +	event.fault.prm.pasid = desc->pasid;
>> +	event.fault.prm.grpid = desc->prg_index;
>> +	event.fault.prm.perm = prq_to_iommu_prot(desc);
>> +
>> +	/*
>> +	 * Set last page in group bit if private data is present,
>> +	 * page response is required as it does for LPIG.
>> +	 */
>> +	if (desc->lpig)
>> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +	if (desc->pasid_present)
>> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> 
> Do you also need to set IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID?  I added
> the flag to deal with devices that do not want a PASID value in their PRI
> response (bit 15 in the PCIe Page Request Status Register):
> https://lore.kernel.org/linux-iommu/20200616144712.748818-1-jean-philippe@linaro.org/
> (applied by Joerg for v5.9)
> 
> Grepping for pci_prg_resp_pasid_required() in intel/iommu.c it seems to
> currently reject devices that do not want a PASID in a PRI response, so I
> think you can set this flag unconditionally for now.

Yes. You are right. I will set this flag in the next version.

Best regards,
baolu

> 
> Thanks,
> Jean
> 
>> +	if (desc->priv_data_present) {
>> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> +		memcpy(event.fault.prm.private_data, desc->priv_data,
>> +		       sizeof(desc->priv_data));
>> +	}
>> +
>> +	return iommu_report_device_fault(dev, &event);
>> +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] iommu/vt-d: Add page response ops support
  2020-07-06  1:47   ` Tian, Kevin
@ 2020-07-09  0:32     ` Lu Baolu
  0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-09  0:32 UTC (permalink / raw)
  To: Tian, Kevin, iommu; +Cc: baolu.lu, Raj, Ashok, linux-kernel

Hi Kevin,

On 7/6/20 9:47 AM, Tian, Kevin wrote:
>> From: Lu Baolu
>> Sent: Monday, July 6, 2020 8:26 AM
>>
>> After a page request is handled, software must response the device which
>> raised the page request with the handling result. This is done through
> 
> 'response' is a noun.

Yes.

> 
>> the iommu ops.page_response if the request was reported to outside of
>> vendor iommu driver through iommu_report_device_fault(). This adds the
>> VT-d implementation of page_response ops.
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>>   drivers/iommu/intel/iommu.c |  1 +
>>   drivers/iommu/intel/svm.c   | 74
>> +++++++++++++++++++++++++++++++++++++
>>   include/linux/intel-iommu.h |  3 ++
>>   3 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
>> index de17952ed133..7eb29167e8f9 100644
>> --- a/drivers/iommu/intel/iommu.c
>> +++ b/drivers/iommu/intel/iommu.c
>> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
>>   	.sva_bind		= intel_svm_bind,
>>   	.sva_unbind		= intel_svm_unbind,
>>   	.sva_get_pasid		= intel_svm_get_pasid,
>> +	.page_response		= intel_svm_page_response,
>>   #endif
>>   };
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index 08c58c2b1a06..1c7d8a9ea124 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
>>
>>   	return pasid;
>>   }
>> +
>> +int intel_svm_page_response(struct device *dev,
>> +			    struct iommu_fault_event *evt,
>> +			    struct iommu_page_response *msg)
>> +{
>> +	struct iommu_fault_page_request *prm;
>> +	struct intel_svm_dev *sdev;
>> +	struct intel_iommu *iommu;
>> +	struct intel_svm *svm;
>> +	bool private_present;
>> +	bool pasid_present;
>> +	bool last_page;
>> +	u8 bus, devfn;
>> +	int ret = 0;
>> +	u16 sid;
>> +
>> +	if (!dev || !dev_is_pci(dev))
>> +		return -ENODEV;
> 
> but we didn't do same check when reporting fault?

For now, we only support PCI devices, so I will add this check in report
as well.

> 
>> +
>> +	iommu = device_to_iommu(dev, &bus, &devfn);
>> +	if (!iommu)
>> +		return -ENODEV;
>> +
>> +	if (!msg || !evt)
>> +		return -EINVAL;
>> +
>> +	mutex_lock(&pasid_mutex);
>> +
>> +	prm = &evt->fault.prm;
>> +	sid = PCI_DEVID(bus, devfn);
>> +	pasid_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> +	private_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> +	last_page = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +
>> +	if (pasid_present) {
>> +		if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
>> +			ret = -EINVAL;
>> +			goto out;
>> +		}
>> +
>> +		ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
>> +		if (ret || !sdev) {
>> +			ret = -ENODEV;
>> +			goto out;
>> +		}
>> +	}
> 
> what about pasid_present==0? Do we support recoverable fault now
> with this patch?

For now, we don't support reporting a prq without pasid to outside.
prq_event_thread() handles such requests explicitly. I will add a
check in response ops.

> 
> and who guarantees that the external fault handler (e.g. guest)
> cannot do bad thing with this interface, e.g. by specifying a PASID
> belonging to other guests (when Scalable IOV is enabled)?

I will check below if the response is from user space.

(svm->mm ==  get_task_mm(current))

> 
> Thanks
> Kevin

Best regards,
baolu

>> +
>> +	/*
>> +	 * Per VT-d spec. v3.0 ch7.7, system software must respond
>> +	 * with page group response if private data is present (PDP)
>> +	 * or last page in group (LPIG) bit is set. This is an
>> +	 * additional VT-d requirement beyond PCI ATS spec.
>> +	 */
>> +	if (last_page || private_present) {
>> +		struct qi_desc desc;
>> +
>> +		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
>> |
>> +				QI_PGRP_PASID_P(pasid_present) |
>> +				QI_PGRP_PDP(private_present) |
>> +				QI_PGRP_RESP_CODE(msg->code) |
>> +				QI_PGRP_RESP_TYPE;
>> +		desc.qw1 = QI_PGRP_IDX(prm->grpid) |
>> QI_PGRP_LPIG(last_page);
>> +		desc.qw2 = 0;
>> +		desc.qw3 = 0;
>> +		if (private_present)
>> +			memcpy(&desc.qw2, prm->private_data,
>> +			       sizeof(prm->private_data));
>> +
>> +		qi_submit_sync(iommu, &desc, 1, 0);
>> +	}
>> +out:
>> +	mutex_unlock(&pasid_mutex);
>> +	return ret;
>> +}
>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index fc2cfc3db6e1..bf6009a344f5 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
>> *dev, struct mm_struct *mm,
>>   				 void *drvdata);
>>   void intel_svm_unbind(struct iommu_sva *handle);
>>   int intel_svm_get_pasid(struct iommu_sva *handle);
>> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
>> *evt,
>> +			    struct iommu_page_response *msg);
>> +
>>   struct svm_dev_ops;
>>
>>   struct intel_svm_dev {
>> --
>> 2.17.1
>>
>> _______________________________________________
>> iommu mailing list
>> iommu@lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-07-09  0:37 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-06  0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
2020-07-06  0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
2020-07-06  1:02   ` Tian, Kevin
2020-07-06  0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
2020-07-06  1:10   ` Tian, Kevin
2020-07-06  0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
2020-07-06  1:29   ` Tian, Kevin
2020-07-06  7:37     ` Lu Baolu
2020-07-06  1:36   ` Tian, Kevin
2020-07-06  7:47     ` Lu Baolu
2020-07-07 11:23   ` Jean-Philippe Brucker
2020-07-08  2:13     ` Lu Baolu
2020-07-06  0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
2020-07-06  1:47   ` Tian, Kevin
2020-07-09  0:32     ` Lu Baolu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).