* [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper
2020-07-06 0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
@ 2020-07-06 0:25 ` Lu Baolu
2020-07-06 1:02 ` Tian, Kevin
2020-07-06 0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
` (2 subsequent siblings)
3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 0:25 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
It is refactored in two ways:
- Make it global so that it could be used in other files.
- Make bus/devfn optional so that callers could ignore these two returned
values when they only want to get the coresponding iommu pointer.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.c | 55 +++++++++++--------------------------
drivers/iommu/intel/svm.c | 8 +++---
include/linux/intel-iommu.h | 3 +-
3 files changed, 21 insertions(+), 45 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d759e7234e98..de17952ed133 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
return false;
}
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
- if (iommu_dummy(dev))
+ if (!dev || iommu_dummy(dev))
return NULL;
if (dev_is_pci(dev)) {
@@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && pdev->is_virtfn)
goto got_pdev;
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
+ if (bus && devfn) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ }
goto out;
}
@@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && drhd->include_all) {
got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
+ if (bus && devfn) {
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ }
goto out;
}
}
@@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
struct device *dev)
{
int ret;
- u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum iommu_cap cap)
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return ERR_PTR(-ENODEV);
@@ -5673,9 +5674,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
@@ -5825,37 +5825,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
- struct intel_iommu *iommu;
- u8 bus, devfn;
-
- if (iommu_dummy(dev)) {
- dev_warn(dev,
- "No IOMMU translation for device; cannot enable SVM\n");
- return NULL;
- }
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if ((!iommu)) {
- dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
- return NULL;
- }
-
- return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
static int intel_iommu_enable_auxd(struct device *dev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
- u8 bus, devfn;
int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu || dmar_disabled)
return -EINVAL;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 6c87c807a0ab..25dd74f27252 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -231,7 +231,7 @@ static LIST_HEAD(global_svm_list);
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct dmar_domain *dmar_domain;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
@@ -373,7 +373,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
int intel_svm_unbind_gpasid(struct device *dev, int pasid)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
int ret = -EINVAL;
@@ -430,7 +430,7 @@ static int
intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
struct mm_struct *mm, struct intel_svm_dev **sd)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
@@ -608,7 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
struct intel_svm *svm;
int ret = -EINVAL;
- iommu = intel_svm_device_to_iommu(dev);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
goto out;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3e8fa1c7a1e6..fc2cfc3db6e1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -728,6 +728,7 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu);
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
struct dmar_domain *find_domain(struct device *dev);
struct device_domain_info *get_domain_info(struct device *dev);
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
#ifdef CONFIG_INTEL_IOMMU_SVM
extern void intel_svm_check(struct intel_iommu *iommu);
@@ -766,8 +767,6 @@ struct intel_svm {
struct list_head devs;
struct list_head list;
};
-
-extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
#endif
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper
2020-07-06 0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
@ 2020-07-06 1:02 ` Tian, Kevin
0 siblings, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06 1:02 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
>
> It is refactored in two ways:
>
> - Make it global so that it could be used in other files.
>
> - Make bus/devfn optional so that callers could ignore these two returned
> values when they only want to get the coresponding iommu pointer.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/iommu.c | 55 +++++++++++--------------------------
> drivers/iommu/intel/svm.c | 8 +++---
> include/linux/intel-iommu.h | 3 +-
> 3 files changed, 21 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index d759e7234e98..de17952ed133 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -778,16 +778,16 @@ is_downstream_to_pci_bridge(struct device *dev,
> struct device *bridge)
> return false;
> }
>
> -static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn)
> +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn)
> {
> struct dmar_drhd_unit *drhd = NULL;
> + struct pci_dev *pdev = NULL;
> struct intel_iommu *iommu;
> struct device *tmp;
> - struct pci_dev *pdev = NULL;
> u16 segment = 0;
> int i;
>
> - if (iommu_dummy(dev))
> + if (!dev || iommu_dummy(dev))
> return NULL;
>
> if (dev_is_pci(dev)) {
> @@ -818,8 +818,10 @@ static struct intel_iommu *device_to_iommu(struct
> device *dev, u8 *bus, u8 *devf
> if (pdev && pdev->is_virtfn)
> goto got_pdev;
>
> - *bus = drhd->devices[i].bus;
> - *devfn = drhd->devices[i].devfn;
> + if (bus && devfn) {
> + *bus = drhd->devices[i].bus;
> + *devfn = drhd->devices[i].devfn;
> + }
> goto out;
> }
>
> @@ -829,8 +831,10 @@ static struct intel_iommu *device_to_iommu(struct
> device *dev, u8 *bus, u8 *devf
>
> if (pdev && drhd->include_all) {
> got_pdev:
> - *bus = pdev->bus->number;
> - *devfn = pdev->devfn;
> + if (bus && devfn) {
> + *bus = pdev->bus->number;
> + *devfn = pdev->devfn;
> + }
> goto out;
> }
> }
> @@ -5146,11 +5150,10 @@ static int aux_domain_add_dev(struct
> dmar_domain *domain,
> struct device *dev)
> {
> int ret;
> - u8 bus, devfn;
> unsigned long flags;
> struct intel_iommu *iommu;
>
> - iommu = device_to_iommu(dev, &bus, &devfn);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu)
> return -ENODEV;
>
> @@ -5236,9 +5239,8 @@ static int prepare_domain_attach_device(struct
> iommu_domain *domain,
> struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> struct intel_iommu *iommu;
> int addr_width;
> - u8 bus, devfn;
>
> - iommu = device_to_iommu(dev, &bus, &devfn);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu)
> return -ENODEV;
>
> @@ -5658,9 +5660,8 @@ static bool intel_iommu_capable(enum
> iommu_cap cap)
> static struct iommu_device *intel_iommu_probe_device(struct device *dev)
> {
> struct intel_iommu *iommu;
> - u8 bus, devfn;
>
> - iommu = device_to_iommu(dev, &bus, &devfn);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu)
> return ERR_PTR(-ENODEV);
>
> @@ -5673,9 +5674,8 @@ static struct iommu_device
> *intel_iommu_probe_device(struct device *dev)
> static void intel_iommu_release_device(struct device *dev)
> {
> struct intel_iommu *iommu;
> - u8 bus, devfn;
>
> - iommu = device_to_iommu(dev, &bus, &devfn);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu)
> return;
>
> @@ -5825,37 +5825,14 @@ static struct iommu_group
> *intel_iommu_device_group(struct device *dev)
> return generic_device_group(dev);
> }
>
> -#ifdef CONFIG_INTEL_IOMMU_SVM
> -struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
> -{
> - struct intel_iommu *iommu;
> - u8 bus, devfn;
> -
> - if (iommu_dummy(dev)) {
> - dev_warn(dev,
> - "No IOMMU translation for device; cannot enable
> SVM\n");
> - return NULL;
> - }
> -
> - iommu = device_to_iommu(dev, &bus, &devfn);
> - if ((!iommu)) {
> - dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
> - return NULL;
> - }
> -
> - return iommu;
> -}
> -#endif /* CONFIG_INTEL_IOMMU_SVM */
> -
> static int intel_iommu_enable_auxd(struct device *dev)
> {
> struct device_domain_info *info;
> struct intel_iommu *iommu;
> unsigned long flags;
> - u8 bus, devfn;
> int ret;
>
> - iommu = device_to_iommu(dev, &bus, &devfn);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu || dmar_disabled)
> return -EINVAL;
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 6c87c807a0ab..25dd74f27252 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -231,7 +231,7 @@ static LIST_HEAD(global_svm_list);
> int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device
> *dev,
> struct iommu_gpasid_bind_data *data)
> {
> - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> struct dmar_domain *dmar_domain;
> struct intel_svm_dev *sdev;
> struct intel_svm *svm;
> @@ -373,7 +373,7 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain, struct device *dev,
>
> int intel_svm_unbind_gpasid(struct device *dev, int pasid)
> {
> - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> struct intel_svm_dev *sdev;
> struct intel_svm *svm;
> int ret = -EINVAL;
> @@ -430,7 +430,7 @@ static int
> intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
> struct mm_struct *mm, struct intel_svm_dev **sd)
> {
> - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> struct device_domain_info *info;
> struct intel_svm_dev *sdev;
> struct intel_svm *svm = NULL;
> @@ -608,7 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
> struct intel_svm *svm;
> int ret = -EINVAL;
>
> - iommu = intel_svm_device_to_iommu(dev);
> + iommu = device_to_iommu(dev, NULL, NULL);
> if (!iommu)
> goto out;
>
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 3e8fa1c7a1e6..fc2cfc3db6e1 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -728,6 +728,7 @@ void iommu_flush_write_buffer(struct intel_iommu
> *iommu);
> int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device
> *dev);
> struct dmar_domain *find_domain(struct device *dev);
> struct device_domain_info *get_domain_info(struct device *dev);
> +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8
> *devfn);
>
> #ifdef CONFIG_INTEL_IOMMU_SVM
> extern void intel_svm_check(struct intel_iommu *iommu);
> @@ -766,8 +767,6 @@ struct intel_svm {
> struct list_head devs;
> struct list_head list;
> };
> -
> -extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
> #else
> static inline void intel_svm_check(struct intel_iommu *iommu) {}
> #endif
> --
> 2.17.1
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid
2020-07-06 0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
2020-07-06 0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
@ 2020-07-06 0:25 ` Lu Baolu
2020-07-06 1:10 ` Tian, Kevin
2020-07-06 0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
2020-07-06 0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 0:25 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
There are several places in the code that need to get the pointers of
svm and sdev according to a pasid and device. Add a helper to achieve
this for code consolidation and readability.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/svm.c | 121 +++++++++++++++++++++-----------------
1 file changed, 68 insertions(+), 53 deletions(-)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 25dd74f27252..c23167877b2b 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+ struct intel_svm **rsvm,
+ struct intel_svm_dev **rsdev)
+{
+ struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm *svm;
+
+ /* The caller should hold the pasid_mutex lock */
+ if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
+ return -EINVAL;
+
+ if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+ return -EINVAL;
+
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (IS_ERR(svm))
+ return PTR_ERR(svm);
+
+ if (!svm)
+ goto out;
+
+ /*
+ * If we found svm for the PASID, there must be at least one device
+ * bond.
+ */
+ if (WARN_ON(list_empty(&svm->devs)))
+ return -EINVAL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(d, &svm->devs, list) {
+ if (d->dev == dev) {
+ sdev = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+out:
+ *rsvm = svm;
+ *rsdev = sdev;
+
+ return 0;
+}
+
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data)
{
@@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
dmar_domain = to_dmar_domain(domain);
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, data->hpasid, NULL);
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- if (svm) {
+ if (sdev) {
/*
- * If we found svm for the PASID, there must be at
- * least one device bond, otherwise svm should be freed.
+ * For devices with aux domains, we should allow
+ * multiple bind calls with the same PASID and pdev.
*/
- if (WARN_ON(list_empty(&svm->devs))) {
- ret = -EINVAL;
- goto out;
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) {
+ sdev->users++;
+ } else {
+ dev_warn_ratelimited(dev,
+ "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
}
+ goto out;
+ }
- for_each_svm_dev(sdev, svm, dev) {
- /*
- * For devices with aux domains, we should allow
- * multiple bind calls with the same PASID and pdev.
- */
- if (iommu_dev_feature_enabled(dev,
- IOMMU_DEV_FEAT_AUX)) {
- sdev->users++;
- } else {
- dev_warn_ratelimited(dev,
- "Already bound with PASID %u\n",
- svm->pasid);
- ret = -EBUSY;
- }
- goto out;
- }
- } else {
+ if (!svm) {
/* We come here when PASID has never been bond to a device. */
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
@@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
- int ret = -EINVAL;
+ int ret;
if (WARN_ON(!iommu))
return -EINVAL;
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm) {
- ret = -EINVAL;
- goto out;
- }
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
sdev->users--;
if (!sdev->users) {
@@ -418,7 +442,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
out:
mutex_unlock(&pasid_mutex);
@@ -596,7 +619,7 @@ intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
if (sd)
*sd = sdev;
ret = 0;
- out:
+out:
return ret;
}
@@ -612,17 +635,11 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
if (!iommu)
goto out;
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm)
- goto out;
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
sdev->users--;
if (!sdev->users) {
list_del_rcu(&sdev->list);
@@ -651,10 +668,8 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
- out:
-
+out:
return ret;
}
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid
2020-07-06 0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
@ 2020-07-06 1:10 ` Tian, Kevin
0 siblings, 0 replies; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06 1:10 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
>
> There are several places in the code that need to get the pointers of
> svm and sdev according to a pasid and device. Add a helper to achieve
> this for code consolidation and readability.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/svm.c | 121 +++++++++++++++++++++-----------------
> 1 file changed, 68 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 25dd74f27252..c23167877b2b 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -228,6 +228,50 @@ static LIST_HEAD(global_svm_list);
> list_for_each_entry((sdev), &(svm)->devs, list) \
> if ((d) != (sdev)->dev) {} else
>
> +static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
> + struct intel_svm **rsvm,
> + struct intel_svm_dev **rsdev)
> +{
> + struct intel_svm_dev *d, *sdev = NULL;
> + struct intel_svm *svm;
> +
> + /* The caller should hold the pasid_mutex lock */
> + if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
> + return -EINVAL;
> +
> + if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
> + return -EINVAL;
> +
> + svm = ioasid_find(NULL, pasid, NULL);
> + if (IS_ERR(svm))
> + return PTR_ERR(svm);
> +
> + if (!svm)
> + goto out;
> +
> + /*
> + * If we found svm for the PASID, there must be at least one device
> + * bond.
> + */
> + if (WARN_ON(list_empty(&svm->devs)))
> + return -EINVAL;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(d, &svm->devs, list) {
> + if (d->dev == dev) {
> + sdev = d;
> + break;
> + }
> + }
> + rcu_read_unlock();
> +
> +out:
> + *rsvm = svm;
> + *rsdev = sdev;
> +
> + return 0;
> +}
> +
> int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device
> *dev,
> struct iommu_gpasid_bind_data *data)
> {
> @@ -261,39 +305,27 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain, struct device *dev,
> dmar_domain = to_dmar_domain(domain);
>
> mutex_lock(&pasid_mutex);
> - svm = ioasid_find(NULL, data->hpasid, NULL);
> - if (IS_ERR(svm)) {
> - ret = PTR_ERR(svm);
> + ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
> + if (ret)
> goto out;
> - }
>
> - if (svm) {
> + if (sdev) {
> /*
> - * If we found svm for the PASID, there must be at
> - * least one device bond, otherwise svm should be freed.
> + * For devices with aux domains, we should allow
> + * multiple bind calls with the same PASID and pdev.
> */
> - if (WARN_ON(list_empty(&svm->devs))) {
> - ret = -EINVAL;
> - goto out;
> + if (iommu_dev_feature_enabled(dev,
> IOMMU_DEV_FEAT_AUX)) {
> + sdev->users++;
> + } else {
> + dev_warn_ratelimited(dev,
> + "Already bound with PASID %u\n",
> + svm->pasid);
> + ret = -EBUSY;
> }
> + goto out;
> + }
>
> - for_each_svm_dev(sdev, svm, dev) {
> - /*
> - * For devices with aux domains, we should allow
> - * multiple bind calls with the same PASID and pdev.
> - */
> - if (iommu_dev_feature_enabled(dev,
> - IOMMU_DEV_FEAT_AUX))
> {
> - sdev->users++;
> - } else {
> - dev_warn_ratelimited(dev,
> - "Already bound with
> PASID %u\n",
> - svm->pasid);
> - ret = -EBUSY;
> - }
> - goto out;
> - }
> - } else {
> + if (!svm) {
> /* We come here when PASID has never been bond to a
> device. */
> svm = kzalloc(sizeof(*svm), GFP_KERNEL);
> if (!svm) {
> @@ -376,25 +408,17 @@ int intel_svm_unbind_gpasid(struct device *dev,
> int pasid)
> struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> struct intel_svm_dev *sdev;
> struct intel_svm *svm;
> - int ret = -EINVAL;
> + int ret;
>
> if (WARN_ON(!iommu))
> return -EINVAL;
>
> mutex_lock(&pasid_mutex);
> - svm = ioasid_find(NULL, pasid, NULL);
> - if (!svm) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - if (IS_ERR(svm)) {
> - ret = PTR_ERR(svm);
> + ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
> + if (ret)
> goto out;
> - }
>
> - for_each_svm_dev(sdev, svm, dev) {
> - ret = 0;
> + if (sdev) {
> if (iommu_dev_feature_enabled(dev,
> IOMMU_DEV_FEAT_AUX))
> sdev->users--;
> if (!sdev->users) {
> @@ -418,7 +442,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int
> pasid)
> kfree(svm);
> }
> }
> - break;
> }
> out:
> mutex_unlock(&pasid_mutex);
> @@ -596,7 +619,7 @@ intel_svm_bind_mm(struct device *dev, int flags,
> struct svm_dev_ops *ops,
> if (sd)
> *sd = sdev;
> ret = 0;
> - out:
> +out:
> return ret;
> }
>
> @@ -612,17 +635,11 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
> if (!iommu)
> goto out;
>
> - svm = ioasid_find(NULL, pasid, NULL);
> - if (!svm)
> - goto out;
> -
> - if (IS_ERR(svm)) {
> - ret = PTR_ERR(svm);
> + ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
> + if (ret)
> goto out;
> - }
>
> - for_each_svm_dev(sdev, svm, dev) {
> - ret = 0;
> + if (sdev) {
> sdev->users--;
> if (!sdev->users) {
> list_del_rcu(&sdev->list);
> @@ -651,10 +668,8 @@ static int intel_svm_unbind_mm(struct device *dev,
> int pasid)
> kfree(svm);
> }
> }
> - break;
> }
> - out:
> -
> +out:
> return ret;
> }
>
> --
> 2.17.1
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
2020-07-06 0:25 ` [PATCH v2 1/4] iommu/vt-d: Refactor device_to_iommu() helper Lu Baolu
2020-07-06 0:25 ` [PATCH v2 2/4] iommu/vt-d: Add a helper to get svm and sdev for pasid Lu Baolu
@ 2020-07-06 0:25 ` Lu Baolu
2020-07-06 1:29 ` Tian, Kevin
` (2 more replies)
2020-07-06 0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
3 siblings, 3 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 0:25 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
A pasid might be bound to a page table from a VM guest via the iommu
ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
on the physical IOMMU, we need to inject the page fault request into
the guest. After the guest completes handling the page fault, a page
response need to be sent back via the iommu ops.page_response().
This adds support to report a page request fault. Any external module
which is interested in handling this fault should regiester a notifier
callback.
Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
1 file changed, 81 insertions(+), 18 deletions(-)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c23167877b2b..08c58c2b1a06 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev, int pasid)
}
}
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+ int prot = 0;
+
+ if (req->rd_req)
+ prot |= IOMMU_FAULT_PERM_READ;
+ if (req->wr_req)
+ prot |= IOMMU_FAULT_PERM_WRITE;
+ if (req->exe_req)
+ prot |= IOMMU_FAULT_PERM_EXEC;
+ if (req->pm_req)
+ prot |= IOMMU_FAULT_PERM_PRIV;
+
+ return prot;
+}
+
+static int
+intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+{
+ struct iommu_fault_event event;
+ u8 bus, devfn;
+
+ memset(&event, 0, sizeof(struct iommu_fault_event));
+ bus = PCI_BUS_NUM(desc->rid);
+ devfn = desc->rid & 0xff;
+
+ /* Fill in event data for device specific processing */
+ event.fault.type = IOMMU_FAULT_PAGE_REQ;
+ event.fault.prm.addr = desc->addr;
+ event.fault.prm.pasid = desc->pasid;
+ event.fault.prm.grpid = desc->prg_index;
+ event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+ /*
+ * Set last page in group bit if private data is present,
+ * page response is required as it does for LPIG.
+ */
+ if (desc->lpig)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ if (desc->pasid_present)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ if (desc->priv_data_present) {
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ memcpy(event.fault.prm.private_data, desc->priv_data,
+ sizeof(desc->priv_data));
+ }
+
+ return iommu_report_device_fault(dev, &event);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
@@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
- struct intel_svm_dev *sdev;
+ struct intel_svm_dev *sdev = NULL;
struct vm_area_struct *vma;
struct page_req_dsc *req;
struct qi_desc resp;
@@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
}
}
+ if (!sdev || sdev->sid != req->rid) {
+ struct intel_svm_dev *t;
+
+ sdev = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == req->rid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ }
+
result = QI_RESP_INVALID;
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
@@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!is_canonical_address(address))
goto bad_req;
+ /*
+ * If prq is to be handled outside iommu driver via receiver of
+ * the fault notifiers, we skip the page response here.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ if (sdev && !intel_svm_prq_report(sdev->dev, req))
+ goto prq_advance;
+ else
+ goto bad_req;
+ }
+
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
@@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
goto invalid;
result = QI_RESP_SUCCESS;
- invalid:
+invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
- bad_req:
- /* Accounting for major/minor faults? */
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- if (sdev->sid == req->rid)
- break;
- }
- /* Other devices can go away, but the drivers are not permitted
- * to unbind while any page faults might be in flight. So it's
- * OK to drop the 'lock' here now we have it. */
- rcu_read_unlock();
-
- if (WARN_ON(&sdev->list == &svm->devs))
- sdev = NULL;
-
+bad_req:
if (sdev && sdev->ops && sdev->ops->fault_cb) {
int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
(req->exe_req << 1) | (req->pm_req);
@@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
- no_pasid:
+no_pasid:
if (req->lpig || req->priv_data_present) {
/*
* Per VT-d spec. v3.0 ch7.7, system software must
@@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
resp.qw3 = 0;
qi_submit_sync(iommu, &resp, 1, 0);
}
+prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-06 1:29 ` Tian, Kevin
2020-07-06 7:37 ` Lu Baolu
2020-07-06 1:36 ` Tian, Kevin
2020-07-07 11:23 ` Jean-Philippe Brucker
2 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06 1:29 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, July 6, 2020 8:26 AM
>
> A pasid might be bound to a page table from a VM guest via the iommu
> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> on the physical IOMMU, we need to inject the page fault request into
> the guest. After the guest completes handling the page fault, a page
> response need to be sent back via the iommu ops.page_response().
>
> This adds support to report a page request fault. Any external module
> which is interested in handling this fault should regiester a notifier
> callback.
be specific on which notifier to be registered...
>
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
> 1 file changed, 81 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index c23167877b2b..08c58c2b1a06 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
> int pasid)
> }
> }
>
> +static int prq_to_iommu_prot(struct page_req_dsc *req)
> +{
> + int prot = 0;
> +
> + if (req->rd_req)
> + prot |= IOMMU_FAULT_PERM_READ;
> + if (req->wr_req)
> + prot |= IOMMU_FAULT_PERM_WRITE;
> + if (req->exe_req)
> + prot |= IOMMU_FAULT_PERM_EXEC;
> + if (req->pm_req)
> + prot |= IOMMU_FAULT_PERM_PRIV;
> +
> + return prot;
> +}
> +
> +static int
> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> +{
> + struct iommu_fault_event event;
> + u8 bus, devfn;
> +
> + memset(&event, 0, sizeof(struct iommu_fault_event));
> + bus = PCI_BUS_NUM(desc->rid);
> + devfn = desc->rid & 0xff;
not required.
> +
> + /* Fill in event data for device specific processing */
> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
> + event.fault.prm.addr = desc->addr;
> + event.fault.prm.pasid = desc->pasid;
> + event.fault.prm.grpid = desc->prg_index;
> + event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> + /*
> + * Set last page in group bit if private data is present,
> + * page response is required as it does for LPIG.
> + */
move to priv_data_present check?
> + if (desc->lpig)
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + if (desc->pasid_present)
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + if (desc->priv_data_present) {
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + event.fault.prm.flags |=
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> + memcpy(event.fault.prm.private_data, desc->priv_data,
> + sizeof(desc->priv_data));
> + }
> +
> + return iommu_report_device_fault(dev, &event);
> +}
> +
> static irqreturn_t prq_event_thread(int irq, void *d)
> {
> struct intel_iommu *iommu = d;
> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> PRQ_RING_MASK;
> head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> PRQ_RING_MASK;
> while (head != tail) {
> - struct intel_svm_dev *sdev;
> + struct intel_svm_dev *sdev = NULL;
move to outside of the loop, otherwise later check always hit "if (!sdev)"
> struct vm_area_struct *vma;
> struct page_req_dsc *req;
> struct qi_desc resp;
> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> }
> }
>
> + if (!sdev || sdev->sid != req->rid) {
> + struct intel_svm_dev *t;
> +
> + sdev = NULL;
> + rcu_read_lock();
> + list_for_each_entry_rcu(t, &svm->devs, list) {
> + if (t->sid == req->rid) {
> + sdev = t;
> + break;
> + }
> + }
> + rcu_read_unlock();
> + }
> +
> result = QI_RESP_INVALID;
> /* Since we're using init_mm.pgd directly, we should never
> take
> * any faults on kernel addresses. */
> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> if (!is_canonical_address(address))
> goto bad_req;
>
> + /*
> + * If prq is to be handled outside iommu driver via receiver of
> + * the fault notifiers, we skip the page response here.
> + */
> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
> + if (sdev && !intel_svm_prq_report(sdev->dev, req))
> + goto prq_advance;
> + else
> + goto bad_req;
> + }
> +
> /* If the mm is already defunct, don't handle faults. */
> if (!mmget_not_zero(svm->mm))
> goto bad_req;
> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> goto invalid;
>
> result = QI_RESP_SUCCESS;
> - invalid:
> +invalid:
> mmap_read_unlock(svm->mm);
> mmput(svm->mm);
> - bad_req:
> - /* Accounting for major/minor faults? */
> - rcu_read_lock();
> - list_for_each_entry_rcu(sdev, &svm->devs, list) {
> - if (sdev->sid == req->rid)
> - break;
> - }
> - /* Other devices can go away, but the drivers are not
> permitted
> - * to unbind while any page faults might be in flight. So it's
> - * OK to drop the 'lock' here now we have it. */
should we keep and move this comment to earlier sdev lookup? and
regarding to guest unbind, ae we preventing the fault owner (outside
of iommu driver) to unbind against in-flight fault request?
> - rcu_read_unlock();
> -
> - if (WARN_ON(&sdev->list == &svm->devs))
> - sdev = NULL;
similarly should we keep the WARN_ON check here?
> -
> +bad_req:
> if (sdev && sdev->ops && sdev->ops->fault_cb) {
> int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
> (req->exe_req << 1) | (req->pm_req);
> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> and these can be NULL. Do not use them below this point!
> */
> sdev = NULL;
> svm = NULL;
> - no_pasid:
> +no_pasid:
> if (req->lpig || req->priv_data_present) {
> /*
> * Per VT-d spec. v3.0 ch7.7, system software must
> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> resp.qw3 = 0;
> qi_submit_sync(iommu, &resp, 1, 0);
> }
> +prq_advance:
> head = (head + sizeof(*req)) & PRQ_RING_MASK;
> }
>
> --
> 2.17.1
Thanks
Kevin
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 1:29 ` Tian, Kevin
@ 2020-07-06 7:37 ` Lu Baolu
0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 7:37 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: Raj, Ashok, linux-kernel
Hi Kevin,
On 2020/7/6 9:29, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Monday, July 6, 2020 8:26 AM
>>
>> A pasid might be bound to a page table from a VM guest via the iommu
>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>> on the physical IOMMU, we need to inject the page fault request into
>> the guest. After the guest completes handling the page fault, a page
>> response need to be sent back via the iommu ops.page_response().
>>
>> This adds support to report a page request fault. Any external module
>> which is interested in handling this fault should regiester a notifier
>> callback.
>
> be specific on which notifier to be registered...
Sure.
>
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>> drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-------
>> 1 file changed, 81 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index c23167877b2b..08c58c2b1a06 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
>> int pasid)
>> }
>> }
>>
>> +static int prq_to_iommu_prot(struct page_req_dsc *req)
>> +{
>> + int prot = 0;
>> +
>> + if (req->rd_req)
>> + prot |= IOMMU_FAULT_PERM_READ;
>> + if (req->wr_req)
>> + prot |= IOMMU_FAULT_PERM_WRITE;
>> + if (req->exe_req)
>> + prot |= IOMMU_FAULT_PERM_EXEC;
>> + if (req->pm_req)
>> + prot |= IOMMU_FAULT_PERM_PRIV;
>> +
>> + return prot;
>> +}
>> +
>> +static int
>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>> +{
>> + struct iommu_fault_event event;
>> + u8 bus, devfn;
>> +
>> + memset(&event, 0, sizeof(struct iommu_fault_event));
>> + bus = PCI_BUS_NUM(desc->rid);
>> + devfn = desc->rid & 0xff;
>
> not required.
Yes. Will remove them.
>
>> +
>> + /* Fill in event data for device specific processing */
>> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
>> + event.fault.prm.addr = desc->addr;
>> + event.fault.prm.pasid = desc->pasid;
>> + event.fault.prm.grpid = desc->prg_index;
>> + event.fault.prm.perm = prq_to_iommu_prot(desc);
>> +
>> + /*
>> + * Set last page in group bit if private data is present,
>> + * page response is required as it does for LPIG.
>> + */
>
> move to priv_data_present check?
Yes.
>
>> + if (desc->lpig)
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + if (desc->pasid_present)
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> + if (desc->priv_data_present) {
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + event.fault.prm.flags |=
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> + memcpy(event.fault.prm.private_data, desc->priv_data,
>> + sizeof(desc->priv_data));
>> + }
>> +
>> + return iommu_report_device_fault(dev, &event);
>> +}
>> +
>> static irqreturn_t prq_event_thread(int irq, void *d)
>> {
>> struct intel_iommu *iommu = d;
>> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
>> PRQ_RING_MASK;
>> head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
>> PRQ_RING_MASK;
>> while (head != tail) {
>> - struct intel_svm_dev *sdev;
>> + struct intel_svm_dev *sdev = NULL;
>
> move to outside of the loop, otherwise later check always hit "if (!sdev)"
Yes, good catch!
>
>> struct vm_area_struct *vma;
>> struct page_req_dsc *req;
>> struct qi_desc resp;
>> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> }
>> }
>>
>> + if (!sdev || sdev->sid != req->rid) {
>> + struct intel_svm_dev *t;
>> +
>> + sdev = NULL;
>> + rcu_read_lock();
>> + list_for_each_entry_rcu(t, &svm->devs, list) {
>> + if (t->sid == req->rid) {
>> + sdev = t;
>> + break;
>> + }
>> + }
>> + rcu_read_unlock();
>> + }
>> +
>> result = QI_RESP_INVALID;
>> /* Since we're using init_mm.pgd directly, we should never
>> take
>> * any faults on kernel addresses. */
>> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> if (!is_canonical_address(address))
>> goto bad_req;
>>
>> + /*
>> + * If prq is to be handled outside iommu driver via receiver of
>> + * the fault notifiers, we skip the page response here.
>> + */
>> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
>> + if (sdev && !intel_svm_prq_report(sdev->dev, req))
>> + goto prq_advance;
>> + else
>> + goto bad_req;
>> + }
>> +
>> /* If the mm is already defunct, don't handle faults. */
>> if (!mmget_not_zero(svm->mm))
>> goto bad_req;
>> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> goto invalid;
>>
>> result = QI_RESP_SUCCESS;
>> - invalid:
>> +invalid:
>> mmap_read_unlock(svm->mm);
>> mmput(svm->mm);
>> - bad_req:
>> - /* Accounting for major/minor faults? */
>> - rcu_read_lock();
>> - list_for_each_entry_rcu(sdev, &svm->devs, list) {
>> - if (sdev->sid == req->rid)
>> - break;
>> - }
>> - /* Other devices can go away, but the drivers are not
>> permitted
>> - * to unbind while any page faults might be in flight. So it's
>> - * OK to drop the 'lock' here now we have it. */
>
> should we keep and move this comment to earlier sdev lookup? and
I thought this comment explained why rcu_read_unlock() before the next
checking. In the new lookup code, we don't need to check, hence I
removed the comments.
> regarding to guest unbind, ae we preventing the fault owner (outside
> of iommu driver) to unbind against in-flight fault request?
Yes. We always wait until all prq with the same pasid completes in
gpasid_unbind().
>
>> - rcu_read_unlock();
>> -
>> - if (WARN_ON(&sdev->list == &svm->devs))
>> - sdev = NULL;
>
> similarly should we keep the WARN_ON check here?
Yes, agreed. We can keep a WARN_ON() here.
>
>> -
>> +bad_req:
>> if (sdev && sdev->ops && sdev->ops->fault_cb) {
>> int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>> (req->exe_req << 1) | (req->pm_req);
>> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> and these can be NULL. Do not use them below this point!
>> */
>> sdev = NULL;
>> svm = NULL;
>> - no_pasid:
>> +no_pasid:
>> if (req->lpig || req->priv_data_present) {
>> /*
>> * Per VT-d spec. v3.0 ch7.7, system software must
>> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>> resp.qw3 = 0;
>> qi_submit_sync(iommu, &resp, 1, 0);
>> }
>> +prq_advance:
>> head = (head + sizeof(*req)) & PRQ_RING_MASK;
>> }
>>
>> --
>> 2.17.1
>
> Thanks
> Kevin
>
Best regards,
baolu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
2020-07-06 1:29 ` Tian, Kevin
@ 2020-07-06 1:36 ` Tian, Kevin
2020-07-06 7:47 ` Lu Baolu
2020-07-07 11:23 ` Jean-Philippe Brucker
2 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06 1:36 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Tian, Kevin
> Sent: Monday, July 6, 2020 9:30 AM
>
> > From: Lu Baolu <baolu.lu@linux.intel.com>
> > Sent: Monday, July 6, 2020 8:26 AM
> >
> > A pasid might be bound to a page table from a VM guest via the iommu
> > ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> > on the physical IOMMU, we need to inject the page fault request into
> > the guest. After the guest completes handling the page fault, a page
> > response need to be sent back via the iommu ops.page_response().
> >
> > This adds support to report a page request fault. Any external module
> > which is interested in handling this fault should regiester a notifier
> > callback.
>
> be specific on which notifier to be registered...
>
> >
> > Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > ---
> > drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-----
> --
> > 1 file changed, 81 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> > index c23167877b2b..08c58c2b1a06 100644
> > --- a/drivers/iommu/intel/svm.c
> > +++ b/drivers/iommu/intel/svm.c
> > @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
> > int pasid)
> > }
> > }
> >
> > +static int prq_to_iommu_prot(struct page_req_dsc *req)
> > +{
> > + int prot = 0;
> > +
> > + if (req->rd_req)
> > + prot |= IOMMU_FAULT_PERM_READ;
> > + if (req->wr_req)
> > + prot |= IOMMU_FAULT_PERM_WRITE;
> > + if (req->exe_req)
> > + prot |= IOMMU_FAULT_PERM_EXEC;
> > + if (req->pm_req)
> > + prot |= IOMMU_FAULT_PERM_PRIV;
> > +
> > + return prot;
> > +}
> > +
> > +static int
> > +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> > +{
> > + struct iommu_fault_event event;
> > + u8 bus, devfn;
> > +
> > + memset(&event, 0, sizeof(struct iommu_fault_event));
> > + bus = PCI_BUS_NUM(desc->rid);
> > + devfn = desc->rid & 0xff;
>
> not required.
>
> > +
> > + /* Fill in event data for device specific processing */
> > + event.fault.type = IOMMU_FAULT_PAGE_REQ;
> > + event.fault.prm.addr = desc->addr;
> > + event.fault.prm.pasid = desc->pasid;
> > + event.fault.prm.grpid = desc->prg_index;
> > + event.fault.prm.perm = prq_to_iommu_prot(desc);
> > +
> > + /*
> > + * Set last page in group bit if private data is present,
> > + * page response is required as it does for LPIG.
> > + */
>
> move to priv_data_present check?
>
> > + if (desc->lpig)
> > + event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> > + if (desc->pasid_present)
> > + event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> > + if (desc->priv_data_present) {
> > + event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
btw earlier comment is more about the behavior of the fault
handler (e.g. the guest), but not about why we need convert
to last_page prm flag. Let's make it clear that doing so is
because iommu_report_device_fault doesn't understand this
vt-d specific requirement thus we set last_page as a workaround.
Thanks
Kevin
> > + event.fault.prm.flags |=
> > IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> > + memcpy(event.fault.prm.private_data, desc->priv_data,
> > + sizeof(desc->priv_data));
> > + }
> > +
> > + return iommu_report_device_fault(dev, &event);
> > +}
> > +
> > static irqreturn_t prq_event_thread(int irq, void *d)
> > {
> > struct intel_iommu *iommu = d;
> > @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> > tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> > PRQ_RING_MASK;
> > head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> > PRQ_RING_MASK;
> > while (head != tail) {
> > - struct intel_svm_dev *sdev;
> > + struct intel_svm_dev *sdev = NULL;
>
> move to outside of the loop, otherwise later check always hit "if (!sdev)"
>
> > struct vm_area_struct *vma;
> > struct page_req_dsc *req;
> > struct qi_desc resp;
> > @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> > }
> > }
> >
> > + if (!sdev || sdev->sid != req->rid) {
> > + struct intel_svm_dev *t;
> > +
> > + sdev = NULL;
> > + rcu_read_lock();
> > + list_for_each_entry_rcu(t, &svm->devs, list) {
> > + if (t->sid == req->rid) {
> > + sdev = t;
> > + break;
> > + }
> > + }
> > + rcu_read_unlock();
> > + }
> > +
> > result = QI_RESP_INVALID;
> > /* Since we're using init_mm.pgd directly, we should never
> > take
> > * any faults on kernel addresses. */
> > @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> > if (!is_canonical_address(address))
> > goto bad_req;
> >
> > + /*
> > + * If prq is to be handled outside iommu driver via receiver of
> > + * the fault notifiers, we skip the page response here.
> > + */
> > + if (svm->flags & SVM_FLAG_GUEST_MODE) {
> > + if (sdev && !intel_svm_prq_report(sdev->dev, req))
> > + goto prq_advance;
> > + else
> > + goto bad_req;
> > + }
> > +
> > /* If the mm is already defunct, don't handle faults. */
> > if (!mmget_not_zero(svm->mm))
> > goto bad_req;
> > @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> > goto invalid;
> >
> > result = QI_RESP_SUCCESS;
> > - invalid:
> > +invalid:
> > mmap_read_unlock(svm->mm);
> > mmput(svm->mm);
> > - bad_req:
> > - /* Accounting for major/minor faults? */
> > - rcu_read_lock();
> > - list_for_each_entry_rcu(sdev, &svm->devs, list) {
> > - if (sdev->sid == req->rid)
> > - break;
> > - }
> > - /* Other devices can go away, but the drivers are not
> > permitted
> > - * to unbind while any page faults might be in flight. So it's
> > - * OK to drop the 'lock' here now we have it. */
>
> should we keep and move this comment to earlier sdev lookup? and
> regarding to guest unbind, ae we preventing the fault owner (outside
> of iommu driver) to unbind against in-flight fault request?
>
> > - rcu_read_unlock();
> > -
> > - if (WARN_ON(&sdev->list == &svm->devs))
> > - sdev = NULL;
>
> similarly should we keep the WARN_ON check here?
>
> > -
> > +bad_req:
> > if (sdev && sdev->ops && sdev->ops->fault_cb) {
> > int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
> > (req->exe_req << 1) | (req->pm_req);
> > @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> > and these can be NULL. Do not use them below this point!
> > */
> > sdev = NULL;
> > svm = NULL;
> > - no_pasid:
> > +no_pasid:
> > if (req->lpig || req->priv_data_present) {
> > /*
> > * Per VT-d spec. v3.0 ch7.7, system software must
> > @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void
> *d)
> > resp.qw3 = 0;
> > qi_submit_sync(iommu, &resp, 1, 0);
> > }
> > +prq_advance:
> > head = (head + sizeof(*req)) & PRQ_RING_MASK;
> > }
> >
> > --
> > 2.17.1
>
> Thanks
> Kevin
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 1:36 ` Tian, Kevin
@ 2020-07-06 7:47 ` Lu Baolu
0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 7:47 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: Raj, Ashok, linux-kernel
On 2020/7/6 9:36, Tian, Kevin wrote:
>> From: Tian, Kevin
>> Sent: Monday, July 6, 2020 9:30 AM
>>
>>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>> Sent: Monday, July 6, 2020 8:26 AM
>>>
>>> A pasid might be bound to a page table from a VM guest via the iommu
>>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>>> on the physical IOMMU, we need to inject the page fault request into
>>> the guest. After the guest completes handling the page fault, a page
>>> response need to be sent back via the iommu ops.page_response().
>>>
>>> This adds support to report a page request fault. Any external module
>>> which is interested in handling this fault should regiester a notifier
>>> callback.
>>
>> be specific on which notifier to be registered...
>>
>>>
>>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>>> ---
>>> drivers/iommu/intel/svm.c | 99 ++++++++++++++++++++++++++++++++-----
>> --
>>> 1 file changed, 81 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>>> index c23167877b2b..08c58c2b1a06 100644
>>> --- a/drivers/iommu/intel/svm.c
>>> +++ b/drivers/iommu/intel/svm.c
>>> @@ -815,6 +815,57 @@ static void intel_svm_drain_prq(struct device *dev,
>>> int pasid)
>>> }
>>> }
>>>
>>> +static int prq_to_iommu_prot(struct page_req_dsc *req)
>>> +{
>>> + int prot = 0;
>>> +
>>> + if (req->rd_req)
>>> + prot |= IOMMU_FAULT_PERM_READ;
>>> + if (req->wr_req)
>>> + prot |= IOMMU_FAULT_PERM_WRITE;
>>> + if (req->exe_req)
>>> + prot |= IOMMU_FAULT_PERM_EXEC;
>>> + if (req->pm_req)
>>> + prot |= IOMMU_FAULT_PERM_PRIV;
>>> +
>>> + return prot;
>>> +}
>>> +
>>> +static int
>>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>>> +{
>>> + struct iommu_fault_event event;
>>> + u8 bus, devfn;
>>> +
>>> + memset(&event, 0, sizeof(struct iommu_fault_event));
>>> + bus = PCI_BUS_NUM(desc->rid);
>>> + devfn = desc->rid & 0xff;
>>
>> not required.
>>
>>> +
>>> + /* Fill in event data for device specific processing */
>>> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
>>> + event.fault.prm.addr = desc->addr;
>>> + event.fault.prm.pasid = desc->pasid;
>>> + event.fault.prm.grpid = desc->prg_index;
>>> + event.fault.prm.perm = prq_to_iommu_prot(desc);
>>> +
>>> + /*
>>> + * Set last page in group bit if private data is present,
>>> + * page response is required as it does for LPIG.
>>> + */
>>
>> move to priv_data_present check?
>>
>>> + if (desc->lpig)
>>> + event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>>> + if (desc->pasid_present)
>>> + event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>>> + if (desc->priv_data_present) {
>>> + event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>
> btw earlier comment is more about the behavior of the fault
> handler (e.g. the guest), but not about why we need convert
> to last_page prm flag. Let's make it clear that doing so is
> because iommu_report_device_fault doesn't understand this
> vt-d specific requirement thus we set last_page as a workaround.
Yes. I will add this in the comment.
Best regards,
baolu
>
> Thanks
> Kevin
>
>>> + event.fault.prm.flags |=
>>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>>> + memcpy(event.fault.prm.private_data, desc->priv_data,
>>> + sizeof(desc->priv_data));
>>> + }
>>> +
>>> + return iommu_report_device_fault(dev, &event);
>>> +}
>>> +
>>> static irqreturn_t prq_event_thread(int irq, void *d)
>>> {
>>> struct intel_iommu *iommu = d;
>>> @@ -828,7 +879,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>> tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
>>> PRQ_RING_MASK;
>>> head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
>>> PRQ_RING_MASK;
>>> while (head != tail) {
>>> - struct intel_svm_dev *sdev;
>>> + struct intel_svm_dev *sdev = NULL;
>>
>> move to outside of the loop, otherwise later check always hit "if (!sdev)"
>>
>>> struct vm_area_struct *vma;
>>> struct page_req_dsc *req;
>>> struct qi_desc resp;
>>> @@ -864,6 +915,20 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>> }
>>> }
>>>
>>> + if (!sdev || sdev->sid != req->rid) {
>>> + struct intel_svm_dev *t;
>>> +
>>> + sdev = NULL;
>>> + rcu_read_lock();
>>> + list_for_each_entry_rcu(t, &svm->devs, list) {
>>> + if (t->sid == req->rid) {
>>> + sdev = t;
>>> + break;
>>> + }
>>> + }
>>> + rcu_read_unlock();
>>> + }
>>> +
>>> result = QI_RESP_INVALID;
>>> /* Since we're using init_mm.pgd directly, we should never
>>> take
>>> * any faults on kernel addresses. */
>>> @@ -874,6 +939,17 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>> if (!is_canonical_address(address))
>>> goto bad_req;
>>>
>>> + /*
>>> + * If prq is to be handled outside iommu driver via receiver of
>>> + * the fault notifiers, we skip the page response here.
>>> + */
>>> + if (svm->flags & SVM_FLAG_GUEST_MODE) {
>>> + if (sdev && !intel_svm_prq_report(sdev->dev, req))
>>> + goto prq_advance;
>>> + else
>>> + goto bad_req;
>>> + }
>>> +
>>> /* If the mm is already defunct, don't handle faults. */
>>> if (!mmget_not_zero(svm->mm))
>>> goto bad_req;
>>> @@ -892,24 +968,10 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>> goto invalid;
>>>
>>> result = QI_RESP_SUCCESS;
>>> - invalid:
>>> +invalid:
>>> mmap_read_unlock(svm->mm);
>>> mmput(svm->mm);
>>> - bad_req:
>>> - /* Accounting for major/minor faults? */
>>> - rcu_read_lock();
>>> - list_for_each_entry_rcu(sdev, &svm->devs, list) {
>>> - if (sdev->sid == req->rid)
>>> - break;
>>> - }
>>> - /* Other devices can go away, but the drivers are not
>>> permitted
>>> - * to unbind while any page faults might be in flight. So it's
>>> - * OK to drop the 'lock' here now we have it. */
>>
>> should we keep and move this comment to earlier sdev lookup? and
>> regarding to guest unbind, ae we preventing the fault owner (outside
>> of iommu driver) to unbind against in-flight fault request?
>>
>>> - rcu_read_unlock();
>>> -
>>> - if (WARN_ON(&sdev->list == &svm->devs))
>>> - sdev = NULL;
>>
>> similarly should we keep the WARN_ON check here?
>>
>>> -
>>> +bad_req:
>>> if (sdev && sdev->ops && sdev->ops->fault_cb) {
>>> int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
>>> (req->exe_req << 1) | (req->pm_req);
>>> @@ -920,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>>> and these can be NULL. Do not use them below this point!
>>> */
>>> sdev = NULL;
>>> svm = NULL;
>>> - no_pasid:
>>> +no_pasid:
>>> if (req->lpig || req->priv_data_present) {
>>> /*
>>> * Per VT-d spec. v3.0 ch7.7, system software must
>>> @@ -945,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void
>> *d)
>>> resp.qw3 = 0;
>>> qi_submit_sync(iommu, &resp, 1, 0);
>>> }
>>> +prq_advance:
>>> head = (head + sizeof(*req)) & PRQ_RING_MASK;
>>> }
>>>
>>> --
>>> 2.17.1
>>
>> Thanks
>> Kevin
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-06 0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
2020-07-06 1:29 ` Tian, Kevin
2020-07-06 1:36 ` Tian, Kevin
@ 2020-07-07 11:23 ` Jean-Philippe Brucker
2020-07-08 2:13 ` Lu Baolu
2 siblings, 1 reply; 15+ messages in thread
From: Jean-Philippe Brucker @ 2020-07-07 11:23 UTC (permalink / raw)
To: Lu Baolu; +Cc: linux-kernel, iommu, Kevin Tian, Ashok Raj
On Mon, Jul 06, 2020 at 08:25:34AM +0800, Lu Baolu wrote:
> A pasid might be bound to a page table from a VM guest via the iommu
> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
> on the physical IOMMU, we need to inject the page fault request into
> the guest. After the guest completes handling the page fault, a page
> response need to be sent back via the iommu ops.page_response().
>
> This adds support to report a page request fault. Any external module
> which is interested in handling this fault should regiester a notifier
> callback.
>
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
[...]
> +static int
> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
> +{
> + struct iommu_fault_event event;
> + u8 bus, devfn;
> +
> + memset(&event, 0, sizeof(struct iommu_fault_event));
> + bus = PCI_BUS_NUM(desc->rid);
> + devfn = desc->rid & 0xff;
> +
> + /* Fill in event data for device specific processing */
> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
> + event.fault.prm.addr = desc->addr;
> + event.fault.prm.pasid = desc->pasid;
> + event.fault.prm.grpid = desc->prg_index;
> + event.fault.prm.perm = prq_to_iommu_prot(desc);
> +
> + /*
> + * Set last page in group bit if private data is present,
> + * page response is required as it does for LPIG.
> + */
> + if (desc->lpig)
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + if (desc->pasid_present)
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
Do you also need to set IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID? I added
the flag to deal with devices that do not want a PASID value in their PRI
response (bit 15 in the PCIe Page Request Status Register):
https://lore.kernel.org/linux-iommu/20200616144712.748818-1-jean-philippe@linaro.org/
(applied by Joerg for v5.9)
Grepping for pci_prg_resp_pasid_required() in intel/iommu.c it seems to
currently reject devices that do not want a PASID in a PRI response, so I
think you can set this flag unconditionally for now.
Thanks,
Jean
> + if (desc->priv_data_present) {
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> + memcpy(event.fault.prm.private_data, desc->priv_data,
> + sizeof(desc->priv_data));
> + }
> +
> + return iommu_report_device_fault(dev, &event);
> +}
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA
2020-07-07 11:23 ` Jean-Philippe Brucker
@ 2020-07-08 2:13 ` Lu Baolu
0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-08 2:13 UTC (permalink / raw)
To: Jean-Philippe Brucker; +Cc: Ashok Raj, linux-kernel, iommu, Kevin Tian
Hi Jean,
On 7/7/20 7:23 PM, Jean-Philippe Brucker wrote:
> On Mon, Jul 06, 2020 at 08:25:34AM +0800, Lu Baolu wrote:
>> A pasid might be bound to a page table from a VM guest via the iommu
>> ops.sva_bind_gpasid. In this case, when a DMA page fault is detected
>> on the physical IOMMU, we need to inject the page fault request into
>> the guest. After the guest completes handling the page fault, a page
>> response need to be sent back via the iommu ops.page_response().
>>
>> This adds support to report a page request fault. Any external module
>> which is interested in handling this fault should regiester a notifier
>> callback.
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
> [...]
>> +static int
>> +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
>> +{
>> + struct iommu_fault_event event;
>> + u8 bus, devfn;
>> +
>> + memset(&event, 0, sizeof(struct iommu_fault_event));
>> + bus = PCI_BUS_NUM(desc->rid);
>> + devfn = desc->rid & 0xff;
>> +
>> + /* Fill in event data for device specific processing */
>> + event.fault.type = IOMMU_FAULT_PAGE_REQ;
>> + event.fault.prm.addr = desc->addr;
>> + event.fault.prm.pasid = desc->pasid;
>> + event.fault.prm.grpid = desc->prg_index;
>> + event.fault.prm.perm = prq_to_iommu_prot(desc);
>> +
>> + /*
>> + * Set last page in group bit if private data is present,
>> + * page response is required as it does for LPIG.
>> + */
>> + if (desc->lpig)
>> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + if (desc->pasid_present)
>> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>
> Do you also need to set IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID? I added
> the flag to deal with devices that do not want a PASID value in their PRI
> response (bit 15 in the PCIe Page Request Status Register):
> https://lore.kernel.org/linux-iommu/20200616144712.748818-1-jean-philippe@linaro.org/
> (applied by Joerg for v5.9)
>
> Grepping for pci_prg_resp_pasid_required() in intel/iommu.c it seems to
> currently reject devices that do not want a PASID in a PRI response, so I
> think you can set this flag unconditionally for now.
Yes. You are right. I will set this flag in the next version.
Best regards,
baolu
>
> Thanks,
> Jean
>
>> + if (desc->priv_data_present) {
>> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> + memcpy(event.fault.prm.private_data, desc->priv_data,
>> + sizeof(desc->priv_data));
>> + }
>> +
>> + return iommu_report_device_fault(dev, &event);
>> +}
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 4/4] iommu/vt-d: Add page response ops support
2020-07-06 0:25 [PATCH v2 0/4] iommu/vt-d: Add prq report and response support Lu Baolu
` (2 preceding siblings ...)
2020-07-06 0:25 ` [PATCH v2 3/4] iommu/vt-d: Report page request faults for guest SVA Lu Baolu
@ 2020-07-06 0:25 ` Lu Baolu
2020-07-06 1:47 ` Tian, Kevin
3 siblings, 1 reply; 15+ messages in thread
From: Lu Baolu @ 2020-07-06 0:25 UTC (permalink / raw)
To: iommu; +Cc: Kevin Tian, Ashok Raj, linux-kernel
After a page request is handled, software must response the device which
raised the page request with the handling result. This is done through
the iommu ops.page_response if the request was reported to outside of
vendor iommu driver through iommu_report_device_fault(). This adds the
VT-d implementation of page_response ops.
Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.c | 1 +
drivers/iommu/intel/svm.c | 74 +++++++++++++++++++++++++++++++++++++
include/linux/intel-iommu.h | 3 ++
3 files changed, 78 insertions(+)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index de17952ed133..7eb29167e8f9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
.sva_bind = intel_svm_bind,
.sva_unbind = intel_svm_unbind,
.sva_get_pasid = intel_svm_get_pasid,
+ .page_response = intel_svm_page_response,
#endif
};
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 08c58c2b1a06..1c7d8a9ea124 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
return pasid;
}
+
+int intel_svm_page_response(struct device *dev,
+ struct iommu_fault_event *evt,
+ struct iommu_page_response *msg)
+{
+ struct iommu_fault_page_request *prm;
+ struct intel_svm_dev *sdev;
+ struct intel_iommu *iommu;
+ struct intel_svm *svm;
+ bool private_present;
+ bool pasid_present;
+ bool last_page;
+ u8 bus, devfn;
+ int ret = 0;
+ u16 sid;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!msg || !evt)
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+
+ prm = &evt->fault.prm;
+ sid = PCI_DEVID(bus, devfn);
+ pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ if (pasid_present) {
+ if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
+ if (ret || !sdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must respond
+ * with page group response if private data is present (PDP)
+ * or last page in group (LPIG) bit is set. This is an
+ * additional VT-d requirement beyond PCI ATS spec.
+ */
+ if (last_page || private_present) {
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_PDP(private_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ if (private_present)
+ memcpy(&desc.qw2, prm->private_data,
+ sizeof(prm->private_data));
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fc2cfc3db6e1..bf6009a344f5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
void *drvdata);
void intel_svm_unbind(struct iommu_sva *handle);
int intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+ struct iommu_page_response *msg);
+
struct svm_dev_ops;
struct intel_svm_dev {
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH v2 4/4] iommu/vt-d: Add page response ops support
2020-07-06 0:25 ` [PATCH v2 4/4] iommu/vt-d: Add page response ops support Lu Baolu
@ 2020-07-06 1:47 ` Tian, Kevin
2020-07-09 0:32 ` Lu Baolu
0 siblings, 1 reply; 15+ messages in thread
From: Tian, Kevin @ 2020-07-06 1:47 UTC (permalink / raw)
To: Lu Baolu, iommu; +Cc: Raj, Ashok, linux-kernel
> From: Lu Baolu
> Sent: Monday, July 6, 2020 8:26 AM
>
> After a page request is handled, software must response the device which
> raised the page request with the handling result. This is done through
'response' is a noun.
> the iommu ops.page_response if the request was reported to outside of
> vendor iommu driver through iommu_report_device_fault(). This adds the
> VT-d implementation of page_response ops.
>
> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/iommu.c | 1 +
> drivers/iommu/intel/svm.c | 74
> +++++++++++++++++++++++++++++++++++++
> include/linux/intel-iommu.h | 3 ++
> 3 files changed, 78 insertions(+)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index de17952ed133..7eb29167e8f9 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
> .sva_bind = intel_svm_bind,
> .sva_unbind = intel_svm_unbind,
> .sva_get_pasid = intel_svm_get_pasid,
> + .page_response = intel_svm_page_response,
> #endif
> };
>
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 08c58c2b1a06..1c7d8a9ea124 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
>
> return pasid;
> }
> +
> +int intel_svm_page_response(struct device *dev,
> + struct iommu_fault_event *evt,
> + struct iommu_page_response *msg)
> +{
> + struct iommu_fault_page_request *prm;
> + struct intel_svm_dev *sdev;
> + struct intel_iommu *iommu;
> + struct intel_svm *svm;
> + bool private_present;
> + bool pasid_present;
> + bool last_page;
> + u8 bus, devfn;
> + int ret = 0;
> + u16 sid;
> +
> + if (!dev || !dev_is_pci(dev))
> + return -ENODEV;
but we didn't do same check when reporting fault?
> +
> + iommu = device_to_iommu(dev, &bus, &devfn);
> + if (!iommu)
> + return -ENODEV;
> +
> + if (!msg || !evt)
> + return -EINVAL;
> +
> + mutex_lock(&pasid_mutex);
> +
> + prm = &evt->fault.prm;
> + sid = PCI_DEVID(bus, devfn);
> + pasid_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> + private_present = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
> + last_page = prm->flags &
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
> +
> + if (pasid_present) {
> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
> + if (ret || !sdev) {
> + ret = -ENODEV;
> + goto out;
> + }
> + }
what about pasid_present==0? Do we support recoverable fault now
with this patch?
and who guarantees that the external fault handler (e.g. guest)
cannot do bad thing with this interface, e.g. by specifying a PASID
belonging to other guests (when Scalable IOV is enabled)?
Thanks
Kevin
> +
> + /*
> + * Per VT-d spec. v3.0 ch7.7, system software must respond
> + * with page group response if private data is present (PDP)
> + * or last page in group (LPIG) bit is set. This is an
> + * additional VT-d requirement beyond PCI ATS spec.
> + */
> + if (last_page || private_present) {
> + struct qi_desc desc;
> +
> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
> |
> + QI_PGRP_PASID_P(pasid_present) |
> + QI_PGRP_PDP(private_present) |
> + QI_PGRP_RESP_CODE(msg->code) |
> + QI_PGRP_RESP_TYPE;
> + desc.qw1 = QI_PGRP_IDX(prm->grpid) |
> QI_PGRP_LPIG(last_page);
> + desc.qw2 = 0;
> + desc.qw3 = 0;
> + if (private_present)
> + memcpy(&desc.qw2, prm->private_data,
> + sizeof(prm->private_data));
> +
> + qi_submit_sync(iommu, &desc, 1, 0);
> + }
> +out:
> + mutex_unlock(&pasid_mutex);
> + return ret;
> +}
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index fc2cfc3db6e1..bf6009a344f5 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
> *dev, struct mm_struct *mm,
> void *drvdata);
> void intel_svm_unbind(struct iommu_sva *handle);
> int intel_svm_get_pasid(struct iommu_sva *handle);
> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
> *evt,
> + struct iommu_page_response *msg);
> +
> struct svm_dev_ops;
>
> struct intel_svm_dev {
> --
> 2.17.1
>
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 4/4] iommu/vt-d: Add page response ops support
2020-07-06 1:47 ` Tian, Kevin
@ 2020-07-09 0:32 ` Lu Baolu
0 siblings, 0 replies; 15+ messages in thread
From: Lu Baolu @ 2020-07-09 0:32 UTC (permalink / raw)
To: Tian, Kevin, iommu; +Cc: linux-kernel, Raj, Ashok
Hi Kevin,
On 7/6/20 9:47 AM, Tian, Kevin wrote:
>> From: Lu Baolu
>> Sent: Monday, July 6, 2020 8:26 AM
>>
>> After a page request is handled, software must response the device which
>> raised the page request with the handling result. This is done through
>
> 'response' is a noun.
Yes.
>
>> the iommu ops.page_response if the request was reported to outside of
>> vendor iommu driver through iommu_report_device_fault(). This adds the
>> VT-d implementation of page_response ops.
>>
>> Co-developed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Co-developed-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>> drivers/iommu/intel/iommu.c | 1 +
>> drivers/iommu/intel/svm.c | 74
>> +++++++++++++++++++++++++++++++++++++
>> include/linux/intel-iommu.h | 3 ++
>> 3 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
>> index de17952ed133..7eb29167e8f9 100644
>> --- a/drivers/iommu/intel/iommu.c
>> +++ b/drivers/iommu/intel/iommu.c
>> @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = {
>> .sva_bind = intel_svm_bind,
>> .sva_unbind = intel_svm_unbind,
>> .sva_get_pasid = intel_svm_get_pasid,
>> + .page_response = intel_svm_page_response,
>> #endif
>> };
>>
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index 08c58c2b1a06..1c7d8a9ea124 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -1078,3 +1078,77 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
>>
>> return pasid;
>> }
>> +
>> +int intel_svm_page_response(struct device *dev,
>> + struct iommu_fault_event *evt,
>> + struct iommu_page_response *msg)
>> +{
>> + struct iommu_fault_page_request *prm;
>> + struct intel_svm_dev *sdev;
>> + struct intel_iommu *iommu;
>> + struct intel_svm *svm;
>> + bool private_present;
>> + bool pasid_present;
>> + bool last_page;
>> + u8 bus, devfn;
>> + int ret = 0;
>> + u16 sid;
>> +
>> + if (!dev || !dev_is_pci(dev))
>> + return -ENODEV;
>
> but we didn't do same check when reporting fault?
For now, we only support PCI devices, so I will add this check in report
as well.
>
>> +
>> + iommu = device_to_iommu(dev, &bus, &devfn);
>> + if (!iommu)
>> + return -ENODEV;
>> +
>> + if (!msg || !evt)
>> + return -EINVAL;
>> +
>> + mutex_lock(&pasid_mutex);
>> +
>> + prm = &evt->fault.prm;
>> + sid = PCI_DEVID(bus, devfn);
>> + pasid_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>> + private_present = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
>> + last_page = prm->flags &
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
>> +
>> + if (pasid_present) {
>> + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
>> + if (ret || !sdev) {
>> + ret = -ENODEV;
>> + goto out;
>> + }
>> + }
>
> what about pasid_present==0? Do we support recoverable fault now
> with this patch?
For now, we don't support reporting a prq without pasid to outside.
prq_event_thread() handles such requests explicitly. I will add a
check in response ops.
>
> and who guarantees that the external fault handler (e.g. guest)
> cannot do bad thing with this interface, e.g. by specifying a PASID
> belonging to other guests (when Scalable IOV is enabled)?
I will check below if the response is from user space.
(svm->mm == get_task_mm(current))
>
> Thanks
> Kevin
Best regards,
baolu
>> +
>> + /*
>> + * Per VT-d spec. v3.0 ch7.7, system software must respond
>> + * with page group response if private data is present (PDP)
>> + * or last page in group (LPIG) bit is set. This is an
>> + * additional VT-d requirement beyond PCI ATS spec.
>> + */
>> + if (last_page || private_present) {
>> + struct qi_desc desc;
>> +
>> + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid)
>> |
>> + QI_PGRP_PASID_P(pasid_present) |
>> + QI_PGRP_PDP(private_present) |
>> + QI_PGRP_RESP_CODE(msg->code) |
>> + QI_PGRP_RESP_TYPE;
>> + desc.qw1 = QI_PGRP_IDX(prm->grpid) |
>> QI_PGRP_LPIG(last_page);
>> + desc.qw2 = 0;
>> + desc.qw3 = 0;
>> + if (private_present)
>> + memcpy(&desc.qw2, prm->private_data,
>> + sizeof(prm->private_data));
>> +
>> + qi_submit_sync(iommu, &desc, 1, 0);
>> + }
>> +out:
>> + mutex_unlock(&pasid_mutex);
>> + return ret;
>> +}
>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index fc2cfc3db6e1..bf6009a344f5 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -741,6 +741,9 @@ struct iommu_sva *intel_svm_bind(struct device
>> *dev, struct mm_struct *mm,
>> void *drvdata);
>> void intel_svm_unbind(struct iommu_sva *handle);
>> int intel_svm_get_pasid(struct iommu_sva *handle);
>> +int intel_svm_page_response(struct device *dev, struct iommu_fault_event
>> *evt,
>> + struct iommu_page_response *msg);
>> +
>> struct svm_dev_ops;
>>
>> struct intel_svm_dev {
>> --
>> 2.17.1
>>
>> _______________________________________________
>> iommu mailing list
>> iommu@lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 15+ messages in thread