linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3
@ 2019-12-19 16:30 Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Jean-Philippe Brucker
                   ` (14 more replies)
  0 siblings, 15 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
[1], I added review and tested tags where appropriate and applied the
suggested changes, shown in the diff below. Thanks all!

I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
Zhangfei's uacce module [2]. The full SVA support, which I'll send out
early next year, is available on my branch sva/zip-devel at
https://jpbrucker.net/git/linux/

[1] https://lore.kernel.org/linux-iommu/20191209180514.272727-1-jean-philippe@linaro.org/
[2] https://lore.kernel.org/linux-iommu/1576465697-27946-1-git-send-email-zhangfei.gao@linaro.org/

Jean-Philippe Brucker (13):
  iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
  dt-bindings: document PASID property for IOMMU masters
  iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
  ACPI/IORT: Parse SSID property of named component node
  iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
  iommu/arm-smmu-v3: Add context descriptor tables allocators
  iommu/arm-smmu-v3: Add support for Substream IDs
  iommu/arm-smmu-v3: Propagate ssid_bits
  iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc()
    failure
  iommu/arm-smmu-v3: Add second level of context descriptor table
  iommu/arm-smmu-v3: Improve add_device() error handling
  PCI/ATS: Add PASID stubs
  iommu/arm-smmu-v3: Add support for PCI PASID

 .../devicetree/bindings/iommu/iommu.txt       |   6 +
 drivers/acpi/arm64/iort.c                     |  18 +
 drivers/iommu/arm-smmu-v3.c                   | 467 +++++++++++++++---
 drivers/iommu/of_iommu.c                      |   6 +-
 include/linux/iommu.h                         |   2 +
 include/linux/pci-ats.h                       |   3 +
 6 files changed, 442 insertions(+), 60 deletions(-)

-- 
Diff since v3:
#diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index cde7af39681c..8e95ecad4c9a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -589,8 +589,10 @@ struct arm_smmu_cd_table {
 };

 struct arm_smmu_s1_cfg {
+	/* Leaf tables or linear table */
 	struct arm_smmu_cd_table	*tables;
 	size_t				num_tables;
+	/* First level tables, when two levels are used */
 	__le64				*l1ptr;
 	dma_addr_t			l1ptr_dma;
 	struct arm_smmu_ctx_desc	cd;
@@ -1561,27 +1563,22 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

-	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
-		table = &cfg->tables[0];
-		idx = ssid;
-	} else {
-		idx = ssid >> CTXDESC_SPLIT;
-		if (idx >= cfg->num_tables)
-			return NULL;
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;

-		table = &cfg->tables[idx];
-		if (!table->ptr) {
-			if (arm_smmu_alloc_cd_leaf_table(smmu, table,
-							 CTXDESC_L2_ENTRIES))
-				return NULL;
+	idx = ssid >> CTXDESC_SPLIT;
+	table = &cfg->tables[idx];
+	if (!table->ptr) {
+		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
+						 CTXDESC_L2_ENTRIES))
+			return NULL;

-			l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
-			arm_smmu_write_cd_l1_desc(l1ptr, table);
-			/* An invalid L1CD can be cached */
-			arm_smmu_sync_cd(smmu_domain, ssid, false);
-		}
-		idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
+		arm_smmu_write_cd_l1_desc(l1ptr, table);
+		/* An invalid L1CD can be cached */
+		arm_smmu_sync_cd(smmu_domain, ssid, false);
 	}
+	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
 	return table->ptr + idx * CTXDESC_CD_DWORDS;
 }

@@ -1617,8 +1614,12 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 	u64 val;
 	bool cd_live;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	__le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+	__le64 *cdptr;

+	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+		return -E2BIG;
+
+	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
 	if (!cdptr)
 		return -ENOMEM;

@@ -1640,9 +1641,9 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 		cdptr[3] = cpu_to_le64(cd->mair);

 		/*
-		 * STE is live, and the SMMU might fetch this CD at any
-		 * time. Ensure that it observes the rest of the CD before we
-		 * enable it.
+		 * STE is live, and the SMMU might read dwords of this CD in any
+		 * order. Ensure that it observes valid values before reading
+		 * V=1.
 		 */
 		arm_smmu_sync_cd(smmu_domain, ssid, true);

@@ -1706,7 +1707,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 	 * Only allocate a leaf table for linear case. With two levels, leaf
 	 * tables are allocated lazily.
 	 */
-	if (!cfg->l1ptr) {
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
 		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
 						   max_contexts);
 		if (ret)
@@ -2657,17 +2658,21 @@ static int arm_smmu_enable_pasid(struct arm_smmu_master *master)

 	features = pci_pasid_features(pdev);
 	if (features < 0)
-		return -ENODEV;
+		return features;

 	num_pasids = pci_max_pasids(pdev);
 	if (num_pasids <= 0)
-		return -ENODEV;
+		return num_pasids;

 	ret = pci_enable_pasid(pdev, features);
-	if (!ret)
-		master->ssid_bits = min_t(u8, ilog2(num_pasids),
-					  master->smmu->ssid_bits);
-	return ret;
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to enable PASID\n");
+		return ret;
+	}
+
+	master->ssid_bits = min_t(u8, ilog2(num_pasids),
+				  master->smmu->ssid_bits);
+	return 0;
 }

 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)



^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 02/13] dt-bindings: document PASID property for IOMMU masters Jean-Philippe Brucker
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Since commit 518a2f1925c3 ("dma-mapping: zero memory returned from
dma_alloc_*"), dma_alloc_* always initializes memory to zero, so there
is no need to use dma_zalloc_* or pass the __GFP_ZERO flag anymore.

The flag was introduced by commit 04fa26c71be5 ("iommu/arm-smmu: Convert
DMA buffer allocations to the managed API"), since the managed API
didn't provide a dmam_zalloc_coherent() function.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index effe72eb89e7..d4e8b7f8d9f4 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1675,7 +1675,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 
 	desc->span = STRTAB_SPLIT + 1;
 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
-					  GFP_KERNEL | __GFP_ZERO);
+					  GFP_KERNEL);
 	if (!desc->l2ptr) {
 		dev_err(smmu->dev,
 			"failed to allocate l2 stream table for SID %u\n",
@@ -2161,8 +2161,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 		return asid;
 
 	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
-					 &cfg->cdptr_dma,
-					 GFP_KERNEL | __GFP_ZERO);
+					 &cfg->cdptr_dma, GFP_KERNEL);
 	if (!cfg->cdptr) {
 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
 		ret = -ENOMEM;
@@ -2883,7 +2882,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 
 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
-				     GFP_KERNEL | __GFP_ZERO);
+				     GFP_KERNEL);
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate l1 stream table (%u bytes)\n",
@@ -2910,7 +2909,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
 
 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
-				     GFP_KERNEL | __GFP_ZERO);
+				     GFP_KERNEL);
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate linear stream table (%u bytes)\n",
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 02/13] dt-bindings: document PASID property for IOMMU masters
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 03/13] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices Jean-Philippe Brucker
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

On Arm systems, some platform devices behind an SMMU may support the PASID
feature, which offers multiple address space. Let the firmware tell us
when a device supports PASID.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 Documentation/devicetree/bindings/iommu/iommu.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt
index 5a8b4624defc..3c36334e4f94 100644
--- a/Documentation/devicetree/bindings/iommu/iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/iommu.txt
@@ -86,6 +86,12 @@ have a means to turn off translation. But it is invalid in such cases to
 disable the IOMMU's device tree node in the first place because it would
 prevent any driver from properly setting up the translations.
 
+Optional properties:
+--------------------
+- pasid-num-bits: Some masters support multiple address spaces for DMA, by
+  tagging DMA transactions with an address space identifier. By default,
+  this is 0, which means that the device only has one address space.
+
 
 Notes:
 ======
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 03/13] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 02/13] dt-bindings: document PASID property for IOMMU masters Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 04/13] ACPI/IORT: Parse SSID property of named component node Jean-Philippe Brucker
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

For platform devices that support SubstreamID (SSID), firmware provides
the number of supported SSID bits. Restrict it to what the SMMU supports
and cache it into master->ssid_bits, which will also be used for PCI
PASID.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 13 +++++++++++++
 drivers/iommu/of_iommu.c    |  6 +++++-
 include/linux/iommu.h       |  2 ++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d4e8b7f8d9f4..837b4283b4dc 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -292,6 +292,12 @@
 
 #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
 
+/*
+ * When the SMMU only supports linear context descriptor tables, pick a
+ * reasonable size limit (64kB).
+ */
+#define CTXDESC_LINEAR_CDMAX		ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
+
 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
 #define ARM_SMMU_TCR2CD(tcr, fld)	FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
 					FIELD_GET(ARM64_TCR_##fld, tcr))
@@ -638,6 +644,7 @@ struct arm_smmu_master {
 	u32				*sids;
 	unsigned int			num_sids;
 	bool				ats_enabled;
+	unsigned int			ssid_bits;
 };
 
 /* SMMU private data for an IOMMU domain */
@@ -2571,6 +2578,12 @@ static int arm_smmu_add_device(struct device *dev)
 		}
 	}
 
+	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
+
+	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
+		master->ssid_bits = min_t(u8, master->ssid_bits,
+					  CTXDESC_LINEAR_CDMAX);
+
 	group = iommu_group_get_for_dev(dev);
 	if (!IS_ERR(group)) {
 		iommu_group_put(group);
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 026ad2b29dcd..b3ccb2f7f1c7 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -196,8 +196,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 			if (err)
 				break;
 		}
-	}
 
+		fwspec = dev_iommu_fwspec_get(dev);
+		if (!err && fwspec)
+			of_property_read_u32(master_np, "pasid-num-bits",
+					     &fwspec->num_pasid_bits);
+	}
 
 	/*
 	 * Two success conditions can be represented by non-negative err here:
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f2223cbb5fd5..956031eab3ef 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -570,6 +570,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev);
  * @ops: ops for this device's IOMMU
  * @iommu_fwnode: firmware handle for this device's IOMMU
  * @iommu_priv: IOMMU driver private data for this device
+ * @num_pasid_bits: number of PASID bits supported by this device
  * @num_ids: number of associated device IDs
  * @ids: IDs which this device may present to the IOMMU
  */
@@ -578,6 +579,7 @@ struct iommu_fwspec {
 	struct fwnode_handle	*iommu_fwnode;
 	void			*iommu_priv;
 	u32			flags;
+	u32			num_pasid_bits;
 	unsigned int		num_ids;
 	u32			ids[1];
 };
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 04/13] ACPI/IORT: Parse SSID property of named component node
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (2 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 03/13] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 05/13] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support Jean-Philippe Brucker
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Named component nodes in the IORT tables describe the number of
Substream ID bits (aka. PASID) supported by the device. Propagate this
value to the fwspec structure in order to enable PASID for platform
devices.

Acked-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/acpi/arm64/iort.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 33f71983e001..39f389214ecf 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt)	"ACPI: IORT: " fmt
 
 #include <linux/acpi_iort.h>
+#include <linux/bitfield.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -924,6 +925,20 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 	return iort_iommu_xlate(info->dev, parent, streamid);
 }
 
+static void iort_named_component_init(struct device *dev,
+				      struct acpi_iort_node *node)
+{
+	struct acpi_iort_named_component *nc;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+	if (!fwspec)
+		return;
+
+	nc = (struct acpi_iort_named_component *)node->node_data;
+	fwspec->num_pasid_bits = FIELD_GET(ACPI_IORT_NC_PASID_BITS,
+					   nc->node_flags);
+}
+
 /**
  * iort_iommu_configure - Set-up IOMMU configuration for a device.
  *
@@ -978,6 +993,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 			if (parent)
 				err = iort_iommu_xlate(dev, parent, streamid);
 		} while (parent && !err);
+
+		if (!err)
+			iort_named_component_init(dev, node);
 	}
 
 	/*
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 05/13] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (3 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 04/13] ACPI/IORT: Parse SSID property of named component node Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators Jean-Philippe Brucker
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

When adding SSID support to the SMMUv3 driver, we'll need to manipulate
leaf pasid tables and context descriptors. Extract the context
descriptor structure and introduce a new table structure.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 44 +++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 837b4283b4dc..b287e303b1d7 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -553,16 +553,21 @@ struct arm_smmu_strtab_l1_desc {
 	dma_addr_t			l2ptr_dma;
 };
 
+struct arm_smmu_ctx_desc {
+	u16				asid;
+	u64				ttbr;
+	u64				tcr;
+	u64				mair;
+};
+
+struct arm_smmu_cd_table {
+	__le64				*ptr;
+	dma_addr_t			ptr_dma;
+};
+
 struct arm_smmu_s1_cfg {
-	__le64				*cdptr;
-	dma_addr_t			cdptr_dma;
-
-	struct arm_smmu_ctx_desc {
-		u16	asid;
-		u64	ttbr;
-		u64	tcr;
-		u64	mair;
-	}				cd;
+	struct arm_smmu_cd_table	table;
+	struct arm_smmu_ctx_desc	cd;
 };
 
 struct arm_smmu_s2_cfg {
@@ -1471,6 +1476,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 				    struct arm_smmu_s1_cfg *cfg)
 {
 	u64 val;
+	__le64 *cdptr = cfg->table.ptr;
 
 	/*
 	 * We don't need to issue any invalidation here, as we'll invalidate
@@ -1488,12 +1494,12 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
 		val |= CTXDESC_CD_0_S;
 
-	cfg->cdptr[0] = cpu_to_le64(val);
+	cdptr[0] = cpu_to_le64(val);
 
 	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
-	cfg->cdptr[1] = cpu_to_le64(val);
+	cdptr[1] = cpu_to_le64(val);
 
-	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
+	cdptr[3] = cpu_to_le64(cfg->cd.mair);
 }
 
 /* Stream table manipulation functions */
@@ -1624,7 +1630,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-		val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
 	}
 
@@ -2138,11 +2144,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
-		if (cfg->cdptr) {
+		if (cfg->table.ptr) {
 			dmam_free_coherent(smmu_domain->smmu->dev,
 					   CTXDESC_CD_DWORDS << 3,
-					   cfg->cdptr,
-					   cfg->cdptr_dma);
+					   cfg->table.ptr,
+					   cfg->table.ptr_dma);
 
 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
 		}
@@ -2167,9 +2173,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (asid < 0)
 		return asid;
 
-	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
-					 &cfg->cdptr_dma, GFP_KERNEL);
-	if (!cfg->cdptr) {
+	cfg->table.ptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
+					     &cfg->table.ptr_dma, GFP_KERNEL);
+	if (!cfg->table.ptr) {
 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
 		ret = -ENOMEM;
 		goto out_free_asid;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (4 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 05/13] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2020-01-14 11:06   ` Will Deacon
  2019-12-19 16:30 ` [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs Jean-Philippe Brucker
                   ` (8 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Support for SSID will require allocating context descriptor tables. Move
the context descriptor allocation to separate functions.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 57 ++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index b287e303b1d7..43d6a7ded6e4 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -568,6 +568,7 @@ struct arm_smmu_cd_table {
 struct arm_smmu_s1_cfg {
 	struct arm_smmu_cd_table	table;
 	struct arm_smmu_ctx_desc	cd;
+	u8				s1cdmax;
 };
 
 struct arm_smmu_s2_cfg {
@@ -1455,6 +1456,31 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 }
 
 /* Context descriptor manipulation functions */
+static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
+					struct arm_smmu_cd_table *table,
+					size_t num_entries)
+{
+	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
+
+	table->ptr = dmam_alloc_coherent(smmu->dev, size, &table->ptr_dma,
+					 GFP_KERNEL);
+	if (!table->ptr) {
+		dev_warn(smmu->dev,
+			 "failed to allocate context descriptor table\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
+					struct arm_smmu_cd_table *table,
+					size_t num_entries)
+{
+	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
+
+	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
+}
+
 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 {
 	u64 val = 0;
@@ -1502,6 +1528,23 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 	cdptr[3] = cpu_to_le64(cfg->cd.mair);
 }
 
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+{
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+
+	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,
+					    1 << cfg->s1cdmax);
+}
+
+static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+{
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+
+	arm_smmu_free_cd_leaf_table(smmu, &cfg->table, 1 << cfg->s1cdmax);
+}
+
 /* Stream table manipulation functions */
 static void
 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
@@ -2145,11 +2188,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
 		if (cfg->table.ptr) {
-			dmam_free_coherent(smmu_domain->smmu->dev,
-					   CTXDESC_CD_DWORDS << 3,
-					   cfg->table.ptr,
-					   cfg->table.ptr_dma);
-
+			arm_smmu_free_cd_tables(smmu_domain);
 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
 		}
 	} else {
@@ -2173,13 +2212,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (asid < 0)
 		return asid;
 
-	cfg->table.ptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
-					     &cfg->table.ptr_dma, GFP_KERNEL);
-	if (!cfg->table.ptr) {
-		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
-		ret = -ENOMEM;
+	ret = arm_smmu_alloc_cd_tables(smmu_domain);
+	if (ret)
 		goto out_free_asid;
-	}
 
 	cfg->cd.asid	= (u16)asid;
 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (5 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2020-01-14 12:38   ` Will Deacon
  2019-12-19 16:30 ` [PATCH v4 08/13] iommu/arm-smmu-v3: Propagate ssid_bits Jean-Philippe Brucker
                   ` (7 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
page directory per device. However SMMUv3 allows more than one address
space for some devices, by providing multiple stage-1 page directories. In
addition to the Stream ID (SID), that identifies a device, we can now have
Substream IDs (SSID) identifying an address space. In PCIe, SID is called
Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
A complete stage-1 walk goes through the context descriptor table:

      Stream tables       Ctx. Desc. tables       Page tables
        +--------+   ,------->+-------+   ,------->+-------+
        :        :   |        :       :   |        :       :
        +--------+   |        +-------+   |        +-------+
   SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
        +--------+            +-------+            +-------+
        :        :            :       :            :       :
        +--------+            +-------+            +-------+

Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table
entries. To keep things simple we only implement one level of context
descriptor tables here, but as with stream and page tables, an SSID can
be split to index multiple levels of tables.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 125 +++++++++++++++++++++++++++++-------
 1 file changed, 102 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 43d6a7ded6e4..e1bec7e552b9 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -227,6 +227,11 @@
 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
 
+#define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
+#define STRTAB_STE_1_S1DSS_TERMINATE	0x0
+#define STRTAB_STE_1_S1DSS_BYPASS	0x1
+#define STRTAB_STE_1_S1DSS_SSID0	0x2
+
 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
@@ -329,6 +334,7 @@
 #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
 #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
 
+#define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
 #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
 #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
@@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent {
 
 		#define CMDQ_OP_CFGI_STE	0x3
 		#define CMDQ_OP_CFGI_ALL	0x4
+		#define CMDQ_OP_CFGI_CD		0x5
+		#define CMDQ_OP_CFGI_CD_ALL	0x6
 		struct {
 			u32			sid;
+			u32			ssid;
 			union {
 				bool		leaf;
 				u8		span;
@@ -568,6 +577,7 @@ struct arm_smmu_cd_table {
 struct arm_smmu_s1_cfg {
 	struct arm_smmu_cd_table	table;
 	struct arm_smmu_ctx_desc	cd;
+	u8				s1fmt;
 	u8				s1cdmax;
 };
 
@@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 		break;
+	case CMDQ_OP_CFGI_CD:
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
+		/* Fallthrough */
 	case CMDQ_OP_CFGI_STE:
 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 		break;
+	case CMDQ_OP_CFGI_CD_ALL:
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+		break;
 	case CMDQ_OP_CFGI_ALL:
 		/* Cover the entire SID range */
 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
@@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 }
 
 /* Context descriptor manipulation functions */
+static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+			     int ssid, bool leaf)
+{
+	size_t i;
+	unsigned long flags;
+	struct arm_smmu_master *master;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_cmdq_ent cmd = {
+		.opcode	= CMDQ_OP_CFGI_CD,
+		.cfgi	= {
+			.ssid	= ssid,
+			.leaf	= leaf,
+		},
+	};
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		for (i = 0; i < master->num_sids; i++) {
+			cmd.cfgi.sid = master->sids[i];
+			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+		}
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+	arm_smmu_cmdq_issue_sync(smmu);
+}
+
 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
 					struct arm_smmu_cd_table *table,
 					size_t num_entries)
@@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 	return val;
 }
 
-static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
-				    struct arm_smmu_s1_cfg *cfg)
+static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
+				   int ssid, struct arm_smmu_ctx_desc *cd)
 {
-	u64 val;
-	__le64 *cdptr = cfg->table.ptr;
-
 	/*
-	 * We don't need to issue any invalidation here, as we'll invalidate
-	 * the STE when installing the new entry anyway.
+	 * This function handles the following cases:
+	 *
+	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
+	 * (2) Install a secondary CD, for SID+SSID traffic.
+	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
+	 *     CD, then invalidate the old entry and mappings.
+	 * (4) Remove a secondary CD.
 	 */
-	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
-#ifdef __BIG_ENDIAN
-	      CTXDESC_CD_0_ENDI |
-#endif
-	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
-	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
-	      CTXDESC_CD_0_V;
+	u64 val;
+	bool cd_live;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
+			CTXDESC_CD_DWORDS;
 
-	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
-	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
-		val |= CTXDESC_CD_0_S;
+	val = le64_to_cpu(cdptr[0]);
+	cd_live = !!(val & CTXDESC_CD_0_V);
 
-	cdptr[0] = cpu_to_le64(val);
+	if (!cd) { /* (4) */
+		val = 0;
+	} else if (cd_live) { /* (3) */
+		val &= ~CTXDESC_CD_0_ASID;
+		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
+		/*
+		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
+		 * this substream's traffic
+		 */
+	} else { /* (1) and (2) */
+		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
+		cdptr[2] = 0;
+		cdptr[3] = cpu_to_le64(cd->mair);
+
+		/*
+		 * STE is live, and the SMMU might read dwords of this CD in any
+		 * order. Ensure that it observes valid values before reading
+		 * V=1.
+		 */
+		arm_smmu_sync_cd(smmu_domain, ssid, true);
 
-	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
-	cdptr[1] = cpu_to_le64(val);
+		val = arm_smmu_cpu_tcr_to_cd(cd->tcr) |
+#ifdef __BIG_ENDIAN
+			CTXDESC_CD_0_ENDI |
+#endif
+			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
+			CTXDESC_CD_0_AA64 |
+			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
+			CTXDESC_CD_0_V;
 
-	cdptr[3] = cpu_to_le64(cfg->cd.mair);
+		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
+		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+			val |= CTXDESC_CD_0_S;
+	}
+
+	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
+	arm_smmu_sync_cd(smmu_domain, ssid, true);
+	return 0;
 }
 
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
@@ -1533,6 +1607,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
+	cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
 	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,
 					    1 << cfg->s1cdmax);
 }
@@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	if (s1_cfg) {
 		BUG_ON(ste_live);
 		dst[1] = cpu_to_le64(
+			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
@@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
 		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
-			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
+			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
+			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
 	}
 
 	if (s2_cfg) {
@@ -2479,7 +2557,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
-		arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
+		arm_smmu_write_ctx_desc(smmu_domain, 0,
+					&smmu_domain->s1_cfg.cd);
 
 	arm_smmu_install_ste_for_dev(master);
 
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 08/13] iommu/arm-smmu-v3: Propagate ssid_bits
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (6 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Jean-Philippe Brucker
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Now that we support substream IDs, initialize s1cdmax with the number of
SSID bits supported by a master and the SMMU.

Context descriptor tables are allocated once for the first master
attached to a domain. Therefore attaching multiple devices with
different SSID sizes is tricky, and we currently don't support it.

As a future improvement it would be nice to at least support attaching a
SSID-capable device to a domain that isn't using SSID, by reallocating
the SSID table. This would allow supporting a SSID-capable device that
is in the same IOMMU group as a bridge, for example. Varying SSID size
is less of a concern, since the PCIe specification "highly recommends"
that devices supporting PASID implement all 20 bits of it.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e1bec7e552b9..e147087198ef 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2279,6 +2279,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 }
 
 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
+				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	int ret;
@@ -2290,6 +2291,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (asid < 0)
 		return asid;
 
+	cfg->s1cdmax = master->ssid_bits;
+
 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
 	if (ret)
 		goto out_free_asid;
@@ -2306,6 +2309,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 }
 
 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
+				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	int vmid;
@@ -2322,7 +2326,8 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	return 0;
 }
 
-static int arm_smmu_domain_finalise(struct iommu_domain *domain)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain,
+				    struct arm_smmu_master *master)
 {
 	int ret;
 	unsigned long ias, oas;
@@ -2330,6 +2335,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	struct io_pgtable_cfg pgtbl_cfg;
 	struct io_pgtable_ops *pgtbl_ops;
 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
+				 struct arm_smmu_master *,
 				 struct io_pgtable_cfg *);
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2384,7 +2390,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
 	domain->geometry.force_aperture = true;
 
-	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
+	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
 	if (ret < 0) {
 		free_io_pgtable_ops(pgtbl_ops);
 		return ret;
@@ -2537,7 +2543,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
 	if (!smmu_domain->smmu) {
 		smmu_domain->smmu = smmu;
-		ret = arm_smmu_domain_finalise(domain);
+		ret = arm_smmu_domain_finalise(domain, master);
 		if (ret) {
 			smmu_domain->smmu = NULL;
 			goto out_unlock;
@@ -2549,6 +2555,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 			dev_name(smmu->dev));
 		ret = -ENXIO;
 		goto out_unlock;
+	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
+		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+		dev_err(dev,
+			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
+			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
+		ret = -EINVAL;
+		goto out_unlock;
 	}
 
 	master->domain = smmu_domain;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (7 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 08/13] iommu/arm-smmu-v3: Propagate ssid_bits Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2020-01-14 12:42   ` Will Deacon
  2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
                   ` (5 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Second-level context descriptor tables will be allocated lazily in
arm_smmu_write_ctx_desc(). Help with handling allocation failure by
moving the CD write into arm_smmu_domain_finalise_s1().

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e147087198ef..b825a5639afc 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2301,8 +2301,15 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
 	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
+
+	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
+	if (ret)
+		goto out_free_tables;
+
 	return 0;
 
+out_free_tables:
+	arm_smmu_free_cd_tables(smmu_domain);
 out_free_asid:
 	arm_smmu_bitmap_free(smmu->asid_map, asid);
 	return ret;
@@ -2569,10 +2576,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
-	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
-		arm_smmu_write_ctx_desc(smmu_domain, 0,
-					&smmu_domain->s1_cfg.cd);
-
 	arm_smmu_install_ste_for_dev(master);
 
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (8 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-20  7:37   ` Auger Eric
  2020-01-14 15:04   ` Will Deacon
  2019-12-19 16:30 ` [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling Jean-Philippe Brucker
                   ` (4 subsequent siblings)
  14 siblings, 2 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

The SMMU can support up to 20 bits of SSID. Add a second level of page
tables to accommodate this. Devices that support more than 1024 SSIDs now
have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
descriptors (64kB), allocated on demand.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 154 +++++++++++++++++++++++++++++++++---
 1 file changed, 144 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index b825a5639afc..bf106a7b53eb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -224,6 +224,7 @@
 
 #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
 #define STRTAB_STE_0_S1FMT_LINEAR	0
+#define STRTAB_STE_0_S1FMT_64K_L2	2
 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
 
@@ -263,7 +264,20 @@
 
 #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
 
-/* Context descriptor (stage-1 only) */
+/*
+ * Context descriptors.
+ *
+ * Linear: when less than 1024 SSIDs are supported
+ * 2lvl: at most 1024 L1 entries,
+ *       1024 lazy entries per table.
+ */
+#define CTXDESC_SPLIT			10
+#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
+
+#define CTXDESC_L1_DESC_DWORDS		1
+#define CTXDESC_L1_DESC_VALID		1
+#define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
+
 #define CTXDESC_CD_DWORDS		8
 #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
 #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
@@ -575,7 +589,12 @@ struct arm_smmu_cd_table {
 };
 
 struct arm_smmu_s1_cfg {
-	struct arm_smmu_cd_table	table;
+	/* Leaf tables or linear table */
+	struct arm_smmu_cd_table	*tables;
+	size_t				num_tables;
+	/* First level tables, when two levels are used */
+	__le64				*l1ptr;
+	dma_addr_t			l1ptr_dma;
 	struct arm_smmu_ctx_desc	cd;
 	u8				s1fmt;
 	u8				s1cdmax;
@@ -1521,9 +1540,48 @@ static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
 {
 	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
 
+	if (!table->ptr)
+		return;
 	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
 }
 
+static void arm_smmu_write_cd_l1_desc(__le64 *dst,
+				      struct arm_smmu_cd_table *table)
+{
+	u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
+		  CTXDESC_L1_DESC_VALID;
+
+	WRITE_ONCE(*dst, cpu_to_le64(val));
+}
+
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
+				   u32 ssid)
+{
+	__le64 *l1ptr;
+	unsigned int idx;
+	struct arm_smmu_cd_table *table;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;
+
+	idx = ssid >> CTXDESC_SPLIT;
+	table = &cfg->tables[idx];
+	if (!table->ptr) {
+		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
+						 CTXDESC_L2_ENTRIES))
+			return NULL;
+
+		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
+		arm_smmu_write_cd_l1_desc(l1ptr, table);
+		/* An invalid L1CD can be cached */
+		arm_smmu_sync_cd(smmu_domain, ssid, false);
+	}
+	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+	return table->ptr + idx * CTXDESC_CD_DWORDS;
+}
+
 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 {
 	u64 val = 0;
@@ -1556,8 +1614,14 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 	u64 val;
 	bool cd_live;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
-			CTXDESC_CD_DWORDS;
+	__le64 *cdptr;
+
+	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+		return -E2BIG;
+
+	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+	if (!cdptr)
+		return -ENOMEM;
 
 	val = le64_to_cpu(cdptr[0]);
 	cd_live = !!(val & CTXDESC_CD_0_V);
@@ -1604,20 +1668,87 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 {
+	int ret;
+	size_t size = 0;
+	size_t max_contexts;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
-	cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
-	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,
-					    1 << cfg->s1cdmax);
+	max_contexts = 1 << cfg->s1cdmax;
+
+	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
+	    max_contexts <= CTXDESC_L2_ENTRIES) {
+		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+		cfg->num_tables = 1;
+	} else {
+		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+		cfg->num_tables = DIV_ROUND_UP(max_contexts,
+					       CTXDESC_L2_ENTRIES);
+
+		size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);
+		cfg->l1ptr = dmam_alloc_coherent(smmu->dev, size,
+						 &cfg->l1ptr_dma,
+						 GFP_KERNEL);
+		if (!cfg->l1ptr) {
+			dev_warn(smmu->dev,
+				 "failed to allocate L1 context table\n");
+			return -ENOMEM;
+		}
+	}
+
+	cfg->tables = devm_kzalloc(smmu->dev, sizeof(struct arm_smmu_cd_table) *
+				   cfg->num_tables, GFP_KERNEL);
+	if (!cfg->tables) {
+		ret = -ENOMEM;
+		goto err_free_l1;
+	}
+
+	/*
+	 * Only allocate a leaf table for linear case. With two levels, leaf
+	 * tables are allocated lazily.
+	 */
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
+		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
+						   max_contexts);
+		if (ret)
+			goto err_free_tables;
+	}
+
+	return 0;
+
+err_free_tables:
+	devm_kfree(smmu->dev, cfg->tables);
+	cfg->tables = NULL;
+err_free_l1:
+	if (cfg->l1ptr) {
+		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);
+		cfg->l1ptr = NULL;
+		cfg->l1ptr_dma = 0;
+	}
+	return ret;
 }
 
 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
 {
+	int i;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+	size_t num_leaf_entries = 1 << cfg->s1cdmax;
+	struct arm_smmu_cd_table *table = cfg->tables;
+
+	if (cfg->l1ptr) {
+		size_t size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);
 
-	arm_smmu_free_cd_leaf_table(smmu, &cfg->table, 1 << cfg->s1cdmax);
+		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);
+		cfg->l1ptr = NULL;
+		cfg->l1ptr_dma = 0;
+		num_leaf_entries = CTXDESC_L2_ENTRIES;
+	}
+
+	for (i = 0; i < cfg->num_tables; i++, table++)
+		arm_smmu_free_cd_leaf_table(smmu, table, num_leaf_entries);
+	devm_kfree(smmu->dev, cfg->tables);
+	cfg->tables = NULL;
 }
 
 /* Stream table manipulation functions */
@@ -1737,6 +1868,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	}
 
 	if (s1_cfg) {
+		dma_addr_t ptr_dma = s1_cfg->l1ptr ? s1_cfg->l1ptr_dma :
+				     s1_cfg->tables[0].ptr_dma;
+
 		BUG_ON(ste_live);
 		dst[1] = cpu_to_le64(
 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
@@ -1749,7 +1883,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+		val |= (ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
@@ -2265,7 +2399,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
-		if (cfg->table.ptr) {
+		if (cfg->tables) {
 			arm_smmu_free_cd_tables(smmu_domain);
 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
 		}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (9 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2020-01-14 15:25   ` Will Deacon
  2019-12-19 16:30 ` [PATCH v4 12/13] PCI/ATS: Add PASID stubs Jean-Philippe Brucker
                   ` (3 subsequent siblings)
  14 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Let add_device() clean up after itself. The iommu_bus_init() function
does call remove_device() on error, but other sites (e.g. of_iommu) do
not.

Don't free level-2 stream tables because we'd have to track if we
allocated each of them or if they are used by other endpoints. It's not
worth the hassle since they are managed resources.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index bf106a7b53eb..e62ca80f2f76 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2837,14 +2837,16 @@ static int arm_smmu_add_device(struct device *dev)
 	for (i = 0; i < master->num_sids; i++) {
 		u32 sid = master->sids[i];
 
-		if (!arm_smmu_sid_in_range(smmu, sid))
-			return -ERANGE;
+		if (!arm_smmu_sid_in_range(smmu, sid)) {
+			ret = -ERANGE;
+			goto err_free_master;
+		}
 
 		/* Ensure l2 strtab is initialised */
 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
 			ret = arm_smmu_init_l2_strtab(smmu, sid);
 			if (ret)
-				return ret;
+				goto err_free_master;
 		}
 	}
 
@@ -2854,13 +2856,25 @@ static int arm_smmu_add_device(struct device *dev)
 		master->ssid_bits = min_t(u8, master->ssid_bits,
 					  CTXDESC_LINEAR_CDMAX);
 
+	ret = iommu_device_link(&smmu->iommu, dev);
+	if (ret)
+		goto err_free_master;
+
 	group = iommu_group_get_for_dev(dev);
-	if (!IS_ERR(group)) {
-		iommu_group_put(group);
-		iommu_device_link(&smmu->iommu, dev);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
+		goto err_unlink;
 	}
 
-	return PTR_ERR_OR_ZERO(group);
+	iommu_group_put(group);
+	return 0;
+
+err_unlink:
+	iommu_device_unlink(&smmu->iommu, dev);
+err_free_master:
+	kfree(master);
+	fwspec->iommu_priv = NULL;
+	return ret;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 12/13] PCI/ATS: Add PASID stubs
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (10 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

The SMMUv3 driver, which may be built without CONFIG_PCI, will soon gain
PASID support.  Partially revert commit c6e9aefbf9db ("PCI/ATS: Remove
unused PRI and PASID stubs") to re-introduce the PASID stubs, and avoid
adding more #ifdefs to the SMMU driver.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 include/linux/pci-ats.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 5d62e78946a3..d08f0869f121 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -33,6 +33,9 @@ void pci_disable_pasid(struct pci_dev *pdev);
 int pci_pasid_features(struct pci_dev *pdev);
 int pci_max_pasids(struct pci_dev *pdev);
 #else /* CONFIG_PCI_PASID */
+static inline int pci_enable_pasid(struct pci_dev *pdev, int features)
+{ return -EINVAL; }
+static inline void pci_disable_pasid(struct pci_dev *pdev) { }
 static inline int pci_pasid_features(struct pci_dev *pdev)
 { return -EINVAL; }
 static inline int pci_max_pasids(struct pci_dev *pdev)
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (11 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 12/13] PCI/ATS: Add PASID stubs Jean-Philippe Brucker
@ 2019-12-19 16:30 ` Jean-Philippe Brucker
  2019-12-20  7:37   ` Auger Eric
  2020-01-14 12:45   ` Will Deacon
  2020-01-09 14:36 ` [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
  2020-01-14 15:40 ` Will Deacon
  14 siblings, 2 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2019-12-19 16:30 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	eric.auger, jonathan.cameron, zhangfei.gao

Enable PASID for PCI devices that support it. Since the SSID tables are
allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
arm_smmu_dev_feature_enable() would be too late, since by that time the
main DMA domain has already been attached. Do it in add_device() instead.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 55 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e62ca80f2f76..8e95ecad4c9a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2644,6 +2644,53 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master)
 	atomic_dec(&smmu_domain->nr_ats_masters);
 }
 
+static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
+{
+	int ret;
+	int features;
+	int num_pasids;
+	struct pci_dev *pdev;
+
+	if (!dev_is_pci(master->dev))
+		return -ENODEV;
+
+	pdev = to_pci_dev(master->dev);
+
+	features = pci_pasid_features(pdev);
+	if (features < 0)
+		return features;
+
+	num_pasids = pci_max_pasids(pdev);
+	if (num_pasids <= 0)
+		return num_pasids;
+
+	ret = pci_enable_pasid(pdev, features);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to enable PASID\n");
+		return ret;
+	}
+
+	master->ssid_bits = min_t(u8, ilog2(num_pasids),
+				  master->smmu->ssid_bits);
+	return 0;
+}
+
+static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
+{
+	struct pci_dev *pdev;
+
+	if (!dev_is_pci(master->dev))
+		return;
+
+	pdev = to_pci_dev(master->dev);
+
+	if (!pdev->pasid_enabled)
+		return;
+
+	master->ssid_bits = 0;
+	pci_disable_pasid(pdev);
+}
+
 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
 {
 	unsigned long flags;
@@ -2852,13 +2899,16 @@ static int arm_smmu_add_device(struct device *dev)
 
 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
 
+	/* Note that PASID must be enabled before, and disabled after ATS */
+	arm_smmu_enable_pasid(master);
+
 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
 		master->ssid_bits = min_t(u8, master->ssid_bits,
 					  CTXDESC_LINEAR_CDMAX);
 
 	ret = iommu_device_link(&smmu->iommu, dev);
 	if (ret)
-		goto err_free_master;
+		goto err_disable_pasid;
 
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group)) {
@@ -2871,6 +2921,8 @@ static int arm_smmu_add_device(struct device *dev)
 
 err_unlink:
 	iommu_device_unlink(&smmu->iommu, dev);
+err_disable_pasid:
+	arm_smmu_disable_pasid(master);
 err_free_master:
 	kfree(master);
 	fwspec->iommu_priv = NULL;
@@ -2891,6 +2943,7 @@ static void arm_smmu_remove_device(struct device *dev)
 	arm_smmu_detach_dev(master);
 	iommu_group_remove_device(dev);
 	iommu_device_unlink(&smmu->iommu, dev);
+	arm_smmu_disable_pasid(master);
 	kfree(master);
 	iommu_fwspec_free(dev);
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table
  2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
@ 2019-12-20  7:37   ` Auger Eric
  2020-01-14 15:04   ` Will Deacon
  1 sibling, 0 replies; 34+ messages in thread
From: Auger Eric @ 2019-12-20  7:37 UTC (permalink / raw)
  To: Jean-Philippe Brucker, linux-pci, linux-arm-kernel, linux-acpi,
	devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	jonathan.cameron, zhangfei.gao

Hi Jean,

On 12/19/19 5:30 PM, Jean-Philippe Brucker wrote:
> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric

> ---
>  drivers/iommu/arm-smmu-v3.c | 154 +++++++++++++++++++++++++++++++++---
>  1 file changed, 144 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index b825a5639afc..bf106a7b53eb 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -224,6 +224,7 @@
>  
>  #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR	0
> +#define STRTAB_STE_0_S1FMT_64K_L2	2
>  #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
>  
> @@ -263,7 +264,20 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
>  
> -/* Context descriptor (stage-1 only) */
> +/*
> + * Context descriptors.
> + *
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entries,
> + *       1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT			10
> +#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORDS		1
> +#define CTXDESC_L1_DESC_VALID		1
> +#define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
> +
>  #define CTXDESC_CD_DWORDS		8
>  #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
>  #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
> @@ -575,7 +589,12 @@ struct arm_smmu_cd_table {
>  };
>  
>  struct arm_smmu_s1_cfg {
> -	struct arm_smmu_cd_table	table;
> +	/* Leaf tables or linear table */
> +	struct arm_smmu_cd_table	*tables;
> +	size_t				num_tables;
> +	/* First level tables, when two levels are used */
> +	__le64				*l1ptr;
> +	dma_addr_t			l1ptr_dma;
>  	struct arm_smmu_ctx_desc	cd;
>  	u8				s1fmt;
>  	u8				s1cdmax;
> @@ -1521,9 +1540,48 @@ static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
>  {
>  	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
>  
> +	if (!table->ptr)
> +		return;
>  	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
>  }
>  
> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> +				      struct arm_smmu_cd_table *table)
> +{
> +	u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
> +		  CTXDESC_L1_DESC_VALID;
> +
> +	WRITE_ONCE(*dst, cpu_to_le64(val));
> +}
> +
> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
> +				   u32 ssid)
> +{
> +	__le64 *l1ptr;
> +	unsigned int idx;
> +	struct arm_smmu_cd_table *table;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> +
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> +		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;
> +
> +	idx = ssid >> CTXDESC_SPLIT;
> +	table = &cfg->tables[idx];
> +	if (!table->ptr) {
> +		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +						 CTXDESC_L2_ENTRIES))
> +			return NULL;
> +
> +		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> +		arm_smmu_write_cd_l1_desc(l1ptr, table);
> +		/* An invalid L1CD can be cached */
> +		arm_smmu_sync_cd(smmu_domain, ssid, false);
> +	}
> +	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> +	return table->ptr + idx * CTXDESC_CD_DWORDS;
> +}
> +
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>  {
>  	u64 val = 0;
> @@ -1556,8 +1614,14 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  	u64 val;
>  	bool cd_live;
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
> -	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
> -			CTXDESC_CD_DWORDS;
> +	__le64 *cdptr;
> +
> +	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
> +		return -E2BIG;
> +
> +	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
> +	if (!cdptr)
> +		return -ENOMEM;
>  
>  	val = le64_to_cpu(cdptr[0]);
>  	cd_live = !!(val & CTXDESC_CD_0_V);
> @@ -1604,20 +1668,87 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
>  {
> +	int ret;
> +	size_t size = 0;
> +	size_t max_contexts;
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>  
> -	cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
> -	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,
> -					    1 << cfg->s1cdmax);
> +	max_contexts = 1 << cfg->s1cdmax;
> +
> +	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
> +	    max_contexts <= CTXDESC_L2_ENTRIES) {
> +		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
> +		cfg->num_tables = 1;
> +	} else {
> +		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
> +		cfg->num_tables = DIV_ROUND_UP(max_contexts,
> +					       CTXDESC_L2_ENTRIES);
> +
> +		size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);
> +		cfg->l1ptr = dmam_alloc_coherent(smmu->dev, size,
> +						 &cfg->l1ptr_dma,
> +						 GFP_KERNEL);
> +		if (!cfg->l1ptr) {
> +			dev_warn(smmu->dev,
> +				 "failed to allocate L1 context table\n");
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	cfg->tables = devm_kzalloc(smmu->dev, sizeof(struct arm_smmu_cd_table) *
> +				   cfg->num_tables, GFP_KERNEL);
> +	if (!cfg->tables) {
> +		ret = -ENOMEM;
> +		goto err_free_l1;
> +	}
> +
> +	/*
> +	 * Only allocate a leaf table for linear case. With two levels, leaf
> +	 * tables are allocated lazily.
> +	 */
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> +		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
> +						   max_contexts);
> +		if (ret)
> +			goto err_free_tables;
> +	}
> +
> +	return 0;
> +
> +err_free_tables:
> +	devm_kfree(smmu->dev, cfg->tables);
> +	cfg->tables = NULL;
> +err_free_l1:
> +	if (cfg->l1ptr) {
> +		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);
> +		cfg->l1ptr = NULL;
> +		cfg->l1ptr_dma = 0;
> +	}
> +	return ret;
>  }
>  
>  static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
>  {
> +	int i;
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> +	size_t num_leaf_entries = 1 << cfg->s1cdmax;
> +	struct arm_smmu_cd_table *table = cfg->tables;
> +
> +	if (cfg->l1ptr) {
> +		size_t size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);
>  
> -	arm_smmu_free_cd_leaf_table(smmu, &cfg->table, 1 << cfg->s1cdmax);
> +		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);
> +		cfg->l1ptr = NULL;
> +		cfg->l1ptr_dma = 0;
> +		num_leaf_entries = CTXDESC_L2_ENTRIES;
> +	}
> +
> +	for (i = 0; i < cfg->num_tables; i++, table++)
> +		arm_smmu_free_cd_leaf_table(smmu, table, num_leaf_entries);
> +	devm_kfree(smmu->dev, cfg->tables);
> +	cfg->tables = NULL;
>  }
>  
>  /* Stream table manipulation functions */
> @@ -1737,6 +1868,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  	}
>  
>  	if (s1_cfg) {
> +		dma_addr_t ptr_dma = s1_cfg->l1ptr ? s1_cfg->l1ptr_dma :
> +				     s1_cfg->tables[0].ptr_dma;
> +
>  		BUG_ON(ste_live);
>  		dst[1] = cpu_to_le64(
>  			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
> @@ -1749,7 +1883,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
>  			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
> -		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> +		val |= (ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
>  			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
>  			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
>  			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> @@ -2265,7 +2399,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
>  	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>  		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>  
> -		if (cfg->table.ptr) {
> +		if (cfg->tables) {
>  			arm_smmu_free_cd_tables(smmu_domain);
>  			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
>  		}
> 


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID
  2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
@ 2019-12-20  7:37   ` Auger Eric
  2020-01-14 12:45   ` Will Deacon
  1 sibling, 0 replies; 34+ messages in thread
From: Auger Eric @ 2019-12-20  7:37 UTC (permalink / raw)
  To: Jean-Philippe Brucker, linux-pci, linux-arm-kernel, linux-acpi,
	devicetree, iommu
  Cc: joro, robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, will, robin.murphy, bhelgaas,
	jonathan.cameron, zhangfei.gao

Hi Jean,

On 12/19/19 5:30 PM, Jean-Philippe Brucker wrote:
> Enable PASID for PCI devices that support it. Since the SSID tables are
> allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
> arm_smmu_dev_feature_enable() would be too late, since by that time the
> main DMA domain has already been attached. Do it in add_device() instead.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric

> ---
>  drivers/iommu/arm-smmu-v3.c | 55 ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index e62ca80f2f76..8e95ecad4c9a 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2644,6 +2644,53 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master)
>  	atomic_dec(&smmu_domain->nr_ats_masters);
>  }
>  
> +static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> +{
> +	int ret;
> +	int features;
> +	int num_pasids;
> +	struct pci_dev *pdev;
> +
> +	if (!dev_is_pci(master->dev))
> +		return -ENODEV;
> +
> +	pdev = to_pci_dev(master->dev);
> +
> +	features = pci_pasid_features(pdev);
> +	if (features < 0)
> +		return features;
> +
> +	num_pasids = pci_max_pasids(pdev);
> +	if (num_pasids <= 0)
> +		return num_pasids;
> +
> +	ret = pci_enable_pasid(pdev, features);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failed to enable PASID\n");
> +		return ret;
> +	}
> +
> +	master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +				  master->smmu->ssid_bits);
> +	return 0;
> +}
> +
> +static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> +{
> +	struct pci_dev *pdev;
> +
> +	if (!dev_is_pci(master->dev))
> +		return;
> +
> +	pdev = to_pci_dev(master->dev);
> +
> +	if (!pdev->pasid_enabled)
> +		return;
> +
> +	master->ssid_bits = 0;
> +	pci_disable_pasid(pdev);
> +}
> +
>  static void arm_smmu_detach_dev(struct arm_smmu_master *master)
>  {
>  	unsigned long flags;
> @@ -2852,13 +2899,16 @@ static int arm_smmu_add_device(struct device *dev)
>  
>  	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
>  
> +	/* Note that PASID must be enabled before, and disabled after ATS */
> +	arm_smmu_enable_pasid(master);
> +
>  	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
>  		master->ssid_bits = min_t(u8, master->ssid_bits,
>  					  CTXDESC_LINEAR_CDMAX);
>  
>  	ret = iommu_device_link(&smmu->iommu, dev);
>  	if (ret)
> -		goto err_free_master;
> +		goto err_disable_pasid;
>  
>  	group = iommu_group_get_for_dev(dev);
>  	if (IS_ERR(group)) {
> @@ -2871,6 +2921,8 @@ static int arm_smmu_add_device(struct device *dev)
>  
>  err_unlink:
>  	iommu_device_unlink(&smmu->iommu, dev);
> +err_disable_pasid:
> +	arm_smmu_disable_pasid(master);
>  err_free_master:
>  	kfree(master);
>  	fwspec->iommu_priv = NULL;
> @@ -2891,6 +2943,7 @@ static void arm_smmu_remove_device(struct device *dev)
>  	arm_smmu_detach_dev(master);
>  	iommu_group_remove_device(dev);
>  	iommu_device_unlink(&smmu->iommu, dev);
> +	arm_smmu_disable_pasid(master);
>  	kfree(master);
>  	iommu_fwspec_free(dev);
>  }
> 


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (12 preceding siblings ...)
  2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
@ 2020-01-09 14:36 ` Jean-Philippe Brucker
  2020-01-09 14:41   ` Will Deacon
  2020-01-14 15:40 ` Will Deacon
  14 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-09 14:36 UTC (permalink / raw)
  To: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu
  Cc: mark.rutland, robin.murphy, guohanjun, rjw, robh+dt,
	sudeep.holla, bhelgaas, zhangfei.gao, will, lenb

Hi Will,

On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> [1], I added review and tested tags where appropriate and applied the
> suggested changes, shown in the diff below. Thanks all!
> 
> I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> early next year, is available on my branch sva/zip-devel at
> https://jpbrucker.net/git/linux/

Is there anything more I should do for the PASID support? Ideally I'd like
to get this in v5.6 so I can focus on the rest of the SVA work and on
performance improvements.

Thanks,
Jean

> 
> [1] https://lore.kernel.org/linux-iommu/20191209180514.272727-1-jean-philippe@linaro.org/
> [2] https://lore.kernel.org/linux-iommu/1576465697-27946-1-git-send-email-zhangfei.gao@linaro.org/
> 
> Jean-Philippe Brucker (13):
>   iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
>   dt-bindings: document PASID property for IOMMU masters
>   iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
>   ACPI/IORT: Parse SSID property of named component node
>   iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
>   iommu/arm-smmu-v3: Add context descriptor tables allocators
>   iommu/arm-smmu-v3: Add support for Substream IDs
>   iommu/arm-smmu-v3: Propagate ssid_bits
>   iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc()
>     failure
>   iommu/arm-smmu-v3: Add second level of context descriptor table
>   iommu/arm-smmu-v3: Improve add_device() error handling
>   PCI/ATS: Add PASID stubs
>   iommu/arm-smmu-v3: Add support for PCI PASID
> 
>  .../devicetree/bindings/iommu/iommu.txt       |   6 +
>  drivers/acpi/arm64/iort.c                     |  18 +
>  drivers/iommu/arm-smmu-v3.c                   | 467 +++++++++++++++---
>  drivers/iommu/of_iommu.c                      |   6 +-
>  include/linux/iommu.h                         |   2 +
>  include/linux/pci-ats.h                       |   3 +
>  6 files changed, 442 insertions(+), 60 deletions(-)
> 
> -- 
> Diff since v3:
> #diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index cde7af39681c..8e95ecad4c9a 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -589,8 +589,10 @@ struct arm_smmu_cd_table {
>  };
> 
>  struct arm_smmu_s1_cfg {
> +	/* Leaf tables or linear table */
>  	struct arm_smmu_cd_table	*tables;
>  	size_t				num_tables;
> +	/* First level tables, when two levels are used */
>  	__le64				*l1ptr;
>  	dma_addr_t			l1ptr_dma;
>  	struct arm_smmu_ctx_desc	cd;
> @@ -1561,27 +1563,22 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> 
> -	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> -		table = &cfg->tables[0];
> -		idx = ssid;
> -	} else {
> -		idx = ssid >> CTXDESC_SPLIT;
> -		if (idx >= cfg->num_tables)
> -			return NULL;
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> +		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;
> 
> -		table = &cfg->tables[idx];
> -		if (!table->ptr) {
> -			if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> -							 CTXDESC_L2_ENTRIES))
> -				return NULL;
> +	idx = ssid >> CTXDESC_SPLIT;
> +	table = &cfg->tables[idx];
> +	if (!table->ptr) {
> +		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +						 CTXDESC_L2_ENTRIES))
> +			return NULL;
> 
> -			l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> -			arm_smmu_write_cd_l1_desc(l1ptr, table);
> -			/* An invalid L1CD can be cached */
> -			arm_smmu_sync_cd(smmu_domain, ssid, false);
> -		}
> -		idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> +		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> +		arm_smmu_write_cd_l1_desc(l1ptr, table);
> +		/* An invalid L1CD can be cached */
> +		arm_smmu_sync_cd(smmu_domain, ssid, false);
>  	}
> +	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
>  	return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
> 
> @@ -1617,8 +1614,12 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  	u64 val;
>  	bool cd_live;
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
> -	__le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
> +	__le64 *cdptr;
> 
> +	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
> +		return -E2BIG;
> +
> +	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  	if (!cdptr)
>  		return -ENOMEM;
> 
> @@ -1640,9 +1641,9 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  		cdptr[3] = cpu_to_le64(cd->mair);
> 
>  		/*
> -		 * STE is live, and the SMMU might fetch this CD at any
> -		 * time. Ensure that it observes the rest of the CD before we
> -		 * enable it.
> +		 * STE is live, and the SMMU might read dwords of this CD in any
> +		 * order. Ensure that it observes valid values before reading
> +		 * V=1.
>  		 */
>  		arm_smmu_sync_cd(smmu_domain, ssid, true);
> 
> @@ -1706,7 +1707,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
>  	 * Only allocate a leaf table for linear case. With two levels, leaf
>  	 * tables are allocated lazily.
>  	 */
> -	if (!cfg->l1ptr) {
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
>  		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
>  						   max_contexts);
>  		if (ret)
> @@ -2657,17 +2658,21 @@ static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> 
>  	features = pci_pasid_features(pdev);
>  	if (features < 0)
> -		return -ENODEV;
> +		return features;
> 
>  	num_pasids = pci_max_pasids(pdev);
>  	if (num_pasids <= 0)
> -		return -ENODEV;
> +		return num_pasids;
> 
>  	ret = pci_enable_pasid(pdev, features);
> -	if (!ret)
> -		master->ssid_bits = min_t(u8, ilog2(num_pasids),
> -					  master->smmu->ssid_bits);
> -	return ret;
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failed to enable PASID\n");
> +		return ret;
> +	}
> +
> +	master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +				  master->smmu->ssid_bits);
> +	return 0;
>  }
> 
>  static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> 
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3
  2020-01-09 14:36 ` [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
@ 2020-01-09 14:41   ` Will Deacon
  2020-01-10  7:15     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-09 14:41 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu,
	mark.rutland, robin.murphy, guohanjun, rjw, robh+dt,
	sudeep.holla, bhelgaas, zhangfei.gao, lenb

On Thu, Jan 09, 2020 at 03:36:18PM +0100, Jean-Philippe Brucker wrote:
> On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> > Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> > [1], I added review and tested tags where appropriate and applied the
> > suggested changes, shown in the diff below. Thanks all!
> > 
> > I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> > Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> > early next year, is available on my branch sva/zip-devel at
> > https://jpbrucker.net/git/linux/
> 
> Is there anything more I should do for the PASID support? Ideally I'd like
> to get this in v5.6 so I can focus on the rest of the SVA work and on
> performance improvements.

Apologies, I'm just behind with review what with the timing of the new
year. You're on the list, but I was hoping to get Robin's TCR stuff dusted
off so that Jordan doesn't have to depend on patches languishing on the
mailing list and there's also the nvidia stuff to review as well.

Going as fast as I can!

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3
  2020-01-09 14:41   ` Will Deacon
@ 2020-01-10  7:15     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-10  7:15 UTC (permalink / raw)
  To: Will Deacon
  Cc: mark.rutland, devicetree, linux-pci, sudeep.holla, rjw,
	linux-acpi, iommu, robh+dt, guohanjun, bhelgaas, zhangfei.gao,
	robin.murphy, linux-arm-kernel, lenb

On Thu, Jan 09, 2020 at 02:41:01PM +0000, Will Deacon wrote:
> On Thu, Jan 09, 2020 at 03:36:18PM +0100, Jean-Philippe Brucker wrote:
> > On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> > > Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> > > [1], I added review and tested tags where appropriate and applied the
> > > suggested changes, shown in the diff below. Thanks all!
> > > 
> > > I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> > > Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> > > early next year, is available on my branch sva/zip-devel at
> > > https://jpbrucker.net/git/linux/
> > 
> > Is there anything more I should do for the PASID support? Ideally I'd like
> > to get this in v5.6 so I can focus on the rest of the SVA work and on
> > performance improvements.
> 
> Apologies, I'm just behind with review what with the timing of the new
> year. You're on the list, but I was hoping to get Robin's TCR stuff dusted
> off so that Jordan doesn't have to depend on patches languishing on the
> mailing list and there's also the nvidia stuff to review as well.
> 
> Going as fast as I can!

No worries, I just wanted to check that it didn't slip through the cracks.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators
  2019-12-19 16:30 ` [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators Jean-Philippe Brucker
@ 2020-01-14 11:06   ` Will Deacon
  2020-01-14 11:52     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-14 11:06 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:26PM +0100, Jean-Philippe Brucker wrote:
> Support for SSID will require allocating context descriptor tables. Move
> the context descriptor allocation to separate functions.
> 
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 57 ++++++++++++++++++++++++++++++-------
>  1 file changed, 46 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index b287e303b1d7..43d6a7ded6e4 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -568,6 +568,7 @@ struct arm_smmu_cd_table {
>  struct arm_smmu_s1_cfg {
>  	struct arm_smmu_cd_table	table;
>  	struct arm_smmu_ctx_desc	cd;
> +	u8				s1cdmax;
>  };
>  
>  struct arm_smmu_s2_cfg {
> @@ -1455,6 +1456,31 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
> +					struct arm_smmu_cd_table *table,
> +					size_t num_entries)
> +{
> +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> +
> +	table->ptr = dmam_alloc_coherent(smmu->dev, size, &table->ptr_dma,
> +					 GFP_KERNEL);
> +	if (!table->ptr) {
> +		dev_warn(smmu->dev,
> +			 "failed to allocate context descriptor table\n");
> +		return -ENOMEM;
> +	}
> +	return 0;
> +}
> +
> +static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
> +					struct arm_smmu_cd_table *table,
> +					size_t num_entries)
> +{
> +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> +
> +	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
> +}

I think we'd be better off taking the 'arm_smmu_s1_cfg' as a parameter here
instead of the table pointer and a num_entries value, since the code above
implies that we support partial freeing of the context descriptors.

I can do that as a follow-up patch if you agree. Thoughts?

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators
  2020-01-14 11:06   ` Will Deacon
@ 2020-01-14 11:52     ` Jean-Philippe Brucker
  2020-01-14 11:56       ` Will Deacon
  0 siblings, 1 reply; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-14 11:52 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 11:06:52AM +0000, Will Deacon wrote:
> >  /* Context descriptor manipulation functions */
> > +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
> > +					struct arm_smmu_cd_table *table,
> > +					size_t num_entries)
> > +{
> > +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> > +
> > +	table->ptr = dmam_alloc_coherent(smmu->dev, size, &table->ptr_dma,
> > +					 GFP_KERNEL);
> > +	if (!table->ptr) {
> > +		dev_warn(smmu->dev,
> > +			 "failed to allocate context descriptor table\n");
> > +		return -ENOMEM;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
> > +					struct arm_smmu_cd_table *table,
> > +					size_t num_entries)
> > +{
> > +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> > +
> > +	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
> > +}
> 
> I think we'd be better off taking the 'arm_smmu_s1_cfg' as a parameter here
> instead of the table pointer and a num_entries value, since the code above
> implies that we support partial freeing of the context descriptors.
> 
> I can do that as a follow-up patch if you agree. Thoughts?

Do you mean only changing the arguments of arm_smmu_free_cd_leaf_table(),
or arm_smmu_alloc_cd_leaf_table() as well? For free() I agree, for alloc()
I'm not sure it would look better.

For my tests I have a debug patch that allocates PASIDs randomly which
quickly consumes DMA for leaf tables. So I do have to free the leaves
individually when they aren't used, but it will be easy for me to update.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators
  2020-01-14 11:52     ` Jean-Philippe Brucker
@ 2020-01-14 11:56       ` Will Deacon
  0 siblings, 0 replies; 34+ messages in thread
From: Will Deacon @ 2020-01-14 11:56 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 12:52:30PM +0100, Jean-Philippe Brucker wrote:
> On Tue, Jan 14, 2020 at 11:06:52AM +0000, Will Deacon wrote:
> > >  /* Context descriptor manipulation functions */
> > > +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
> > > +					struct arm_smmu_cd_table *table,
> > > +					size_t num_entries)
> > > +{
> > > +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> > > +
> > > +	table->ptr = dmam_alloc_coherent(smmu->dev, size, &table->ptr_dma,
> > > +					 GFP_KERNEL);
> > > +	if (!table->ptr) {
> > > +		dev_warn(smmu->dev,
> > > +			 "failed to allocate context descriptor table\n");
> > > +		return -ENOMEM;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > > +static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,
> > > +					struct arm_smmu_cd_table *table,
> > > +					size_t num_entries)
> > > +{
> > > +	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);
> > > +
> > > +	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);
> > > +}
> > 
> > I think we'd be better off taking the 'arm_smmu_s1_cfg' as a parameter here
> > instead of the table pointer and a num_entries value, since the code above
> > implies that we support partial freeing of the context descriptors.
> > 
> > I can do that as a follow-up patch if you agree. Thoughts?
> 
> Do you mean only changing the arguments of arm_smmu_free_cd_leaf_table(),
> or arm_smmu_alloc_cd_leaf_table() as well? For free() I agree, for alloc()
> I'm not sure it would look better.

Yeah, just for free(). I'll spin something on top after I've finished
reviewing the series.

> For my tests I have a debug patch that allocates PASIDs randomly which
> quickly consumes DMA for leaf tables. So I do have to free the leaves
> individually when they aren't used, but it will be easy for me to update.

Cool.

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs
  2019-12-19 16:30 ` [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs Jean-Philippe Brucker
@ 2020-01-14 12:38   ` Will Deacon
  2020-01-14 16:30     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-14 12:38 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:27PM +0100, Jean-Philippe Brucker wrote:
> At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
> page directory per device. However SMMUv3 allows more than one address
> space for some devices, by providing multiple stage-1 page directories. In
> addition to the Stream ID (SID), that identifies a device, we can now have
> Substream IDs (SSID) identifying an address space. In PCIe, SID is called
> Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
> A complete stage-1 walk goes through the context descriptor table:
> 
>       Stream tables       Ctx. Desc. tables       Page tables
>         +--------+   ,------->+-------+   ,------->+-------+
>         :        :   |        :       :   |        :       :
>         +--------+   |        +-------+   |        +-------+
>    SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
>         +--------+            +-------+            +-------+
>         :        :            :       :            :       :
>         +--------+            +-------+            +-------+
> 
> Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table
> entries. To keep things simple we only implement one level of context
> descriptor tables here, but as with stream and page tables, an SSID can
> be split to index multiple levels of tables.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 125 +++++++++++++++++++++++++++++-------
>  1 file changed, 102 insertions(+), 23 deletions(-)

--->8

> @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> +			     int ssid, bool leaf)
> +{
> +	size_t i;
> +	unsigned long flags;
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	struct arm_smmu_cmdq_ent cmd = {
> +		.opcode	= CMDQ_OP_CFGI_CD,
> +		.cfgi	= {
> +			.ssid	= ssid,
> +			.leaf	= leaf,
> +		},
> +	};
> +
> +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> +		for (i = 0; i < master->num_sids; i++) {
> +			cmd.cfgi.sid = master->sids[i];
> +			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> +		}
> +	}
> +	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +
> +	arm_smmu_cmdq_issue_sync(smmu);

Can you send a follow-up patch converting this to batch submission, please?

> +}
> +
>  static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
>  					struct arm_smmu_cd_table *table,
>  					size_t num_entries)
> @@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>  	return val;
>  }
>  
> -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
> -				    struct arm_smmu_s1_cfg *cfg)
> +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
> +				   int ssid, struct arm_smmu_ctx_desc *cd)
>  {
> -	u64 val;
> -	__le64 *cdptr = cfg->table.ptr;
> -
>  	/*
> -	 * We don't need to issue any invalidation here, as we'll invalidate
> -	 * the STE when installing the new entry anyway.
> +	 * This function handles the following cases:
> +	 *
> +	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
> +	 * (2) Install a secondary CD, for SID+SSID traffic.
> +	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
> +	 *     CD, then invalidate the old entry and mappings.
> +	 * (4) Remove a secondary CD.
>  	 */
> -	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
> -#ifdef __BIG_ENDIAN
> -	      CTXDESC_CD_0_ENDI |
> -#endif
> -	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
> -	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
> -	      CTXDESC_CD_0_V;
> +	u64 val;
> +	bool cd_live;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
> +			CTXDESC_CD_DWORDS;
>  
> -	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> -	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> -		val |= CTXDESC_CD_0_S;
> +	val = le64_to_cpu(cdptr[0]);
> +	cd_live = !!(val & CTXDESC_CD_0_V);
>  
> -	cdptr[0] = cpu_to_le64(val);
> +	if (!cd) { /* (4) */
> +		val = 0;
> +	} else if (cd_live) { /* (3) */
> +		val &= ~CTXDESC_CD_0_ASID;
> +		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
> +		/*
> +		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
> +		 * this substream's traffic
> +		 */

I don't think you need to change anything here, but I do find it a little
scary that we can modify live CDs like this. However, given that the
hardware is permitted to cache the structures regardless of validity, it
appears to be the only option. Terrifying!

> +	} else { /* (1) and (2) */
> +		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);

Can you use FIELD_PREP here too?

> +		cdptr[2] = 0;
> +		cdptr[3] = cpu_to_le64(cd->mair);
> +
> +		/*
> +		 * STE is live, and the SMMU might read dwords of this CD in any
> +		 * order. Ensure that it observes valid values before reading
> +		 * V=1.
> +		 */
> +		arm_smmu_sync_cd(smmu_domain, ssid, true);
>  
> -	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
> -	cdptr[1] = cpu_to_le64(val);
> +		val = arm_smmu_cpu_tcr_to_cd(cd->tcr) |
> +#ifdef __BIG_ENDIAN
> +			CTXDESC_CD_0_ENDI |
> +#endif
> +			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
> +			CTXDESC_CD_0_AA64 |
> +			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
> +			CTXDESC_CD_0_V;
>  
> -	cdptr[3] = cpu_to_le64(cfg->cd.mair);
> +		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> +		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> +			val |= CTXDESC_CD_0_S;
> +	}
> +
> +	WRITE_ONCE(cdptr[0], cpu_to_le64(val));

Can you add a comment here citing 3.21.3 ("Configuration structures and
configuration invalidation completion") please? Specifically, the note that
states:

  | The size of single-copy atomic reads made by the SMMU is IMPLEMENTATION
  | DEFINED but must be at least 64 bits.

Because that's really crucial to the WRITE_ONCE() above!

Shouldn't we also do the same thing for the STE side of things? I think so,
and you can just comment of them with the quote and cite the comment from
the other callsite.

Thanks,

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure
  2019-12-19 16:30 ` [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Jean-Philippe Brucker
@ 2020-01-14 12:42   ` Will Deacon
  2020-01-14 16:31     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-14 12:42 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:29PM +0100, Jean-Philippe Brucker wrote:
> Second-level context descriptor tables will be allocated lazily in
> arm_smmu_write_ctx_desc(). Help with handling allocation failure by
> moving the CD write into arm_smmu_domain_finalise_s1().
> 
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index e147087198ef..b825a5639afc 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2301,8 +2301,15 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>  	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
>  	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
>  	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
> +
> +	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);

Hmm. This ends up calling arm_smmu_sync_cd() but I think that happens before
we've added the master to the devices list of the domain. Does that mean we
miss the new SSID during the invalidation?

> +	if (ret)
> +		goto out_free_tables;
> +
>  	return 0;
>  
> +out_free_tables:

nit: We have more tables in this driver than you can shake a stick at, so
please rename the label "out_free_cd_tables" or something like that.

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID
  2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
  2019-12-20  7:37   ` Auger Eric
@ 2020-01-14 12:45   ` Will Deacon
  2020-01-15  7:57     ` Jean-Philippe Brucker
  1 sibling, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-14 12:45 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:33PM +0100, Jean-Philippe Brucker wrote:
> Enable PASID for PCI devices that support it. Since the SSID tables are
> allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
> arm_smmu_dev_feature_enable() would be too late, since by that time the

What is arm_smmu_dev_feature_enable()?

> main DMA domain has already been attached. Do it in add_device() instead.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 55 ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index e62ca80f2f76..8e95ecad4c9a 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2644,6 +2644,53 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master)
>  	atomic_dec(&smmu_domain->nr_ats_masters);
>  }
>  
> +static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> +{
> +	int ret;
> +	int features;
> +	int num_pasids;
> +	struct pci_dev *pdev;
> +
> +	if (!dev_is_pci(master->dev))
> +		return -ENODEV;
> +
> +	pdev = to_pci_dev(master->dev);
> +
> +	features = pci_pasid_features(pdev);
> +	if (features < 0)
> +		return features;
> +
> +	num_pasids = pci_max_pasids(pdev);
> +	if (num_pasids <= 0)
> +		return num_pasids;
> +
> +	ret = pci_enable_pasid(pdev, features);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failed to enable PASID\n");
> +		return ret;
> +	}
> +
> +	master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +				  master->smmu->ssid_bits);
> +	return 0;
> +}
> +
> +static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> +{
> +	struct pci_dev *pdev;
> +
> +	if (!dev_is_pci(master->dev))
> +		return;
> +
> +	pdev = to_pci_dev(master->dev);
> +
> +	if (!pdev->pasid_enabled)
> +		return;
> +
> +	master->ssid_bits = 0;
> +	pci_disable_pasid(pdev);
> +}
> +
>  static void arm_smmu_detach_dev(struct arm_smmu_master *master)
>  {
>  	unsigned long flags;
> @@ -2852,13 +2899,16 @@ static int arm_smmu_add_device(struct device *dev)
>  
>  	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
>  
> +	/* Note that PASID must be enabled before, and disabled after ATS */
> +	arm_smmu_enable_pasid(master);

Is that part of the PCIe specs? If so, please can you add a citation to the
comment?

Are there any other ordering requirements, i.e. with respect to enabling
substreams at the SMMU? For example, can a speculative ATS request provide
a PASID?

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table
  2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
  2019-12-20  7:37   ` Auger Eric
@ 2020-01-14 15:04   ` Will Deacon
  2020-01-15  9:45     ` Jean-Philippe Brucker
  1 sibling, 1 reply; 34+ messages in thread
From: Will Deacon @ 2020-01-14 15:04 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:30PM +0100, Jean-Philippe Brucker wrote:
> The SMMU can support up to 20 bits of SSID. Add a second level of page
> tables to accommodate this. Devices that support more than 1024 SSIDs now
> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> descriptors (64kB), allocated on demand.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 154 +++++++++++++++++++++++++++++++++---
>  1 file changed, 144 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index b825a5639afc..bf106a7b53eb 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -224,6 +224,7 @@
>  
>  #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR	0
> +#define STRTAB_STE_0_S1FMT_64K_L2	2
>  #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
>  
> @@ -263,7 +264,20 @@
>  
>  #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
>  
> -/* Context descriptor (stage-1 only) */
> +/*
> + * Context descriptors.
> + *
> + * Linear: when less than 1024 SSIDs are supported
> + * 2lvl: at most 1024 L1 entries,
> + *       1024 lazy entries per table.
> + */
> +#define CTXDESC_SPLIT			10
> +#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
> +
> +#define CTXDESC_L1_DESC_DWORDS		1
> +#define CTXDESC_L1_DESC_VALID		1

	#define CTXDESC_L1_DESC_V	(1UL << 0)

fits better with the rest of the driver and also ensures that the thing
is unsigned (we should probably switch over the BIT macros, but that's a
separate cleanup patch).

> +#define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
> +
>  #define CTXDESC_CD_DWORDS		8
>  #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
>  #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
> @@ -575,7 +589,12 @@ struct arm_smmu_cd_table {
>  };
>  
>  struct arm_smmu_s1_cfg {
> -	struct arm_smmu_cd_table	table;
> +	/* Leaf tables or linear table */
> +	struct arm_smmu_cd_table	*tables;
> +	size_t				num_tables;
> +	/* First level tables, when two levels are used */
> +	__le64				*l1ptr;
> +	dma_addr_t			l1ptr_dma;

It probably feels like a nit, but I think this is all a little hard to read
because it differs unnecessarily from the way the stream table is handled.

Could we align the two as follows? (I've commented things with what they
refer to in your patch):


struct arm_smmu_l1_ctx_desc {				// arm_smmu_cd_table
	__le64				*l2ptr;		// ptr
	dma_addr_t			l2ptr_dma;	// ptr_dma
};

struct arm_smmu_ctx_desc_cfg {
	__le64				*cdtab;		// l1ptr
	dma_addr_t			cdtab_dma;	// l1ptr_dma
	struct arm_smmu_l1_ctx_desc	*l1_desc;	// tables
	unsigned int			num_l1_ents;	// num_tables
};

struct arm_smmu_s1_cfg {
	struct arm_smmu_ctx_desc_cfg	cdcfg;
	struct arm_smmu_ctx_desc	cd;
	u8				s1fmt;
	u8				s1cdmax;
};


I don't know whether you'd then want to move s1fmt and s1cdmax into the
cdcfg, I'll leave that up to you. Similarly if you want any functions
to operate on arm_smmu_ctx_desc_cfg in preference to arm_smmu_s1_cfg.

Thoughts?

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling
  2019-12-19 16:30 ` [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling Jean-Philippe Brucker
@ 2020-01-14 15:25   ` Will Deacon
  2020-01-15 16:17     ` Will Deacon
  2020-01-15 17:44     ` Robin Murphy
  0 siblings, 2 replies; 34+ messages in thread
From: Will Deacon @ 2020-01-14 15:25 UTC (permalink / raw)
  To: Jean-Philippe Brucker, robin.murphy
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
> Let add_device() clean up after itself. The iommu_bus_init() function
> does call remove_device() on error, but other sites (e.g. of_iommu) do
> not.
> 
> Don't free level-2 stream tables because we'd have to track if we
> allocated each of them or if they are used by other endpoints. It's not
> worth the hassle since they are managed resources.
> 
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
>  1 file changed, 21 insertions(+), 7 deletions(-)

I think this is alright, with one caveat relating to:


	/*
	 * We _can_ actually withstand dodgy bus code re-calling add_device()
	 * without an intervening remove_device()/of_xlate() sequence, but
	 * we're not going to do so quietly...
	 */
	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
		master = fwspec->iommu_priv;
		smmu = master->smmu;
	} ...


which may be on shakey ground if the subsequent add_device() call can fail
and free stuff that the first one allocated. At least, I don't know what
we're trying to support with this, so it's hard to tell whether or not it
still works as intended after your change.

How is this supposed to work? I don't recall ever seeing that WARN fire,
so can we just remove this and bail instead? Robin?

Something like below before your changes...

Will

--->8

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index effe72eb89e7..6ae3df2f3495 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2534,28 +2534,23 @@ static int arm_smmu_add_device(struct device *dev)
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return -ENODEV;
-	/*
-	 * We _can_ actually withstand dodgy bus code re-calling add_device()
-	 * without an intervening remove_device()/of_xlate() sequence, but
-	 * we're not going to do so quietly...
-	 */
-	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
-		master = fwspec->iommu_priv;
-		smmu = master->smmu;
-	} else {
-		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-		if (!smmu)
-			return -ENODEV;
-		master = kzalloc(sizeof(*master), GFP_KERNEL);
-		if (!master)
-			return -ENOMEM;
 
-		master->dev = dev;
-		master->smmu = smmu;
-		master->sids = fwspec->ids;
-		master->num_sids = fwspec->num_ids;
-		fwspec->iommu_priv = master;
-	}
+	if (WARN_ON_ONCE(fwspec->iommu_priv))
+		return -EBUSY;
+
+	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+	if (!smmu)
+		return -ENODEV;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = dev;
+	master->smmu = smmu;
+	master->sids = fwspec->ids;
+	master->num_sids = fwspec->num_ids;
+	fwspec->iommu_priv = master;
 
 	/* Check the SIDs are in range of the SMMU and our stream table */
 	for (i = 0; i < master->num_sids; i++) {

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3
  2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
                   ` (13 preceding siblings ...)
  2020-01-09 14:36 ` [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
@ 2020-01-14 15:40 ` Will Deacon
  14 siblings, 0 replies; 34+ messages in thread
From: Will Deacon @ 2020-01-14 15:40 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> [1], I added review and tested tags where appropriate and applied the
> suggested changes, shown in the diff below. Thanks all!
> 
> I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> early next year, is available on my branch sva/zip-devel at
> https://jpbrucker.net/git/linux/
> 
> [1] https://lore.kernel.org/linux-iommu/20191209180514.272727-1-jean-philippe@linaro.org/
> [2] https://lore.kernel.org/linux-iommu/1576465697-27946-1-git-send-email-zhangfei.gao@linaro.org/
> 
> Jean-Philippe Brucker (13):
>   iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
>   dt-bindings: document PASID property for IOMMU masters
>   iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
>   ACPI/IORT: Parse SSID property of named component node
>   iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
>   iommu/arm-smmu-v3: Add context descriptor tables allocators
>   iommu/arm-smmu-v3: Add support for Substream IDs
>   iommu/arm-smmu-v3: Propagate ssid_bits
>   iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc()
>     failure
>   iommu/arm-smmu-v3: Add second level of context descriptor table
>   iommu/arm-smmu-v3: Improve add_device() error handling
>   PCI/ATS: Add PASID stubs
>   iommu/arm-smmu-v3: Add support for PCI PASID
> 
>  .../devicetree/bindings/iommu/iommu.txt       |   6 +
>  drivers/acpi/arm64/iort.c                     |  18 +
>  drivers/iommu/arm-smmu-v3.c                   | 467 +++++++++++++++---
>  drivers/iommu/of_iommu.c                      |   6 +-
>  include/linux/iommu.h                         |   2 +
>  include/linux/pci-ats.h                       |   3 +
>  6 files changed, 442 insertions(+), 60 deletions(-)

This is close, and I've replied to all of the patches I have comments on.
To summarise:

  1-5	I could queue these now
  6	I can make the small change we discussed
  7	I can make the changes if you agree (but I'd prefer you to change to
  	batch submission since I can't test this)
  8	Good to go once above is solved
  9	Need your opinion
  10	Some refactoring needed (sorry)
  11	Needs Robin's input
  12	Good to go once above is solved
  13	Need clarification on PCIe behaviour from you

In other words, I could probably take the first 8 or 9 patches for 5.6 if
you can resolve those issues asap.

Cheers,

Will

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs
  2020-01-14 12:38   ` Will Deacon
@ 2020-01-14 16:30     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-14 16:30 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 12:38:19PM +0000, Will Deacon wrote:
> > +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> > +			     int ssid, bool leaf)
> > +{
> > +	size_t i;
> > +	unsigned long flags;
> > +	struct arm_smmu_master *master;
> > +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +	struct arm_smmu_cmdq_ent cmd = {
> > +		.opcode	= CMDQ_OP_CFGI_CD,
> > +		.cfgi	= {
> > +			.ssid	= ssid,
> > +			.leaf	= leaf,
> > +		},
> > +	};
> > +
> > +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> > +	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> > +		for (i = 0; i < master->num_sids; i++) {
> > +			cmd.cfgi.sid = master->sids[i];
> > +			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> > +		}
> > +	}
> > +	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> > +
> > +	arm_smmu_cmdq_issue_sync(smmu);
> 
> Can you send a follow-up patch converting this to batch submission, please?

Ok

> > +}
> > +
> >  static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
> >  					struct arm_smmu_cd_table *table,
> >  					size_t num_entries)
> > @@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
> >  	return val;
> >  }
> >  
> > -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
> > -				    struct arm_smmu_s1_cfg *cfg)
> > +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
> > +				   int ssid, struct arm_smmu_ctx_desc *cd)
> >  {
> > -	u64 val;
> > -	__le64 *cdptr = cfg->table.ptr;
> > -
> >  	/*
> > -	 * We don't need to issue any invalidation here, as we'll invalidate
> > -	 * the STE when installing the new entry anyway.
> > +	 * This function handles the following cases:
> > +	 *
> > +	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
> > +	 * (2) Install a secondary CD, for SID+SSID traffic.
> > +	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
> > +	 *     CD, then invalidate the old entry and mappings.
> > +	 * (4) Remove a secondary CD.
> >  	 */
> > -	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
> > -#ifdef __BIG_ENDIAN
> > -	      CTXDESC_CD_0_ENDI |
> > -#endif
> > -	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
> > -	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
> > -	      CTXDESC_CD_0_V;
> > +	u64 val;
> > +	bool cd_live;
> > +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
> > +			CTXDESC_CD_DWORDS;
> >  
> > -	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> > -	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> > -		val |= CTXDESC_CD_0_S;
> > +	val = le64_to_cpu(cdptr[0]);
> > +	cd_live = !!(val & CTXDESC_CD_0_V);
> >  
> > -	cdptr[0] = cpu_to_le64(val);
> > +	if (!cd) { /* (4) */
> > +		val = 0;
> > +	} else if (cd_live) { /* (3) */
> > +		val &= ~CTXDESC_CD_0_ASID;
> > +		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
> > +		/*
> > +		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
> > +		 * this substream's traffic
> > +		 */
> 
> I don't think you need to change anything here, but I do find it a little
> scary that we can modify live CDs like this. However, given that the
> hardware is permitted to cache the structures regardless of validity, it
> appears to be the only option. Terrifying!
> 
> > +	} else { /* (1) and (2) */
> > +		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
> 
> Can you use FIELD_PREP here too?

No, FIELD_PREP will shift ttbr left by 4 bits

> > +		cdptr[2] = 0;
> > +		cdptr[3] = cpu_to_le64(cd->mair);
> > +
> > +		/*
> > +		 * STE is live, and the SMMU might read dwords of this CD in any
> > +		 * order. Ensure that it observes valid values before reading
> > +		 * V=1.
> > +		 */
> > +		arm_smmu_sync_cd(smmu_domain, ssid, true);
> >  
> > -	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
> > -	cdptr[1] = cpu_to_le64(val);
> > +		val = arm_smmu_cpu_tcr_to_cd(cd->tcr) |
> > +#ifdef __BIG_ENDIAN
> > +			CTXDESC_CD_0_ENDI |
> > +#endif
> > +			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
> > +			CTXDESC_CD_0_AA64 |
> > +			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
> > +			CTXDESC_CD_0_V;
> >  
> > -	cdptr[3] = cpu_to_le64(cfg->cd.mair);
> > +		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> > +		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> > +			val |= CTXDESC_CD_0_S;
> > +	}
> > +
> > +	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
> 
> Can you add a comment here citing 3.21.3 ("Configuration structures and
> configuration invalidation completion") please? Specifically, the note that
> states:
> 
>   | The size of single-copy atomic reads made by the SMMU is IMPLEMENTATION
>   | DEFINED but must be at least 64 bits.
> 
> Because that's really crucial to the WRITE_ONCE() above!
> 
> Shouldn't we also do the same thing for the STE side of things? I think so,
> and you can just comment of them with the quote and cite the comment from
> the other callsite.

Yes, makes sense

Thanks,
Jean

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure
  2020-01-14 12:42   ` Will Deacon
@ 2020-01-14 16:31     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-14 16:31 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 12:42:47PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:29PM +0100, Jean-Philippe Brucker wrote:
> > Second-level context descriptor tables will be allocated lazily in
> > arm_smmu_write_ctx_desc(). Help with handling allocation failure by
> > moving the CD write into arm_smmu_domain_finalise_s1().
> > 
> > Reviewed-by: Eric Auger <eric.auger@redhat.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 11 +++++++----
> >  1 file changed, 7 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index e147087198ef..b825a5639afc 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -2301,8 +2301,15 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
> >  	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> >  	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
> >  	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
> > +
> > +	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
> 
> Hmm. This ends up calling arm_smmu_sync_cd() but I think that happens before
> we've added the master to the devices list of the domain. Does that mean we
> miss the new SSID during the invalidation?

Yes, the arm_smmu_sync_cd() isn't useful in this case, it's only needed
when the STE is live and arm_smmu_write_ctx_desc() is called for a
ssid!=0. On this path, the CD cache is invalidated by a CFGI_STE executed
later, when arm_smmu_attach_dev() installs the STE. I didn't want to add a
special case that avoids the sync when ssid==0 in because a spurious sync
probably doesn't impact performance here and arm_smmu_write_ctx_desc() is
quite fiddly already.

Thanks,
Jean

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID
  2020-01-14 12:45   ` Will Deacon
@ 2020-01-15  7:57     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-15  7:57 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 12:45:42PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:33PM +0100, Jean-Philippe Brucker wrote:
> > Enable PASID for PCI devices that support it. Since the SSID tables are
> > allocated by arm_smmu_attach_dev(), PASID has to be enabled early enough.
> > arm_smmu_dev_feature_enable() would be too late, since by that time the
> 
> What is arm_smmu_dev_feature_enable()?

It's the implementation of the IOMMU op .dev_enable_feat(), which I'll add
later (called by a device driver to enable the SVA feature). I'll reword
this comment, since the only real requirement is enabling PASID before
ATS.

> >  static void arm_smmu_detach_dev(struct arm_smmu_master *master)
> >  {
> >  	unsigned long flags;
> > @@ -2852,13 +2899,16 @@ static int arm_smmu_add_device(struct device *dev)
> >  
> >  	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
> >  
> > +	/* Note that PASID must be enabled before, and disabled after ATS */
> > +	arm_smmu_enable_pasid(master);
> 
> Is that part of the PCIe specs? If so, please can you add a citation to the
> comment?

Yes (PCIe 4.0r1.0 10.5.1.3 ATS Control register).

> Are there any other ordering requirements, i.e. with respect to enabling
> substreams at the SMMU? For example, can a speculative ATS request provide
> a PASID?

You recent fix bfff88ec1afe ("iommu/arm-smmu-v3: Rework enabling/disabling
of ATS for PCI masters") should prevent from speculative ATS requests.
More generally both ATS and SSID are enabled and disabled at the same time
in the SMMU, when toggling STE.V, so any request arriving before STE
enablement will be aborted regardless of SSID.

Thanks,
Jean


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table
  2020-01-14 15:04   ` Will Deacon
@ 2020-01-15  9:45     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 34+ messages in thread
From: Jean-Philippe Brucker @ 2020-01-15  9:45 UTC (permalink / raw)
  To: Will Deacon
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, robin.murphy, bhelgaas, eric.auger,
	jonathan.cameron, zhangfei.gao

On Tue, Jan 14, 2020 at 03:04:36PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:30PM +0100, Jean-Philippe Brucker wrote:
> > The SMMU can support up to 20 bits of SSID. Add a second level of page
> > tables to accommodate this. Devices that support more than 1024 SSIDs now
> > have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
> > descriptors (64kB), allocated on demand.
> > 
> > Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 154 +++++++++++++++++++++++++++++++++---
> >  1 file changed, 144 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index b825a5639afc..bf106a7b53eb 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -224,6 +224,7 @@
> >  
> >  #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
> >  #define STRTAB_STE_0_S1FMT_LINEAR	0
> > +#define STRTAB_STE_0_S1FMT_64K_L2	2
> >  #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
> >  #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
> >  
> > @@ -263,7 +264,20 @@
> >  
> >  #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
> >  
> > -/* Context descriptor (stage-1 only) */
> > +/*
> > + * Context descriptors.
> > + *
> > + * Linear: when less than 1024 SSIDs are supported
> > + * 2lvl: at most 1024 L1 entries,
> > + *       1024 lazy entries per table.
> > + */
> > +#define CTXDESC_SPLIT			10
> > +#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
> > +
> > +#define CTXDESC_L1_DESC_DWORDS		1
> > +#define CTXDESC_L1_DESC_VALID		1
> 
> 	#define CTXDESC_L1_DESC_V	(1UL << 0)
> 
> fits better with the rest of the driver and also ensures that the thing
> is unsigned (we should probably switch over the BIT macros, but that's a
> separate cleanup patch).
> 
> > +#define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
> > +
> >  #define CTXDESC_CD_DWORDS		8
> >  #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
> >  #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
> > @@ -575,7 +589,12 @@ struct arm_smmu_cd_table {
> >  };
> >  
> >  struct arm_smmu_s1_cfg {
> > -	struct arm_smmu_cd_table	table;
> > +	/* Leaf tables or linear table */
> > +	struct arm_smmu_cd_table	*tables;
> > +	size_t				num_tables;
> > +	/* First level tables, when two levels are used */
> > +	__le64				*l1ptr;
> > +	dma_addr_t			l1ptr_dma;
> 
> It probably feels like a nit, but I think this is all a little hard to read
> because it differs unnecessarily from the way the stream table is handled.
> 
> Could we align the two as follows? (I've commented things with what they
> refer to in your patch):
> 
> 
> struct arm_smmu_l1_ctx_desc {				// arm_smmu_cd_table
> 	__le64				*l2ptr;		// ptr
> 	dma_addr_t			l2ptr_dma;	// ptr_dma
> };
> 
> struct arm_smmu_ctx_desc_cfg {
> 	__le64				*cdtab;		// l1ptr
> 	dma_addr_t			cdtab_dma;	// l1ptr_dma
> 	struct arm_smmu_l1_ctx_desc	*l1_desc;	// tables
> 	unsigned int			num_l1_ents;	// num_tables
> };
> 
> struct arm_smmu_s1_cfg {
> 	struct arm_smmu_ctx_desc_cfg	cdcfg;
> 	struct arm_smmu_ctx_desc	cd;
> 	u8				s1fmt;
> 	u8				s1cdmax;
> };
> 
> 
> I don't know whether you'd then want to move s1fmt and s1cdmax into the
> cdcfg, I'll leave that up to you. Similarly if you want any functions
> to operate on arm_smmu_ctx_desc_cfg in preference to arm_smmu_s1_cfg.
> 
> Thoughts?

No problem, it looks cleaner overall.

Thanks,
Jean


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling
  2020-01-14 15:25   ` Will Deacon
@ 2020-01-15 16:17     ` Will Deacon
  2020-01-15 17:44     ` Robin Murphy
  1 sibling, 0 replies; 34+ messages in thread
From: Will Deacon @ 2020-01-15 16:17 UTC (permalink / raw)
  To: Jean-Philippe Brucker, robin.murphy
  Cc: mark.rutland, devicetree, lorenzo.pieralisi, eric.auger,
	linux-pci, joro, sudeep.holla, rjw, linux-acpi, iommu, robh+dt,
	jonathan.cameron, guohanjun, bhelgaas, zhangfei.gao,
	linux-arm-kernel, lenb

On Tue, Jan 14, 2020 at 03:25:39PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
> > Let add_device() clean up after itself. The iommu_bus_init() function
> > does call remove_device() on error, but other sites (e.g. of_iommu) do
> > not.
> > 
> > Don't free level-2 stream tables because we'd have to track if we
> > allocated each of them or if they are used by other endpoints. It's not
> > worth the hassle since they are managed resources.
> > 
> > Reviewed-by: Eric Auger <eric.auger@redhat.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
> >  1 file changed, 21 insertions(+), 7 deletions(-)
> 
> I think this is alright, with one caveat relating to:
> 
> 
> 	/*
> 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> 	 * without an intervening remove_device()/of_xlate() sequence, but
> 	 * we're not going to do so quietly...
> 	 */
> 	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> 		master = fwspec->iommu_priv;
> 		smmu = master->smmu;
> 	} ...
> 
> 
> which may be on shakey ground if the subsequent add_device() call can fail
> and free stuff that the first one allocated. At least, I don't know what
> we're trying to support with this, so it's hard to tell whether or not it
> still works as intended after your change.
> 
> How is this supposed to work? I don't recall ever seeing that WARN fire,
> so can we just remove this and bail instead? Robin?
> 
> Something like below before your changes...

FWIW, I've written this as a patch locally, since I'd like to apply it
on top of v5 of your series.

Will

--->8

From 6029102f406d4db5e7a465da5fd2e08a5b12c532 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 15 Jan 2020 15:35:16 +0000
Subject: [PATCH] iommu/arm-smmu-v3: Return -EBUSY when trying to re-add a
 device

Although we WARN in arm_smmu_add_device() if the device being added has
been added already without a subsequent call to arm_smmu_remove_device(),
we still continue half-heartedly, initialising the stream-table for any
new StreamIDs that may have magically appeared and re-establishing device
links that should still be there from last time.

Given that calling ->add_device() twice without removing the device in the
meantime is indicative of an error in the caller, just return -EBUSY after
warning.

Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Jean Philippe-Brucker <jean-philippe@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm-smmu-v3.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index efa326601308..cc26e1323da3 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2841,28 +2841,23 @@ static int arm_smmu_add_device(struct device *dev)
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return -ENODEV;
-	/*
-	 * We _can_ actually withstand dodgy bus code re-calling add_device()
-	 * without an intervening remove_device()/of_xlate() sequence, but
-	 * we're not going to do so quietly...
-	 */
-	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
-		master = fwspec->iommu_priv;
-		smmu = master->smmu;
-	} else {
-		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-		if (!smmu)
-			return -ENODEV;
-		master = kzalloc(sizeof(*master), GFP_KERNEL);
-		if (!master)
-			return -ENOMEM;
 
-		master->dev = dev;
-		master->smmu = smmu;
-		master->sids = fwspec->ids;
-		master->num_sids = fwspec->num_ids;
-		fwspec->iommu_priv = master;
-	}
+	if (WARN_ON_ONCE(fwspec->iommu_priv))
+		return -EBUSY;
+
+	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+	if (!smmu)
+		return -ENODEV;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = dev;
+	master->smmu = smmu;
+	master->sids = fwspec->ids;
+	master->num_sids = fwspec->num_ids;
+	fwspec->iommu_priv = master;
 
 	/* Check the SIDs are in range of the SMMU and our stream table */
 	for (i = 0; i < master->num_sids; i++) {
-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling
  2020-01-14 15:25   ` Will Deacon
  2020-01-15 16:17     ` Will Deacon
@ 2020-01-15 17:44     ` Robin Murphy
  1 sibling, 0 replies; 34+ messages in thread
From: Robin Murphy @ 2020-01-15 17:44 UTC (permalink / raw)
  To: Will Deacon, Jean-Philippe Brucker
  Cc: linux-pci, linux-arm-kernel, linux-acpi, devicetree, iommu, joro,
	robh+dt, mark.rutland, lorenzo.pieralisi, guohanjun,
	sudeep.holla, rjw, lenb, bhelgaas, eric.auger, jonathan.cameron,
	zhangfei.gao

On 14/01/2020 3:25 pm, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
>> Let add_device() clean up after itself. The iommu_bus_init() function
>> does call remove_device() on error, but other sites (e.g. of_iommu) do
>> not.
>>
>> Don't free level-2 stream tables because we'd have to track if we
>> allocated each of them or if they are used by other endpoints. It's not
>> worth the hassle since they are managed resources.
>>
>> Reviewed-by: Eric Auger <eric.auger@redhat.com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
>>   1 file changed, 21 insertions(+), 7 deletions(-)
> 
> I think this is alright, with one caveat relating to:
> 
> 
> 	/*
> 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> 	 * without an intervening remove_device()/of_xlate() sequence, but
> 	 * we're not going to do so quietly...
> 	 */
> 	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> 		master = fwspec->iommu_priv;
> 		smmu = master->smmu;
> 	} ...
> 
> 
> which may be on shakey ground if the subsequent add_device() call can fail
> and free stuff that the first one allocated. At least, I don't know what
> we're trying to support with this, so it's hard to tell whether or not it
> still works as intended after your change.

Hmm, if add_device() ever did fail it should really be expected to 
return the device back to an un-added state, so I don't believe that 
particular concern should be significant regardless...
> How is this supposed to work? I don't recall ever seeing that WARN fire,
> so can we just remove this and bail instead? Robin?

However, I am inclined to agree that it's probably better to make it all 
moot. Although it indeed should never happen, ISTR at the time there 
appeared to be some possible path somewhere by which the notifier may 
have been triggered a second time - possibly if some other device failed 
or deferred after the first call, triggering the bus code to start all 
over again. Since then, though, we've made a lot of changes to how 
->add_device usually gets called, plus stuff like the 
iommu_device_link() call has snuck in that might not stand up to a 
replay anyway, so I don't see any problem with making this condition a 
hard failure. It's certainly much easier to reason about.

In fact, there will already be a WARN from iommu_probe_device() now 
(because the first call will have set the group), so I don't think we 
need any additional diagnostic in the driver any more.

Robin.

> Something like below before your changes...
> 
> Will
> 
> --->8
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index effe72eb89e7..6ae3df2f3495 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2534,28 +2534,23 @@ static int arm_smmu_add_device(struct device *dev)
>   
>   	if (!fwspec || fwspec->ops != &arm_smmu_ops)
>   		return -ENODEV;
> -	/*
> -	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> -	 * without an intervening remove_device()/of_xlate() sequence, but
> -	 * we're not going to do so quietly...
> -	 */
> -	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> -		master = fwspec->iommu_priv;
> -		smmu = master->smmu;
> -	} else {
> -		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> -		if (!smmu)
> -			return -ENODEV;
> -		master = kzalloc(sizeof(*master), GFP_KERNEL);
> -		if (!master)
> -			return -ENOMEM;
>   
> -		master->dev = dev;
> -		master->smmu = smmu;
> -		master->sids = fwspec->ids;
> -		master->num_sids = fwspec->num_ids;
> -		fwspec->iommu_priv = master;
> -	}
> +	if (WARN_ON_ONCE(fwspec->iommu_priv))
> +		return -EBUSY;
> +
> +	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return -ENODEV;
> +
> +	master = kzalloc(sizeof(*master), GFP_KERNEL);
> +	if (!master)
> +		return -ENOMEM;
> +
> +	master->dev = dev;
> +	master->smmu = smmu;
> +	master->sids = fwspec->ids;
> +	master->num_sids = fwspec->num_ids;
> +	fwspec->iommu_priv = master;
>   
>   	/* Check the SIDs are in range of the SMMU and our stream table */
>   	for (i = 0; i < master->num_sids; i++) {
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2020-01-15 17:45 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 02/13] dt-bindings: document PASID property for IOMMU masters Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 03/13] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 04/13] ACPI/IORT: Parse SSID property of named component node Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 05/13] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators Jean-Philippe Brucker
2020-01-14 11:06   ` Will Deacon
2020-01-14 11:52     ` Jean-Philippe Brucker
2020-01-14 11:56       ` Will Deacon
2019-12-19 16:30 ` [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs Jean-Philippe Brucker
2020-01-14 12:38   ` Will Deacon
2020-01-14 16:30     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 08/13] iommu/arm-smmu-v3: Propagate ssid_bits Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Jean-Philippe Brucker
2020-01-14 12:42   ` Will Deacon
2020-01-14 16:31     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
2019-12-20  7:37   ` Auger Eric
2020-01-14 15:04   ` Will Deacon
2020-01-15  9:45     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling Jean-Philippe Brucker
2020-01-14 15:25   ` Will Deacon
2020-01-15 16:17     ` Will Deacon
2020-01-15 17:44     ` Robin Murphy
2019-12-19 16:30 ` [PATCH v4 12/13] PCI/ATS: Add PASID stubs Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
2019-12-20  7:37   ` Auger Eric
2020-01-14 12:45   ` Will Deacon
2020-01-15  7:57     ` Jean-Philippe Brucker
2020-01-09 14:36 ` [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
2020-01-09 14:41   ` Will Deacon
2020-01-10  7:15     ` Jean-Philippe Brucker
2020-01-14 15:40 ` Will Deacon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).