linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
To: linux-pci@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	linux-acpi@vger.kernel.org, devicetree@vger.kernel.org,
	iommu@lists.linux-foundation.org
Cc: joro@8bytes.org, robh+dt@kernel.org, mark.rutland@arm.com,
	lorenzo.pieralisi@arm.com, guohanjun@huawei.com,
	sudeep.holla@arm.com, rjw@rjwysocki.net, lenb@kernel.org,
	will@kernel.org, robin.murphy@arm.com, bhelgaas@google.com,
	eric.auger@redhat.com, jonathan.cameron@huawei.com,
	zhangfei.gao@linaro.org
Subject: [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs
Date: Thu, 19 Dec 2019 17:30:27 +0100	[thread overview]
Message-ID: <20191219163033.2608177-8-jean-philippe@linaro.org> (raw)
In-Reply-To: <20191219163033.2608177-1-jean-philippe@linaro.org>

At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
page directory per device. However SMMUv3 allows more than one address
space for some devices, by providing multiple stage-1 page directories. In
addition to the Stream ID (SID), that identifies a device, we can now have
Substream IDs (SSID) identifying an address space. In PCIe, SID is called
Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
A complete stage-1 walk goes through the context descriptor table:

      Stream tables       Ctx. Desc. tables       Page tables
        +--------+   ,------->+-------+   ,------->+-------+
        :        :   |        :       :   |        :       :
        +--------+   |        +-------+   |        +-------+
   SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
        +--------+            +-------+            +-------+
        :        :            :       :            :       :
        +--------+            +-------+            +-------+

Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table
entries. To keep things simple we only implement one level of context
descriptor tables here, but as with stream and page tables, an SSID can
be split to index multiple levels of tables.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 125 +++++++++++++++++++++++++++++-------
 1 file changed, 102 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 43d6a7ded6e4..e1bec7e552b9 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -227,6 +227,11 @@
 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
 
+#define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
+#define STRTAB_STE_1_S1DSS_TERMINATE	0x0
+#define STRTAB_STE_1_S1DSS_BYPASS	0x1
+#define STRTAB_STE_1_S1DSS_SSID0	0x2
+
 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
@@ -329,6 +334,7 @@
 #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
 #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
 
+#define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
 #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
 #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
@@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent {
 
 		#define CMDQ_OP_CFGI_STE	0x3
 		#define CMDQ_OP_CFGI_ALL	0x4
+		#define CMDQ_OP_CFGI_CD		0x5
+		#define CMDQ_OP_CFGI_CD_ALL	0x6
 		struct {
 			u32			sid;
+			u32			ssid;
 			union {
 				bool		leaf;
 				u8		span;
@@ -568,6 +577,7 @@ struct arm_smmu_cd_table {
 struct arm_smmu_s1_cfg {
 	struct arm_smmu_cd_table	table;
 	struct arm_smmu_ctx_desc	cd;
+	u8				s1fmt;
 	u8				s1cdmax;
 };
 
@@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 		break;
+	case CMDQ_OP_CFGI_CD:
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
+		/* Fallthrough */
 	case CMDQ_OP_CFGI_STE:
 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 		break;
+	case CMDQ_OP_CFGI_CD_ALL:
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+		break;
 	case CMDQ_OP_CFGI_ALL:
 		/* Cover the entire SID range */
 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
@@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 }
 
 /* Context descriptor manipulation functions */
+static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+			     int ssid, bool leaf)
+{
+	size_t i;
+	unsigned long flags;
+	struct arm_smmu_master *master;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_cmdq_ent cmd = {
+		.opcode	= CMDQ_OP_CFGI_CD,
+		.cfgi	= {
+			.ssid	= ssid,
+			.leaf	= leaf,
+		},
+	};
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		for (i = 0; i < master->num_sids; i++) {
+			cmd.cfgi.sid = master->sids[i];
+			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+		}
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+	arm_smmu_cmdq_issue_sync(smmu);
+}
+
 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
 					struct arm_smmu_cd_table *table,
 					size_t num_entries)
@@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 	return val;
 }
 
-static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
-				    struct arm_smmu_s1_cfg *cfg)
+static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
+				   int ssid, struct arm_smmu_ctx_desc *cd)
 {
-	u64 val;
-	__le64 *cdptr = cfg->table.ptr;
-
 	/*
-	 * We don't need to issue any invalidation here, as we'll invalidate
-	 * the STE when installing the new entry anyway.
+	 * This function handles the following cases:
+	 *
+	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
+	 * (2) Install a secondary CD, for SID+SSID traffic.
+	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
+	 *     CD, then invalidate the old entry and mappings.
+	 * (4) Remove a secondary CD.
 	 */
-	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
-#ifdef __BIG_ENDIAN
-	      CTXDESC_CD_0_ENDI |
-#endif
-	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
-	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
-	      CTXDESC_CD_0_V;
+	u64 val;
+	bool cd_live;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
+			CTXDESC_CD_DWORDS;
 
-	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
-	if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
-		val |= CTXDESC_CD_0_S;
+	val = le64_to_cpu(cdptr[0]);
+	cd_live = !!(val & CTXDESC_CD_0_V);
 
-	cdptr[0] = cpu_to_le64(val);
+	if (!cd) { /* (4) */
+		val = 0;
+	} else if (cd_live) { /* (3) */
+		val &= ~CTXDESC_CD_0_ASID;
+		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
+		/*
+		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
+		 * this substream's traffic
+		 */
+	} else { /* (1) and (2) */
+		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
+		cdptr[2] = 0;
+		cdptr[3] = cpu_to_le64(cd->mair);
+
+		/*
+		 * STE is live, and the SMMU might read dwords of this CD in any
+		 * order. Ensure that it observes valid values before reading
+		 * V=1.
+		 */
+		arm_smmu_sync_cd(smmu_domain, ssid, true);
 
-	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
-	cdptr[1] = cpu_to_le64(val);
+		val = arm_smmu_cpu_tcr_to_cd(cd->tcr) |
+#ifdef __BIG_ENDIAN
+			CTXDESC_CD_0_ENDI |
+#endif
+			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
+			CTXDESC_CD_0_AA64 |
+			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
+			CTXDESC_CD_0_V;
 
-	cdptr[3] = cpu_to_le64(cfg->cd.mair);
+		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
+		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+			val |= CTXDESC_CD_0_S;
+	}
+
+	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
+	arm_smmu_sync_cd(smmu_domain, ssid, true);
+	return 0;
 }
 
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
@@ -1533,6 +1607,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
+	cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
 	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,
 					    1 << cfg->s1cdmax);
 }
@@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	if (s1_cfg) {
 		BUG_ON(ste_live);
 		dst[1] = cpu_to_le64(
+			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
@@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
 		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
-			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
+			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
+			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
 	}
 
 	if (s2_cfg) {
@@ -2479,7 +2557,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
-		arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
+		arm_smmu_write_ctx_desc(smmu_domain, 0,
+					&smmu_domain->s1_cfg.cd);
 
 	arm_smmu_install_ste_for_dev(master);
 
-- 
2.24.1


  parent reply	other threads:[~2019-12-19 16:31 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-19 16:30 [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 02/13] dt-bindings: document PASID property for IOMMU masters Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 03/13] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 04/13] ACPI/IORT: Parse SSID property of named component node Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 05/13] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 06/13] iommu/arm-smmu-v3: Add context descriptor tables allocators Jean-Philippe Brucker
2020-01-14 11:06   ` Will Deacon
2020-01-14 11:52     ` Jean-Philippe Brucker
2020-01-14 11:56       ` Will Deacon
2019-12-19 16:30 ` Jean-Philippe Brucker [this message]
2020-01-14 12:38   ` [PATCH v4 07/13] iommu/arm-smmu-v3: Add support for Substream IDs Will Deacon
2020-01-14 16:30     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 08/13] iommu/arm-smmu-v3: Propagate ssid_bits Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 09/13] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Jean-Philippe Brucker
2020-01-14 12:42   ` Will Deacon
2020-01-14 16:31     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 10/13] iommu/arm-smmu-v3: Add second level of context descriptor table Jean-Philippe Brucker
2019-12-20  7:37   ` Auger Eric
2020-01-14 15:04   ` Will Deacon
2020-01-15  9:45     ` Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 11/13] iommu/arm-smmu-v3: Improve add_device() error handling Jean-Philippe Brucker
2020-01-14 15:25   ` Will Deacon
2020-01-15 16:17     ` Will Deacon
2020-01-15 17:44     ` Robin Murphy
2019-12-19 16:30 ` [PATCH v4 12/13] PCI/ATS: Add PASID stubs Jean-Philippe Brucker
2019-12-19 16:30 ` [PATCH v4 13/13] iommu/arm-smmu-v3: Add support for PCI PASID Jean-Philippe Brucker
2019-12-20  7:37   ` Auger Eric
2020-01-14 12:45   ` Will Deacon
2020-01-15  7:57     ` Jean-Philippe Brucker
2020-01-09 14:36 ` [PATCH v4 00/13] iommu: Add PASID support to Arm SMMUv3 Jean-Philippe Brucker
2020-01-09 14:41   ` Will Deacon
2020-01-10  7:15     ` Jean-Philippe Brucker
2020-01-14 15:40 ` Will Deacon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191219163033.2608177-8-jean-philippe@linaro.org \
    --to=jean-philippe@linaro.org \
    --cc=bhelgaas@google.com \
    --cc=devicetree@vger.kernel.org \
    --cc=eric.auger@redhat.com \
    --cc=guohanjun@huawei.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jonathan.cameron@huawei.com \
    --cc=joro@8bytes.org \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lorenzo.pieralisi@arm.com \
    --cc=mark.rutland@arm.com \
    --cc=rjw@rjwysocki.net \
    --cc=robh+dt@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=sudeep.holla@arm.com \
    --cc=will@kernel.org \
    --cc=zhangfei.gao@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).