iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Will Deacon <will@kernel.org>
To: iommu@lists.linux-foundation.org
Cc: Vijay Kilary <vkilari@codeaurora.org>,
	Jean-Philippe Brucker <jean-philippe.brucker@arm.com>,
	Will Deacon <will@kernel.org>, Jan Glauber <jglauber@marvell.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Jayachandran Chandrasekharan Nair <jnair@marvell.com>,
	Jon Masters <jcm@redhat.com>, Robin Murphy <robin.murphy@arm.com>
Subject: [RFC PATCH v2 19/19] iommu/arm-smmu-v3: Defer TLB invalidation until ->iotlb_sync()
Date: Thu, 11 Jul 2019 18:19:27 +0100	[thread overview]
Message-ID: <20190711171927.28803-20-will@kernel.org> (raw)
In-Reply-To: <20190711171927.28803-1-will@kernel.org>

Update the iommu_iotlb_gather structure passed to ->tlb_add_page() and
use this information to defer all TLB invalidation until ->iotlb_sync().
This drastically reduces contention on the command queue, since we can
insert our commands in batches rather than one-by-one.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm-smmu-v3.c | 71 +++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5ee2c6389df3..b71ab839db3d 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -303,6 +303,13 @@
 
 #define CMDQ_PROD_OWNED_FLAG		Q_OVERFLOW_FLAG
 
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES		BITS_PER_LONG
+
 #define CMDQ_0_OP			GENMASK_ULL(7, 0)
 #define CMDQ_0_SSV			(1UL << 11)
 
@@ -1930,15 +1937,17 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 	arm_smmu_cmdq_issue_sync(smmu);
 }
 
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-					  size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
+				   size_t granule, bool leaf,
+				   struct arm_smmu_domain *smmu_domain)
 {
-	struct arm_smmu_domain *smmu_domain = cookie;
+	u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	unsigned long end = iova + size;
+	int i = 0;
 	struct arm_smmu_cmdq_ent cmd = {
 		.tlbi = {
 			.leaf	= leaf,
-			.addr	= iova,
 		},
 	};
 
@@ -1950,37 +1959,41 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
 	}
 
-	do {
-		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-		cmd.tlbi.addr += granule;
-	} while (size -= granule);
+	while (iova < end) {
+		if (i == CMDQ_BATCH_ENTRIES) {
+			arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
+			i = 0;
+		}
+
+		cmd.tlbi.addr = iova;
+		arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
+		iova += granule;
+		i++;
+	}
+
+	arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
 }
 
 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
 					 unsigned long iova, size_t granule,
 					 void *cookie)
 {
-	arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
+	struct arm_smmu_domain *smmu_domain = cookie;
+	struct iommu_domain *domain = &smmu_domain->domain;
+
+	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
 }
 
 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
 				  size_t granule, void *cookie)
 {
-	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-
-	arm_smmu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
 }
 
 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
 				  size_t granule, void *cookie)
 {
-	struct arm_smmu_domain *smmu_domain = cookie;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-
-	arm_smmu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
 }
 
 static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -2391,10 +2404,10 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
 				struct iommu_iotlb_gather *gather)
 {
-	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (smmu)
-		arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
+			       gather->pgsize, true, smmu_domain);
 }
 
 static phys_addr_t
@@ -3321,15 +3334,15 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	/* Queue sizes, capped to ensure natural alignment */
 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
 					     FIELD_GET(IDR1_CMDQS, reg));
-	if (smmu->cmdq.q.llq.max_n_shift < ilog2(BITS_PER_LONG)) {
+	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
 		/*
-		 * The cmdq valid_map relies on the total number of entries
-		 * being a multiple of BITS_PER_LONG. There's also no way
-		 * we can handle the weird alignment restrictions on the
-		 * base pointer for a unit-length queue.
+		 * We don't support splitting up batches, so one batch of
+		 * commands plus an extra sync needs to fit inside the command
+		 * queue. There's also no way we can handle the weird alignment
+		 * restrictions on the base pointer for a unit-length queue.
 		 */
-		dev_err(smmu->dev, "command queue size < %d entries not supported\n",
-			BITS_PER_LONG);
+		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
+			CMDQ_BATCH_ENTRIES);
 		return -ENXIO;
 	}
 
-- 
2.11.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  parent reply	other threads:[~2019-07-11 17:30 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-11 17:19 [RFC PATCH v2 00/19] Try to reduce lock contention on the SMMUv3 command queue Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 01/19] iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 02/19] iommu/io-pgtable-arm: Remove redundant call to io_pgtable_tlb_sync() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 03/19] iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 04/19] iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes Will Deacon
2019-07-24  7:19   ` Joerg Roedel
2019-07-24  7:41     ` Will Deacon
2019-07-25  7:58       ` Joerg Roedel
2019-07-11 17:19 ` [RFC PATCH v2 05/19] iommu: Introduce iommu_iotlb_gather_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 06/19] iommu: Pass struct iommu_iotlb_gather to ->unmap() and ->iotlb_sync() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 07/19] iommu/io-pgtable: Introduce tlb_flush_walk() and tlb_flush_leaf() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 08/19] iommu/io-pgtable: Hook up ->tlb_flush_walk() and ->tlb_flush_leaf() in drivers Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 09/19] iommu/io-pgtable-arm: Call ->tlb_flush_walk() and ->tlb_flush_leaf() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 10/19] iommu/io-pgtable: Replace ->tlb_add_flush() with ->tlb_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 11/19] iommu/io-pgtable: Remove unused ->tlb_sync() callback Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 12/19] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->unmap() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 13/19] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->tlb_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 14/19] iommu/arm-smmu-v3: Separate s/w and h/w views of prod and cons indexes Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 15/19] iommu/arm-smmu-v3: Drop unused 'q' argument from Q_OVF macro Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 16/19] iommu/arm-smmu-v3: Move low-level queue fields out of arm_smmu_queue Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 17/19] iommu/arm-smmu-v3: Operate directly on low-level queue where possible Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 18/19] iommu/arm-smmu-v3: Reduce contention during command-queue insertion Will Deacon
2019-07-19 11:04   ` John Garry
2019-07-24 12:15     ` Will Deacon
2019-07-24 14:03       ` John Garry
2019-07-24 14:07         ` Will Deacon
2019-07-24  8:20   ` John Garry
2019-07-24 14:33     ` Will Deacon
2019-07-25 11:31       ` John Garry
2019-07-11 17:19 ` Will Deacon [this message]
2019-07-19  4:25 ` [RFC PATCH v2 00/19] Try to reduce lock contention on the SMMUv3 command queue Ganapatrao Kulkarni
2019-07-24 12:28   ` Will Deacon
2019-07-24  9:58 ` John Garry
2019-07-24 12:20   ` Will Deacon
2019-07-24 14:25     ` John Garry
2019-07-24 14:48       ` Will Deacon
2019-07-25 10:11         ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190711171927.28803-20-will@kernel.org \
    --to=will@kernel.org \
    --cc=alex.williamson@redhat.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jcm@redhat.com \
    --cc=jean-philippe.brucker@arm.com \
    --cc=jglauber@marvell.com \
    --cc=jnair@marvell.com \
    --cc=robin.murphy@arm.com \
    --cc=vkilari@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).