All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
@ 2021-07-02  5:37 ` David Stevens
  0 siblings, 0 replies; 10+ messages in thread
From: David Stevens @ 2021-07-02  5:37 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon
  Cc: Lu Baolu, Tom Murphy, iommu, linux-kernel, David Stevens

From: David Stevens <stevensd@chromium.org>

Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
use sync_single_for and sync_sg_for functions for swiotlb sync and arch
sync. This ensures that the same code path is responsible for syncing
regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
various places where the original physical address and swiotlb tlb_addr
are mixed up:
  - Make sync_sg functions call sync_single functions for untrusted
    devices, so they use tlb_addr when checking is_swiotlb_buffer and
    when doing arch sync if necessary.
  - Use tlb_addr for arch sync in map_page if necessary.
  - In map_sg, map before syncing so that arch sync can target the
    bounce buffer if necessary.
  - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
    the swiotlb. This had previously only happened in the unmap_page
    case, but is now necessary for all swiotlb cases.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens <stevensd@chromium.org>
---
 drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..24d1042cd052 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 	__iommu_dma_unmap(dev, dma_addr, size);
 
 	if (unlikely(is_swiotlb_buffer(phys)))
-		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+		swiotlb_tbl_unmap_single(dev, phys, size, dir,
+					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 
 static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 		size_t org_size, dma_addr_t dma_mask, bool coherent,
-		enum dma_data_direction dir, unsigned long attrs)
+		enum dma_data_direction dir, unsigned long attrs,
+		phys_addr_t *adj_phys)
 {
 	int prot = dma_info_to_prot(dir, coherent, attrs);
 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	    iova_offset(iovad, phys | org_size)) {
 		aligned_size = iova_align(iovad, org_size);
 		phys = swiotlb_tbl_map_single(dev, phys, org_size,
-					      aligned_size, dir, attrs);
+					      aligned_size, dir,
+					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
 
 		if (phys == DMA_MAPPING_ERROR)
 			return DMA_MAPPING_ERROR;
@@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 
 		memset(padding_start, 0, padding_size);
 	}
+	if (adj_phys)
+		*adj_phys = phys;
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
 	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
@@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
-static void iommu_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+static void __iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction dir, phys_addr_t phys)
 {
-	phys_addr_t phys;
-
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+	if (phys == 0)
+		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
+					  dma_handle);
+
 	if (is_swiotlb_buffer(phys))
 		swiotlb_sync_single_for_device(dev, phys, size, dir);
 
@@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		arch_sync_dma_for_device(phys, size, dir);
 }
 
+static void iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+{
+	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
+}
+
 static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		struct scatterlist *sgl, int nelems,
 		enum dma_data_direction dir)
@@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (!dev_is_dma_coherent(dev))
+	if (dev_is_untrusted(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+						      sg->length, dir);
+	else
+		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-		if (is_swiotlb_buffer(sg_phys(sg)))
-			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-						    sg->length, dir);
-	}
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
-			swiotlb_sync_single_for_device(dev, sg_phys(sg),
-						       sg->length, dir);
-
-		if (!dev_is_dma_coherent(dev))
+	if (dev_is_untrusted(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			__iommu_dma_sync_single_for_device(dev,
+							   sg_dma_address(sg),
+							   sg->length, dir, 0);
+	else
+		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-	}
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
 	bool coherent = dev_is_dma_coherent(dev);
 	dma_addr_t dma_handle;
 
-	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
-			coherent, dir, attrs);
-	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
+			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    dma_handle != DMA_MAPPING_ERROR)
-		arch_sync_dma_for_device(phys, size, dir);
+		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
+						   dir, adj_phys);
 	return dma_handle;
 }
 
@@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 	for_each_sg(sg, s, nents, i) {
 		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
 				s->length, dma_get_mask(dev),
-				dev_is_dma_coherent(dev), dir, attrs);
+				dev_is_dma_coherent(dev), dir, attrs, NULL);
 		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
 			goto out_unmap;
 		sg_dma_len(s) = s->length;
@@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
-	int i;
+	int i, early_mapped = 0;
 
 	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 	    iommu_deferred_attach(dev, domain))
 		return 0;
 
+	if (dev_is_untrusted(dev)) {
+		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
+							dir, attrs);
+		if (!early_mapped)
+			return 0;
+	}
+
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
 
-	if (dev_is_untrusted(dev))
-		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
+	if (early_mapped)
+		return early_mapped;
 
 	/*
 	 * Work out how much IOVA space we need, and align the segments to
-- 
2.32.0.93.g670b81a890-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
@ 2021-07-02  5:37 ` David Stevens
  0 siblings, 0 replies; 10+ messages in thread
From: David Stevens @ 2021-07-02  5:37 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon; +Cc: iommu, David Stevens, linux-kernel, Tom Murphy

From: David Stevens <stevensd@chromium.org>

Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
use sync_single_for and sync_sg_for functions for swiotlb sync and arch
sync. This ensures that the same code path is responsible for syncing
regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
various places where the original physical address and swiotlb tlb_addr
are mixed up:
  - Make sync_sg functions call sync_single functions for untrusted
    devices, so they use tlb_addr when checking is_swiotlb_buffer and
    when doing arch sync if necessary.
  - Use tlb_addr for arch sync in map_page if necessary.
  - In map_sg, map before syncing so that arch sync can target the
    bounce buffer if necessary.
  - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
    the swiotlb. This had previously only happened in the unmap_page
    case, but is now necessary for all swiotlb cases.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens <stevensd@chromium.org>
---
 drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..24d1042cd052 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 	__iommu_dma_unmap(dev, dma_addr, size);
 
 	if (unlikely(is_swiotlb_buffer(phys)))
-		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+		swiotlb_tbl_unmap_single(dev, phys, size, dir,
+					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 
 static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 		size_t org_size, dma_addr_t dma_mask, bool coherent,
-		enum dma_data_direction dir, unsigned long attrs)
+		enum dma_data_direction dir, unsigned long attrs,
+		phys_addr_t *adj_phys)
 {
 	int prot = dma_info_to_prot(dir, coherent, attrs);
 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	    iova_offset(iovad, phys | org_size)) {
 		aligned_size = iova_align(iovad, org_size);
 		phys = swiotlb_tbl_map_single(dev, phys, org_size,
-					      aligned_size, dir, attrs);
+					      aligned_size, dir,
+					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
 
 		if (phys == DMA_MAPPING_ERROR)
 			return DMA_MAPPING_ERROR;
@@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 
 		memset(padding_start, 0, padding_size);
 	}
+	if (adj_phys)
+		*adj_phys = phys;
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
 	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
@@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
-static void iommu_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+static void __iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction dir, phys_addr_t phys)
 {
-	phys_addr_t phys;
-
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+	if (phys == 0)
+		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
+					  dma_handle);
+
 	if (is_swiotlb_buffer(phys))
 		swiotlb_sync_single_for_device(dev, phys, size, dir);
 
@@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		arch_sync_dma_for_device(phys, size, dir);
 }
 
+static void iommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+{
+	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
+}
+
 static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		struct scatterlist *sgl, int nelems,
 		enum dma_data_direction dir)
@@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (!dev_is_dma_coherent(dev))
+	if (dev_is_untrusted(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+						      sg->length, dir);
+	else
+		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-		if (is_swiotlb_buffer(sg_phys(sg)))
-			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-						    sg->length, dir);
-	}
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
 		return;
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
-			swiotlb_sync_single_for_device(dev, sg_phys(sg),
-						       sg->length, dir);
-
-		if (!dev_is_dma_coherent(dev))
+	if (dev_is_untrusted(dev))
+		for_each_sg(sgl, sg, nelems, i)
+			__iommu_dma_sync_single_for_device(dev,
+							   sg_dma_address(sg),
+							   sg->length, dir, 0);
+	else
+		for_each_sg(sgl, sg, nelems, i)
 			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-	}
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
 	bool coherent = dev_is_dma_coherent(dev);
 	dma_addr_t dma_handle;
 
-	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
-			coherent, dir, attrs);
-	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
+			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    dma_handle != DMA_MAPPING_ERROR)
-		arch_sync_dma_for_device(phys, size, dir);
+		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
+						   dir, adj_phys);
 	return dma_handle;
 }
 
@@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 	for_each_sg(sg, s, nents, i) {
 		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
 				s->length, dma_get_mask(dev),
-				dev_is_dma_coherent(dev), dir, attrs);
+				dev_is_dma_coherent(dev), dir, attrs, NULL);
 		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
 			goto out_unmap;
 		sg_dma_len(s) = s->length;
@@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
-	int i;
+	int i, early_mapped = 0;
 
 	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
 	    iommu_deferred_attach(dev, domain))
 		return 0;
 
+	if (dev_is_untrusted(dev)) {
+		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
+							dir, attrs);
+		if (!early_mapped)
+			return 0;
+	}
+
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
 
-	if (dev_is_untrusted(dev))
-		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
+	if (early_mapped)
+		return early_mapped;
 
 	/*
 	 * Work out how much IOVA space we need, and align the segments to
-- 
2.32.0.93.g670b81a890-goog

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly
  2021-07-02  5:37 ` David Stevens
@ 2021-07-02  5:37   ` David Stevens
  -1 siblings, 0 replies; 10+ messages in thread
From: David Stevens @ 2021-07-02  5:37 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon
  Cc: Lu Baolu, Tom Murphy, iommu, linux-kernel, David Stevens

From: David Stevens <stevensd@chromium.org>

Add check for CONFIG_SWIOTLB to dev_is_untrusted, so that swiotlb
related code can be removed more aggressively.

Signed-off-by: David Stevens <stevensd@chromium.org>
---
 drivers/iommu/dma-iommu.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 24d1042cd052..614f0dd86b08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -310,9 +310,10 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
 	domain->ops->flush_iotlb_all(domain);
 }
 
-static bool dev_is_untrusted(struct device *dev)
+static bool dev_use_swiotlb(struct device *dev)
 {
-	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+	return IS_ENABLED(CONFIG_SWIOTLB) &&
+	       dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
 /**
@@ -368,7 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
 
-	if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+	if (!cookie->fq_domain && (!dev || !dev_use_swiotlb(dev)) &&
 	    domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
 		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
 					  iommu_dma_entry_dtor))
@@ -553,8 +554,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	 * If both the physical buffer start address and size are
 	 * page aligned, we don't need to use a bounce page.
 	 */
-	if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-	    iova_offset(iovad, phys | org_size)) {
+	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
 		aligned_size = iova_align(iovad, org_size);
 		phys = swiotlb_tbl_map_single(dev, phys, org_size,
 					      aligned_size, dir,
@@ -779,7 +779,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -794,7 +794,7 @@ static void __iommu_dma_sync_single_for_device(struct device *dev,
 		dma_addr_t dma_handle, size_t size,
 		enum dma_data_direction dir, phys_addr_t phys)
 {
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
 	if (phys == 0)
@@ -821,10 +821,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
-	if (dev_is_untrusted(dev))
+	if (dev_use_swiotlb(dev))
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 						      sg->length, dir);
@@ -840,10 +840,10 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
-	if (dev_is_untrusted(dev))
+	if (dev_use_swiotlb(dev))
 		for_each_sg(sgl, sg, nelems, i)
 			__iommu_dma_sync_single_for_device(dev,
 							   sg_dma_address(sg),
@@ -1010,7 +1010,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	    iommu_deferred_attach(dev, domain))
 		return 0;
 
-	if (dev_is_untrusted(dev)) {
+	if (dev_use_swiotlb(dev)) {
 		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
 							dir, attrs);
 		if (!early_mapped)
@@ -1092,7 +1092,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 
-	if (dev_is_untrusted(dev)) {
+	if (dev_use_swiotlb(dev)) {
 		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
 		return;
 	}
-- 
2.32.0.93.g670b81a890-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly
@ 2021-07-02  5:37   ` David Stevens
  0 siblings, 0 replies; 10+ messages in thread
From: David Stevens @ 2021-07-02  5:37 UTC (permalink / raw)
  To: Joerg Roedel, Will Deacon; +Cc: iommu, David Stevens, linux-kernel, Tom Murphy

From: David Stevens <stevensd@chromium.org>

Add check for CONFIG_SWIOTLB to dev_is_untrusted, so that swiotlb
related code can be removed more aggressively.

Signed-off-by: David Stevens <stevensd@chromium.org>
---
 drivers/iommu/dma-iommu.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 24d1042cd052..614f0dd86b08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -310,9 +310,10 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
 	domain->ops->flush_iotlb_all(domain);
 }
 
-static bool dev_is_untrusted(struct device *dev)
+static bool dev_use_swiotlb(struct device *dev)
 {
-	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+	return IS_ENABLED(CONFIG_SWIOTLB) &&
+	       dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
 /**
@@ -368,7 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
 
-	if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+	if (!cookie->fq_domain && (!dev || !dev_use_swiotlb(dev)) &&
 	    domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
 		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
 					  iommu_dma_entry_dtor))
@@ -553,8 +554,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	 * If both the physical buffer start address and size are
 	 * page aligned, we don't need to use a bounce page.
 	 */
-	if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-	    iova_offset(iovad, phys | org_size)) {
+	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
 		aligned_size = iova_align(iovad, org_size);
 		phys = swiotlb_tbl_map_single(dev, phys, org_size,
 					      aligned_size, dir,
@@ -779,7 +779,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -794,7 +794,7 @@ static void __iommu_dma_sync_single_for_device(struct device *dev,
 		dma_addr_t dma_handle, size_t size,
 		enum dma_data_direction dir, phys_addr_t phys)
 {
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
 	if (phys == 0)
@@ -821,10 +821,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
-	if (dev_is_untrusted(dev))
+	if (dev_use_swiotlb(dev))
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 						      sg->length, dir);
@@ -840,10 +840,10 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
 		return;
 
-	if (dev_is_untrusted(dev))
+	if (dev_use_swiotlb(dev))
 		for_each_sg(sgl, sg, nelems, i)
 			__iommu_dma_sync_single_for_device(dev,
 							   sg_dma_address(sg),
@@ -1010,7 +1010,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	    iommu_deferred_attach(dev, domain))
 		return 0;
 
-	if (dev_is_untrusted(dev)) {
+	if (dev_use_swiotlb(dev)) {
 		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
 							dir, attrs);
 		if (!early_mapped)
@@ -1092,7 +1092,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 
-	if (dev_is_untrusted(dev)) {
+	if (dev_use_swiotlb(dev)) {
 		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
 		return;
 	}
-- 
2.32.0.93.g670b81a890-goog

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
  2021-07-02  5:37 ` David Stevens
@ 2021-07-08  9:17   ` Joerg Roedel
  -1 siblings, 0 replies; 10+ messages in thread
From: Joerg Roedel @ 2021-07-08  9:17 UTC (permalink / raw)
  To: David Stevens, Robin Murphy
  Cc: Will Deacon, Lu Baolu, Tom Murphy, iommu, linux-kernel

Adding Robin.

On Fri, Jul 02, 2021 at 02:37:41PM +0900, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
> use sync_single_for and sync_sg_for functions for swiotlb sync and arch
> sync. This ensures that the same code path is responsible for syncing
> regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
> various places where the original physical address and swiotlb tlb_addr
> are mixed up:
>   - Make sync_sg functions call sync_single functions for untrusted
>     devices, so they use tlb_addr when checking is_swiotlb_buffer and
>     when doing arch sync if necessary.
>   - Use tlb_addr for arch sync in map_page if necessary.
>   - In map_sg, map before syncing so that arch sync can target the
>     bounce buffer if necessary.
>   - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
>     the swiotlb. This had previously only happened in the unmap_page
>     case, but is now necessary for all swiotlb cases.
> 
> Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>  drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
>  1 file changed, 51 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 7bcdd1205535..24d1042cd052 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
>  	__iommu_dma_unmap(dev, dma_addr, size);
>  
>  	if (unlikely(is_swiotlb_buffer(phys)))
> -		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
> +		swiotlb_tbl_unmap_single(dev, phys, size, dir,
> +					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
>  }
>  
>  static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
> @@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>  
>  static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  		size_t org_size, dma_addr_t dma_mask, bool coherent,
> -		enum dma_data_direction dir, unsigned long attrs)
> +		enum dma_data_direction dir, unsigned long attrs,
> +		phys_addr_t *adj_phys)
>  {
>  	int prot = dma_info_to_prot(dir, coherent, attrs);
>  	struct iommu_domain *domain = iommu_get_dma_domain(dev);
> @@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  	    iova_offset(iovad, phys | org_size)) {
>  		aligned_size = iova_align(iovad, org_size);
>  		phys = swiotlb_tbl_map_single(dev, phys, org_size,
> -					      aligned_size, dir, attrs);
> +					      aligned_size, dir,
> +					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
>  
>  		if (phys == DMA_MAPPING_ERROR)
>  			return DMA_MAPPING_ERROR;
> @@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  
>  		memset(padding_start, 0, padding_size);
>  	}
> +	if (adj_phys)
> +		*adj_phys = phys;
>  
>  	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
>  	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
> @@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>  		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
>  }
>  
> -static void iommu_dma_sync_single_for_device(struct device *dev,
> -		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
> +static void __iommu_dma_sync_single_for_device(struct device *dev,
> +		dma_addr_t dma_handle, size_t size,
> +		enum dma_data_direction dir, phys_addr_t phys)
>  {
> -	phys_addr_t phys;
> -
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
> +	if (phys == 0)
> +		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
> +					  dma_handle);
> +
>  	if (is_swiotlb_buffer(phys))
>  		swiotlb_sync_single_for_device(dev, phys, size, dir);
>  
> @@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
>  		arch_sync_dma_for_device(phys, size, dir);
>  }
>  
> +static void iommu_dma_sync_single_for_device(struct device *dev,
> +		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
> +{
> +	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
> +}
> +
>  static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>  		struct scatterlist *sgl, int nelems,
>  		enum dma_data_direction dir)
> @@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	for_each_sg(sgl, sg, nelems, i) {
> -		if (!dev_is_dma_coherent(dev))
> +	if (dev_is_untrusted(dev))
> +		for_each_sg(sgl, sg, nelems, i)
> +			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
> +						      sg->length, dir);
> +	else
> +		for_each_sg(sgl, sg, nelems, i)
>  			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
> -
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> -			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
> -						    sg->length, dir);
> -	}
>  }
>  
>  static void iommu_dma_sync_sg_for_device(struct device *dev,
> @@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	for_each_sg(sgl, sg, nelems, i) {
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> -			swiotlb_sync_single_for_device(dev, sg_phys(sg),
> -						       sg->length, dir);
> -
> -		if (!dev_is_dma_coherent(dev))
> +	if (dev_is_untrusted(dev))
> +		for_each_sg(sgl, sg, nelems, i)
> +			__iommu_dma_sync_single_for_device(dev,
> +							   sg_dma_address(sg),
> +							   sg->length, dir, 0);
> +	else
> +		for_each_sg(sgl, sg, nelems, i)
>  			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
> -	}
>  }
>  
>  static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
>  		unsigned long offset, size_t size, enum dma_data_direction dir,
>  		unsigned long attrs)
>  {
> -	phys_addr_t phys = page_to_phys(page) + offset;
> +	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
>  	bool coherent = dev_is_dma_coherent(dev);
>  	dma_addr_t dma_handle;
>  
> -	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
> -			coherent, dir, attrs);
> -	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> +	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
> +			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>  	    dma_handle != DMA_MAPPING_ERROR)
> -		arch_sync_dma_for_device(phys, size, dir);
> +		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
> +						   dir, adj_phys);
>  	return dma_handle;
>  }
>  
> @@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
>  	for_each_sg(sg, s, nents, i) {
>  		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
>  				s->length, dma_get_mask(dev),
> -				dev_is_dma_coherent(dev), dir, attrs);
> +				dev_is_dma_coherent(dev), dir, attrs, NULL);
>  		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
>  			goto out_unmap;
>  		sg_dma_len(s) = s->length;
> @@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>  	dma_addr_t iova;
>  	size_t iova_len = 0;
>  	unsigned long mask = dma_get_seg_boundary(dev);
> -	int i;
> +	int i, early_mapped = 0;
>  
>  	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
>  	    iommu_deferred_attach(dev, domain))
>  		return 0;
>  
> +	if (dev_is_untrusted(dev)) {
> +		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
> +							dir, attrs);
> +		if (!early_mapped)
> +			return 0;
> +	}
> +
>  	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>  		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
>  
> -	if (dev_is_untrusted(dev))
> -		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
> +	if (early_mapped)
> +		return early_mapped;
>  
>  	/*
>  	 * Work out how much IOVA space we need, and align the segments to
> -- 
> 2.32.0.93.g670b81a890-goog

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
@ 2021-07-08  9:17   ` Joerg Roedel
  0 siblings, 0 replies; 10+ messages in thread
From: Joerg Roedel @ 2021-07-08  9:17 UTC (permalink / raw)
  To: David Stevens, Robin Murphy; +Cc: iommu, Will Deacon, linux-kernel, Tom Murphy

Adding Robin.

On Fri, Jul 02, 2021 at 02:37:41PM +0900, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
> use sync_single_for and sync_sg_for functions for swiotlb sync and arch
> sync. This ensures that the same code path is responsible for syncing
> regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
> various places where the original physical address and swiotlb tlb_addr
> are mixed up:
>   - Make sync_sg functions call sync_single functions for untrusted
>     devices, so they use tlb_addr when checking is_swiotlb_buffer and
>     when doing arch sync if necessary.
>   - Use tlb_addr for arch sync in map_page if necessary.
>   - In map_sg, map before syncing so that arch sync can target the
>     bounce buffer if necessary.
>   - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
>     the swiotlb. This had previously only happened in the unmap_page
>     case, but is now necessary for all swiotlb cases.
> 
> Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>  drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
>  1 file changed, 51 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 7bcdd1205535..24d1042cd052 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
>  	__iommu_dma_unmap(dev, dma_addr, size);
>  
>  	if (unlikely(is_swiotlb_buffer(phys)))
> -		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
> +		swiotlb_tbl_unmap_single(dev, phys, size, dir,
> +					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
>  }
>  
>  static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
> @@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>  
>  static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  		size_t org_size, dma_addr_t dma_mask, bool coherent,
> -		enum dma_data_direction dir, unsigned long attrs)
> +		enum dma_data_direction dir, unsigned long attrs,
> +		phys_addr_t *adj_phys)
>  {
>  	int prot = dma_info_to_prot(dir, coherent, attrs);
>  	struct iommu_domain *domain = iommu_get_dma_domain(dev);
> @@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  	    iova_offset(iovad, phys | org_size)) {
>  		aligned_size = iova_align(iovad, org_size);
>  		phys = swiotlb_tbl_map_single(dev, phys, org_size,
> -					      aligned_size, dir, attrs);
> +					      aligned_size, dir,
> +					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
>  
>  		if (phys == DMA_MAPPING_ERROR)
>  			return DMA_MAPPING_ERROR;
> @@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>  
>  		memset(padding_start, 0, padding_size);
>  	}
> +	if (adj_phys)
> +		*adj_phys = phys;
>  
>  	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
>  	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
> @@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>  		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
>  }
>  
> -static void iommu_dma_sync_single_for_device(struct device *dev,
> -		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
> +static void __iommu_dma_sync_single_for_device(struct device *dev,
> +		dma_addr_t dma_handle, size_t size,
> +		enum dma_data_direction dir, phys_addr_t phys)
>  {
> -	phys_addr_t phys;
> -
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
> +	if (phys == 0)
> +		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
> +					  dma_handle);
> +
>  	if (is_swiotlb_buffer(phys))
>  		swiotlb_sync_single_for_device(dev, phys, size, dir);
>  
> @@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
>  		arch_sync_dma_for_device(phys, size, dir);
>  }
>  
> +static void iommu_dma_sync_single_for_device(struct device *dev,
> +		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
> +{
> +	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
> +}
> +
>  static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>  		struct scatterlist *sgl, int nelems,
>  		enum dma_data_direction dir)
> @@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	for_each_sg(sgl, sg, nelems, i) {
> -		if (!dev_is_dma_coherent(dev))
> +	if (dev_is_untrusted(dev))
> +		for_each_sg(sgl, sg, nelems, i)
> +			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
> +						      sg->length, dir);
> +	else
> +		for_each_sg(sgl, sg, nelems, i)
>  			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
> -
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> -			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
> -						    sg->length, dir);
> -	}
>  }
>  
>  static void iommu_dma_sync_sg_for_device(struct device *dev,
> @@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>  	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>  		return;
>  
> -	for_each_sg(sgl, sg, nelems, i) {
> -		if (is_swiotlb_buffer(sg_phys(sg)))
> -			swiotlb_sync_single_for_device(dev, sg_phys(sg),
> -						       sg->length, dir);
> -
> -		if (!dev_is_dma_coherent(dev))
> +	if (dev_is_untrusted(dev))
> +		for_each_sg(sgl, sg, nelems, i)
> +			__iommu_dma_sync_single_for_device(dev,
> +							   sg_dma_address(sg),
> +							   sg->length, dir, 0);
> +	else
> +		for_each_sg(sgl, sg, nelems, i)
>  			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
> -	}
>  }
>  
>  static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
>  		unsigned long offset, size_t size, enum dma_data_direction dir,
>  		unsigned long attrs)
>  {
> -	phys_addr_t phys = page_to_phys(page) + offset;
> +	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
>  	bool coherent = dev_is_dma_coherent(dev);
>  	dma_addr_t dma_handle;
>  
> -	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
> -			coherent, dir, attrs);
> -	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> +	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
> +			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>  	    dma_handle != DMA_MAPPING_ERROR)
> -		arch_sync_dma_for_device(phys, size, dir);
> +		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
> +						   dir, adj_phys);
>  	return dma_handle;
>  }
>  
> @@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
>  	for_each_sg(sg, s, nents, i) {
>  		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
>  				s->length, dma_get_mask(dev),
> -				dev_is_dma_coherent(dev), dir, attrs);
> +				dev_is_dma_coherent(dev), dir, attrs, NULL);
>  		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
>  			goto out_unmap;
>  		sg_dma_len(s) = s->length;
> @@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>  	dma_addr_t iova;
>  	size_t iova_len = 0;
>  	unsigned long mask = dma_get_seg_boundary(dev);
> -	int i;
> +	int i, early_mapped = 0;
>  
>  	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
>  	    iommu_deferred_attach(dev, domain))
>  		return 0;
>  
> +	if (dev_is_untrusted(dev)) {
> +		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
> +							dir, attrs);
> +		if (!early_mapped)
> +			return 0;
> +	}
> +
>  	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>  		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
>  
> -	if (dev_is_untrusted(dev))
> -		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
> +	if (early_mapped)
> +		return early_mapped;
>  
>  	/*
>  	 * Work out how much IOVA space we need, and align the segments to
> -- 
> 2.32.0.93.g670b81a890-goog
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
  2021-07-08  9:17   ` Joerg Roedel
@ 2021-07-08 15:43     ` Robin Murphy
  -1 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2021-07-08 15:43 UTC (permalink / raw)
  To: Joerg Roedel, David Stevens; +Cc: iommu, Will Deacon, linux-kernel, Tom Murphy

On 2021-07-08 10:17, Joerg Roedel wrote:
> Adding Robin.
> 
> On Fri, Jul 02, 2021 at 02:37:41PM +0900, David Stevens wrote:
>> From: David Stevens <stevensd@chromium.org>
>>
>> Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
>> use sync_single_for and sync_sg_for functions for swiotlb sync and arch
>> sync. This ensures that the same code path is responsible for syncing
>> regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
>> various places where the original physical address and swiotlb tlb_addr
>> are mixed up:
>>    - Make sync_sg functions call sync_single functions for untrusted
>>      devices, so they use tlb_addr when checking is_swiotlb_buffer and
>>      when doing arch sync if necessary.
>>    - Use tlb_addr for arch sync in map_page if necessary.
>>    - In map_sg, map before syncing so that arch sync can target the
>>      bounce buffer if necessary.
>>    - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
>>      the swiotlb. This had previously only happened in the unmap_page
>>      case, but is now necessary for all swiotlb cases.
>>
>> Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")

Hmm, there's a lot going on here, and it's not really clear to me what's 
what, especially WRT the comment about addresses being mixed up. Is 
there an actual correctness bug anywhere, i.e. an address which *should* 
be synced *isn't*? If so, please fix that minimally in a standalone 
patch without all the additional complication. If it's purely an 
efficiency improvement - i.e. the bounce page getting synced twice 
(which shouldn't have *too* much impact) or the original copy geting 
redundantly synced *as well* as the bounce page, then please clarify 
whether you've measured significant impact in practice or if this is 
just an observation from code inspection.

TBH I don't find the overall justification in the commit message 
particularly convincing - to a less generous reading it kind of comes 
across as "move things around for the sake of it". Yes one can argue for 
keeping all the cache maintenance in one place, but one can equally 
argue for the keeping all the low-level practicalities of bouncing under 
SWIOTLB's umbrella such that it doesn't clutter up the IOMMU-focused 
code. The point of utilising the SWIOTLB machinery in the first place 
was to share and reuse as much of its "normal" flow as possible to keep 
things simple (see the thread at [1] for where the history began).

Now, what is true is that the integration into iommu-dma is a bit rough 
and bolted-on in places. IIRC it was done expediently to un-block 
further development on the Intel driver, and it was always the plan to 
come back and improve it later (like only bouncing the unaligned start 
and end of larger buffers such that whole pages in the middle can still 
be mapped in-place). As such I'm very happy to see that someone has the 
time and interest to improve things, but some of this patch looks like 
adding more mess to work around the existing mess, rather than more 
fundamentally cleaning it up. For example, rather than further cementing 
the bodge of having two separate map_sg implementations, I think the 
bouncing could be streamlined into iommu-dma's own flow as part of its 
scatterlist transformation (AFAICS temporarily swizzling sg_page so that 
iommu_map_sg() maps the bounced copy would be no worse than what it's 
already doing).

Thanks,
Robin.

[1] 
https://lore.kernel.org/linux-iommu/20190312163845.GA13141@infradead.org/

>> Signed-off-by: David Stevens <stevensd@chromium.org>
>> ---
>>   drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
>>   1 file changed, 51 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 7bcdd1205535..24d1042cd052 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
>>   	__iommu_dma_unmap(dev, dma_addr, size);
>>   
>>   	if (unlikely(is_swiotlb_buffer(phys)))
>> -		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
>> +		swiotlb_tbl_unmap_single(dev, phys, size, dir,
>> +					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
>>   }
>>   
>>   static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>> @@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>>   
>>   static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   		size_t org_size, dma_addr_t dma_mask, bool coherent,
>> -		enum dma_data_direction dir, unsigned long attrs)
>> +		enum dma_data_direction dir, unsigned long attrs,
>> +		phys_addr_t *adj_phys)
>>   {
>>   	int prot = dma_info_to_prot(dir, coherent, attrs);
>>   	struct iommu_domain *domain = iommu_get_dma_domain(dev);
>> @@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   	    iova_offset(iovad, phys | org_size)) {
>>   		aligned_size = iova_align(iovad, org_size);
>>   		phys = swiotlb_tbl_map_single(dev, phys, org_size,
>> -					      aligned_size, dir, attrs);
>> +					      aligned_size, dir,
>> +					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
>>   
>>   		if (phys == DMA_MAPPING_ERROR)
>>   			return DMA_MAPPING_ERROR;
>> @@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   
>>   		memset(padding_start, 0, padding_size);
>>   	}
>> +	if (adj_phys)
>> +		*adj_phys = phys;
>>   
>>   	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
>>   	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
>> @@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>>   		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
>>   }
>>   
>> -static void iommu_dma_sync_single_for_device(struct device *dev,
>> -		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
>> +static void __iommu_dma_sync_single_for_device(struct device *dev,
>> +		dma_addr_t dma_handle, size_t size,
>> +		enum dma_data_direction dir, phys_addr_t phys)
>>   {
>> -	phys_addr_t phys;
>> -
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
>> +	if (phys == 0)
>> +		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
>> +					  dma_handle);
>> +
>>   	if (is_swiotlb_buffer(phys))
>>   		swiotlb_sync_single_for_device(dev, phys, size, dir);
>>   
>> @@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
>>   		arch_sync_dma_for_device(phys, size, dir);
>>   }
>>   
>> +static void iommu_dma_sync_single_for_device(struct device *dev,
>> +		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
>> +{
>> +	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
>> +}
>> +
>>   static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>>   		struct scatterlist *sgl, int nelems,
>>   		enum dma_data_direction dir)
>> @@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	for_each_sg(sgl, sg, nelems, i) {
>> -		if (!dev_is_dma_coherent(dev))
>> +	if (dev_is_untrusted(dev))
>> +		for_each_sg(sgl, sg, nelems, i)
>> +			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
>> +						      sg->length, dir);
>> +	else
>> +		for_each_sg(sgl, sg, nelems, i)
>>   			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
>> -
>> -		if (is_swiotlb_buffer(sg_phys(sg)))
>> -			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
>> -						    sg->length, dir);
>> -	}
>>   }
>>   
>>   static void iommu_dma_sync_sg_for_device(struct device *dev,
>> @@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	for_each_sg(sgl, sg, nelems, i) {
>> -		if (is_swiotlb_buffer(sg_phys(sg)))
>> -			swiotlb_sync_single_for_device(dev, sg_phys(sg),
>> -						       sg->length, dir);
>> -
>> -		if (!dev_is_dma_coherent(dev))
>> +	if (dev_is_untrusted(dev))
>> +		for_each_sg(sgl, sg, nelems, i)
>> +			__iommu_dma_sync_single_for_device(dev,
>> +							   sg_dma_address(sg),
>> +							   sg->length, dir, 0);
>> +	else
>> +		for_each_sg(sgl, sg, nelems, i)
>>   			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
>> -	}
>>   }
>>   
>>   static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
>>   		unsigned long offset, size_t size, enum dma_data_direction dir,
>>   		unsigned long attrs)
>>   {
>> -	phys_addr_t phys = page_to_phys(page) + offset;
>> +	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
>>   	bool coherent = dev_is_dma_coherent(dev);
>>   	dma_addr_t dma_handle;
>>   
>> -	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
>> -			coherent, dir, attrs);
>> -	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>> +	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
>> +			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
>> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>>   	    dma_handle != DMA_MAPPING_ERROR)
>> -		arch_sync_dma_for_device(phys, size, dir);
>> +		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
>> +						   dir, adj_phys);
>>   	return dma_handle;
>>   }
>>   
>> @@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
>>   	for_each_sg(sg, s, nents, i) {
>>   		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
>>   				s->length, dma_get_mask(dev),
>> -				dev_is_dma_coherent(dev), dir, attrs);
>> +				dev_is_dma_coherent(dev), dir, attrs, NULL);
>>   		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
>>   			goto out_unmap;
>>   		sg_dma_len(s) = s->length;
>> @@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>>   	dma_addr_t iova;
>>   	size_t iova_len = 0;
>>   	unsigned long mask = dma_get_seg_boundary(dev);
>> -	int i;
>> +	int i, early_mapped = 0;
>>   
>>   	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
>>   	    iommu_deferred_attach(dev, domain))
>>   		return 0;
>>   
>> +	if (dev_is_untrusted(dev)) {
>> +		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
>> +							dir, attrs);
>> +		if (!early_mapped)
>> +			return 0;
>> +	}
>> +
>>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>>   		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
>>   
>> -	if (dev_is_untrusted(dev))
>> -		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
>> +	if (early_mapped)
>> +		return early_mapped;
>>   
>>   	/*
>>   	 * Work out how much IOVA space we need, and align the segments to
>> -- 
>> 2.32.0.93.g670b81a890-goog
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync
@ 2021-07-08 15:43     ` Robin Murphy
  0 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2021-07-08 15:43 UTC (permalink / raw)
  To: Joerg Roedel, David Stevens; +Cc: iommu, Will Deacon, linux-kernel, Tom Murphy

On 2021-07-08 10:17, Joerg Roedel wrote:
> Adding Robin.
> 
> On Fri, Jul 02, 2021 at 02:37:41PM +0900, David Stevens wrote:
>> From: David Stevens <stevensd@chromium.org>
>>
>> Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
>> use sync_single_for and sync_sg_for functions for swiotlb sync and arch
>> sync. This ensures that the same code path is responsible for syncing
>> regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
>> various places where the original physical address and swiotlb tlb_addr
>> are mixed up:
>>    - Make sync_sg functions call sync_single functions for untrusted
>>      devices, so they use tlb_addr when checking is_swiotlb_buffer and
>>      when doing arch sync if necessary.
>>    - Use tlb_addr for arch sync in map_page if necessary.
>>    - In map_sg, map before syncing so that arch sync can target the
>>      bounce buffer if necessary.
>>    - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
>>      the swiotlb. This had previously only happened in the unmap_page
>>      case, but is now necessary for all swiotlb cases.
>>
>> Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")

Hmm, there's a lot going on here, and it's not really clear to me what's 
what, especially WRT the comment about addresses being mixed up. Is 
there an actual correctness bug anywhere, i.e. an address which *should* 
be synced *isn't*? If so, please fix that minimally in a standalone 
patch without all the additional complication. If it's purely an 
efficiency improvement - i.e. the bounce page getting synced twice 
(which shouldn't have *too* much impact) or the original copy geting 
redundantly synced *as well* as the bounce page, then please clarify 
whether you've measured significant impact in practice or if this is 
just an observation from code inspection.

TBH I don't find the overall justification in the commit message 
particularly convincing - to a less generous reading it kind of comes 
across as "move things around for the sake of it". Yes one can argue for 
keeping all the cache maintenance in one place, but one can equally 
argue for the keeping all the low-level practicalities of bouncing under 
SWIOTLB's umbrella such that it doesn't clutter up the IOMMU-focused 
code. The point of utilising the SWIOTLB machinery in the first place 
was to share and reuse as much of its "normal" flow as possible to keep 
things simple (see the thread at [1] for where the history began).

Now, what is true is that the integration into iommu-dma is a bit rough 
and bolted-on in places. IIRC it was done expediently to un-block 
further development on the Intel driver, and it was always the plan to 
come back and improve it later (like only bouncing the unaligned start 
and end of larger buffers such that whole pages in the middle can still 
be mapped in-place). As such I'm very happy to see that someone has the 
time and interest to improve things, but some of this patch looks like 
adding more mess to work around the existing mess, rather than more 
fundamentally cleaning it up. For example, rather than further cementing 
the bodge of having two separate map_sg implementations, I think the 
bouncing could be streamlined into iommu-dma's own flow as part of its 
scatterlist transformation (AFAICS temporarily swizzling sg_page so that 
iommu_map_sg() maps the bounced copy would be no worse than what it's 
already doing).

Thanks,
Robin.

[1] 
https://lore.kernel.org/linux-iommu/20190312163845.GA13141@infradead.org/

>> Signed-off-by: David Stevens <stevensd@chromium.org>
>> ---
>>   drivers/iommu/dma-iommu.c | 82 ++++++++++++++++++++++++---------------
>>   1 file changed, 51 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 7bcdd1205535..24d1042cd052 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
>>   	__iommu_dma_unmap(dev, dma_addr, size);
>>   
>>   	if (unlikely(is_swiotlb_buffer(phys)))
>> -		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
>> +		swiotlb_tbl_unmap_single(dev, phys, size, dir,
>> +					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
>>   }
>>   
>>   static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>> @@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
>>   
>>   static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   		size_t org_size, dma_addr_t dma_mask, bool coherent,
>> -		enum dma_data_direction dir, unsigned long attrs)
>> +		enum dma_data_direction dir, unsigned long attrs,
>> +		phys_addr_t *adj_phys)
>>   {
>>   	int prot = dma_info_to_prot(dir, coherent, attrs);
>>   	struct iommu_domain *domain = iommu_get_dma_domain(dev);
>> @@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   	    iova_offset(iovad, phys | org_size)) {
>>   		aligned_size = iova_align(iovad, org_size);
>>   		phys = swiotlb_tbl_map_single(dev, phys, org_size,
>> -					      aligned_size, dir, attrs);
>> +					      aligned_size, dir,
>> +					      attrs | DMA_ATTR_SKIP_CPU_SYNC);
>>   
>>   		if (phys == DMA_MAPPING_ERROR)
>>   			return DMA_MAPPING_ERROR;
>> @@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>>   
>>   		memset(padding_start, 0, padding_size);
>>   	}
>> +	if (adj_phys)
>> +		*adj_phys = phys;
>>   
>>   	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
>>   	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
>> @@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>>   		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
>>   }
>>   
>> -static void iommu_dma_sync_single_for_device(struct device *dev,
>> -		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
>> +static void __iommu_dma_sync_single_for_device(struct device *dev,
>> +		dma_addr_t dma_handle, size_t size,
>> +		enum dma_data_direction dir, phys_addr_t phys)
>>   {
>> -	phys_addr_t phys;
>> -
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
>> +	if (phys == 0)
>> +		phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
>> +					  dma_handle);
>> +
>>   	if (is_swiotlb_buffer(phys))
>>   		swiotlb_sync_single_for_device(dev, phys, size, dir);
>>   
>> @@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
>>   		arch_sync_dma_for_device(phys, size, dir);
>>   }
>>   
>> +static void iommu_dma_sync_single_for_device(struct device *dev,
>> +		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
>> +{
>> +	__iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
>> +}
>> +
>>   static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>>   		struct scatterlist *sgl, int nelems,
>>   		enum dma_data_direction dir)
>> @@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	for_each_sg(sgl, sg, nelems, i) {
>> -		if (!dev_is_dma_coherent(dev))
>> +	if (dev_is_untrusted(dev))
>> +		for_each_sg(sgl, sg, nelems, i)
>> +			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
>> +						      sg->length, dir);
>> +	else
>> +		for_each_sg(sgl, sg, nelems, i)
>>   			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
>> -
>> -		if (is_swiotlb_buffer(sg_phys(sg)))
>> -			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
>> -						    sg->length, dir);
>> -	}
>>   }
>>   
>>   static void iommu_dma_sync_sg_for_device(struct device *dev,
>> @@ -831,29 +843,30 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>>   	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
>>   		return;
>>   
>> -	for_each_sg(sgl, sg, nelems, i) {
>> -		if (is_swiotlb_buffer(sg_phys(sg)))
>> -			swiotlb_sync_single_for_device(dev, sg_phys(sg),
>> -						       sg->length, dir);
>> -
>> -		if (!dev_is_dma_coherent(dev))
>> +	if (dev_is_untrusted(dev))
>> +		for_each_sg(sgl, sg, nelems, i)
>> +			__iommu_dma_sync_single_for_device(dev,
>> +							   sg_dma_address(sg),
>> +							   sg->length, dir, 0);
>> +	else
>> +		for_each_sg(sgl, sg, nelems, i)
>>   			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
>> -	}
>>   }
>>   
>>   static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
>>   		unsigned long offset, size_t size, enum dma_data_direction dir,
>>   		unsigned long attrs)
>>   {
>> -	phys_addr_t phys = page_to_phys(page) + offset;
>> +	phys_addr_t phys = page_to_phys(page) + offset, adj_phys;
>>   	bool coherent = dev_is_dma_coherent(dev);
>>   	dma_addr_t dma_handle;
>>   
>> -	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
>> -			coherent, dir, attrs);
>> -	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>> +	dma_handle = __iommu_dma_map_swiotlb(dev, phys, size,
>> +			dma_get_mask(dev), coherent, dir, attrs, &adj_phys);
>> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>>   	    dma_handle != DMA_MAPPING_ERROR)
>> -		arch_sync_dma_for_device(phys, size, dir);
>> +		__iommu_dma_sync_single_for_device(dev, dma_handle, size,
>> +						   dir, adj_phys);
>>   	return dma_handle;
>>   }
>>   
>> @@ -960,7 +973,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
>>   	for_each_sg(sg, s, nents, i) {
>>   		sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
>>   				s->length, dma_get_mask(dev),
>> -				dev_is_dma_coherent(dev), dir, attrs);
>> +				dev_is_dma_coherent(dev), dir, attrs, NULL);
>>   		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
>>   			goto out_unmap;
>>   		sg_dma_len(s) = s->length;
>> @@ -991,17 +1004,24 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>>   	dma_addr_t iova;
>>   	size_t iova_len = 0;
>>   	unsigned long mask = dma_get_seg_boundary(dev);
>> -	int i;
>> +	int i, early_mapped = 0;
>>   
>>   	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
>>   	    iommu_deferred_attach(dev, domain))
>>   		return 0;
>>   
>> +	if (dev_is_untrusted(dev)) {
>> +		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
>> +							dir, attrs);
>> +		if (!early_mapped)
>> +			return 0;
>> +	}
>> +
>>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>>   		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
>>   
>> -	if (dev_is_untrusted(dev))
>> -		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
>> +	if (early_mapped)
>> +		return early_mapped;
>>   
>>   	/*
>>   	 * Work out how much IOVA space we need, and align the segments to
>> -- 
>> 2.32.0.93.g670b81a890-goog
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
> 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly
  2021-07-02  5:37   ` David Stevens
@ 2021-07-08 17:49     ` Robin Murphy
  -1 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2021-07-08 17:49 UTC (permalink / raw)
  To: David Stevens, Joerg Roedel, Will Deacon; +Cc: iommu, linux-kernel, Tom Murphy

On 2021-07-02 06:37, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Add check for CONFIG_SWIOTLB to dev_is_untrusted, so that swiotlb
> related code can be removed more aggressively.

Seems logical, and I think the new name is secretly the best part since 
it clarifies the intent of 90% of the callers. However...

> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>   drivers/iommu/dma-iommu.c | 26 +++++++++++++-------------
>   1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 24d1042cd052..614f0dd86b08 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -310,9 +310,10 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
>   	domain->ops->flush_iotlb_all(domain);
>   }
>   
> -static bool dev_is_untrusted(struct device *dev)
> +static bool dev_use_swiotlb(struct device *dev)
>   {
> -	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
> +	return IS_ENABLED(CONFIG_SWIOTLB) &&
> +	       dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
>   }
>   
>   /**
> @@ -368,7 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   
>   	init_iova_domain(iovad, 1UL << order, base_pfn);
>   
> -	if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
> +	if (!cookie->fq_domain && (!dev || !dev_use_swiotlb(dev)) &&

...this one is unrelated to SWIOTLB. Even when we can't use bouncing to 
fully mitigate untrusted devices, it still makes sense to impose strict 
invalidation on them. Maybe we can keep dev_is_untrusted() and define 
dev_use_swiotlb() in terms of it?

Robin.

>   	    domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
>   		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
>   					  iommu_dma_entry_dtor))
> @@ -553,8 +554,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>   	 * If both the physical buffer start address and size are
>   	 * page aligned, we don't need to use a bounce page.
>   	 */
> -	if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
> -	    iova_offset(iovad, phys | org_size)) {
> +	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
>   		aligned_size = iova_align(iovad, org_size);
>   		phys = swiotlb_tbl_map_single(dev, phys, org_size,
>   					      aligned_size, dir,
> @@ -779,7 +779,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>   {
>   	phys_addr_t phys;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
>   	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
> @@ -794,7 +794,7 @@ static void __iommu_dma_sync_single_for_device(struct device *dev,
>   		dma_addr_t dma_handle, size_t size,
>   		enum dma_data_direction dir, phys_addr_t phys)
>   {
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
>   	if (phys == 0)
> @@ -821,10 +821,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>   	struct scatterlist *sg;
>   	int i;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
> -	if (dev_is_untrusted(dev))
> +	if (dev_use_swiotlb(dev))
>   		for_each_sg(sgl, sg, nelems, i)
>   			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
>   						      sg->length, dir);
> @@ -840,10 +840,10 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>   	struct scatterlist *sg;
>   	int i;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
> -	if (dev_is_untrusted(dev))
> +	if (dev_use_swiotlb(dev))
>   		for_each_sg(sgl, sg, nelems, i)
>   			__iommu_dma_sync_single_for_device(dev,
>   							   sg_dma_address(sg),
> @@ -1010,7 +1010,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>   	    iommu_deferred_attach(dev, domain))
>   		return 0;
>   
> -	if (dev_is_untrusted(dev)) {
> +	if (dev_use_swiotlb(dev)) {
>   		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
>   							dir, attrs);
>   		if (!early_mapped)
> @@ -1092,7 +1092,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>   		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
>   
> -	if (dev_is_untrusted(dev)) {
> +	if (dev_use_swiotlb(dev)) {
>   		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
>   		return;
>   	}
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly
@ 2021-07-08 17:49     ` Robin Murphy
  0 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2021-07-08 17:49 UTC (permalink / raw)
  To: David Stevens, Joerg Roedel, Will Deacon; +Cc: iommu, linux-kernel, Tom Murphy

On 2021-07-02 06:37, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Add check for CONFIG_SWIOTLB to dev_is_untrusted, so that swiotlb
> related code can be removed more aggressively.

Seems logical, and I think the new name is secretly the best part since 
it clarifies the intent of 90% of the callers. However...

> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>   drivers/iommu/dma-iommu.c | 26 +++++++++++++-------------
>   1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 24d1042cd052..614f0dd86b08 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -310,9 +310,10 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
>   	domain->ops->flush_iotlb_all(domain);
>   }
>   
> -static bool dev_is_untrusted(struct device *dev)
> +static bool dev_use_swiotlb(struct device *dev)
>   {
> -	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
> +	return IS_ENABLED(CONFIG_SWIOTLB) &&
> +	       dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
>   }
>   
>   /**
> @@ -368,7 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>   
>   	init_iova_domain(iovad, 1UL << order, base_pfn);
>   
> -	if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
> +	if (!cookie->fq_domain && (!dev || !dev_use_swiotlb(dev)) &&

...this one is unrelated to SWIOTLB. Even when we can't use bouncing to 
fully mitigate untrusted devices, it still makes sense to impose strict 
invalidation on them. Maybe we can keep dev_is_untrusted() and define 
dev_use_swiotlb() in terms of it?

Robin.

>   	    domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
>   		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
>   					  iommu_dma_entry_dtor))
> @@ -553,8 +554,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
>   	 * If both the physical buffer start address and size are
>   	 * page aligned, we don't need to use a bounce page.
>   	 */
> -	if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
> -	    iova_offset(iovad, phys | org_size)) {
> +	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
>   		aligned_size = iova_align(iovad, org_size);
>   		phys = swiotlb_tbl_map_single(dev, phys, org_size,
>   					      aligned_size, dir,
> @@ -779,7 +779,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
>   {
>   	phys_addr_t phys;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
>   	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
> @@ -794,7 +794,7 @@ static void __iommu_dma_sync_single_for_device(struct device *dev,
>   		dma_addr_t dma_handle, size_t size,
>   		enum dma_data_direction dir, phys_addr_t phys)
>   {
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
>   	if (phys == 0)
> @@ -821,10 +821,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
>   	struct scatterlist *sg;
>   	int i;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
> -	if (dev_is_untrusted(dev))
> +	if (dev_use_swiotlb(dev))
>   		for_each_sg(sgl, sg, nelems, i)
>   			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
>   						      sg->length, dir);
> @@ -840,10 +840,10 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
>   	struct scatterlist *sg;
>   	int i;
>   
> -	if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> +	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
>   		return;
>   
> -	if (dev_is_untrusted(dev))
> +	if (dev_use_swiotlb(dev))
>   		for_each_sg(sgl, sg, nelems, i)
>   			__iommu_dma_sync_single_for_device(dev,
>   							   sg_dma_address(sg),
> @@ -1010,7 +1010,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
>   	    iommu_deferred_attach(dev, domain))
>   		return 0;
>   
> -	if (dev_is_untrusted(dev)) {
> +	if (dev_use_swiotlb(dev)) {
>   		early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
>   							dir, attrs);
>   		if (!early_mapped)
> @@ -1092,7 +1092,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
>   	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>   		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
>   
> -	if (dev_is_untrusted(dev)) {
> +	if (dev_use_swiotlb(dev)) {
>   		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
>   		return;
>   	}
> 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-07-08 17:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02  5:37 [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync David Stevens
2021-07-02  5:37 ` David Stevens
2021-07-02  5:37 ` [PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly David Stevens
2021-07-02  5:37   ` David Stevens
2021-07-08 17:49   ` Robin Murphy
2021-07-08 17:49     ` Robin Murphy
2021-07-08  9:17 ` [PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync Joerg Roedel
2021-07-08  9:17   ` Joerg Roedel
2021-07-08 15:43   ` Robin Murphy
2021-07-08 15:43     ` Robin Murphy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.