* [PATCH 1/2] dma-mapping: add a dma_ops_bypass flag to struct device
2019-11-13 13:37 generic DMA bypass flag Christoph Hellwig
@ 2019-11-13 13:37 ` Christoph Hellwig
2019-11-13 13:37 ` [PATCH 2/2] powerpc: use the generic dma_ops_bypass mode Christoph Hellwig
2019-11-13 14:45 ` generic DMA bypass flag Robin Murphy
2 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2019-11-13 13:37 UTC (permalink / raw)
To: iommu, Alexey Kardashevskiy
Cc: Greg Kroah-Hartman, linuxppc-dev, Robin Murphy, linux-kernel
Several IOMMU drivers have a bypass mode where they can use a direct
mapping if the devices DMA mask is large enough. Add generic support
to the core dma-mapping code to do that to switch those drivers to
a common solution.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/device.h | 4 ++++
include/linux/dma-mapping.h | 30 ++++++++++++++++++------------
kernel/dma/mapping.c | 35 ++++++++++++++++++++++++++---------
3 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/include/linux/device.h b/include/linux/device.h
index 297239a08bb7..b8a3b4ec46bd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1217,6 +1217,9 @@ struct dev_links_info {
* device.
* @dma_coherent: this particular device is dma coherent, even if the
* architecture supports non-coherent devices.
+ * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the
+ * streaming DMA operations (->map_* / ->unmap_* / ->sync_*).
+ * This flag is managed by the dma_ops from ->dma_supported.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
@@ -1316,6 +1319,7 @@ struct device {
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
bool dma_coherent:1;
#endif
+ bool dma_ops_bypass : 1;
};
static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4d450672b7d6..22fe74179e02 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -191,9 +191,15 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
}
#endif /* CONFIG_DMA_DECLARE_COHERENT */
-static inline bool dma_is_direct(const struct dma_map_ops *ops)
+/*
+ * Check if the devices uses a direct mapping for streaming DMA operations.
+ * This allows IOMMU drivers to set a bypass mode if the DMA mask is large
+ * enough.
+ */
+static inline bool dma_map_direct(struct device *dev,
+ const struct dma_map_ops *ops)
{
- return likely(!ops);
+ return likely(!ops) || dev->dma_ops_bypass;
}
/*
@@ -282,7 +288,7 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev,
dma_addr_t addr;
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
else
addr = ops->map_page(dev, page, offset, size, dir, attrs);
@@ -297,7 +303,7 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_unmap_page(dev, addr, size, dir, attrs);
else if (ops->unmap_page)
ops->unmap_page(dev, addr, size, dir, attrs);
@@ -316,7 +322,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int ents;
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
else
ents = ops->map_sg(dev, sg, nents, dir, attrs);
@@ -334,7 +340,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
BUG_ON(!valid_dma_direction(dir));
debug_dma_unmap_sg(dev, sg, nents, dir);
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
else if (ops->unmap_sg)
ops->unmap_sg(dev, sg, nents, dir, attrs);
@@ -355,7 +361,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
return DMA_MAPPING_ERROR;
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
else if (ops->map_resource)
addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
@@ -371,7 +377,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (!dma_is_direct(ops) && ops->unmap_resource)
+ if (!dma_map_direct(dev, ops) && ops->unmap_resource)
ops->unmap_resource(dev, addr, size, dir, attrs);
debug_dma_unmap_resource(dev, addr, size, dir);
}
@@ -383,7 +389,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
ops->sync_single_for_cpu(dev, addr, size, dir);
@@ -397,7 +403,7 @@ static inline void dma_sync_single_for_device(struct device *dev,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_single_for_device(dev, addr, size, dir);
else if (ops->sync_single_for_device)
ops->sync_single_for_device(dev, addr, size, dir);
@@ -411,7 +417,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
else if (ops->sync_sg_for_cpu)
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
@@ -425,7 +431,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
else if (ops->sync_sg_for_device)
ops->sync_sg_for_device(dev, sg, nelems, dir);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 12ff766ec1fa..fdb6e16c1b00 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -105,6 +105,19 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
EXPORT_SYMBOL(dmam_alloc_attrs);
+/*
+ * Check if the devices uses a direct mapping for DMA allocations.
+ * This allows IOMMU drivers to set a bypass mode if the DMA mask is large
+ * enough.
+ */
+static inline bool dma_alloc_direct(struct device *dev,
+ const struct dma_map_ops *ops)
+{
+ return likely(!ops) ||
+ (dev->dma_ops_bypass &&
+ dma_direct_supported(dev, dev->coherent_dma_mask));
+}
+
/*
* Create scatter-list for the already allocated DMA buffer.
*/
@@ -138,7 +151,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
@@ -206,7 +219,7 @@ bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_can_mmap(dev);
return ops->mmap != NULL;
}
@@ -231,7 +244,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
@@ -244,7 +257,7 @@ u64 dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
return dma_direct_get_required_mask(dev);
if (ops->get_required_mask)
return ops->get_required_mask(dev);
@@ -275,7 +288,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
else if (ops->alloc)
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
@@ -307,7 +320,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
else if (ops->free)
ops->free(dev, size, cpu_addr, dma_handle, attrs);
@@ -318,7 +331,11 @@ int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ /*
+ * Only call the dma-direct version if we really do not have any ops
+ * set, as the dma_supported op will set the dma_ops_bypass flag.
+ */
+ if (!ops)
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
@@ -374,7 +391,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
BUG_ON(!valid_dma_direction(dir));
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
arch_dma_cache_sync(dev, vaddr, size, dir);
else if (ops->cache_sync)
ops->cache_sync(dev, vaddr, size, dir);
@@ -386,7 +403,7 @@ size_t dma_max_mapping_size(struct device *dev)
const struct dma_map_ops *ops = get_dma_ops(dev);
size_t size = SIZE_MAX;
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
size = dma_direct_max_mapping_size(dev);
else if (ops && ops->max_mapping_size)
size = ops->max_mapping_size(dev);
--
2.20.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] powerpc: use the generic dma_ops_bypass mode
2019-11-13 13:37 generic DMA bypass flag Christoph Hellwig
2019-11-13 13:37 ` [PATCH 1/2] dma-mapping: add a dma_ops_bypass flag to struct device Christoph Hellwig
@ 2019-11-13 13:37 ` Christoph Hellwig
2019-11-13 14:45 ` generic DMA bypass flag Robin Murphy
2 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2019-11-13 13:37 UTC (permalink / raw)
To: iommu, Alexey Kardashevskiy
Cc: Greg Kroah-Hartman, linuxppc-dev, Robin Murphy, linux-kernel
Use the DMA API bypass mechanism for direct window mappings. This uses
common code and speed up the direct mapping case by avoiding indirect
calls just when not using dma ops at all. It also fixes a problem where
the sync_* methods were using the bypass check for DMA allocations, but
those are part of the streaming ops.
Note that this patch loses the DMA_ATTR_WEAK_ORDERING override, which
has never been well defined, as is only used by a few drivers, which
IIRC never showed up in the typical Cell blade setups that are affected
by the ordering workaround.
Fixes: efd176a04bef ("powerpc/pseries/dma: Allow SWIOTLB")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/include/asm/device.h | 5 --
arch/powerpc/kernel/dma-iommu.c | 90 ++++---------------------------
2 files changed, 9 insertions(+), 86 deletions(-)
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 266542769e4b..452402215e12 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -18,11 +18,6 @@ struct iommu_table;
* drivers/macintosh/macio_asic.c
*/
struct dev_archdata {
- /*
- * Set to %true if the dma_iommu_ops are requested to use a direct
- * window instead of dynamically mapping memory.
- */
- bool iommu_bypass : 1;
/*
* These two used to be a union. However, with the hybrid ops we need
* both so here we store both a DMA offset for direct mappings and
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e486d1d78de2..569fecd7b5b2 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -14,23 +14,6 @@
* Generic iommu implementation
*/
-/*
- * The coherent mask may be smaller than the real mask, check if we can
- * really use a direct window.
- */
-static inline bool dma_iommu_alloc_bypass(struct device *dev)
-{
- return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
- dma_direct_supported(dev, dev->coherent_dma_mask);
-}
-
-static inline bool dma_iommu_map_bypass(struct device *dev,
- unsigned long attrs)
-{
- return dev->archdata.iommu_bypass &&
- (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
-}
-
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
@@ -39,8 +22,6 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -50,11 +31,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_free(dev, size, vaddr, dma_handle, attrs);
- else
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
- dma_handle);
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -67,9 +44,6 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_page(dev, page, offset, size, direction,
- attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, dma_get_mask(dev), direction, attrs);
}
@@ -79,11 +53,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
- direction, attrs);
- else
- dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+ attrs);
}
@@ -91,8 +62,6 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
dma_get_mask(dev), direction, attrs);
}
@@ -101,11 +70,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
- else
- dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
}
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
@@ -113,8 +79,9 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- return phb->controller_ops.iommu_bypass_supported &&
- phb->controller_ops.iommu_bypass_supported(pdev, mask);
+ if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported)
+ return false;
+ return phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
@@ -123,7 +90,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
struct iommu_table *tbl = get_iommu_table_base(dev);
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
- dev->archdata.iommu_bypass = true;
+ dev->dma_ops_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
@@ -141,7 +108,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
}
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
- dev->archdata.iommu_bypass = false;
+ dev->dma_ops_bypass = false;
return 1;
}
@@ -153,47 +120,12 @@ u64 dma_iommu_get_required_mask(struct device *dev)
if (!tbl)
return 0;
- if (dev_is_pci(dev)) {
- u64 bypass_mask = dma_direct_get_required_mask(dev);
-
- if (dma_iommu_bypass_supported(dev, bypass_mask))
- return bypass_mask;
- }
-
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
mask += mask - 1;
return mask;
}
-static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
- size_t sz, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_device(dev, addr, sz, dir);
-}
-
-extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-
-extern void dma_iommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
-}
-
const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -203,10 +135,6 @@ const struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
- .sync_single_for_cpu = dma_iommu_sync_for_cpu,
- .sync_single_for_device = dma_iommu_sync_for_device,
- .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
- .sync_sg_for_device = dma_iommu_sync_sg_for_device,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
};
--
2.20.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 7+ messages in thread