[1/4] dma-mapping: Add bounced DMA ops
diff mbox series

Message ID 20200713091211.2183368-2-tientzu@chromium.org
State New
Headers show
Series
  • Bounced DMA support
Related show

Commit Message

Claire Chang July 13, 2020, 9:12 a.m. UTC
The bounced DMA ops provide an implementation of DMA ops that bounce
streaming DMA in and out of a specially allocated region. Only the
operations relevant to streaming DMA are supported.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/device.h      |   3 +
 include/linux/dma-mapping.h |   1 +
 kernel/dma/Kconfig          |  17 +++
 kernel/dma/Makefile         |   1 +
 kernel/dma/bounced.c        | 215 ++++++++++++++++++++++++++++++++++++
 5 files changed, 237 insertions(+)
 create mode 100644 kernel/dma/bounced.c

Comments

Robin Murphy July 13, 2020, 11:55 a.m. UTC | #1
On 2020-07-13 10:12, Claire Chang wrote:
> The bounced DMA ops provide an implementation of DMA ops that bounce
> streaming DMA in and out of a specially allocated region. Only the
> operations relevant to streaming DMA are supported.

I think there are too many implicit assumptions here - apparently that 
coherent allocations will always be intercepted by 
dma_*_from_dev_coherent(), and that calling into dma-direct won't 
actually bounce things a second time beyond where you thought they were 
going, manage coherency for a different address, and make it all go 
subtly wrong. Consider "swiotlb=force", for instance...

Again, plumbing this straight into dma-direct so that SWIOTLB can simply 
target a different buffer and always bounce regardless of masks would 
seem a far better option.

Robin.

> Signed-off-by: Claire Chang <tientzu@chromium.org>
> ---
>   include/linux/device.h      |   3 +
>   include/linux/dma-mapping.h |   1 +
>   kernel/dma/Kconfig          |  17 +++
>   kernel/dma/Makefile         |   1 +
>   kernel/dma/bounced.c        | 215 ++++++++++++++++++++++++++++++++++++
>   5 files changed, 237 insertions(+)
>   create mode 100644 kernel/dma/bounced.c
> 
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 7322c51e9c0c..868b9a364003 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -588,6 +588,9 @@ struct device {
>   
>   	struct list_head	dma_pools;	/* dma pools (if dma'ble) */
>   
> +#ifdef CONFIG_DMA_BOUNCED
> +	struct dma_bounced_mem  *dma_bounced_mem;
> +#endif
>   #ifdef CONFIG_DMA_DECLARE_COHERENT
>   	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
>   					     override */
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 2328f451a45d..86089424dafd 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -135,6 +135,7 @@ struct dma_map_ops {
>   
>   extern const struct dma_map_ops dma_virt_ops;
>   extern const struct dma_map_ops dma_dummy_ops;
> +extern const struct dma_map_ops dma_bounced_ops;
>   
>   #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
>   
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index 1da3f44f2565..148734c8748b 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -88,6 +88,23 @@ config DMA_DIRECT_REMAP
>   	select DMA_REMAP
>   	select DMA_COHERENT_POOL
>   
> +config DMA_BOUNCED
> +	bool "DMA Bounced"
> +	depends on !HIGHMEM
> +	select OF_RESERVED_MEM
> +	help
> +	  This enables support for bounced DMA pools which provide a level of
> +	  DMA memory protection on systems with limited hardware protection
> +	  capabilities, such as those lacking an IOMMU. It does so by bouncing
> +	  the data to a specially allocated DMA-accessible protected region
> +	  before mapping and unmapping. One can assign the protected memory
> +	  region in the device tree by using reserved-memory.
> +
> +	  For more information see
> +	  <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
> +	  and <kernel/dma/bounced.c>.
> +	  If unsure, say "n".
> +
>   config DMA_CMA
>   	bool "DMA Contiguous Memory Allocator"
>   	depends on HAVE_DMA_CONTIGUOUS && CMA
> diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
> index 370f63344e9c..f5fb4f42326a 100644
> --- a/kernel/dma/Makefile
> +++ b/kernel/dma/Makefile
> @@ -1,6 +1,7 @@
>   # SPDX-License-Identifier: GPL-2.0
>   
>   obj-$(CONFIG_HAS_DMA)			+= mapping.o direct.o dummy.o
> +obj-$(CONFIG_DMA_BOUNCED)		+= bounced.o
>   obj-$(CONFIG_DMA_CMA)			+= contiguous.o
>   obj-$(CONFIG_DMA_DECLARE_COHERENT)	+= coherent.o
>   obj-$(CONFIG_DMA_VIRT_OPS)		+= virt.o
> diff --git a/kernel/dma/bounced.c b/kernel/dma/bounced.c
> new file mode 100644
> index 000000000000..fcaabb5eccf2
> --- /dev/null
> +++ b/kernel/dma/bounced.c
> @@ -0,0 +1,215 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Bounced DMA support.
> + *
> + * This implements the mitigations for lack of IOMMU by bouncing the data to a
> + * specially allocated region before mapping and unmapping.
> + *
> + * Copyright 2020 Google LLC.
> + */
> +#include <linux/dma-direct.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/dma-noncoherent.h>
> +#include <linux/io.h>
> +#include <linux/genalloc.h>
> +#include <linux/slab.h>
> +
> +struct dma_bounced_mem {
> +	void		**orig_addr;
> +	void		*virt_base;
> +	dma_addr_t	device_base;
> +	dma_addr_t	device_end;
> +	struct gen_pool	*pool;
> +	size_t		size;
> +};
> +
> +static void dma_bounced_set_orig_virt(struct device *dev, dma_addr_t dma_addr,
> +				      void *orig_addr)
> +{
> +	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
> +	int idx = (dma_addr - mem->device_base) >> PAGE_SHIFT;
> +
> +	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
> +		return;
> +
> +	mem->orig_addr[idx] = orig_addr;
> +}
> +
> +static void *dma_bounced_get_orig_virt(struct device *dev, dma_addr_t dma_addr)
> +{
> +	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
> +	int idx = (dma_addr - mem->device_base) >> PAGE_SHIFT;
> +
> +	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
> +		return NULL;
> +
> +	return mem->orig_addr[idx];
> +}
> +
> +static void *dma_bounced_get_virt(struct device *dev, dma_addr_t dma_addr)
> +{
> +	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
> +
> +	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
> +		return NULL;
> +
> +	return (dma_addr - mem->device_base) + mem->virt_base;
> +}
> +
> +static void dma_bounced_sync_single_for_cpu(struct device *dev,
> +					    dma_addr_t dma_addr, size_t size,
> +					    enum dma_data_direction dir)
> +{
> +	void *orig_virt = dma_bounced_get_orig_virt(dev, dma_addr);
> +	void *bounced_virt = dma_bounced_get_virt(dev, dma_addr);
> +
> +	if (!orig_virt || !bounced_virt)
> +		return;
> +
> +	dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
> +
> +	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
> +		memcpy(orig_virt, bounced_virt, size);
> +}
> +
> +static void dma_bounced_sync_single_for_device(struct device *dev,
> +					       dma_addr_t dma_addr, size_t size,
> +					       enum dma_data_direction dir)
> +{
> +	void *orig_virt = dma_bounced_get_orig_virt(dev, dma_addr);
> +	void *bounced_virt = dma_bounced_get_virt(dev, dma_addr);
> +
> +	if (!orig_virt || !bounced_virt)
> +		return;
> +
> +	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
> +		memcpy(bounced_virt, orig_virt, size);
> +
> +	dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
> +}
> +
> +static void dma_bounced_sync_sg_for_cpu(struct device *dev,
> +					struct scatterlist *sgl, int nents,
> +					enum dma_data_direction dir)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	for_each_sg(sgl, sg, nents, i) {
> +		dma_bounced_sync_single_for_cpu(dev, sg->dma_address,
> +						sg->length, dir);
> +	}
> +}
> +
> +static void dma_bounced_sync_sg_for_device(struct device *dev,
> +					   struct scatterlist *sgl, int nents,
> +					   enum dma_data_direction dir)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	for_each_sg(sgl, sg, nents, i) {
> +		dma_bounced_sync_single_for_device(dev, sg->dma_address,
> +						   sg->length, dir);
> +	}
> +}
> +
> +static void dma_bounced_unmap_page(struct device *dev, dma_addr_t dma_addr,
> +				   size_t size, enum dma_data_direction dir,
> +				   unsigned long attrs)
> +{
> +	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
> +
> +	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
> +		return;
> +
> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> +		dma_bounced_sync_single_for_cpu(dev, dma_addr, size, dir);
> +
> +	dma_bounced_set_orig_virt(dev, dma_addr, NULL);
> +	gen_pool_free(mem->pool,
> +		      (unsigned long)dma_bounced_get_virt(dev, dma_addr), size);
> +}
> +
> +static dma_addr_t dma_bounced_map_page(struct device *dev, struct page *page,
> +				       unsigned long offset, size_t size,
> +				       enum dma_data_direction dir,
> +				       unsigned long attrs)
> +{
> +	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
> +	dma_addr_t dma_addr;
> +	void *orig_virt;
> +
> +	if (unlikely(!gen_pool_dma_alloc(mem->pool, size, &dma_addr)))
> +		return DMA_MAPPING_ERROR;
> +
> +	orig_virt = page_to_virt(page) + offset;
> +	dma_bounced_set_orig_virt(dev, dma_addr, orig_virt);
> +
> +	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> +		dma_bounced_sync_single_for_device(dev, dma_addr, size, dir);
> +
> +	return dma_addr;
> +}
> +
> +static void dma_bounced_unmap_sg(struct device *dev, struct scatterlist *sgl,
> +				 int nents, enum dma_data_direction dir,
> +				 unsigned long attrs)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	for_each_sg(sgl, sg, nents, i) {
> +		dma_bounced_unmap_page(dev, sg->dma_address, sg_dma_len(sg),
> +				       dir, attrs);
> +	}
> +}
> +
> +static int dma_bounced_map_sg(struct device *dev, struct scatterlist *sgl,
> +			      int nents, enum dma_data_direction dir,
> +			      unsigned long attrs)
> +{
> +	int i;
> +	struct scatterlist *sg;
> +
> +	for_each_sg(sgl, sg, nents, i) {
> +		sg->dma_address = dma_bounced_map_page(
> +			dev, sg_page(sg), sg->offset, sg->length, dir, attrs);
> +		if (sg->dma_address == DMA_MAPPING_ERROR)
> +			goto out_unmap;
> +		sg_dma_len(sg) = sg->length;
> +	}
> +
> +	return nents;
> +
> +out_unmap:
> +	dma_bounced_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
> +	return 0;
> +}
> +
> +static size_t dma_bounced_max_mapping_size(struct device *dev)
> +{
> +	return dev->dma_bounced_mem->size;
> +}
> +
> +const struct dma_map_ops dma_bounced_ops = {
> +	.alloc			= NULL,
> +	.free			= NULL,
> +	.mmap			= NULL,
> +	.get_sgtable		= NULL,
> +	.sync_single_for_cpu	= dma_bounced_sync_single_for_cpu,
> +	.sync_single_for_device = dma_bounced_sync_single_for_device,
> +	.sync_sg_for_cpu	= dma_bounced_sync_sg_for_cpu,
> +	.sync_sg_for_device	= dma_bounced_sync_sg_for_device,
> +	.map_page		= dma_bounced_map_page,
> +	.unmap_page		= dma_bounced_unmap_page,
> +	.map_sg			= dma_bounced_map_sg,
> +	.unmap_sg		= dma_bounced_unmap_sg,
> +	.unmap_resource		= NULL,
> +	.map_resource		= NULL,
> +	.cache_sync		= NULL,
> +	.dma_supported		= dma_direct_supported,
> +	.get_required_mask	= dma_direct_get_required_mask,
> +	.max_mapping_size	= dma_bounced_max_mapping_size,
> +	.get_merge_boundary	= NULL,
> +};
>
Christoph Hellwig July 14, 2020, 11:01 a.m. UTC | #2
On Mon, Jul 13, 2020 at 12:55:43PM +0100, Robin Murphy wrote:
> On 2020-07-13 10:12, Claire Chang wrote:
>> The bounced DMA ops provide an implementation of DMA ops that bounce
>> streaming DMA in and out of a specially allocated region. Only the
>> operations relevant to streaming DMA are supported.
>
> I think there are too many implicit assumptions here - apparently that 
> coherent allocations will always be intercepted by 
> dma_*_from_dev_coherent(), and that calling into dma-direct won't actually 
> bounce things a second time beyond where you thought they were going, 
> manage coherency for a different address, and make it all go subtly wrong. 
> Consider "swiotlb=force", for instance...
>
> Again, plumbing this straight into dma-direct so that SWIOTLB can simply 
> target a different buffer and always bounce regardless of masks would seem 
> a far better option.

I haven't really had time to read through the details, but I agree that
any bouncing scheme should reuse the swiotlb code and not invent a
parallel infrastructure.
Claire Chang July 15, 2020, 3:46 a.m. UTC | #3
On Tue, Jul 14, 2020 at 7:01 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jul 13, 2020 at 12:55:43PM +0100, Robin Murphy wrote:
> > On 2020-07-13 10:12, Claire Chang wrote:
> >> The bounced DMA ops provide an implementation of DMA ops that bounce
> >> streaming DMA in and out of a specially allocated region. Only the
> >> operations relevant to streaming DMA are supported.
> >
> > I think there are too many implicit assumptions here - apparently that
> > coherent allocations will always be intercepted by
> > dma_*_from_dev_coherent(), and that calling into dma-direct won't actually
> > bounce things a second time beyond where you thought they were going,
> > manage coherency for a different address, and make it all go subtly wrong.
> > Consider "swiotlb=force", for instance...
> >
> > Again, plumbing this straight into dma-direct so that SWIOTLB can simply
> > target a different buffer and always bounce regardless of masks would seem
> > a far better option.
>
> I haven't really had time to read through the details, but I agree that
> any bouncing scheme should reuse the swiotlb code and not invent a
> parallel infrastructure.
Thanks for the feedback. I'll try to reuse SWIOTLB.
Claire Chang July 15, 2020, 9:04 a.m. UTC | #4
On Wed, Jul 15, 2020 at 11:46 AM Claire Chang <tientzu@chromium.org> wrote:
>
> On Tue, Jul 14, 2020 at 7:01 PM Christoph Hellwig <hch@lst.de> wrote:
> >
> > On Mon, Jul 13, 2020 at 12:55:43PM +0100, Robin Murphy wrote:
> > > On 2020-07-13 10:12, Claire Chang wrote:
> > >> The bounced DMA ops provide an implementation of DMA ops that bounce
> > >> streaming DMA in and out of a specially allocated region. Only the
> > >> operations relevant to streaming DMA are supported.
> > >
> > > I think there are too many implicit assumptions here - apparently that
> > > coherent allocations will always be intercepted by
> > > dma_*_from_dev_coherent(), and that calling into dma-direct won't actually
> > > bounce things a second time beyond where you thought they were going,
> > > manage coherency for a different address, and make it all go subtly wrong.
> > > Consider "swiotlb=force", for instance...
If I understand it correctly, reusing SWIOTLB won't prevent the
coherent allocations
from always being intercepted by dma_*_from_dev_coherent(), right?
Since we can't bounce the coherent memory, we still need to rely on
dma_*_from_dev_coherent() and a reserved-memory region for coherent DMA to
restrict the device DMA access.

As for calling into dma-direct, in this version, I use set_dma_ops to set the
dma_bounced_ops, so I just bypass dma-direct and SWIOTLB. "swiotlb=force"
won't bounce things a second time and the data will still be bounced
to the region
set in dts.
Besides, I did a quick search and found that only two *-iommu.c directly use
dma_direct_map_page.
https://elixir.bootlin.com/linux/latest/C/ident/dma_direct_map_page
Since bounced DMA is to mitigate the lack of DMA access control on systems
without an IOMMU (see patch#4, only call of_dma_set_bounce_buffer for the
devices not behind an IOMMU), can we assume no one will use dma-direct?
(I understand that if we build bounced DMA on top of SWIOTLB, we don't need
to worry about this.)

> > >
> > > Again, plumbing this straight into dma-direct so that SWIOTLB can simply
> > > target a different buffer and always bounce regardless of masks would seem
> > > a far better option.
> >
> > I haven't really had time to read through the details, but I agree that
> > any bouncing scheme should reuse the swiotlb code and not invent a
> > parallel infrastructure.
> Thanks for the feedback. I'll try to reuse SWIOTLB.
My current plan is to first change the buffers management logic in SWIOTLB to
use gen_pool like this patch (i.e., gen_pool_dma_alloc, gen_pool_free, ect), and
then make SWIOTLB use the device's private pool for regular DMA to/from system
memory if possible.
Does this sound right?

Thanks!
Claire Chang July 28, 2020, 5:05 a.m. UTC | #5
v2 that reuses SWIOTLB here: https://lore.kernel.org/patchwork/cover/1280705/

Thanks,
Claire

Patch
diff mbox series

diff --git a/include/linux/device.h b/include/linux/device.h
index 7322c51e9c0c..868b9a364003 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -588,6 +588,9 @@  struct device {
 
 	struct list_head	dma_pools;	/* dma pools (if dma'ble) */
 
+#ifdef CONFIG_DMA_BOUNCED
+	struct dma_bounced_mem  *dma_bounced_mem;
+#endif
 #ifdef CONFIG_DMA_DECLARE_COHERENT
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 2328f451a45d..86089424dafd 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -135,6 +135,7 @@  struct dma_map_ops {
 
 extern const struct dma_map_ops dma_virt_ops;
 extern const struct dma_map_ops dma_dummy_ops;
+extern const struct dma_map_ops dma_bounced_ops;
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 1da3f44f2565..148734c8748b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -88,6 +88,23 @@  config DMA_DIRECT_REMAP
 	select DMA_REMAP
 	select DMA_COHERENT_POOL
 
+config DMA_BOUNCED
+	bool "DMA Bounced"
+	depends on !HIGHMEM
+	select OF_RESERVED_MEM
+	help
+	  This enables support for bounced DMA pools which provide a level of
+	  DMA memory protection on systems with limited hardware protection
+	  capabilities, such as those lacking an IOMMU. It does so by bouncing
+	  the data to a specially allocated DMA-accessible protected region
+	  before mapping and unmapping. One can assign the protected memory
+	  region in the device tree by using reserved-memory.
+
+	  For more information see
+	  <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
+	  and <kernel/dma/bounced.c>.
+	  If unsure, say "n".
+
 config DMA_CMA
 	bool "DMA Contiguous Memory Allocator"
 	depends on HAVE_DMA_CONTIGUOUS && CMA
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 370f63344e9c..f5fb4f42326a 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -1,6 +1,7 @@ 
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_HAS_DMA)			+= mapping.o direct.o dummy.o
+obj-$(CONFIG_DMA_BOUNCED)		+= bounced.o
 obj-$(CONFIG_DMA_CMA)			+= contiguous.o
 obj-$(CONFIG_DMA_DECLARE_COHERENT)	+= coherent.o
 obj-$(CONFIG_DMA_VIRT_OPS)		+= virt.o
diff --git a/kernel/dma/bounced.c b/kernel/dma/bounced.c
new file mode 100644
index 000000000000..fcaabb5eccf2
--- /dev/null
+++ b/kernel/dma/bounced.c
@@ -0,0 +1,215 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Bounced DMA support.
+ *
+ * This implements the mitigations for lack of IOMMU by bouncing the data to a
+ * specially allocated region before mapping and unmapping.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <linux/dma-direct.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/io.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+
+struct dma_bounced_mem {
+	void		**orig_addr;
+	void		*virt_base;
+	dma_addr_t	device_base;
+	dma_addr_t	device_end;
+	struct gen_pool	*pool;
+	size_t		size;
+};
+
+static void dma_bounced_set_orig_virt(struct device *dev, dma_addr_t dma_addr,
+				      void *orig_addr)
+{
+	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
+	int idx = (dma_addr - mem->device_base) >> PAGE_SHIFT;
+
+	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
+		return;
+
+	mem->orig_addr[idx] = orig_addr;
+}
+
+static void *dma_bounced_get_orig_virt(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
+	int idx = (dma_addr - mem->device_base) >> PAGE_SHIFT;
+
+	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
+		return NULL;
+
+	return mem->orig_addr[idx];
+}
+
+static void *dma_bounced_get_virt(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
+
+	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
+		return NULL;
+
+	return (dma_addr - mem->device_base) + mem->virt_base;
+}
+
+static void dma_bounced_sync_single_for_cpu(struct device *dev,
+					    dma_addr_t dma_addr, size_t size,
+					    enum dma_data_direction dir)
+{
+	void *orig_virt = dma_bounced_get_orig_virt(dev, dma_addr);
+	void *bounced_virt = dma_bounced_get_virt(dev, dma_addr);
+
+	if (!orig_virt || !bounced_virt)
+		return;
+
+	dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
+
+	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+		memcpy(orig_virt, bounced_virt, size);
+}
+
+static void dma_bounced_sync_single_for_device(struct device *dev,
+					       dma_addr_t dma_addr, size_t size,
+					       enum dma_data_direction dir)
+{
+	void *orig_virt = dma_bounced_get_orig_virt(dev, dma_addr);
+	void *bounced_virt = dma_bounced_get_virt(dev, dma_addr);
+
+	if (!orig_virt || !bounced_virt)
+		return;
+
+	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+		memcpy(bounced_virt, orig_virt, size);
+
+	dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
+}
+
+static void dma_bounced_sync_sg_for_cpu(struct device *dev,
+					struct scatterlist *sgl, int nents,
+					enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		dma_bounced_sync_single_for_cpu(dev, sg->dma_address,
+						sg->length, dir);
+	}
+}
+
+static void dma_bounced_sync_sg_for_device(struct device *dev,
+					   struct scatterlist *sgl, int nents,
+					   enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		dma_bounced_sync_single_for_device(dev, sg->dma_address,
+						   sg->length, dir);
+	}
+}
+
+static void dma_bounced_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				   size_t size, enum dma_data_direction dir,
+				   unsigned long attrs)
+{
+	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
+
+	if (dma_addr < mem->device_base || dma_addr >= mem->device_end)
+		return;
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_bounced_sync_single_for_cpu(dev, dma_addr, size, dir);
+
+	dma_bounced_set_orig_virt(dev, dma_addr, NULL);
+	gen_pool_free(mem->pool,
+		      (unsigned long)dma_bounced_get_virt(dev, dma_addr), size);
+}
+
+static dma_addr_t dma_bounced_map_page(struct device *dev, struct page *page,
+				       unsigned long offset, size_t size,
+				       enum dma_data_direction dir,
+				       unsigned long attrs)
+{
+	struct dma_bounced_mem *mem = dev->dma_bounced_mem;
+	dma_addr_t dma_addr;
+	void *orig_virt;
+
+	if (unlikely(!gen_pool_dma_alloc(mem->pool, size, &dma_addr)))
+		return DMA_MAPPING_ERROR;
+
+	orig_virt = page_to_virt(page) + offset;
+	dma_bounced_set_orig_virt(dev, dma_addr, orig_virt);
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_bounced_sync_single_for_device(dev, dma_addr, size, dir);
+
+	return dma_addr;
+}
+
+static void dma_bounced_unmap_sg(struct device *dev, struct scatterlist *sgl,
+				 int nents, enum dma_data_direction dir,
+				 unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		dma_bounced_unmap_page(dev, sg->dma_address, sg_dma_len(sg),
+				       dir, attrs);
+	}
+}
+
+static int dma_bounced_map_sg(struct device *dev, struct scatterlist *sgl,
+			      int nents, enum dma_data_direction dir,
+			      unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		sg->dma_address = dma_bounced_map_page(
+			dev, sg_page(sg), sg->offset, sg->length, dir, attrs);
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			goto out_unmap;
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+
+out_unmap:
+	dma_bounced_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+	return 0;
+}
+
+static size_t dma_bounced_max_mapping_size(struct device *dev)
+{
+	return dev->dma_bounced_mem->size;
+}
+
+const struct dma_map_ops dma_bounced_ops = {
+	.alloc			= NULL,
+	.free			= NULL,
+	.mmap			= NULL,
+	.get_sgtable		= NULL,
+	.sync_single_for_cpu	= dma_bounced_sync_single_for_cpu,
+	.sync_single_for_device = dma_bounced_sync_single_for_device,
+	.sync_sg_for_cpu	= dma_bounced_sync_sg_for_cpu,
+	.sync_sg_for_device	= dma_bounced_sync_sg_for_device,
+	.map_page		= dma_bounced_map_page,
+	.unmap_page		= dma_bounced_unmap_page,
+	.map_sg			= dma_bounced_map_sg,
+	.unmap_sg		= dma_bounced_unmap_sg,
+	.unmap_resource		= NULL,
+	.map_resource		= NULL,
+	.cache_sync		= NULL,
+	.dma_supported		= dma_direct_supported,
+	.get_required_mask	= dma_direct_get_required_mask,
+	.max_mapping_size	= dma_bounced_max_mapping_size,
+	.get_merge_boundary	= NULL,
+};