All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hiroshi Doyu <hdoyu@nvidia.com>
To: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: "linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"linaro-mm-sig@lists.linaro.org" <linaro-mm-sig@lists.linaro.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	"linux-samsung-soc@vger.kernel.org"
	<linux-samsung-soc@vger.kernel.org>,
	"iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	Shariq Hasnain <shariq.hasnain@linaro.org>,
	Arnd Bergmann <arnd@arndb.de>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Krishna Reddy <vdumpa@nvidia.com>,
	Kyungmin Park <kyungmin.park@samsung.com>,
	Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
	Russell King - ARM Linux <linux@arm.linux.org.uk>,
	KyongHo Cho <pullip.cho@samsung.com>,
	Chunsang Jeong <chunsang.jeong@linaro.org>
Subject: Re: [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper
Date: Mon, 5 Mar 2012 13:47:21 +0200	[thread overview]
Message-ID: <20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com> (raw)
In-Reply-To: <1330527862-16234-10-git-send-email-m.szyprowski@samsung.com>

Hi Marek,

On Wed, 29 Feb 2012 16:04:22 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices that have IOMMU support. All DMA-mapping calls are supported.
>
> This patch contains some of the code kindly provided by Krishna Reddy
> <vdumpa@nvidia.com> and Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  726 +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 758 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 59102fb..5d9a0b6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -44,6 +44,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>         bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>         bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>         void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 67fe7e2..9b0db81 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>         return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>   * Allocate a DMA buffer for 'dev' of size 'size' using the
>   * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>  {
>         unsigned long order = get_order(size);
>         struct page *page, *p, *e;
> -       void *ptr;
>         u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
>                 __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>         return page;
>  }
> @@ -347,7 +357,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
>                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                 pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                 do {
>                         BUG_ON(!pte_none(*pte));
> @@ -460,6 +470,14 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>         return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>   * Allocate DMA-coherent memory space and return both the kernel remapped
>   * virtual and bus address for that space.
> @@ -467,9 +485,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                     gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>         void *memory;
>
>         if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -498,13 +514,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
>         c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>         if (c) {
>                 unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                 if (off < kern_size &&
>                     user_size <= (kern_size - off)) {
>                         ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) + off,
> +                                             page_to_pfn(pages) + off,
>                                               user_size << PAGE_SHIFT,
>                                               vma->vm_page_prot);
>                 }
> @@ -643,6 +660,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
>         int i, j;
>
>         for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                 s->length, dir, attrs);
>                 if (dma_mapping_error(dev, s->dma_address))
> @@ -748,3 +768,681 @@ static int __init dma_debug_do_init(void)
>         return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return ARM_DMA_ERROR;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i=0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i=0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = ARM_DMA_ERROR;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i=0; i<count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return ARM_DMA_ERROR;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = ARM_DMA_ERROR;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == ARM_DMA_ERROR)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n", ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = ARM_DMA_ERROR;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == ARM_DMA_ERROR)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s))
> +       {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s->dma_address = ARM_DMA_ERROR;
> +               s->dma_length = 0;
> +
> +               s = sg_next(s);
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
> +                       if (__map_sg_chunk(dev, start, size, &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                       enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
> +            unsigned long offset, size_t size, enum dma_data_direction dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return ARM_DMA_ERROR;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order)
> +{
> +       unsigned int count = (size >> PAGE_SHIFT) - order;
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = bitmap_size;

Shouldn't the above be as below?

From 093c77ac6f19899679f2f2447a9d2c684eab7b2e Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <hdoyu@nvidia.com>
Date: Mon, 5 Mar 2012 13:04:38 +0200
Subject: [PATCH 1/1] dma-mapping: Fix mapping->bits size

Amount of bits should be mutiplied by BITS_PER_BITE.

Signed-off-by: Hiroshi DOYU <hdoyu@nvidia.com>
---
 arch/arm/mm/dma-mapping.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e55f425..5ec7747 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1495,7 +1495,7 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
 		goto err2;
 
 	mapping->base = base;
-	mapping->bits = bitmap_size;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
 	mapping->order = order;
 	spin_lock_init(&mapping->lock);
 
-- 
1.7.5.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Hiroshi Doyu <hdoyu@nvidia.com>
To: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: "linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"linaro-mm-sig@lists.linaro.org" <linaro-mm-sig@lists.linaro.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	"linux-samsung-soc@vger.kernel.org"
	<linux-samsung-soc@vger.kernel.org>,
	"iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	Shariq Hasnain <shariq.hasnain@linaro.org>,
	Arnd Bergmann <arnd@arndb.de>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Krishna Reddy <vdumpa@nvidia.com>,
	Kyungmin Park <kyungmin.park@samsung.com>,
	Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
	Russell King - ARM Linux <linux@arm.linux.org.uk>,
	KyongHo Cho <pullip.cho@samsung.com>,
	Chunsang Jeong <chunsang.jeong@linaro.org>
Subject: Re: [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper
Date: Mon, 5 Mar 2012 13:47:21 +0200	[thread overview]
Message-ID: <20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com> (raw)
Message-ID: <20120305114721.rAp9uX8AgFTTln6LisrBMOEK-bsHMqVL1zZ91UfWCgI@z> (raw)
In-Reply-To: <1330527862-16234-10-git-send-email-m.szyprowski@samsung.com>

Hi Marek,

On Wed, 29 Feb 2012 16:04:22 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices that have IOMMU support. All DMA-mapping calls are supported.
>
> This patch contains some of the code kindly provided by Krishna Reddy
> <vdumpa@nvidia.com> and Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  726 +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 758 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 59102fb..5d9a0b6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -44,6 +44,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>         bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>         bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>         void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 67fe7e2..9b0db81 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>         return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>   * Allocate a DMA buffer for 'dev' of size 'size' using the
>   * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>  {
>         unsigned long order = get_order(size);
>         struct page *page, *p, *e;
> -       void *ptr;
>         u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
>                 __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>         return page;
>  }
> @@ -347,7 +357,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
>                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                 pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                 do {
>                         BUG_ON(!pte_none(*pte));
> @@ -460,6 +470,14 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>         return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>   * Allocate DMA-coherent memory space and return both the kernel remapped
>   * virtual and bus address for that space.
> @@ -467,9 +485,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                     gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>         void *memory;
>
>         if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -498,13 +514,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
>         c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>         if (c) {
>                 unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                 if (off < kern_size &&
>                     user_size <= (kern_size - off)) {
>                         ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) + off,
> +                                             page_to_pfn(pages) + off,
>                                               user_size << PAGE_SHIFT,
>                                               vma->vm_page_prot);
>                 }
> @@ -643,6 +660,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
>         int i, j;
>
>         for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                 s->length, dir, attrs);
>                 if (dma_mapping_error(dev, s->dma_address))
> @@ -748,3 +768,681 @@ static int __init dma_debug_do_init(void)
>         return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return ARM_DMA_ERROR;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i=0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i=0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = ARM_DMA_ERROR;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i=0; i<count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return ARM_DMA_ERROR;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = ARM_DMA_ERROR;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == ARM_DMA_ERROR)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n", ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = ARM_DMA_ERROR;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == ARM_DMA_ERROR)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s))
> +       {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s->dma_address = ARM_DMA_ERROR;
> +               s->dma_length = 0;
> +
> +               s = sg_next(s);
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
> +                       if (__map_sg_chunk(dev, start, size, &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                       enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
> +            unsigned long offset, size_t size, enum dma_data_direction dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return ARM_DMA_ERROR;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order)
> +{
> +       unsigned int count = (size >> PAGE_SHIFT) - order;
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = bitmap_size;

Shouldn't the above be as below?

From 093c77ac6f19899679f2f2447a9d2c684eab7b2e Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <hdoyu@nvidia.com>
Date: Mon, 5 Mar 2012 13:04:38 +0200
Subject: [PATCH 1/1] dma-mapping: Fix mapping->bits size

Amount of bits should be mutiplied by BITS_PER_BITE.

Signed-off-by: Hiroshi DOYU <hdoyu@nvidia.com>
---
 arch/arm/mm/dma-mapping.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e55f425..5ec7747 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1495,7 +1495,7 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
 		goto err2;
 
 	mapping->base = base;
-	mapping->bits = bitmap_size;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
 	mapping->order = order;
 	spin_lock_init(&mapping->lock);
 
-- 
1.7.5.4


WARNING: multiple messages have this Message-ID (diff)
From: Hiroshi Doyu <hdoyu@nvidia.com>
To: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: "linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"linaro-mm-sig@lists.linaro.org" <linaro-mm-sig@lists.linaro.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	"linux-samsung-soc@vger.kernel.org"
	<linux-samsung-soc@vger.kernel.org>,
	"iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	Shariq Hasnain <shariq.hasnain@linaro.org>,
	Arnd Bergmann <arnd@arndb.de>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Krishna Reddy <vdumpa@nvidia.com>,
	Kyungmin Park <kyungmin.park@samsung.com>,
	Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
	Russell King - ARM Linux <linux@arm.linux.org.uk>,
	KyongHo Cho <pullip.cho@samsung.com>,
	Chunsang Jeong <chunsang.jeong@linaro.org>
Subject: Re: [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper
Date: Mon, 5 Mar 2012 13:47:21 +0200	[thread overview]
Message-ID: <20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com> (raw)
In-Reply-To: <1330527862-16234-10-git-send-email-m.szyprowski@samsung.com>

Hi Marek,

On Wed, 29 Feb 2012 16:04:22 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices that have IOMMU support. All DMA-mapping calls are supported.
>
> This patch contains some of the code kindly provided by Krishna Reddy
> <vdumpa@nvidia.com> and Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  726 +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 758 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 59102fb..5d9a0b6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -44,6 +44,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>         bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>         bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>         void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 67fe7e2..9b0db81 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>         return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>   * Allocate a DMA buffer for 'dev' of size 'size' using the
>   * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>  {
>         unsigned long order = get_order(size);
>         struct page *page, *p, *e;
> -       void *ptr;
>         u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
>                 __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>         return page;
>  }
> @@ -347,7 +357,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
>                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                 pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                 do {
>                         BUG_ON(!pte_none(*pte));
> @@ -460,6 +470,14 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>         return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>   * Allocate DMA-coherent memory space and return both the kernel remapped
>   * virtual and bus address for that space.
> @@ -467,9 +485,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                     gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>         void *memory;
>
>         if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -498,13 +514,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
>         c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>         if (c) {
>                 unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                 if (off < kern_size &&
>                     user_size <= (kern_size - off)) {
>                         ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) + off,
> +                                             page_to_pfn(pages) + off,
>                                               user_size << PAGE_SHIFT,
>                                               vma->vm_page_prot);
>                 }
> @@ -643,6 +660,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
>         int i, j;
>
>         for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                 s->length, dir, attrs);
>                 if (dma_mapping_error(dev, s->dma_address))
> @@ -748,3 +768,681 @@ static int __init dma_debug_do_init(void)
>         return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return ARM_DMA_ERROR;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i=0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i=0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = ARM_DMA_ERROR;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i=0; i<count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return ARM_DMA_ERROR;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = ARM_DMA_ERROR;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == ARM_DMA_ERROR)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n", ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = ARM_DMA_ERROR;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == ARM_DMA_ERROR)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s))
> +       {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s->dma_address = ARM_DMA_ERROR;
> +               s->dma_length = 0;
> +
> +               s = sg_next(s);
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
> +                       if (__map_sg_chunk(dev, start, size, &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                       enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
> +            unsigned long offset, size_t size, enum dma_data_direction dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return ARM_DMA_ERROR;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order)
> +{
> +       unsigned int count = (size >> PAGE_SHIFT) - order;
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = bitmap_size;

Shouldn't the above be as below?

>From 093c77ac6f19899679f2f2447a9d2c684eab7b2e Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <hdoyu@nvidia.com>
Date: Mon, 5 Mar 2012 13:04:38 +0200
Subject: [PATCH 1/1] dma-mapping: Fix mapping->bits size

Amount of bits should be mutiplied by BITS_PER_BITE.

Signed-off-by: Hiroshi DOYU <hdoyu@nvidia.com>
---
 arch/arm/mm/dma-mapping.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e55f425..5ec7747 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1495,7 +1495,7 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
 		goto err2;
 
 	mapping->base = base;
-	mapping->bits = bitmap_size;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
 	mapping->order = order;
 	spin_lock_init(&mapping->lock);
 
-- 
1.7.5.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Hiroshi Doyu <hdoyu@nvidia.com>
To: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: "linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"linaro-mm-sig@lists.linaro.org" <linaro-mm-sig@lists.linaro.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	"linux-samsung-soc@vger.kernel.org"
	<linux-samsung-soc@vger.kernel.org>,
	"iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	Shariq Hasnain <shariq.hasnain@linaro.org>,
	Arnd Bergmann <arnd@arndb.de>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Krishna Reddy <vdumpa@nvidia.com>,
	Kyungmin Park <kyungmin.park@samsung.com>,
	Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
	Russell King - ARM Linux <linux@arm.linux.org.uk>,
	KyongHo Cho <pullip.cho@samsung.com>,
	Chunsang Jeong <chunsang.jeong@linaro.org>
Subject: Re: [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper
Date: Mon, 5 Mar 2012 13:47:21 +0200	[thread overview]
Message-ID: <20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com> (raw)
In-Reply-To: <1330527862-16234-10-git-send-email-m.szyprowski@samsung.com>

Hi Marek,

On Wed, 29 Feb 2012 16:04:22 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices that have IOMMU support. All DMA-mapping calls are supported.
>
> This patch contains some of the code kindly provided by Krishna Reddy
> <vdumpa@nvidia.com> and Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  726 +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 758 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 59102fb..5d9a0b6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -44,6 +44,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>         bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>         bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>         void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 67fe7e2..9b0db81 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>         return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>   * Allocate a DMA buffer for 'dev' of size 'size' using the
>   * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>  {
>         unsigned long order = get_order(size);
>         struct page *page, *p, *e;
> -       void *ptr;
>         u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
>                 __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>         return page;
>  }
> @@ -347,7 +357,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
>                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                 pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                 do {
>                         BUG_ON(!pte_none(*pte));
> @@ -460,6 +470,14 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>         return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>   * Allocate DMA-coherent memory space and return both the kernel remapped
>   * virtual and bus address for that space.
> @@ -467,9 +485,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                     gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>         void *memory;
>
>         if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -498,13 +514,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
>         c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>         if (c) {
>                 unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                 if (off < kern_size &&
>                     user_size <= (kern_size - off)) {
>                         ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) + off,
> +                                             page_to_pfn(pages) + off,
>                                               user_size << PAGE_SHIFT,
>                                               vma->vm_page_prot);
>                 }
> @@ -643,6 +660,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
>         int i, j;
>
>         for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                 s->length, dir, attrs);
>                 if (dma_mapping_error(dev, s->dma_address))
> @@ -748,3 +768,681 @@ static int __init dma_debug_do_init(void)
>         return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return ARM_DMA_ERROR;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i=0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i=0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = ARM_DMA_ERROR;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i=0; i<count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return ARM_DMA_ERROR;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = ARM_DMA_ERROR;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == ARM_DMA_ERROR)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n", ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = ARM_DMA_ERROR;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == ARM_DMA_ERROR)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s))
> +       {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s->dma_address = ARM_DMA_ERROR;
> +               s->dma_length = 0;
> +
> +               s = sg_next(s);
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
> +                       if (__map_sg_chunk(dev, start, size, &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                       enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
> +            unsigned long offset, size_t size, enum dma_data_direction dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return ARM_DMA_ERROR;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order)
> +{
> +       unsigned int count = (size >> PAGE_SHIFT) - order;
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = bitmap_size;

Shouldn't the above be as below?

WARNING: multiple messages have this Message-ID (diff)
From: hdoyu@nvidia.com (Hiroshi Doyu)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper
Date: Mon, 5 Mar 2012 13:47:21 +0200	[thread overview]
Message-ID: <20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com> (raw)
In-Reply-To: <1330527862-16234-10-git-send-email-m.szyprowski@samsung.com>

Hi Marek,

On Wed, 29 Feb 2012 16:04:22 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices that have IOMMU support. All DMA-mapping calls are supported.
>
> This patch contains some of the code kindly provided by Krishna Reddy
> <vdumpa@nvidia.com> and Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  726 +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 758 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 59102fb..5d9a0b6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -44,6 +44,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>         bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>         bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>         void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 67fe7e2..9b0db81 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>         return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>   * Allocate a DMA buffer for 'dev' of size 'size' using the
>   * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>  {
>         unsigned long order = get_order(size);
>         struct page *page, *p, *e;
> -       void *ptr;
>         u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
>         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
>                 __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>         return page;
>  }
> @@ -347,7 +357,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
>                 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                 pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                 do {
>                         BUG_ON(!pte_none(*pte));
> @@ -460,6 +470,14 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>         return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>   * Allocate DMA-coherent memory space and return both the kernel remapped
>   * virtual and bus address for that space.
> @@ -467,9 +485,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                     gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>         void *memory;
>
>         if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -498,13 +514,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
>         c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>         if (c) {
>                 unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                 if (off < kern_size &&
>                     user_size <= (kern_size - off)) {
>                         ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) + off,
> +                                             page_to_pfn(pages) + off,
>                                               user_size << PAGE_SHIFT,
>                                               vma->vm_page_prot);
>                 }
> @@ -643,6 +660,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
>         int i, j;
>
>         for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                 s->length, dir, attrs);
>                 if (dma_mapping_error(dev, s->dma_address))
> @@ -748,3 +768,681 @@ static int __init dma_debug_do_init(void)
>         return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return ARM_DMA_ERROR;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i=0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i=0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = ARM_DMA_ERROR;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i=0; i<count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return ARM_DMA_ERROR;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = ARM_DMA_ERROR;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == ARM_DMA_ERROR)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n", ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = ARM_DMA_ERROR;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == ARM_DMA_ERROR)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s))
> +       {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s->dma_address = ARM_DMA_ERROR;
> +               s->dma_length = 0;
> +
> +               s = sg_next(s);
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
> +                       if (__map_sg_chunk(dev, start, size, &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
> +                       enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
> +            unsigned long offset, size_t size, enum dma_data_direction dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == ARM_DMA_ERROR)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return ARM_DMA_ERROR;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
> +                        int order)
> +{
> +       unsigned int count = (size >> PAGE_SHIFT) - order;
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = bitmap_size;

Shouldn't the above be as below?

>From 093c77ac6f19899679f2f2447a9d2c684eab7b2e Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <hdoyu@nvidia.com>
Date: Mon, 5 Mar 2012 13:04:38 +0200
Subject: [PATCH 1/1] dma-mapping: Fix mapping->bits size

Amount of bits should be mutiplied by BITS_PER_BITE.

Signed-off-by: Hiroshi DOYU <hdoyu@nvidia.com>
---
 arch/arm/mm/dma-mapping.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e55f425..5ec7747 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1495,7 +1495,7 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
 		goto err2;
 
 	mapping->base = base;
-	mapping->bits = bitmap_size;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
 	mapping->order = order;
 	spin_lock_init(&mapping->lock);
 
-- 
1.7.5.4

  parent reply	other threads:[~2012-03-05 11:47 UTC|newest]

Thread overview: 129+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-29 15:04 [PATCHv7 0/9] ARM: DMA-mapping framework redesign Marek Szyprowski
2012-02-29 15:04 ` Marek Szyprowski
2012-02-29 15:04 ` Marek Szyprowski
2012-02-29 15:04 ` [PATCHv7 1/9] ARM: dma-mapping: introduce ARM_DMA_ERROR constant Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04 ` [PATCHv7 2/9] ARM: dma-mapping: use pr_* instread of printk Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04 ` [PATCHv7 3/9] ARM: dma-mapping: remove offset parameter to prepare for generic dma_ops Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04 ` [PATCHv7 5/9] ARM: dma-mapping: implement dma sg methods on top of any generic dma ops Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-03-26 11:38   ` Subash Patel
2012-03-26 11:38     ` Subash Patel
2012-03-26 11:38     ` Subash Patel
2012-02-29 15:04 ` [PATCHv7 6/9] ARM: dma-mapping: move all dma bounce code to separate dma ops structure Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04 ` [PATCHv7 7/9] ARM: dma-mapping: remove redundant code and cleanup Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
     [not found] ` <1330527862-16234-1-git-send-email-m.szyprowski-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-02-29 15:04   ` [PATCHv7 4/9] ARM: dma-mapping: use asm-generic/dma-mapping-common.h Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-02-29 15:04   ` [PATCHv7 8/9] ARM: dma-mapping: use alloc, mmap, free from dma_ops Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-02-29 15:04     ` Marek Szyprowski
2012-03-22 13:45     ` Subash Patel
2012-03-22 13:45       ` Subash Patel
2012-03-22 13:45       ` Subash Patel
2012-03-23 12:12       ` Marek Szyprowski
2012-03-23 12:12         ` Marek Szyprowski
2012-03-23 12:12         ` Marek Szyprowski
2012-03-23 12:26         ` [PATCH 0/2] ARM: dma-mapping: Fix mmap support for coherent buffers Marek Szyprowski
2012-03-23 12:26           ` Marek Szyprowski
2012-03-23 12:26           ` Marek Szyprowski
2012-03-23 12:26           ` [PATCH 1/2] common: add dma_mmap_from_coherent() function Marek Szyprowski
2012-03-23 12:26             ` Marek Szyprowski
2012-03-23 12:26             ` Marek Szyprowski
2012-03-23 12:26           ` [PATCH 2/2] arm: dma-mapping: use dma_mmap_from_coherent() Marek Szyprowski
2012-03-23 12:26             ` Marek Szyprowski
2012-03-23 12:26             ` Marek Szyprowski
     [not found]           ` <1332505563-17646-1-git-send-email-m.szyprowski-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-03-26 11:04             ` [PATCH 0/2] ARM: dma-mapping: Fix mmap support for coherent buffers Subash Patel
2012-03-26 11:04               ` Subash Patel
2012-03-26 11:04               ` Subash Patel
2012-03-26 11:04               ` Subash Patel
2012-02-29 15:04 ` [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-02-29 15:04   ` Marek Szyprowski
2012-03-02  8:05   ` KyongHo Cho
2012-03-02  8:05     ` KyongHo Cho
2012-03-02  8:05     ` KyongHo Cho
2012-03-02 11:07     ` Marek Szyprowski
2012-03-02 11:07       ` Marek Szyprowski
2012-03-02 11:07       ` Marek Szyprowski
2012-03-20 13:50     ` Subash Patel
2012-03-20 13:50       ` Subash Patel
2012-03-20 13:50       ` Subash Patel
2012-03-20 23:56       ` KyongHo Cho
2012-03-20 23:56         ` KyongHo Cho
2012-03-20 23:56         ` KyongHo Cho
2012-03-22 13:59         ` Subash Patel
2012-03-22 13:59           ` Subash Patel
2012-03-22 13:59           ` Subash Patel
2012-03-30  7:14           ` Subash Patel
2012-03-30  7:14             ` Subash Patel
2012-03-30  7:14             ` Subash Patel
2012-03-05 11:47   ` Hiroshi Doyu [this message]
2012-03-05 11:47     ` Hiroshi Doyu
2012-03-05 11:47     ` Hiroshi Doyu
2012-03-05 11:47     ` Hiroshi Doyu
2012-03-05 11:47     ` Hiroshi Doyu
     [not found]     ` <20120305134721.0ab0d0e6de56fa30250059b1-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2012-03-05 16:07       ` Marek Szyprowski
2012-03-05 16:07         ` Marek Szyprowski
2012-03-05 16:07         ` Marek Szyprowski
2012-03-05 16:07         ` Marek Szyprowski
2012-03-06 22:48         ` Krishna Reddy
2012-03-06 22:48           ` Krishna Reddy
2012-03-06 22:48           ` Krishna Reddy
2012-03-07  6:09           ` Hiroshi Doyu
2012-03-07  6:09             ` Hiroshi Doyu
2012-03-07  6:09             ` Hiroshi Doyu
     [not found]             ` <20120307.080952.2152478004740487196.hdoyu-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2012-03-07  6:37               ` Hiroshi Doyu
2012-03-07  6:37                 ` Hiroshi Doyu
2012-03-07  6:37                 ` Hiroshi Doyu
2012-03-07  6:37                 ` Hiroshi Doyu
2012-03-07  7:06                 ` Krishna Reddy
2012-03-07  7:06                   ` Krishna Reddy
2012-03-07  7:06                   ` Krishna Reddy
2012-03-07  7:16                 ` Hiroshi Doyu
2012-03-07  7:16                   ` Hiroshi Doyu
2012-03-07  7:16                   ` Hiroshi Doyu
2012-03-07  7:16                   ` Hiroshi Doyu
2012-03-07  7:16                   ` Hiroshi Doyu
2012-03-07 16:58                   ` Marek Szyprowski
2012-03-07 16:58                     ` Marek Szyprowski
2012-03-07 16:58                     ` Marek Szyprowski
     [not found]                     ` <011701ccfc83$78030180$68090480$%szyprowski-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-03-09 14:53                       ` [PATCH] ARM: dma-mapping: fix calculation of iova bitmap size Marek Szyprowski
2012-03-09 14:53                         ` Marek Szyprowski
2012-03-09 14:53                         ` Marek Szyprowski
2012-03-09 14:53                         ` Marek Szyprowski
2012-03-06 23:21   ` [PATCHv7 9/9] ARM: dma-mapping: add support for IOMMU mapper Russell King - ARM Linux
2012-03-06 23:21     ` Russell King - ARM Linux
2012-03-06 23:21     ` Russell King - ARM Linux
2012-03-06 23:36     ` Krishna Reddy
2012-03-06 23:36       ` Krishna Reddy
2012-03-06 23:36       ` Krishna Reddy
2012-03-07 16:17     ` Marek Szyprowski
2012-03-07 16:17       ` Marek Szyprowski
2012-03-07 16:17       ` Marek Szyprowski
2012-03-29  7:19   ` Hiroshi Doyu
2012-03-29  7:19     ` Hiroshi Doyu
2012-03-29  7:19     ` Hiroshi Doyu
2012-03-29  7:19     ` Hiroshi Doyu
2012-03-29  7:19     ` Hiroshi Doyu
2012-03-29  8:00     ` Marek Szyprowski
2012-03-29  8:00       ` Marek Szyprowski
2012-03-29  8:00       ` Marek Szyprowski
2012-03-30  2:24       ` Krishna Reddy
2012-03-30  2:24         ` Krishna Reddy
2012-03-30  2:24         ` Krishna Reddy
2012-03-30  6:30         ` Marek Szyprowski
2012-03-30  6:30           ` Marek Szyprowski
2012-03-30  6:30           ` Marek Szyprowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120305134721.0ab0d0e6de56fa30250059b1@nvidia.com \
    --to=hdoyu@nvidia.com \
    --cc=andrzej.p@samsung.com \
    --cc=arnd@arndb.de \
    --cc=benh@kernel.crashing.org \
    --cc=chunsang.jeong@linaro.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=kyungmin.park@samsung.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-samsung-soc@vger.kernel.org \
    --cc=linux@arm.linux.org.uk \
    --cc=m.szyprowski@samsung.com \
    --cc=pullip.cho@samsung.com \
    --cc=shariq.hasnain@linaro.org \
    --cc=vdumpa@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.