linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
@ 2019-04-22 16:51 laurentiu.tudor
  2019-04-22 18:11 ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: laurentiu.tudor @ 2019-04-22 16:51 UTC (permalink / raw)
  To: hch, robin.murphy, m.szyprowski, iommu
  Cc: leoyang.li, linux-arm-kernel, linux-kernel, Laurentiu Tudor

From: Laurentiu Tudor <laurentiu.tudor@nxp.com>

If possible / available call into the DMA API to get a proper iommu
mapping and a dma address for the newly allocated coherent dma memory.

Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
---
 arch/arm/mm/dma-mapping-nommu.c |  3 ++-
 include/linux/dma-mapping.h     | 12 ++++++---
 kernel/dma/coherent.c           | 45 +++++++++++++++++++++++----------
 kernel/dma/mapping.c            |  3 ++-
 4 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index f304b10e23a4..2c42e83a6995 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -74,7 +74,8 @@ static void arm_nommu_dma_free(struct device *dev, size_t size,
 		dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 	} else {
 		int ret = dma_release_from_global_coherent(get_order(size),
-							   cpu_addr);
+							   cpu_addr, size,
+							   dma_addr);
 
 		WARN_ON_ONCE(ret == 0);
 	}
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 6309a721394b..cb23334608a7 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -161,19 +161,21 @@ static inline int is_device_dma_capable(struct device *dev)
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 				       dma_addr_t *dma_handle, void **ret);
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr,
+				  ssize_t size, dma_addr_t dma_handle);
 
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 			    void *cpu_addr, size_t size, int *ret);
 
 void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle);
-int dma_release_from_global_coherent(int order, void *vaddr);
+int dma_release_from_global_coherent(int order, void *vaddr, ssize_t size,
+				     dma_addr_t dma_handle);
 int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
 				  size_t size, int *ret);
 
 #else
 #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
-#define dma_release_from_dev_coherent(dev, order, vaddr) (0)
+#define dma_release_from_dev_coherent(dev, order, vaddr, size, dma_handle) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
 
 static inline void *dma_alloc_from_global_coherent(ssize_t size,
@@ -182,7 +184,9 @@ static inline void *dma_alloc_from_global_coherent(ssize_t size,
 	return NULL;
 }
 
-static inline int dma_release_from_global_coherent(int order, void *vaddr)
+static inline int dma_release_from_global_coherent(int order, void *vaddr
+						   ssize_t size,
+						   dma_addr_t dma_handle)
 {
 	return 0;
 }
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 29fd6590dc1e..b40439d6feaa 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -135,13 +135,15 @@ void dma_release_declared_memory(struct device *dev)
 }
 EXPORT_SYMBOL(dma_release_declared_memory);
 
-static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
-		ssize_t size, dma_addr_t *dma_handle)
+static void *__dma_alloc_from_coherent(struct device *dev,
+				       struct dma_coherent_mem *mem,
+				       ssize_t size, dma_addr_t *dma_handle)
 {
 	int order = get_order(size);
 	unsigned long flags;
 	int pageno;
 	void *ret;
+	const struct dma_map_ops *ops = dev ? get_dma_ops(dev) : NULL;
 
 	spin_lock_irqsave(&mem->spinlock, flags);
 
@@ -155,10 +157,16 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 	/*
 	 * Memory was found in the coherent area.
 	 */
-	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	ret = mem->virt_base + (pageno << PAGE_SHIFT);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 	memset(ret, 0, size);
+	if (ops && ops->map_resource)
+		*dma_handle = ops->map_resource(dev,
+						mem->device_base +
+							(pageno << PAGE_SHIFT),
+						size, DMA_BIDIRECTIONAL, 0);
+	else
+		*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	return ret;
 err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
@@ -187,7 +195,7 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 	if (!mem)
 		return 0;
 
-	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+	*ret = __dma_alloc_from_coherent(dev, mem, size, dma_handle);
 	return 1;
 }
 
@@ -196,18 +204,26 @@ void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
 	if (!dma_coherent_default_memory)
 		return NULL;
 
-	return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
-			dma_handle);
+	return __dma_alloc_from_coherent(NULL, dma_coherent_default_memory,
+			size, dma_handle);
 }
 
-static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
-				       int order, void *vaddr)
+static int __dma_release_from_coherent(struct device *dev,
+				       struct dma_coherent_mem *mem,
+				       int order, void *vaddr, ssize_t size,
+				       dma_addr_t dma_handle)
 {
+	const struct dma_map_ops *ops = dev ? get_dma_ops(dev) : NULL;
+
 	if (mem && vaddr >= mem->virt_base && vaddr <
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
 		unsigned long flags;
 
+		if (ops && ops->unmap_resource)
+			ops->unmap_resource(dev, dma_handle, size,
+					    DMA_BIDIRECTIONAL, 0);
+
 		spin_lock_irqsave(&mem->spinlock, flags);
 		bitmap_release_region(mem->bitmap, page, order);
 		spin_unlock_irqrestore(&mem->spinlock, flags);
@@ -228,20 +244,23 @@ static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
  * Returns 1 if we correctly released the memory, or 0 if the caller should
  * proceed with releasing memory from generic pools.
  */
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr,
+				  ssize_t size, dma_addr_t dma_handle)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
-	return __dma_release_from_coherent(mem, order, vaddr);
+	return __dma_release_from_coherent(dev, mem, order, vaddr, size,
+					   dma_handle);
 }
 
-int dma_release_from_global_coherent(int order, void *vaddr)
+int dma_release_from_global_coherent(int order, void *vaddr, ssize_t size,
+				     dma_addr_t dma_handle)
 {
 	if (!dma_coherent_default_memory)
 		return 0;
 
-	return __dma_release_from_coherent(dma_coherent_default_memory, order,
-			vaddr);
+	return __dma_release_from_coherent(NULL, dma_coherent_default_memory,
+					   order, vaddr, size, dma_handle);
 }
 
 static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 685a53f2a793..398bf838b7d7 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -269,7 +269,8 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 
-	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
+	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr,
+					  size, dma_handle))
 		return;
 	/*
 	 * On non-coherent platforms which implement DMA-coherent buffers via
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
  2019-04-22 16:51 [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem laurentiu.tudor
@ 2019-04-22 18:11 ` Christoph Hellwig
  2019-04-23 22:09   ` Laurentiu Tudor
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2019-04-22 18:11 UTC (permalink / raw)
  To: laurentiu.tudor
  Cc: hch, robin.murphy, m.szyprowski, iommu, leoyang.li,
	linux-arm-kernel, linux-kernel

On Mon, Apr 22, 2019 at 07:51:25PM +0300, laurentiu.tudor@nxp.com wrote:
> From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
> 
> If possible / available call into the DMA API to get a proper iommu
> mapping and a dma address for the newly allocated coherent dma memory.

I don't think this is so simple.  The original use case of
dma_declare_coherent_memory was memory that is local to a device, where
we copy in data through a MMIO mapping and the device can then access
it.  This use case still seems to be alive in the ohci-sm501 and
ohci-tmio drivers.  Going through the iommu in those cases would be
counter productive.

The other use cases, including the OF ones seem to be (and Marek who
added that support should correct me if I'm wrong) normal host DRAM
that is just set aside for the device.

So if we want to support these prealloc pools with an iommu we need to
split the use cases.  I have to say I really hate the way we do the DMA
"coherent" allocations, so I'm all in favor of that, it just hasn't
bubbled up towards the front of my todo list yet.

My highlevel plan here would be:

 a) move the two OHCI drivers away from our current DMA declare
    coherent code, and just use a trivial genalloc allocator for
    their MMIO space, given that the USB layer already wraps the
    dma_alloc/free APIs anyway
 b) move the rest of the DMA coherent stuff into the actual dma
    map ops, similar to how we handle CMA allocations.  In fact
    we should probably do this by a new page allocation helper
    that tries CMA, "coherent" or the page allocator as fallback
 b) also merge the OF-side handling of CMA vs "coherent" into a
    single implementation.  Preferably dropping the misleading
    "coherent" name while we are at it.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
  2019-04-22 18:11 ` Christoph Hellwig
@ 2019-04-23 22:09   ` Laurentiu Tudor
  2019-04-24 14:57     ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Laurentiu Tudor @ 2019-04-23 22:09 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: robin.murphy, m.szyprowski, iommu, Leo Li, linux-arm-kernel,
	linux-kernel

Hello,

> -----Original Message-----
> From: Christoph Hellwig <hch@lst.de>
> Sent: Monday, April 22, 2019 9:11 PM
> 
> On Mon, Apr 22, 2019 at 07:51:25PM +0300, laurentiu.tudor@nxp.com wrote:
> > From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
> >
> > If possible / available call into the DMA API to get a proper iommu
> > mapping and a dma address for the newly allocated coherent dma memory.
> 
> I don't think this is so simple.  The original use case of
> dma_declare_coherent_memory was memory that is local to a device, where
> we copy in data through a MMIO mapping and the device can then access
> it.  This use case still seems to be alive in the ohci-sm501 and
> ohci-tmio drivers.  Going through the iommu in those cases would be
> counter productive.

I had a feeling that I didn't get the whole story and something isn't quite right with this patch. 😊 But I'm happy that we have a discussion started on the topic and, I must say, I'm very interested in getting to the bottom of it (I have some patches enabling SMMU on a couple of NXP chips and depend on resolving this).
I'll try to understand what you're planning but in the meantime please let me know if you think I can be of any help.

---
Thanks & Best Regards, Laurentiu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
  2019-04-23 22:09   ` Laurentiu Tudor
@ 2019-04-24 14:57     ` Christoph Hellwig
  2019-04-25 11:30       ` Laurentiu Tudor
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2019-04-24 14:57 UTC (permalink / raw)
  To: Laurentiu Tudor
  Cc: Christoph Hellwig, robin.murphy, m.szyprowski, iommu, Leo Li,
	linux-arm-kernel, linux-kernel

I'd be happy to offload all of the mentioned tasks to you if you
volunteer.

I think the first step is to move the two USB controller that can only
DMA to their own BAR off the existing DMA coherent infrastructure.  The
controllers are already identified using the HCD_LOCAL_MEM flag, so we
just need to key off that in the hcd_buffer_* routines and call into a
genalloc that has been fed using the bar, replacing the current
dma_declare_coherent usage.  Take a look at drivers/pci/p2pdma.c
for another example of allocating bits of a BAR using genalloc.

Another issue in that are just popped up, which is remoteproc_virtio.c,
which looks like a complete trainwreck.  I'll need to spend time to
figure out what the hell they are trying to first, though.

Then we need to kill the dma_declare_coherent_memory and
dma_release_declared_memory exports ASAP to avoid growing more users.

Next is figuring out how we want to plumb the OF / early boot coherent
regions into the iommu drivers.  I think I want to handle them similar
to the per-device CMA regions, that is each of the DMA ops has to
manually call into it instead of the page allocator.  Fortunately we
are down to only about a hand full of instances that are relevant
for the reserved memory now.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
  2019-04-24 14:57     ` Christoph Hellwig
@ 2019-04-25 11:30       ` Laurentiu Tudor
  2019-04-26 15:13         ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Laurentiu Tudor @ 2019-04-25 11:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: robin.murphy, m.szyprowski, iommu, Leo Li, linux-arm-kernel,
	linux-kernel

Hi Christoph,

On 24.04.2019 17:57, Christoph Hellwig wrote:
> I'd be happy to offload all of the mentioned tasks to you if you
> volunteer.

Alright, I think I mostly got it and can start with the two usb drivers 
you mentioned. Just need a few clarifications, please see inline.

> I think the first step is to move the two USB controller that can only
> DMA to their own BAR off the existing DMA coherent infrastructure.  The
> controllers are already identified using the HCD_LOCAL_MEM flag, so we
> just need to key off that in the hcd_buffer_* routines and call into a

So if HCD_LOCAL_MEM is set I should call into the gen_pool api instead 
of existing dma_{alloc,free}_coherent().

> genalloc that has been fed using the bar, replacing the current
> dma_declare_coherent usage.  Take a look at drivers/pci/p2pdma.c
> for another example of allocating bits of a BAR using genalloc.

Where should I place the reference to the gen_pool? How does local_pool 
in 'struct hcd_usb' sound?

---
Thanks & Best Regards, Laurentiu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem
  2019-04-25 11:30       ` Laurentiu Tudor
@ 2019-04-26 15:13         ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2019-04-26 15:13 UTC (permalink / raw)
  To: Laurentiu Tudor
  Cc: Christoph Hellwig, robin.murphy, m.szyprowski, iommu, Leo Li,
	linux-arm-kernel, linux-kernel

On Thu, Apr 25, 2019 at 11:30:54AM +0000, Laurentiu Tudor wrote:
> > I think the first step is to move the two USB controller that can only
> > DMA to their own BAR off the existing DMA coherent infrastructure.  The
> > controllers are already identified using the HCD_LOCAL_MEM flag, so we
> > just need to key off that in the hcd_buffer_* routines and call into a
> 
> So if HCD_LOCAL_MEM is set I should call into the gen_pool api instead 
> of existing dma_{alloc,free}_coherent().

Yes.

> > genalloc that has been fed using the bar, replacing the current
> > dma_declare_coherent usage.  Take a look at drivers/pci/p2pdma.c
> > for another example of allocating bits of a BAR using genalloc.
> 
> Where should I place the reference to the gen_pool? How does local_pool 
> in 'struct hcd_usb' sound?

Sounds fine to me, but in the end the usb maintainers will have
to decide.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-04-26 15:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-22 16:51 [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem laurentiu.tudor
2019-04-22 18:11 ` Christoph Hellwig
2019-04-23 22:09   ` Laurentiu Tudor
2019-04-24 14:57     ` Christoph Hellwig
2019-04-25 11:30       ` Laurentiu Tudor
2019-04-26 15:13         ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).