All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
@ 2014-04-30 20:33 Bjorn Helgaas
  2014-05-01  0:07 ` Greg Kroah-Hartman
  2014-05-01 14:08 ` James Bottomley
  0 siblings, 2 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2014-04-30 20:33 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-pci, Randy Dunlap, linux-kernel, James E.J. Bottomley, linux-doc

dma_declare_coherent_memory() takes two addresses for a region of memory: a
"bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
the physical address a *CPU* would use to access the region, and
"device_addr" is the bus address the *device* would use to address the
region.

Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 Documentation/DMA-API.txt          |    9 ++++-----
 drivers/base/dma-coherent.c        |    6 +++---
 drivers/base/dma-mapping.c         |    6 +++---
 include/asm-generic/dma-coherent.h |   13 +++++--------
 include/linux/dma-mapping.h        |    7 ++++---
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 0371ad0f37e7..97318a365efc 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -491,19 +491,18 @@ continuing on for size.  Again, you *must* observe the cache line
 boundaries when doing this.
 
 int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 			    dma_addr_t device_addr, size_t size, int
 			    flags)
 
 Declare region of memory to be handed out by dma_alloc_coherent when
 it's asked for coherent memory for this device.
 
-bus_addr is the physical address to which the memory is currently
-assigned in the bus responding region (this will be used by the
-platform to perform the mapping).
+phys_addr is the cpu physical address to which the memory is currently
+assigned (this will be used by the platform to perform the mapping).
 
 device_addr is the bus address the device needs to be programmed
-with actually to address this memory (this will be handed out as the
+with to actually address this memory (this will be handed out as the
 dma_addr_t in dma_alloc_coherent()).
 
 size is the size of the area (must be multiples of PAGE_SIZE).
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index bc256b641027..4d7979863ba4 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -16,7 +16,7 @@ struct dma_coherent_mem {
 	unsigned long	*bitmap;
 };
 
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size, int flags)
 {
 	void __iomem *mem_base = NULL;
@@ -32,7 +32,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
 
-	mem_base = ioremap(bus_addr, size);
+	mem_base = ioremap(phys_addr, size);
 	if (!mem_base)
 		goto out;
 
@@ -45,7 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	dev->dma_mem->virt_base = mem_base;
 	dev->dma_mem->device_base = device_addr;
-	dev->dma_mem->pfn_base = PFN_DOWN(bus_addr);
+	dev->dma_mem->pfn_base = PFN_DOWN(phys_addr);
 	dev->dma_mem->size = pages;
 	dev->dma_mem->flags = flags;
 
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 0ce39a33b3c2..6cd08e145bfa 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -175,7 +175,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
 /**
  * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
  * @dev: Device to declare coherent memory for
- * @bus_addr: Bus address of coherent memory to be declared
+ * @phys_addr: Physical address of coherent memory to be declared
  * @device_addr: Device address of coherent memory to be declared
  * @size: Size of coherent memory to be declared
  * @flags: Flags
@@ -185,7 +185,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				 dma_addr_t device_addr, size_t size, int flags)
 {
 	void *res;
@@ -195,7 +195,7 @@ int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 	if (!res)
 		return -ENOMEM;
 
-	rc = dma_declare_coherent_memory(dev, bus_addr, device_addr, size,
+	rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
 					 flags);
 	if (rc == 0)
 		devres_add(dev, res);
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h
index 2be8a2dbc868..0297e5875798 100644
--- a/include/asm-generic/dma-coherent.h
+++ b/include/asm-generic/dma-coherent.h
@@ -16,16 +16,13 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
  * Standard interface
  */
 #define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-extern int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-			    dma_addr_t device_addr, size_t size, int flags);
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+				dma_addr_t device_addr, size_t size, int flags);
 
-extern void
-dma_release_declared_memory(struct device *dev);
+void dma_release_declared_memory(struct device *dev);
 
-extern void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size);
+void *dma_mark_declared_memory_occupied(struct device *dev,
+					dma_addr_t device_addr, size_t size);
 #else
 #define dma_alloc_from_coherent(dev, size, handle, ret) (0)
 #define dma_release_from_coherent(dev, order, vaddr) (0)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index fd4aee29ad10..a266a1d7e77b 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -186,7 +186,7 @@ static inline int dma_get_cache_alignment(void)
 
 #ifndef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
 static inline int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 			    dma_addr_t device_addr, size_t size, int flags)
 {
 	return 0;
@@ -217,13 +217,14 @@ extern void *dmam_alloc_noncoherent(struct device *dev, size_t size,
 extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
 				  dma_addr_t dma_handle);
 #ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-extern int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+extern int dmam_declare_coherent_memory(struct device *dev,
+					phys_addr_t phys_addr,
 					dma_addr_t device_addr, size_t size,
 					int flags);
 extern void dmam_release_declared_memory(struct device *dev);
 #else /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
 static inline int dmam_declare_coherent_memory(struct device *dev,
-				dma_addr_t bus_addr, dma_addr_t device_addr,
+				phys_addr_t phys_addr, dma_addr_t device_addr,
 				size_t size, gfp_t gfp)
 {
 	return 0;


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-04-30 20:33 [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t Bjorn Helgaas
@ 2014-05-01  0:07 ` Greg Kroah-Hartman
  2014-05-01  7:48   ` Bjorn Helgaas
  2014-05-01 14:08 ` James Bottomley
  1 sibling, 1 reply; 19+ messages in thread
From: Greg Kroah-Hartman @ 2014-05-01  0:07 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: linux-pci, Randy Dunlap, linux-kernel, James E.J. Bottomley, linux-doc

On Wed, Apr 30, 2014 at 02:33:21PM -0600, Bjorn Helgaas wrote:
> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> the physical address a *CPU* would use to access the region, and
> "device_addr" is the bus address the *device* would use to address the
> region.
> 
> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.

Isn't this going to cause problems with the callers of this function?
You aren't changing them...

greg k-h

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-01  0:07 ` Greg Kroah-Hartman
@ 2014-05-01  7:48   ` Bjorn Helgaas
  0 siblings, 0 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-01  7:48 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-pci, Randy Dunlap, linux-kernel, James E.J. Bottomley, linux-doc

On Wed, Apr 30, 2014 at 6:07 PM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Wed, Apr 30, 2014 at 02:33:21PM -0600, Bjorn Helgaas wrote:
>> dma_declare_coherent_memory() takes two addresses for a region of memory: a
>> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
>> the physical address a *CPU* would use to access the region, and
>> "device_addr" is the bus address the *device* would use to address the
>> region.
>>
>> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
>
> Isn't this going to cause problems with the callers of this function?
> You aren't changing them...

Sorry, I should have mentioned that in the changelog.

This could cause a problem if it changed "bus_addr" to a smaller type,
e.g., in a config where dma_addr_t was 64 bits but phys_addr_t was
only 32.  In that config (if it exists), this change would truncate
the address if a caller  supplied a 64-bit dma_addr_t.  But most of
the callers in the tree already supply a phys_addr_t (or the
equivalent resource_size_t), and the rest supply 32-bit int values, so
I don't think there's a problem here.

This change could theoretically *fix* a problem in a config with
32-bit dma_addr_t and 64-bit phys_addr_t.  In that case we could
previously have truncated a phys_addr_t, and this change would fix it.
 But I don't know of any issues like this.

Here are the 16 callers in the tree:

4 in arch/arm/mach-imx/mach-imx27_visstrim_m10.c (phys_addr_t)
3 in arch/arm/mach-imx/mach-mx31_3ds.c (phys_addr_t)
1 in arch/arm/mach-imx/mach-pcm037.c (phys_addr_t)
1 in arch/arm/plat-samsung/s5p-dev-mfc.c (phys_addr_t)
1 in arch/sh/drivers/pci/fixups-dreamcast.c (#define constant)
2 in drivers/media/platform/s5p-mfc/s5p_mfc.c (unsigned int from OF property)
1 in drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c (struct
resource.start)
1 in drivers/scsi/NCR_Q720.c (__u32 memory address computed from I/O
port values)
1 in drivers/usb/host/ohci-sm501.c (struct resource.start)
1 in drivers/usb/host/ohci-tmio.c (struct resource.start)

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-04-30 20:33 [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t Bjorn Helgaas
  2014-05-01  0:07 ` Greg Kroah-Hartman
@ 2014-05-01 14:08 ` James Bottomley
  2014-05-01 19:05   ` Bjorn Helgaas
  2014-05-02 11:02   ` Liviu Dudau
  1 sibling, 2 replies; 19+ messages in thread
From: James Bottomley @ 2014-05-01 14:08 UTC (permalink / raw)
  To: bhelgaas; +Cc: rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> the physical address a *CPU* would use to access the region, and
> "device_addr" is the bus address the *device* would use to address the
> region.
> 
> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.

Remind me what the difference between phys_addr_t and dma_addr_t are.

I thought phys_addr_t was the maximum address the CPU could reach after
address translation and dma_addr_t was the maximum physical address any
bus attached memory mapped devices could appear at. (of course, mostly
they're the same).

The intent of dma_declare_coherent_memory() is to take a range of memory
provided by some device on the bus and allow the CPU to allocate regions
from it for use as things like descriptors.  The CPU treats it as real
memory, but, in fact, it is a mapped region of an attached device.

If my definition is correct, then bus_addr should be dma_addr_t because
it has to be mapped from a device and dma_addr_t is the correct type for
device addresses.  If I've got the definition wrong, then we should
document it somewhere, because it's probably confusing other people as
well.

James


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-01 14:08 ` James Bottomley
@ 2014-05-01 19:05   ` Bjorn Helgaas
  2014-05-02  6:11     ` James Bottomley
  2014-05-02 11:18     ` Liviu Dudau
  2014-05-02 11:02   ` Liviu Dudau
  1 sibling, 2 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-01 19:05 UTC (permalink / raw)
  To: James Bottomley
  Cc: rdunlap, linux-pci, gregkh, linux-doc, linux-kernel, Arnd Bergmann

[+cc Arnd]

On Thu, May 1, 2014 at 8:08 AM, James Bottomley
<jbottomley@parallels.com> wrote:
> On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
>> dma_declare_coherent_memory() takes two addresses for a region of memory: a
>> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
>> the physical address a *CPU* would use to access the region, and
>> "device_addr" is the bus address the *device* would use to address the
>> region.
>>
>> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
>
> Remind me what the difference between phys_addr_t and dma_addr_t are.
>
> I thought phys_addr_t was the maximum address the CPU could reach after
> address translation and dma_addr_t was the maximum physical address any
> bus attached memory mapped devices could appear at. (of course, mostly
> they're the same).

I assumed phys_addr_t was for physical addresses generated by a CPU
and dma_addr_t was for addresses generated by a device, e.g.,
addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
things, e.g., ARM_LPAE, than to device bus properties like "support
64-bit PCI, not just 32-bit PCI."

DMA-API-HOWTO.txt contains this:

  ... the definition of the dma_addr_t (which can hold any valid DMA
  address for the platform) type which should be used everywhere you
  hold a DMA (bus) address returned from the DMA mapping functions.

and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
program the device.  It seems silly to have phys_addr_t and dma_addr_t
for two (possibly slightly different) flavors of CPU physical
addresses, and then sort of overload dma_addr_t by also using it to
hold the addresses devices use for DMA.

I think the "CPU generates phys_addr_t" and "device generates
dma_addr_t" distinction seems like a useful idea.  I'm not a CPU
person, so I don't understand the usefulness of dma_addr_t as a
separate type to hold CPU addresses at which memory-mapped devices can
appear.  If that's all it is, why not just use phys_addr_t everywhere
and get rid of dma_addr_t altogether?

> The intent of dma_declare_coherent_memory() is to take a range of memory
> provided by some device on the bus and allow the CPU to allocate regions
> from it for use as things like descriptors.  The CPU treats it as real
> memory, but, in fact, it is a mapped region of an attached device.
>
> If my definition is correct, then bus_addr should be dma_addr_t because
> it has to be mapped from a device and dma_addr_t is the correct type for
> device addresses.  If I've got the definition wrong, then we should
> document it somewhere, because it's probably confusing other people as
> well.

dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
a clear indication that bus_addr is really a physical address usable
by the CPU.  But I do see your point that bus_addr is a CPU address
for a memory-mapped device, and would thus fit into your idea of a
dma_addr_t.

I think this is the only part of the DMA API that deals with CPU
physical addresses.  I suppose we could side-step this question by
having the caller do the ioremap; then dma_declare_coherent_memory()
could just take a CPU virtual address (a void *) and a device address
(a dma_addr_t).

But I'd still have the question of whether we're using dma_addr_t
correctly in the PCI core.  We use it to hold BAR values and the like,
and I've been making assumptions like "if dma_addr_t is only 32 bits,
we can't handle BARs above 4GB."

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-01 19:05   ` Bjorn Helgaas
@ 2014-05-02  6:11     ` James Bottomley
  2014-05-02  8:42       ` Arnd Bergmann
  2014-05-02 18:34       ` Bjorn Helgaas
  2014-05-02 11:18     ` Liviu Dudau
  1 sibling, 2 replies; 19+ messages in thread
From: James Bottomley @ 2014-05-02  6:11 UTC (permalink / raw)
  To: bhelgaas; +Cc: rdunlap, linux-pci, gregkh, arnd, linux-doc, linux-kernel

On Thu, 2014-05-01 at 13:05 -0600, Bjorn Helgaas wrote:
> [+cc Arnd]
> 
> On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> <jbottomley@parallels.com> wrote:
> > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> >> the physical address a *CPU* would use to access the region, and
> >> "device_addr" is the bus address the *device* would use to address the
> >> region.
> >>
> >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> >
> > Remind me what the difference between phys_addr_t and dma_addr_t are.
> >
> > I thought phys_addr_t was the maximum address the CPU could reach after
> > address translation and dma_addr_t was the maximum physical address any
> > bus attached memory mapped devices could appear at. (of course, mostly
> > they're the same).
> 
> I assumed phys_addr_t was for physical addresses generated by a CPU
> and dma_addr_t was for addresses generated by a device, e.g.,
> addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> things, e.g., ARM_LPAE, than to device bus properties like "support
> 64-bit PCI, not just 32-bit PCI."

OK, but even in your definition dma_declare_coherent_memory() should
operate on dma_addr_t because the input is a region of memory you got
from the device.  Somehow you generate and idea from the device
configuration of where this piece of memory sits on the device and
that's what you feed into dma_declare_coherent_memory().  It maps that
region and makes it available to the CPU allocators.

> 
> DMA-API-HOWTO.txt contains this:
> 
>   ... the definition of the dma_addr_t (which can hold any valid DMA
>   address for the platform) type which should be used everywhere you
>   hold a DMA (bus) address returned from the DMA mapping functions.
> 
> and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
> program the device.  It seems silly to have phys_addr_t and dma_addr_t
> for two (possibly slightly different) flavors of CPU physical
> addresses, and then sort of overload dma_addr_t by also using it to
> hold the addresses devices use for DMA.
> 
> I think the "CPU generates phys_addr_t" and "device generates
> dma_addr_t" distinction seems like a useful idea.  I'm not a CPU
> person, so I don't understand the usefulness of dma_addr_t as a
> separate type to hold CPU addresses at which memory-mapped devices can
> appear.  If that's all it is, why not just use phys_addr_t everywhere
> and get rid of dma_addr_t altogether?

Actually, on this one, I think I agree ... we really just need the idea
of physical and virtual.  The original idea of dma_addr_t is that the
type would yell if you tried to use it where you could use a virtual
address and the users were supposed to treat it as an opaque cookie for
a region on the device, but I suppose it's generating more confusion
than solving problems.

> > The intent of dma_declare_coherent_memory() is to take a range of memory
> > provided by some device on the bus and allow the CPU to allocate regions
> > from it for use as things like descriptors.  The CPU treats it as real
> > memory, but, in fact, it is a mapped region of an attached device.
> >
> > If my definition is correct, then bus_addr should be dma_addr_t because
> > it has to be mapped from a device and dma_addr_t is the correct type for
> > device addresses.  If I've got the definition wrong, then we should
> > document it somewhere, because it's probably confusing other people as
> > well.
> 
> dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
> a clear indication that bus_addr is really a physical address usable
> by the CPU.  But I do see your point that bus_addr is a CPU address
> for a memory-mapped device, and would thus fit into your idea of a
> dma_addr_t.

Well, yes, but it's declaring that the device has a region at bus_addr
(on the device) we want to make available to the CPU to use.  On the
Q720 it's about a megabyte of sram behind a bridge.  The CPU puts script
fragments into it and the 720 scripts engines execute them.  The reason
for it being on the device behind a bridge is that fetching from CPU
main memory causes too much bus traffic.  I think all the other uses are
something similar.  The point, though, is that the address for the
memory comes from the device.  It has to be ioremapped because the
system doesn't know about it and we're going to allocate from it for
buffers mapped into the CPU virtual space.

> I think this is the only part of the DMA API that deals with CPU
> physical addresses.  I suppose we could side-step this question by
> having the caller do the ioremap; then dma_declare_coherent_memory()
> could just take a CPU virtual address (a void *) and a device address
> (a dma_addr_t).

But we have to do the ioremap.  Without that there's no mapping for the
memory region we've just declared, so I think it does make sense to do
it within the dma_declare_coherent_memory() API.

> But I'd still have the question of whether we're using dma_addr_t
> correctly in the PCI core.  We use it to hold BAR values and the like,
> and I've been making assumptions like "if dma_addr_t is only 32 bits,
> we can't handle BARs above 4GB."

Well, first we need to answer whether we need separate phys_addr_t and
dma_addr_t ... we know the former represents physical memory addressed
by the CPU and the latter bus physical addresses from devices ... but
they're essentially the same.

James


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-02  6:11     ` James Bottomley
@ 2014-05-02  8:42       ` Arnd Bergmann
  2014-05-05 23:01         ` Bjorn Helgaas
  2014-05-02 18:34       ` Bjorn Helgaas
  1 sibling, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2014-05-02  8:42 UTC (permalink / raw)
  To: James Bottomley
  Cc: bhelgaas, rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Friday 02 May 2014 06:11:42 James Bottomley wrote:
> On Thu, 2014-05-01 at 13:05 -0600, Bjorn Helgaas wrote:
> > [+cc Arnd]
> > 
> > On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> > <jbottomley@parallels.com> wrote:
> > > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > >> the physical address a *CPU* would use to access the region, and
> > >> "device_addr" is the bus address the *device* would use to address the
> > >> region.
> > >>
> > >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> > >
> > > Remind me what the difference between phys_addr_t and dma_addr_t are.
> > >
> > > I thought phys_addr_t was the maximum address the CPU could reach after
> > > address translation and dma_addr_t was the maximum physical address any
> > > bus attached memory mapped devices could appear at. (of course, mostly
> > > they're the same).
> > 
> > I assumed phys_addr_t was for physical addresses generated by a CPU
> > and dma_addr_t was for addresses generated by a device, e.g.,
> > addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> > ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> > things, e.g., ARM_LPAE, than to device bus properties like "support
> > 64-bit PCI, not just 32-bit PCI."
> 
> OK, but even in your definition dma_declare_coherent_memory() should
> operate on dma_addr_t because the input is a region of memory you got
> from the device.  Somehow you generate and idea from the device
> configuration of where this piece of memory sits on the device and
> that's what you feed into dma_declare_coherent_memory().  It maps that
> region and makes it available to the CPU allocators.

I think it's ambiguous at best at the moment, there is no clear
answer what it should be until we define the semantics more clearly.

At the moment, we have these callers

arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-mx31_3ds.c:      dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-mx31moboard.c:   dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-mx35_3ds.c:      dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/mach-imx/mach-pcm037.c:        dma = dma_declare_coherent_memory(&pdev->dev,
arch/arm/plat-samsung/s5p-dev-mfc.c:            if (dma_declare_coherent_memory(area->dev, area->base,
arch/sh/drivers/pci/fixups-dreamcast.c:         BUG_ON(!dma_declare_coherent_memory(&dev->dev,
drivers/media/platform/s5p-mfc/s5p_mfc.c:       if (dma_declare_coherent_memory(dev->mem_dev_l, mem_info[
drivers/media/platform/s5p-mfc/s5p_mfc.c:       if (dma_declare_coherent_memory(dev->mem_dev_r, mem_info[
drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c:               err = dma_declare_coherent_memory
drivers/scsi/NCR_Q720.c:        if (dma_declare_coherent_memory(dev, base_addr, base_addr,
drivers/usb/host/ohci-sm501.c:  if (!dma_declare_coherent_memory(dev, mem->start,
drivers/usb/host/ohci-tmio.c:   if (!dma_declare_coherent_memory(&dev->dev, sram->start,

I don't know about NCR_Q720, but all others are only used on machines
where physical addresses and bus addresses are in the same space.

There are other machines that may always have to go through an IOMMU,
or that have a fixed offset between dma addresses and physical addresses
and that need to call a phys_to_dma() to convert between the two, but
none of those call dma_declare_coherent_memory.

> > DMA-API-HOWTO.txt contains this:
> > 
> >   ... the definition of the dma_addr_t (which can hold any valid DMA
> >   address for the platform) type which should be used everywhere you
> >   hold a DMA (bus) address returned from the DMA mapping functions.
> > 
> > and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
> > program the device.  It seems silly to have phys_addr_t and dma_addr_t
> > for two (possibly slightly different) flavors of CPU physical
> > addresses, and then sort of overload dma_addr_t by also using it to
> > hold the addresses devices use for DMA.
> > 
> > I think the "CPU generates phys_addr_t" and "device generates
> > dma_addr_t" distinction seems like a useful idea.  I'm not a CPU
> > person, so I don't understand the usefulness of dma_addr_t as a
> > separate type to hold CPU addresses at which memory-mapped devices can
> > appear.  If that's all it is, why not just use phys_addr_t everywhere
> > and get rid of dma_addr_t altogether?
> 
> Actually, on this one, I think I agree ... we really just need the idea
> of physical and virtual.  The original idea of dma_addr_t is that the
> type would yell if you tried to use it where you could use a virtual
> address and the users were supposed to treat it as an opaque cookie for
> a region on the device, but I suppose it's generating more confusion
> than solving problems.

We also have machines on ARM can never do DMA to addresses above the
32-bit boundary but can have more than 4GB RAM (using swiotlb or iommu).
Building a kernel for these machines uses a 32-bit dma_addr_t  and
a 64-bit phys_addr_t.

It's not clear if that distinction has any practical benefits, since
most configurations that enable 64-bit phys_addr_t also enable machines
that can do DMA to high addresses.

> > > > The intent of dma_declare_coherent_memory() is to take a range of memory
> > > provided by some device on the bus and allow the CPU to allocate regions
> > > from it for use as things like descriptors.  The CPU treats it as real
> > > memory, but, in fact, it is a mapped region of an attached device.
> > >
> > > If my definition is correct, then bus_addr should be dma_addr_t because
> > > it has to be mapped from a device and dma_addr_t is the correct type for
> > > device addresses.  If I've got the definition wrong, then we should
> > > document it somewhere, because it's probably confusing other people as
> > > well.
> > 
> > dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
> > a clear indication that bus_addr is really a physical address usable
> > by the CPU.  But I do see your point that bus_addr is a CPU address
> > for a memory-mapped device, and would thus fit into your idea of a
> > dma_addr_t.
> 
> Well, yes, but it's declaring that the device has a region at bus_addr
> (on the device) we want to make available to the CPU to use.  On the
> Q720 it's about a megabyte of sram behind a bridge.  The CPU puts script
> fragments into it and the 720 scripts engines execute them.  The reason
> for it being on the device behind a bridge is that fetching from CPU
> main memory causes too much bus traffic.  I think all the other uses are
> something similar.  The point, though, is that the address for the
> memory comes from the device.  It has to be ioremapped because the
> system doesn't know about it and we're going to allocate from it for
> buffers mapped into the CPU virtual space.

I can see three different use cases:

* drivers/usb/host/ohci-*: SRAM, same as Q720
* s5p_mfc: a hack to guarantee that there are two DMA areas on
  different memory banks, so the device can use them in parallel
  for higher throughput.
* arch/arm/mach-imx: A workaround for limited amount of DMA-coherent
  memory, which is traditionally limited to 2MB total on ARM.
  Modern systems would use CMA for this, and imx could be converted
  if someone wants to do the work.

There is also pci-dreamcast.c, but I'm not familiar with that, and
I can't tell from briefly reading the code.

> > I think this is the only part of the DMA API that deals with CPU
> > physical addresses.  I suppose we could side-step this question by
> > having the caller do the ioremap; then dma_declare_coherent_memory()
> > could just take a CPU virtual address (a void *) and a device address
> > (a dma_addr_t).
> 
> But we have to do the ioremap.  Without that there's no mapping for the
> memory region we've just declared, so I think it does make sense to do
> it within the dma_declare_coherent_memory() API.

Doing ioremap() on memory may not be the ideal though, at least on ARM,
where subtle differences exist between doing an MT_DEVICE mapping
for MMIO and MT_UNCACHED for RAM that is meant for coherent access.

> > But I'd still have the question of whether we're using dma_addr_t
> > correctly in the PCI core.  We use it to hold BAR values and the like,
> > and I've been making assumptions like "if dma_addr_t is only 32 bits,
> > we can't handle BARs above 4GB."
> 
> Well, first we need to answer whether we need separate phys_addr_t and
> dma_addr_t ... we know the former represents physical memory addressed
> by the CPU and the latter bus physical addresses from devices ... but
> they're essentially the same.

I think BARs should be phys_addr_t: in the example with 32-bit dma_addr_t
and 64-bit phys_addr_t, you would be able to put the MMIO registers
into a high address if the PCI host supports that, the only limitation
would be that no device could DMA into a high address for doing inter-device
DMA.

	Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-01 14:08 ` James Bottomley
  2014-05-01 19:05   ` Bjorn Helgaas
@ 2014-05-02 11:02   ` Liviu Dudau
  1 sibling, 0 replies; 19+ messages in thread
From: Liviu Dudau @ 2014-05-02 11:02 UTC (permalink / raw)
  To: James Bottomley
  Cc: bhelgaas, rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Thu, May 01, 2014 at 03:08:12PM +0100, James Bottomley wrote:
> On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > the physical address a *CPU* would use to access the region, and
> > "device_addr" is the bus address the *device* would use to address the
> > region.
> > 
> > Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> 
> Remind me what the difference between phys_addr_t and dma_addr_t are.
> 
> I thought phys_addr_t was the maximum address the CPU could reach after
> address translation and dma_addr_t was the maximum physical address any
> bus attached memory mapped devices could appear at. (of course, mostly
> they're the same).

My understanding is that dma_addr_t has nothing to do (directly) with the
bus the device is on. In the way I understand things, dma_addr_t is the
range of addresses that a DMA engine (possibly inside a device) can access.

So it is possible to have a device sitting on a PCI bus that uses addresses
for dma_addr_t that are 32bit but those addresses are then translated by
the bus translation setup into phys_addr_t and that can be any size
(preferrably >= dma_addr_t).

One way of seeing dma_addr_t is a translated IO physical address. And yes,
that means that there are two ways of expressing a phys_addr_t, except
that dma_addr_t is further restricted to the space that the DMA engine can
access and how the address output of the DMA engine gets mapped on the
physical busses.

Best regards,
Liviu

> 
> The intent of dma_declare_coherent_memory() is to take a range of memory
> provided by some device on the bus and allow the CPU to allocate regions
> from it for use as things like descriptors.  The CPU treats it as real
> memory, but, in fact, it is a mapped region of an attached device.
> 
> If my definition is correct, then bus_addr should be dma_addr_t because
> it has to be mapped from a device and dma_addr_t is the correct type for
> device addresses.  If I've got the definition wrong, then we should
> document it somewhere, because it's probably confusing other people as
> well.
> 
> James
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-01 19:05   ` Bjorn Helgaas
  2014-05-02  6:11     ` James Bottomley
@ 2014-05-02 11:18     ` Liviu Dudau
  2014-05-05 22:42       ` Bjorn Helgaas
  1 sibling, 1 reply; 19+ messages in thread
From: Liviu Dudau @ 2014-05-02 11:18 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: James Bottomley, rdunlap, linux-pci, gregkh, linux-doc,
	linux-kernel, Arnd Bergmann

On Thu, May 01, 2014 at 08:05:04PM +0100, Bjorn Helgaas wrote:
> [+cc Arnd]
> 
> On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> <jbottomley@parallels.com> wrote:
> > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> >> the physical address a *CPU* would use to access the region, and
> >> "device_addr" is the bus address the *device* would use to address the
> >> region.
> >>
> >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> >
> > Remind me what the difference between phys_addr_t and dma_addr_t are.
> >
> > I thought phys_addr_t was the maximum address the CPU could reach after
> > address translation and dma_addr_t was the maximum physical address any
> > bus attached memory mapped devices could appear at. (of course, mostly
> > they're the same).
> 
> I assumed phys_addr_t was for physical addresses generated by a CPU
> and dma_addr_t was for addresses generated by a device, e.g.,
> addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> things, e.g., ARM_LPAE, than to device bus properties like "support
> 64-bit PCI, not just 32-bit PCI."
> 
> DMA-API-HOWTO.txt contains this:
> 
>   ... the definition of the dma_addr_t (which can hold any valid DMA
>   address for the platform) type which should be used everywhere you
>   hold a DMA (bus) address returned from the DMA mapping functions.
> 
> and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
> program the device.  It seems silly to have phys_addr_t and dma_addr_t
> for two (possibly slightly different) flavors of CPU physical
> addresses, and then sort of overload dma_addr_t by also using it to
> hold the addresses devices use for DMA.

dma_addr_t is a phys_addr_t masked according to the capabilities of the
*DMA engine* (not device). Quite a lot of PCI devices (all?) now contain
a DMA engine, so the distinction is somewhat blurred nowadays.

> 
> I think the "CPU generates phys_addr_t" and "device generates
> dma_addr_t" distinction seems like a useful idea.  

s/device/DMA engine inside device/

> I'm not a CPU
> person, so I don't understand the usefulness of dma_addr_t as a
> separate type to hold CPU addresses at which memory-mapped devices can
> appear.  If that's all it is, why not just use phys_addr_t everywhere
> and get rid of dma_addr_t altogether?

Because dma_addr_t imposes further restrictions based on what the DMA engine
can access. You could have a DMA engine that can only access 32bit addresses
in a 40+ bit CPU addresses system.

> 
> > The intent of dma_declare_coherent_memory() is to take a range of memory
> > provided by some device on the bus and allow the CPU to allocate regions
> > from it for use as things like descriptors.  The CPU treats it as real
> > memory, but, in fact, it is a mapped region of an attached device.

It is not the CPU that is the user of the bus_addr but the DMA engine. And
that can be on the bus as well, so it needs to generate bus addresses.

> >
> > If my definition is correct, then bus_addr should be dma_addr_t because
> > it has to be mapped from a device and dma_addr_t is the correct type for
> > device addresses.  If I've got the definition wrong, then we should
> > document it somewhere, because it's probably confusing other people as
> > well.
> 
> dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
> a clear indication that bus_addr is really a physical address usable
> by the CPU.  But I do see your point that bus_addr is a CPU address
> for a memory-mapped device, and would thus fit into your idea of a
> dma_addr_t.
> 
> I think this is the only part of the DMA API that deals with CPU
> physical addresses.  I suppose we could side-step this question by
> having the caller do the ioremap; then dma_declare_coherent_memory()
> could just take a CPU virtual address (a void *) and a device address
> (a dma_addr_t).

And how would that change the meaning of the function?

Best regards,
Liviu

> 
> But I'd still have the question of whether we're using dma_addr_t
> correctly in the PCI core.  We use it to hold BAR values and the like,
> and I've been making assumptions like "if dma_addr_t is only 32 bits,
> we can't handle BARs above 4GB."
> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-02  6:11     ` James Bottomley
  2014-05-02  8:42       ` Arnd Bergmann
@ 2014-05-02 18:34       ` Bjorn Helgaas
  1 sibling, 0 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-02 18:34 UTC (permalink / raw)
  To: James Bottomley
  Cc: rdunlap, linux-pci, gregkh, arnd, linux-doc, linux-kernel, Magnus Damm

[+cc Magnus]

On Fri, May 02, 2014 at 06:11:42AM +0000, James Bottomley wrote:
> On Thu, 2014-05-01 at 13:05 -0600, Bjorn Helgaas wrote:
> > On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> > <jbottomley@parallels.com> wrote:
> > > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > >> the physical address a *CPU* would use to access the region, and
> > >> "device_addr" is the bus address the *device* would use to address the
> > >> region.
> > >>
> > >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> > >
> > > Remind me what the difference between phys_addr_t and dma_addr_t are.
> > >
> > > I thought phys_addr_t was the maximum address the CPU could reach after
> > > address translation and dma_addr_t was the maximum physical address any
> > > bus attached memory mapped devices could appear at. (of course, mostly
> > > they're the same).
> > 
> > I assumed phys_addr_t was for physical addresses generated by a CPU
> > and dma_addr_t was for addresses generated by a device, e.g.,
> > addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> > ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> > things, e.g., ARM_LPAE, than to device bus properties like "support
> > 64-bit PCI, not just 32-bit PCI."
> 
> OK, but even in your definition dma_declare_coherent_memory() should
> operate on dma_addr_t because the input is a region of memory you got
> from the device.  Somehow you generate and idea from the device
> configuration of where this piece of memory sits on the device and
> that's what you feed into dma_declare_coherent_memory().  It maps that
> region and makes it available to the CPU allocators.

The device configuration tells you where the region is on the bus.  To
find the corresponding CPU physical address, we have to know about any
address translation performed by the host bridge.  That's a bus-
specific concept, so maybe it belongs in the driver.

The only user of dma_declare_coherent_memory() on a PCI device is
gapspci_fixup_resources(), so we could leave
dma_declare_coherent_memory() alone and do something like this:

-   dma_declare_coherent_memory(&dev->dev, GAPSPCI_DMA_BASE, GAPSPCI_DMA_BASE,

+   struct pci_bus_region region;
+   struct resource res;

+   region.start = GAPSPCI_DMA_BASE;
+   region.end = GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1;
+   pcibios_bus_to_resource(dev->bus, &res, &region);
+   dma_declare_coherent_memory(&dev->dev, res->start, GAPSPCI_DMA_BASE,

But res->size is a resource_size_t, which is the same as phys_addr_t
and theoretically might not fit in dma_addr_t.  If we had some generic
way of doing pcibios_bus_to_resource(), we could pass just a single
address (the current "device_addr") and compute the CPU physical
address inside dma_declare_coherent_memory().

The other users implicitly assume the CPU physical address is the same
as the bus address.  I'm a little uneasy because I assume it's
possible to have a platform_device behind an address-translating
bridge, too.  But maybe nobody actually does that.

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-02 11:18     ` Liviu Dudau
@ 2014-05-05 22:42       ` Bjorn Helgaas
  2014-05-06  8:59         ` Liviu Dudau
  0 siblings, 1 reply; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-05 22:42 UTC (permalink / raw)
  To: James Bottomley, rdunlap, linux-pci, gregkh, linux-doc,
	linux-kernel, Arnd Bergmann

On Fri, May 02, 2014 at 12:18:07PM +0100, Liviu Dudau wrote:
> On Thu, May 01, 2014 at 08:05:04PM +0100, Bjorn Helgaas wrote:
> > On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> > <jbottomley@parallels.com> wrote:
> > > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > >> the physical address a *CPU* would use to access the region, and
> > >> "device_addr" is the bus address the *device* would use to address the
> > >> region.
> > >>
> > >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> > >
> > > Remind me what the difference between phys_addr_t and dma_addr_t are.
> > >
> > > I thought phys_addr_t was the maximum address the CPU could reach after
> > > address translation and dma_addr_t was the maximum physical address any
> > > bus attached memory mapped devices could appear at. (of course, mostly
> > > they're the same).
> > 
> > I assumed phys_addr_t was for physical addresses generated by a CPU
> > and dma_addr_t was for addresses generated by a device, e.g.,
> > addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> > ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> > things, e.g., ARM_LPAE, than to device bus properties like "support
> > 64-bit PCI, not just 32-bit PCI."
> > 
> > DMA-API-HOWTO.txt contains this:
> > 
> >   ... the definition of the dma_addr_t (which can hold any valid DMA
> >   address for the platform) type which should be used everywhere you
> >   hold a DMA (bus) address returned from the DMA mapping functions.
> > 
> > and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
> > program the device.  It seems silly to have phys_addr_t and dma_addr_t
> > for two (possibly slightly different) flavors of CPU physical
> > addresses, and then sort of overload dma_addr_t by also using it to
> > hold the addresses devices use for DMA.
> 
> dma_addr_t is a phys_addr_t masked according to the capabilities of the
> *DMA engine* (not device). 

I don't quite agree with this.  phys_addr_t is used for physical
addresses generated by a CPU, e.g., ioremap() takes a phys_addr_t and
returns a void * (a virtual address).  A dma_addr_t is an address you
get from dma_map_page() or a similar interface, and you program into a
device (or DMA engine if you prefer).  You can't generate a dma_addr_t
simply by masking a phys_addr_t because an IOMMU can make arbitrary
mappings of bus addresses to physical memory addresses.

It is meaningless for a CPU to generate a reference to a dma_addr_t.
It will work in some simple cases where there's no IOMMU, but it won't
work in general.

> Quite a lot of PCI devices (all?) now contain
> a DMA engine, so the distinction is somewhat blurred nowadays.

I'll take your word for the DMA engine/device distinction, but I don't
see how it's relevant here.  Linux doesn't have a generic idea of a
DMA engine; we just treat it as part of the device capabilities.

> > I'm not a CPU
> > person, so I don't understand the usefulness of dma_addr_t as a
> > separate type to hold CPU addresses at which memory-mapped devices can
> > appear.  If that's all it is, why not just use phys_addr_t everywhere
> > and get rid of dma_addr_t altogether?
> 
> Because dma_addr_t imposes further restrictions based on what the DMA engine
> can access. You could have a DMA engine that can only access 32bit addresses
> in a 40+ bit CPU addresses system.

Yes; this is an argument for having separate types for bus addresses
and CPU addresses.  I think dma_addr_t *is* for bus addresses.  I
thought James was suggesting that it was a subset of CPU physical
address space, and that any valid dma_addr_t is also a valid
phys_addr_t.  That doesn't seem right to me.

> > dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
> > a clear indication that bus_addr is really a physical address usable
> > by the CPU.  But I do see your point that bus_addr is a CPU address
> > for a memory-mapped device, and would thus fit into your idea of a
> > dma_addr_t.
> > 
> > I think this is the only part of the DMA API that deals with CPU
> > physical addresses.  I suppose we could side-step this question by
> > having the caller do the ioremap; then dma_declare_coherent_memory()
> > could just take a CPU virtual address (a void *) and a device address
> > (a dma_addr_t).
> 
> And how would that change the meaning of the function?

I think the interesting thing here is that we ioremap() a bus address.
That doesn't work in general because ioremap() is expecting a CPU
physical address, not a bus address.  We don't have a generic way for
dma_declare_coherent_memory() to convert a bus address to a CPU
physical address.  The caller knows a little more and probably *could*
do that, e.g., a PCI driver could use pcibios_bus_to_resource().  If
we made a generic way to do the conversion, it would definitely be
nicer to leave the ioremap() inside dma_declare_coherent_memory().

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-02  8:42       ` Arnd Bergmann
@ 2014-05-05 23:01         ` Bjorn Helgaas
  2014-05-06  2:42           ` James Bottomley
  0 siblings, 1 reply; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-05 23:01 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: James Bottomley, rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:
> On Friday 02 May 2014 06:11:42 James Bottomley wrote:
> > On Thu, 2014-05-01 at 13:05 -0600, Bjorn Helgaas wrote:
> > > On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> > > <jbottomley@parallels.com> wrote:
> > > > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > > >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > > >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > > >> the physical address a *CPU* would use to access the region, and
> > > >> "device_addr" is the bus address the *device* would use to address the
> > > >> region.
> > > >>
> > > >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> > > >
> > > > Remind me what the difference between phys_addr_t and dma_addr_t are.
> > > >
> > > > I thought phys_addr_t was the maximum address the CPU could reach after
> > > > address translation and dma_addr_t was the maximum physical address any
> > > > bus attached memory mapped devices could appear at. (of course, mostly
> > > > they're the same).
> > > 
> > > I assumed phys_addr_t was for physical addresses generated by a CPU
> > > and dma_addr_t was for addresses generated by a device, e.g.,
> > > addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> > > ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> > > things, e.g., ARM_LPAE, than to device bus properties like "support
> > > 64-bit PCI, not just 32-bit PCI."
> > 
> > OK, but even in your definition dma_declare_coherent_memory() should
> > operate on dma_addr_t because the input is a region of memory you got
> > from the device.  Somehow you generate and idea from the device
> > configuration of where this piece of memory sits on the device and
> > that's what you feed into dma_declare_coherent_memory().  It maps that
> > region and makes it available to the CPU allocators.
> 
> I think it's ambiguous at best at the moment, there is no clear
> answer what it should be until we define the semantics more clearly.
> 
> At the moment, we have these callers
> 
> arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-imx27_visstrim_m10.c:    dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-mx31_3ds.c:      dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-mx31moboard.c:   dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-mx35_3ds.c:      dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/mach-imx/mach-pcm037.c:        dma = dma_declare_coherent_memory(&pdev->dev,
> arch/arm/plat-samsung/s5p-dev-mfc.c:            if (dma_declare_coherent_memory(area->dev, area->base,
> arch/sh/drivers/pci/fixups-dreamcast.c:         BUG_ON(!dma_declare_coherent_memory(&dev->dev,
> drivers/media/platform/s5p-mfc/s5p_mfc.c:       if (dma_declare_coherent_memory(dev->mem_dev_l, mem_info[
> drivers/media/platform/s5p-mfc/s5p_mfc.c:       if (dma_declare_coherent_memory(dev->mem_dev_r, mem_info[
> drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c:               err = dma_declare_coherent_memory
> drivers/scsi/NCR_Q720.c:        if (dma_declare_coherent_memory(dev, base_addr, base_addr,
> drivers/usb/host/ohci-sm501.c:  if (!dma_declare_coherent_memory(dev, mem->start,
> drivers/usb/host/ohci-tmio.c:   if (!dma_declare_coherent_memory(&dev->dev, sram->start,
> 
> I don't know about NCR_Q720, but all others are only used on machines
> where physical addresses and bus addresses are in the same space.

In general, the driver doesn't know whether physical and bus addresses
are in the same space.  At least, I *hope* it doesn't have to know,
because it can't be very generic if it does.

> There are other machines that may always have to go through an IOMMU,
> or that have a fixed offset between dma addresses and physical addresses
> and that need to call a phys_to_dma() to convert between the two, but
> none of those call dma_declare_coherent_memory.
> ...

> We also have machines on ARM can never do DMA to addresses above the
> 32-bit boundary but can have more than 4GB RAM (using swiotlb or iommu).
> Building a kernel for these machines uses a 32-bit dma_addr_t  and
> a 64-bit phys_addr_t.

The PCI core currently prevents those machines from having MMIO space
above 4GB on the bus, which doesn't sound strictly necessary.

> > Well, first we need to answer whether we need separate phys_addr_t and
> > dma_addr_t ... we know the former represents physical memory addressed
> > by the CPU and the latter bus physical addresses from devices ... but
> > they're essentially the same.
> 
> I think BARs should be phys_addr_t: in the example with 32-bit dma_addr_t
> and 64-bit phys_addr_t, you would be able to put the MMIO registers
> into a high address if the PCI host supports that, the only limitation
> would be that no device could DMA into a high address for doing inter-device
> DMA.

I could imagine using u64 for BARs, but I don't see why we would use
phys_addr_t.  I think it's simpler to describe dma_addr_t as "sufficient
to hold any address on the bus, whether for DMA or MMIO accesses."

There isn't really a connection between the CPU address width and the
bus address width.  A 64-bit CPU could have a 32-bit conventional PCI
bus, and a 32-bit CPU could use a bus with all MMIO space above 4GB
given a host bridge that did appropriate translation.  (I don't know
why one would do the latter, but I don't know why it wouldn't work.)

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-05 23:01         ` Bjorn Helgaas
@ 2014-05-06  2:42           ` James Bottomley
  2014-05-06  9:29             ` Arnd Bergmann
  2014-05-06 13:18             ` Bjorn Helgaas
  0 siblings, 2 replies; 19+ messages in thread
From: James Bottomley @ 2014-05-06  2:42 UTC (permalink / raw)
  To: bhelgaas; +Cc: rdunlap, linux-pci, gregkh, arnd, linux-doc, linux-kernel

On Mon, 2014-05-05 at 17:01 -0600, Bjorn Helgaas wrote:
> On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:

> > I don't know about NCR_Q720, but all others are only used on machines
> > where physical addresses and bus addresses are in the same space.
> 
> In general, the driver doesn't know whether physical and bus addresses
> are in the same space.  At least, I *hope* it doesn't have to know,
> because it can't be very generic if it does.

The API was designed for the case where the memory resides on a PCI
device (the Q720 case), the card config gives us a bus address, but if
the system has an IOMMU, we'd have to do a dma_map of the entire region
to set up the IOMMU before we can touch it.  The address it gets back
from the dma_map (the dma_addr_t handle for the IOMMU mapping) is what
we pass into dma_declare_coherent_memory().  The reason it does an
ioremap is because this IOMMU mapped address is now physical to the CPU
and we want to make the region available to virtual space.  Essentially
the memory the allocator hands out behaves as proper virtual memory but
it's backed by physical memory on the card behind the PCI bridge.

I'm still not that fussed about the difference between phys_addr_t and
dma_addr_t, but if the cookie returned from a dma_map is a dma_addr_t
then that's what dma_declare_coherent_memory() should use.  If it's a
phys_addr_t, then likewise.

James


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-05 22:42       ` Bjorn Helgaas
@ 2014-05-06  8:59         ` Liviu Dudau
  2014-05-06 20:22           ` Bjorn Helgaas
  0 siblings, 1 reply; 19+ messages in thread
From: Liviu Dudau @ 2014-05-06  8:59 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: James Bottomley, rdunlap, linux-pci, gregkh, linux-doc,
	linux-kernel, Arnd Bergmann

On Mon, May 05, 2014 at 11:42:40PM +0100, Bjorn Helgaas wrote:
> On Fri, May 02, 2014 at 12:18:07PM +0100, Liviu Dudau wrote:
> > On Thu, May 01, 2014 at 08:05:04PM +0100, Bjorn Helgaas wrote:
> > > On Thu, May 1, 2014 at 8:08 AM, James Bottomley
> > > <jbottomley@parallels.com> wrote:
> > > > On Wed, 2014-04-30 at 14:33 -0600, Bjorn Helgaas wrote:
> > > >> dma_declare_coherent_memory() takes two addresses for a region of memory: a
> > > >> "bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
> > > >> the physical address a *CPU* would use to access the region, and
> > > >> "device_addr" is the bus address the *device* would use to address the
> > > >> region.
> > > >>
> > > >> Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
> > > >
> > > > Remind me what the difference between phys_addr_t and dma_addr_t are.
> > > >
> > > > I thought phys_addr_t was the maximum address the CPU could reach after
> > > > address translation and dma_addr_t was the maximum physical address any
> > > > bus attached memory mapped devices could appear at. (of course, mostly
> > > > they're the same).
> > > 
> > > I assumed phys_addr_t was for physical addresses generated by a CPU
> > > and dma_addr_t was for addresses generated by a device, e.g.,
> > > addresses you would see with a PCI bus analyzer or in a PCI BAR.  But
> > > ARCH_DMA_ADDR_T_64BIT does seem more connected to processor-specific
> > > things, e.g., ARM_LPAE, than to device bus properties like "support
> > > 64-bit PCI, not just 32-bit PCI."
> > > 
> > > DMA-API-HOWTO.txt contains this:
> > > 
> > >   ... the definition of the dma_addr_t (which can hold any valid DMA
> > >   address for the platform) type which should be used everywhere you
> > >   hold a DMA (bus) address returned from the DMA mapping functions.
> > > 
> > > and clearly you use a dma_addr_t from dma_alloc_coherent(), etc., to
> > > program the device.  It seems silly to have phys_addr_t and dma_addr_t
> > > for two (possibly slightly different) flavors of CPU physical
> > > addresses, and then sort of overload dma_addr_t by also using it to
> > > hold the addresses devices use for DMA.
> > 
> > dma_addr_t is a phys_addr_t masked according to the capabilities of the
> > *DMA engine* (not device). 

Sorry, I should've said restricted rather than masked.

> 
> I don't quite agree with this.  phys_addr_t is used for physical
> addresses generated by a CPU, e.g., ioremap() takes a phys_addr_t and
> returns a void * (a virtual address).  A dma_addr_t is an address you
> get from dma_map_page() or a similar interface, and you program into a
> device (or DMA engine if you prefer).  You can't generate a dma_addr_t
> simply by masking a phys_addr_t because an IOMMU can make arbitrary
> mappings of bus addresses to physical memory addresses.

Agree, sorry for the confusion I've generated.

> 
> It is meaningless for a CPU to generate a reference to a dma_addr_t.
> It will work in some simple cases where there's no IOMMU, but it won't
> work in general.
> 
> > Quite a lot of PCI devices (all?) now contain
> > a DMA engine, so the distinction is somewhat blurred nowadays.
> 
> I'll take your word for the DMA engine/device distinction, but I don't
> see how it's relevant here.  Linux doesn't have a generic idea of a
> DMA engine; we just treat it as part of the device capabilities.

Well, ARM has a separate DMA engine that can be used by multiple devices.
Hence for (my) sanity I prefer to keep the distinction clear.

> 
> > > I'm not a CPU
> > > person, so I don't understand the usefulness of dma_addr_t as a
> > > separate type to hold CPU addresses at which memory-mapped devices can
> > > appear.  If that's all it is, why not just use phys_addr_t everywhere
> > > and get rid of dma_addr_t altogether?
> > 
> > Because dma_addr_t imposes further restrictions based on what the DMA engine
> > can access. You could have a DMA engine that can only access 32bit addresses
> > in a 40+ bit CPU addresses system.
> 
> Yes; this is an argument for having separate types for bus addresses
> and CPU addresses.  I think dma_addr_t *is* for bus addresses.  I
> thought James was suggesting that it was a subset of CPU physical
> address space, and that any valid dma_addr_t is also a valid
> phys_addr_t.  That doesn't seem right to me.

I'm actually more inclined towards James' view. To me the DMA engine is a very
specialised CPU that does memcpy(). You can pair it with an IOMMU engine and
you can have address translation into virtual space of any size you want, or
you can have the engine sitting behind a bus in which case it will use bus
addresses. But its uses of addresses is similar to a CPU (whenever you want to
call them phys_addr_t or not is a different discussion).

ARM has a PL330 DMA engine. It's a SoC component and usually doesn't get attached
to any PCI bus. It uses a system bus called AXI in the same way a CPU would. So
for that having a *bus* address does not fit with the way hardware is connected.

My mental picture for this discussion is a hypotetical DMA engine that sits on
a PCI bus but has access to the system memory that the CPU can see (sort of like
a DMA engine inside a host bridge). Would you use a phys_addr_t when programming
the device or a bus address? I would say the former, because you know how to 
translate that into bus addresses and back in the host bridge.

> 
> > > dma_declare_coherent_memory() calls ioremap() on bus_addr, which seems
> > > a clear indication that bus_addr is really a physical address usable
> > > by the CPU.  But I do see your point that bus_addr is a CPU address
> > > for a memory-mapped device, and would thus fit into your idea of a
> > > dma_addr_t.
> > > 
> > > I think this is the only part of the DMA API that deals with CPU
> > > physical addresses.  I suppose we could side-step this question by
> > > having the caller do the ioremap; then dma_declare_coherent_memory()
> > > could just take a CPU virtual address (a void *) and a device address
> > > (a dma_addr_t).
> > 
> > And how would that change the meaning of the function?
> 
> I think the interesting thing here is that we ioremap() a bus address.
> That doesn't work in general because ioremap() is expecting a CPU
> physical address, not a bus address.  We don't have a generic way for
> dma_declare_coherent_memory() to convert a bus address to a CPU
> physical address.  The caller knows a little more and probably *could*
> do that, e.g., a PCI driver could use pcibios_bus_to_resource().  If
> we made a generic way to do the conversion, it would definitely be
> nicer to leave the ioremap() inside dma_declare_coherent_memory().

Agree, ioremap()-ing a bus address sounds to me like a bug.

Best regards,
Liviu

> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-06  2:42           ` James Bottomley
@ 2014-05-06  9:29             ` Arnd Bergmann
  2014-05-06 13:18             ` Bjorn Helgaas
  1 sibling, 0 replies; 19+ messages in thread
From: Arnd Bergmann @ 2014-05-06  9:29 UTC (permalink / raw)
  To: James Bottomley
  Cc: bhelgaas, rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Tuesday 06 May 2014 02:42:22 James Bottomley wrote:
> On Mon, 2014-05-05 at 17:01 -0600, Bjorn Helgaas wrote:
> > On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:
> 
> > > I don't know about NCR_Q720, but all others are only used on machines
> > > where physical addresses and bus addresses are in the same space.
> > 
> > In general, the driver doesn't know whether physical and bus addresses
> > are in the same space.  At least, I *hope* it doesn't have to know,
> > because it can't be very generic if it does.
> 
> The API was designed for the case where the memory resides on a PCI
> device (the Q720 case), the card config gives us a bus address, but if
> the system has an IOMMU, we'd have to do a dma_map of the entire region
> to set up the IOMMU before we can touch it.  The address it gets back
> from the dma_map (the dma_addr_t handle for the IOMMU mapping) is what
> we pass into dma_declare_coherent_memory().  The reason it does an
> ioremap is because this IOMMU mapped address is now physical to the CPU
> and we want to make the region available to virtual space.  Essentially
> the memory the allocator hands out behaves as proper virtual memory but
> it's backed by physical memory on the card behind the PCI bridge.
> 
> I'm still not that fussed about the difference between phys_addr_t and
> dma_addr_t, but if the cookie returned from a dma_map is a dma_addr_t
> then that's what dma_declare_coherent_memory() should use.  If it's a
> phys_addr_t, then likewise.

I think the 'bus_addr' argument should be renamed to 'phys_addr' and
changed to type 'phys_addr_t' from the currentn code, the device_addr
should stay as dma_addr_t. This is what the code looks like today:

struct dma_coherent_mem {
        void            *virt_base;
        dma_addr_t      device_base;
        phys_addr_t     pfn_base;
	...
};

int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
                                dma_addr_t device_addr, size_t size, int flags)
{
	...
	dev->dma_mem->virt_base = ioremap(bus_addr, size);
	dev->dma_mem->pfn_base = PFN_DOWN(bus_addr);
	dev->dma_mem->device_base = device_addr;
	...
}

* ioremap() takes a phys_addr_t argument, we feed it a dma_addr_t,
  which should get corrected.
* a PFN by convention is an 'unsigned long', not a phys_addr_t,
  PFN_DOWN essentially converts phys_addr_t to 'unsigned long'.

If we change these, it's all consistent.

Unrelated to that, there is the problem that most callers (including Q720)
pass the same address as bus_addr and device_addr into
dma_declare_coherent_memory(), but that would be an issue with the caller,
and it's sort of ok when the caller knows they are the same (i.e. no IOMMU).

Normally dma_map_single() is used to convert from phys_addr_t to dma_addr_t,
and you seem to argue that this is what the callers should do. However,
that doesn't work here, because dma_map_single() is only defined for
actual RAM in the linear kernel mapping, not for a physical address on
a device. We have a similar problem in passing FIFO addresses from slave
devices to dma engines, and that is also unsolved right now, and we rely
on them to be on the same bus, with no IOMMU between them.

	Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-06  2:42           ` James Bottomley
  2014-05-06  9:29             ` Arnd Bergmann
@ 2014-05-06 13:18             ` Bjorn Helgaas
  2014-05-06 13:42               ` James Bottomley
  1 sibling, 1 reply; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-06 13:18 UTC (permalink / raw)
  To: James Bottomley; +Cc: rdunlap, linux-pci, gregkh, arnd, linux-doc, linux-kernel

On Mon, May 5, 2014 at 8:42 PM, James Bottomley
<jbottomley@parallels.com> wrote:
> On Mon, 2014-05-05 at 17:01 -0600, Bjorn Helgaas wrote:
>> On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:
>
>> > I don't know about NCR_Q720, but all others are only used on machines
>> > where physical addresses and bus addresses are in the same space.
>>
>> In general, the driver doesn't know whether physical and bus addresses
>> are in the same space.  At least, I *hope* it doesn't have to know,
>> because it can't be very generic if it does.
>
> The API was designed for the case where the memory resides on a PCI
> device (the Q720 case), the card config gives us a bus address, but if
> the system has an IOMMU, we'd have to do a dma_map of the entire region
> to set up the IOMMU before we can touch it.  The address it gets back
> from the dma_map (the dma_addr_t handle for the IOMMU mapping) is what
> we pass into dma_declare_coherent_memory().

The IOMMU (if any) is only involved for DMA to system memory, and
there is no system memory in this picture.  The device does DMA to its
own memory; no dma_map is required for this.  We use
dma_declare_coherent_memory() to set things up so the CPU can also do
programmed I/O to the memory.

> The reason it does an
> ioremap is because this IOMMU mapped address is now physical to the CPU
> and we want to make the region available to virtual space.  Essentially
> the memory the allocator hands out behaves as proper virtual memory but
> it's backed by physical memory on the card behind the PCI bridge.

Yep, the programmed I/O depends on the ioremap().  But I don't think
it depends on any IOMMU mapping.

> I'm still not that fussed about the difference between phys_addr_t and
> dma_addr_t, but if the cookie returned from a dma_map is a dma_addr_t
> then that's what dma_declare_coherent_memory() should use.  If it's a
> phys_addr_t, then likewise.
>
> James
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-06 13:18             ` Bjorn Helgaas
@ 2014-05-06 13:42               ` James Bottomley
  2014-05-06 14:09                 ` Arnd Bergmann
  0 siblings, 1 reply; 19+ messages in thread
From: James Bottomley @ 2014-05-06 13:42 UTC (permalink / raw)
  To: bhelgaas; +Cc: rdunlap, linux-pci, gregkh, arnd, linux-doc, linux-kernel

On Tue, 2014-05-06 at 07:18 -0600, Bjorn Helgaas wrote:
> On Mon, May 5, 2014 at 8:42 PM, James Bottomley
> <jbottomley@parallels.com> wrote:
> > On Mon, 2014-05-05 at 17:01 -0600, Bjorn Helgaas wrote:
> >> On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:
> >
> >> > I don't know about NCR_Q720, but all others are only used on machines
> >> > where physical addresses and bus addresses are in the same space.
> >>
> >> In general, the driver doesn't know whether physical and bus addresses
> >> are in the same space.  At least, I *hope* it doesn't have to know,
> >> because it can't be very generic if it does.
> >
> > The API was designed for the case where the memory resides on a PCI
> > device (the Q720 case), the card config gives us a bus address, but if
> > the system has an IOMMU, we'd have to do a dma_map of the entire region
> > to set up the IOMMU before we can touch it.  The address it gets back
> > from the dma_map (the dma_addr_t handle for the IOMMU mapping) is what
> > we pass into dma_declare_coherent_memory().
> 
> The IOMMU (if any) is only involved for DMA to system memory, and
> there is no system memory in this picture.  The device does DMA to its
> own memory; no dma_map is required for this.  We use
> dma_declare_coherent_memory() to set things up so the CPU can also do
> programmed I/O to the memory.

Right, but for the CPU to access memory on a device, the access has to
go through the IOMMU: it has to be programmed to map the memory on the
bus to a physical address.

> > The reason it does an
> > ioremap is because this IOMMU mapped address is now physical to the CPU
> > and we want to make the region available to virtual space.  Essentially
> > the memory the allocator hands out behaves as proper virtual memory but
> > it's backed by physical memory on the card behind the PCI bridge.
> 
> Yep, the programmed I/O depends on the ioremap().  But I don't think
> it depends on any IOMMU mapping.

At least on Parisc with U2/Uturn, unless there are IOMMU entries, you
won't be able to address the memory on the device because it's behind
the IOMMU.  For regions like this, the necessary IOMMU entries are set
up at init time because without them you don't get memory mapped access
to register space either.

So like I said, I can go either way.  The entries arrive properly set
up, so perhaps we should use phys_addr_t because that's what's the other
side of the IOMMU.  On the other hand it is a handle for something on
the device, so perhaps it should be dma_addr_t.

James

> > I'm still not that fussed about the difference between phys_addr_t and
> > dma_addr_t, but if the cookie returned from a dma_map is a dma_addr_t
> > then that's what dma_declare_coherent_memory() should use.  If it's a
> > phys_addr_t, then likewise.
> >
> > James
> >



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-06 13:42               ` James Bottomley
@ 2014-05-06 14:09                 ` Arnd Bergmann
  0 siblings, 0 replies; 19+ messages in thread
From: Arnd Bergmann @ 2014-05-06 14:09 UTC (permalink / raw)
  To: James Bottomley
  Cc: bhelgaas, rdunlap, linux-pci, gregkh, linux-doc, linux-kernel

On Tuesday 06 May 2014 13:42:06 James Bottomley wrote:
> On Tue, 2014-05-06 at 07:18 -0600, Bjorn Helgaas wrote:
> > On Mon, May 5, 2014 at 8:42 PM, James Bottomley
> > <jbottomley@parallels.com> wrote:
> > > On Mon, 2014-05-05 at 17:01 -0600, Bjorn Helgaas wrote:
> > >> On Fri, May 02, 2014 at 10:42:18AM +0200, Arnd Bergmann wrote:
> > >
> > >> > I don't know about NCR_Q720, but all others are only used on machines
> > >> > where physical addresses and bus addresses are in the same space.
> > >>
> > >> In general, the driver doesn't know whether physical and bus addresses
> > >> are in the same space.  At least, I *hope* it doesn't have to know,
> > >> because it can't be very generic if it does.
> > >
> > > The API was designed for the case where the memory resides on a PCI
> > > device (the Q720 case), the card config gives us a bus address, but if
> > > the system has an IOMMU, we'd have to do a dma_map of the entire region
> > > to set up the IOMMU before we can touch it.  The address it gets back
> > > from the dma_map (the dma_addr_t handle for the IOMMU mapping) is what
> > > we pass into dma_declare_coherent_memory().
> > 
> > The IOMMU (if any) is only involved for DMA to system memory, and
> > there is no system memory in this picture.  The device does DMA to its
> > own memory; no dma_map is required for this.  We use
> > dma_declare_coherent_memory() to set things up so the CPU can also do
> > programmed I/O to the memory.
> 
> Right, but for the CPU to access memory on a device, the access has to
> go through the IOMMU: it has to be programmed to map the memory on the
> bus to a physical address.

That's not how most IOMMUs work, I haven't actually seen any system that
requires using the IOMMU for an MMIO access on the architectures I worked
on. The IOMMU may be required for the device to access its own memory
though, if it issues a DMA request that goes up the bus hierarchy into
the IOMMU and gets translated into an MMIO address there.

> > > The reason it does an
> > > ioremap is because this IOMMU mapped address is now physical to the CPU
> > > and we want to make the region available to virtual space.  Essentially
> > > the memory the allocator hands out behaves as proper virtual memory but
> > > it's backed by physical memory on the card behind the PCI bridge.
> > 
> > Yep, the programmed I/O depends on the ioremap().  But I don't think
> > it depends on any IOMMU mapping.
> 
> At least on Parisc with U2/Uturn, unless there are IOMMU entries, you
> won't be able to address the memory on the device because it's behind
> the IOMMU.  For regions like this, the necessary IOMMU entries are set
> up at init time because without them you don't get memory mapped access
> to register space either.

I would treat that as a specific property of that system. The IOMMU
and dma-mapping APIs we have in Linux normally assume that the IOMMU
is strictly one-way, translating dma_addr_t (used by the device) into
phys_addr_t (normally for RAM), but not for memory mapped access
initiated by the CPU. Most architectures (not s390 though) rely
on the MMU to set up a page table entry converting from a virtual
address to the physical address. There may be offsets between the
physical address seen by the CPU and the address seen on the bus,
but not a second set of page tables.

	Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t
  2014-05-06  8:59         ` Liviu Dudau
@ 2014-05-06 20:22           ` Bjorn Helgaas
  0 siblings, 0 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2014-05-06 20:22 UTC (permalink / raw)
  To: Bjorn Helgaas, James Bottomley, rdunlap, linux-pci, gregkh,
	linux-doc, linux-kernel, Arnd Bergmann

On Tue, May 6, 2014 at 2:59 AM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Mon, May 05, 2014 at 11:42:40PM +0100, Bjorn Helgaas wrote:
>> On Fri, May 02, 2014 at 12:18:07PM +0100, Liviu Dudau wrote:
>> > On Thu, May 01, 2014 at 08:05:04PM +0100, Bjorn Helgaas wrote:

>> > > I'm not a CPU
>> > > person, so I don't understand the usefulness of dma_addr_t as a
>> > > separate type to hold CPU addresses at which memory-mapped devices can
>> > > appear.  If that's all it is, why not just use phys_addr_t everywhere
>> > > and get rid of dma_addr_t altogether?
>> >
>> > Because dma_addr_t imposes further restrictions based on what the DMA engine
>> > can access. You could have a DMA engine that can only access 32bit addresses
>> > in a 40+ bit CPU addresses system.
>>
>> Yes; this is an argument for having separate types for bus addresses
>> and CPU addresses.  I think dma_addr_t *is* for bus addresses.  I
>> thought James was suggesting that it was a subset of CPU physical
>> address space, and that any valid dma_addr_t is also a valid
>> phys_addr_t.  That doesn't seem right to me.
>
> I'm actually more inclined towards James' view. To me the DMA engine is a very
> specialised CPU that does memcpy(). You can pair it with an IOMMU engine and
> you can have address translation into virtual space of any size you want, or
> you can have the engine sitting behind a bus in which case it will use bus
> addresses. But its uses of addresses is similar to a CPU (whenever you want to
> call them phys_addr_t or not is a different discussion).
>
> ARM has a PL330 DMA engine. It's a SoC component and usually doesn't get attached
> to any PCI bus. It uses a system bus called AXI in the same way a CPU would. So
> for that having a *bus* address does not fit with the way hardware is connected.
>
> My mental picture for this discussion is a hypotetical DMA engine that sits on
> a PCI bus but has access to the system memory that the CPU can see (sort of like
> a DMA engine inside a host bridge). Would you use a phys_addr_t when programming
> the device or a bus address? I would say the former, because you know how to
> translate that into bus addresses and back in the host bridge.

I don't think the physical location of the DMA engine is important.  I
think what matters is how you program it.  If it fully participates in
the single shared virtual address space used by the main CPUs, i.e.,
it uses the same page tables and TLB management and is cache-coherent
with them, then you'd probably give it virtual source and destination
addresses.  But my guess is the DMA engine operates on some flavor of
physical address and is outside the coherency domain.  In that case,
you probably have to allocate wired buffers to avoid faults, and you
probably have to use the DMA API (DMA_TO_DEVICE, DMA_FROM_DEVICE,
etc.) to manage cache coherency.  The DMA API gives you back a bus
address just like it does for ordinary devices, even if the value of
that address happens to be identical to the CPU physical address.

Should that bus address be a phys_addr_t instead of a dma_addr_t?  I
dunno.  We have a pretty long history of using dma_addr_t to hold bus
addresses, and I don't see a compelling reason to change, and the
separate type does help me understand the code.

I'll write some text about dma_addr_t in the docs when I repost these
patches and maybe we can converge on something.

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2014-05-06 20:23 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-30 20:33 [PATCH] DMA-API: Change dma_declare_coherent_memory() CPU address to phys_addr_t Bjorn Helgaas
2014-05-01  0:07 ` Greg Kroah-Hartman
2014-05-01  7:48   ` Bjorn Helgaas
2014-05-01 14:08 ` James Bottomley
2014-05-01 19:05   ` Bjorn Helgaas
2014-05-02  6:11     ` James Bottomley
2014-05-02  8:42       ` Arnd Bergmann
2014-05-05 23:01         ` Bjorn Helgaas
2014-05-06  2:42           ` James Bottomley
2014-05-06  9:29             ` Arnd Bergmann
2014-05-06 13:18             ` Bjorn Helgaas
2014-05-06 13:42               ` James Bottomley
2014-05-06 14:09                 ` Arnd Bergmann
2014-05-02 18:34       ` Bjorn Helgaas
2014-05-02 11:18     ` Liviu Dudau
2014-05-05 22:42       ` Bjorn Helgaas
2014-05-06  8:59         ` Liviu Dudau
2014-05-06 20:22           ` Bjorn Helgaas
2014-05-02 11:02   ` Liviu Dudau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.