linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 1/3] powerpc: Use sg->dma_length in sg_dma_len() macro on 32-bit
@ 2009-05-14 22:42 Becky Bruce
  2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
  0 siblings, 1 reply; 30+ messages in thread
From: Becky Bruce @ 2009-05-14 22:42 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: fujita.tomonori

Currently, the 32-bit code uses sg->length instead of sg->dma_lentgh
to report sg_dma_len.  However, since the default dma code for 32-bit
(the dma_direct case) sets dma_length and length to the same thing,
we should be able to use dma_length there as well.  This gets rid of
some 32-vs-64-bit ifdefs, and is needed by the swiotlb code which
actually distinguishes between dma_length and length.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/include/asm/scatterlist.h |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
index fcf7d55..912bf59 100644
--- a/arch/powerpc/include/asm/scatterlist.h
+++ b/arch/powerpc/include/asm/scatterlist.h
@@ -21,7 +21,7 @@ struct scatterlist {
 	unsigned int offset;
 	unsigned int length;
 
-	/* For TCE support */
+	/* For TCE or SWIOTLB support */
 	dma_addr_t dma_address;
 	u32 dma_length;
 };
@@ -34,11 +34,7 @@ struct scatterlist {
  * is 0.
  */
 #define sg_dma_address(sg)	((sg)->dma_address)
-#ifdef __powerpc64__
 #define sg_dma_len(sg)		((sg)->dma_length)
-#else
-#define sg_dma_len(sg)		((sg)->length)
-#endif
 
 #ifdef __powerpc64__
 #define ISA_DMA_THRESHOLD	(~0UL)
-- 
1.6.0.6

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-14 22:42 [PATCH V2 1/3] powerpc: Use sg->dma_length in sg_dma_len() macro on 32-bit Becky Bruce
@ 2009-05-14 22:42 ` Becky Bruce
  2009-05-14 22:42   ` [PATCH V2 3/3] powerpc: Add 86xx support for SWIOTLB Becky Bruce
                     ` (3 more replies)
  0 siblings, 4 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-14 22:42 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: fujita.tomonori

This patch includes the basic infrastructure to use swiotlb
bounce buffering on 32-bit powerpc.  It is not yet enabled on
any platforms.  Probably the most interesting bit is the
addition of addr_needs_map to dma_ops - we need this as
a dma_op because the decision of whether or not an addr
can be mapped by a device is device-specific.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/Kconfig                   |   12 ++-
 arch/powerpc/include/asm/dma-mapping.h |   11 ++
 arch/powerpc/include/asm/swiotlb.h     |   27 +++++
 arch/powerpc/kernel/Makefile           |    1 +
 arch/powerpc/kernel/dma-swiotlb.c      |  163 ++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/dma.c              |    2 +-
 arch/powerpc/kernel/setup_32.c         |    6 +
 arch/powerpc/kernel/setup_64.c         |    6 +
 8 files changed, 226 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/include/asm/swiotlb.h
 create mode 100644 arch/powerpc/kernel/dma-swiotlb.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a0d1146..54e519a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -296,9 +296,19 @@ config IOMMU_VMERGE
 config IOMMU_HELPER
 	def_bool PPC64
 
+config SWIOTLB
+	bool "SWIOTLB support"
+	default n
+	select IOMMU_HELPER
+	---help---
+	  Support for IO bounce buffering for systems without an IOMMU.
+	  This allows us to DMA to the full physical address space on
+	  platforms where the size of a physical address is larger
+	  than the bus address.  Not all platforms support this.
+
 config PPC_NEED_DMA_SYNC_OPS
 	def_bool y
-	depends on NOT_COHERENT_CACHE
+	depends on (NOT_COHERENT_CACHE || SWIOTLB)
 
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index c69f2b5..71bbc17 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -15,9 +15,18 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-attrs.h>
 #include <asm/io.h>
+#include <asm/swiotlb.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
+/* Some dma direct funcs must be visible for use in other dma_ops */
+extern void *dma_direct_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t flag);
+extern void dma_direct_free_coherent(struct device *dev, size_t size,
+				     void *vaddr, dma_addr_t dma_handle);
+
+extern unsigned long get_dma_direct_offset(struct device *dev);
+
 #ifdef CONFIG_NOT_COHERENT_CACHE
 /*
  * DMA-consistent mapping functions for PowerPCs that don't support
@@ -76,6 +85,8 @@ struct dma_mapping_ops {
 				dma_addr_t dma_address, size_t size,
 				enum dma_data_direction direction,
 				struct dma_attrs *attrs);
+	int		(*addr_needs_map)(struct device *dev, dma_addr_t addr,
+				size_t size);
 #ifdef CONFIG_PPC_NEED_DMA_SYNC_OPS
 	void            (*sync_single_range_for_cpu)(struct device *hwdev,
 				dma_addr_t dma_handle, unsigned long offset,
diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
new file mode 100644
index 0000000..30891d6
--- /dev/null
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __ASM_SWIOTLB_H
+#define __ASM_SWIOTLB_H
+
+#include <linux/swiotlb.h>
+
+extern struct dma_mapping_ops swiotlb_dma_ops;
+extern struct dma_mapping_ops swiotlb_pci_dma_ops;
+
+int swiotlb_arch_address_needs_mapping(struct device *, dma_addr_t,
+				       size_t size);
+
+static inline void dma_mark_clean(void *addr, size_t size) {}
+
+extern unsigned int ppc_swiotlb_enable;
+int __init swiotlb_setup_bus_notifier(void);
+
+#endif /* __ASM_SWIOTLB_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fb..34c0a95 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-$(CONFIG_SWIOTLB)		+= dma-swiotlb.o
 
 pci64-$(CONFIG_PPC64)		+= pci_dn.o isa-bridge.o
 obj-$(CONFIG_PCI)		+= pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
new file mode 100644
index 0000000..68ccf11
--- /dev/null
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -0,0 +1,163 @@
+/*
+ * Contains routines needed to support swiotlb for ppc.
+ *
+ * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/pfn.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/abs_addr.h>
+
+int swiotlb __read_mostly;
+unsigned int ppc_swiotlb_enable;
+
+void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr)
+{
+	unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr));
+	void *pageaddr = page_address(pfn_to_page(pfn));
+
+	if (pageaddr != NULL)
+		return pageaddr + (addr % PAGE_SIZE);
+	return NULL;
+}
+
+dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
+{
+	return paddr + get_dma_direct_offset(hwdev);
+}
+
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
+
+{
+	return baddr - get_dma_direct_offset(hwdev);
+}
+
+/*
+ * Determine if an address needs bounce buffering via swiotlb.
+ * Going forward I expect the swiotlb code to generalize on using
+ * a dma_ops->addr_needs_map, and this function will move from here to the
+ * generic swiotlb code.
+ */
+int
+swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr,
+				   size_t size)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
+
+	BUG_ON(!dma_ops);
+	return dma_ops->addr_needs_map(hwdev, addr, size);
+}
+
+/*
+ * Determine if an address is reachable by a pci device, or if we must bounce.
+ */
+static int
+swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
+{
+	u64 mask = dma_get_mask(hwdev);
+	dma_addr_t max;
+	struct pci_controller *hose;
+	struct pci_dev *pdev = to_pci_dev(hwdev);
+
+	hose = pci_bus_to_host(pdev->bus);
+	max = hose->dma_window_base_cur + hose->dma_window_size;
+
+	/* check that we're within mapped pci window space */
+	if ((addr + size > max) | (addr < hose->dma_window_base_cur))
+		return 1;
+
+	return !is_buffer_dma_capable(mask, addr, size);
+}
+
+static int
+swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
+{
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+}
+
+
+/*
+ * At the moment, all platforms that use this code only require
+ * swiotlb to be used if we're operating on HIGHMEM.  Since
+ * we don't ever call anything other than map_sg, unmap_sg,
+ * map_page, and unmap_page on highmem, use normal dma_ops
+ * for everything else.
+ */
+struct dma_mapping_ops swiotlb_dma_ops = {
+	.alloc_coherent = dma_direct_alloc_coherent,
+	.free_coherent = dma_direct_free_coherent,
+	.map_sg = swiotlb_map_sg_attrs,
+	.unmap_sg = swiotlb_unmap_sg_attrs,
+	.dma_supported = swiotlb_dma_supported,
+	.map_page = swiotlb_map_page,
+	.unmap_page = swiotlb_unmap_page,
+	.addr_needs_map = swiotlb_addr_needs_map,
+	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
+	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device
+};
+
+struct dma_mapping_ops swiotlb_pci_dma_ops = {
+	.alloc_coherent = dma_direct_alloc_coherent,
+	.free_coherent = dma_direct_free_coherent,
+	.map_sg = swiotlb_map_sg_attrs,
+	.unmap_sg = swiotlb_unmap_sg_attrs,
+	.dma_supported = swiotlb_dma_supported,
+	.map_page = swiotlb_map_page,
+	.unmap_page = swiotlb_unmap_page,
+	.addr_needs_map = swiotlb_pci_addr_needs_map,
+	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
+	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device
+};
+
+static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
+				  unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	/* May need to bounce if the device can't address all of DRAM */
+	if (dma_get_mask(dev) < lmb_end_of_DRAM())
+		set_dma_ops(dev, &swiotlb_dma_ops);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
+	.notifier_call = ppc_swiotlb_bus_notify,
+	.priority = 0,
+};
+
+static struct notifier_block ppc_swiotlb_of_bus_notifier = {
+	.notifier_call = ppc_swiotlb_bus_notify,
+	.priority = 0,
+};
+
+int __init swiotlb_setup_bus_notifier(void)
+{
+	bus_register_notifier(&platform_bus_type,
+			      &ppc_swiotlb_plat_bus_notifier);
+	bus_register_notifier(&of_platform_bus_type,
+			      &ppc_swiotlb_of_bus_notifier);
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 53c7788..62d80c4 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -19,7 +19,7 @@
  * default the offset is PCI_DRAM_OFFSET.
  */
 
-static unsigned long get_dma_direct_offset(struct device *dev)
+unsigned long get_dma_direct_offset(struct device *dev)
 {
 	if (dev)
 		return (unsigned long)dev->archdata.dma_data;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 9e1ca74..1d15424 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
 #include <asm/serial.h>
 #include <asm/udbg.h>
 #include <asm/mmu_context.h>
+#include <asm/swiotlb.h>
 
 #include "setup.h"
 
@@ -332,6 +333,11 @@ void __init setup_arch(char **cmdline_p)
 		ppc_md.setup_arch();
 	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
 
+#ifdef CONFIG_SWIOTLB
+	if (ppc_swiotlb_enable)
+		swiotlb_init();
+#endif
+
 	paging_init();
 
 	/* Initialize the MMU context management stuff */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c410c60..fbcca72 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -61,6 +61,7 @@
 #include <asm/xmon.h>
 #include <asm/udbg.h>
 #include <asm/kexec.h>
+#include <asm/swiotlb.h>
 
 #include "setup.h"
 
@@ -524,6 +525,11 @@ void __init setup_arch(char **cmdline_p)
 	if (ppc_md.setup_arch)
 		ppc_md.setup_arch();
 
+#ifdef CONFIG_SWIOTLB
+	if (ppc_swiotlb_enable)
+		swiotlb_init();
+#endif
+
 	paging_init();
 	ppc64_boot_msg(0x15, "Setup Done");
 }
-- 
1.6.0.6

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH V2 3/3] powerpc: Add 86xx support for SWIOTLB
  2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
@ 2009-05-14 22:42   ` Becky Bruce
  2009-05-15  4:49   ` [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit Kumar Gala
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-14 22:42 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: fujita.tomonori

This is the final bit of code to allow enabling swiotlb on
mpc86xx.  The platform-specific code is very small and consists
of enabling SWIOTLB in the config file, registering the
swiotlb_setup_bus_notifier initcall, and setting pci_dma_ops
to point to swiotlb_pci_dma_ops if we have more memory than
can be mapped by the inbound PCI windows.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/platforms/86xx/Kconfig        |    1 +
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c |   15 +++++++++++++++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index fdaf4dd..9c7b64a 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -15,6 +15,7 @@ config MPC8641_HPCN
 	select DEFAULT_UIMAGE
 	select FSL_ULI1575
 	select HAS_RAPIDIO
+	select SWIOTLB
 	help
 	  This option enables support for the MPC8641 HPCN board.
 
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 7e9e83c..6632702 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -19,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/of_platform.h>
+#include <linux/lmb.h>
 
 #include <asm/system.h>
 #include <asm/time.h>
@@ -27,6 +28,7 @@
 #include <asm/prom.h>
 #include <mm/mmu_decl.h>
 #include <asm/udbg.h>
+#include <asm/swiotlb.h>
 
 #include <asm/mpic.h>
 
@@ -70,7 +72,9 @@ mpc86xx_hpcn_setup_arch(void)
 {
 #ifdef CONFIG_PCI
 	struct device_node *np;
+	struct pci_controller *hose;
 #endif
+	dma_addr_t max = 0xffffffff;
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0);
@@ -83,6 +87,9 @@ mpc86xx_hpcn_setup_arch(void)
 			fsl_add_bridge(np, 1);
 		else
 			fsl_add_bridge(np, 0);
+		hose = pci_find_hose_for_OF_device(np);
+		max = min(max, hose->dma_window_base_cur +
+			  hose->dma_window_size);
 	}
 
 	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
@@ -94,6 +101,13 @@ mpc86xx_hpcn_setup_arch(void)
 #ifdef CONFIG_SMP
 	mpc86xx_smp_init();
 #endif
+
+#ifdef CONFIG_SWIOTLB
+	if (lmb_end_of_DRAM() > max) {
+		ppc_swiotlb_enable = 1;
+		set_pci_dma_ops(&swiotlb_pci_dma_ops);
+	}
+#endif
 }
 
 
@@ -158,6 +172,7 @@ static int __init declare_of_platform_devices(void)
 	return 0;
 }
 machine_device_initcall(mpc86xx_hpcn, declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
 
 define_machine(mpc86xx_hpcn) {
 	.name			= "MPC86xx HPCN",
-- 
1.6.0.6

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
  2009-05-14 22:42   ` [PATCH V2 3/3] powerpc: Add 86xx support for SWIOTLB Becky Bruce
@ 2009-05-15  4:49   ` Kumar Gala
  2009-05-18  4:49   ` Benjamin Herrenschmidt
  2009-05-19  5:27   ` FUJITA Tomonori
  3 siblings, 0 replies; 30+ messages in thread
From: Kumar Gala @ 2009-05-15  4:49 UTC (permalink / raw)
  To: Becky Bruce; +Cc: fujita.tomonori, linuxppc-dev


On May 14, 2009, at 5:42 PM, Becky Bruce wrote:

> This patch includes the basic infrastructure to use swiotlb
> bounce buffering on 32-bit powerpc.  It is not yet enabled on
> any platforms.  Probably the most interesting bit is the
> addition of addr_needs_map to dma_ops - we need this as
> a dma_op because the decision of whether or not an addr
> can be mapped by a device is device-specific.
>
> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
> ---
> arch/powerpc/Kconfig                   |   12 ++-
> arch/powerpc/include/asm/dma-mapping.h |   11 ++
> arch/powerpc/include/asm/swiotlb.h     |   27 +++++
> arch/powerpc/kernel/Makefile           |    1 +
> arch/powerpc/kernel/dma-swiotlb.c      |  163 +++++++++++++++++++++++ 
> +++++++++
> arch/powerpc/kernel/dma.c              |    2 +-
> arch/powerpc/kernel/setup_32.c         |    6 +
> arch/powerpc/kernel/setup_64.c         |    6 +
> 8 files changed, 226 insertions(+), 2 deletions(-)
> create mode 100644 arch/powerpc/include/asm/swiotlb.h
> create mode 100644 arch/powerpc/kernel/dma-swiotlb.c

Acked-by: Kumar Gala <galak@kernel.crashing.org>

- k

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
  2009-05-14 22:42   ` [PATCH V2 3/3] powerpc: Add 86xx support for SWIOTLB Becky Bruce
  2009-05-15  4:49   ` [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit Kumar Gala
@ 2009-05-18  4:49   ` Benjamin Herrenschmidt
  2009-05-18 13:25     ` Kumar Gala
  2009-05-19  5:27   ` FUJITA Tomonori
  3 siblings, 1 reply; 30+ messages in thread
From: Benjamin Herrenschmidt @ 2009-05-18  4:49 UTC (permalink / raw)
  To: Becky Bruce; +Cc: fujita.tomonori, linuxppc-dev

On Thu, 2009-05-14 at 17:42 -0500, Becky Bruce wrote:
> This patch includes the basic infrastructure to use swiotlb
> bounce buffering on 32-bit powerpc.  It is not yet enabled on
> any platforms.  Probably the most interesting bit is the
> addition of addr_needs_map to dma_ops - we need this as
> a dma_op because the decision of whether or not an addr
> can be mapped by a device is device-specific.
> 
> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>

Hi Becky !

Finally I got to look at your patch :-)

A few comments below...

>  #ifdef CONFIG_NOT_COHERENT_CACHE
>  /*
>   * DMA-consistent mapping functions for PowerPCs that don't support
> @@ -76,6 +85,8 @@ struct dma_mapping_ops {
>  				dma_addr_t dma_address, size_t size,
>  				enum dma_data_direction direction,
>  				struct dma_attrs *attrs);
> +	int		(*addr_needs_map)(struct device *dev, dma_addr_t addr,
> +				size_t size);

What annoys me here is that we basically end up with two indirect
function calls for pretty much any DMA map. One was bad enough on low
end processors or very intensive networking, but this is getting real
bad don't you think ?

Granted, this is only used when swiotlb is used too, but still...

So the problem is that the region that can pass-through is somewhat
a mix of bus specific (incoming DMA window location & size) and
device specific (device addressing limitations).

Now, if we can always reduce it to a single range though, which I
think is practically the case, can't we instead pre-calculate that
range and stick -that- in the struct dev archdata or similar thus
speeding up the decision for a given address as to whether it needs
a swiotlb mapping or not ? Or does it gets too messy ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-18  4:49   ` Benjamin Herrenschmidt
@ 2009-05-18 13:25     ` Kumar Gala
  2009-05-18 21:49       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 30+ messages in thread
From: Kumar Gala @ 2009-05-18 13:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: fujita.tomonori, linuxppc-dev


On May 17, 2009, at 11:49 PM, Benjamin Herrenschmidt wrote:

> On Thu, 2009-05-14 at 17:42 -0500, Becky Bruce wrote:
>> This patch includes the basic infrastructure to use swiotlb
>> bounce buffering on 32-bit powerpc.  It is not yet enabled on
>> any platforms.  Probably the most interesting bit is the
>> addition of addr_needs_map to dma_ops - we need this as
>> a dma_op because the decision of whether or not an addr
>> can be mapped by a device is device-specific.
>>
>> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
>
> Hi Becky !
>
> Finally I got to look at your patch :-)
>
> A few comments below...
>
>> #ifdef CONFIG_NOT_COHERENT_CACHE
>> /*
>>  * DMA-consistent mapping functions for PowerPCs that don't support
>> @@ -76,6 +85,8 @@ struct dma_mapping_ops {
>> 				dma_addr_t dma_address, size_t size,
>> 				enum dma_data_direction direction,
>> 				struct dma_attrs *attrs);
>> +	int		(*addr_needs_map)(struct device *dev, dma_addr_t addr,
>> +				size_t size);
>
> What annoys me here is that we basically end up with two indirect
> function calls for pretty much any DMA map. One was bad enough on low
> end processors or very intensive networking, but this is getting real
> bad don't you think ?
>
> Granted, this is only used when swiotlb is used too, but still...
>
> So the problem is that the region that can pass-through is somewhat
> a mix of bus specific (incoming DMA window location & size) and
> device specific (device addressing limitations).
>
> Now, if we can always reduce it to a single range though, which I
> think is practically the case, can't we instead pre-calculate that
> range and stick -that- in the struct dev archdata or similar thus
> speeding up the decision for a given address as to whether it needs
> a swiotlb mapping or not ? Or does it gets too messy ?

Part of this is how the generic swiotlb code works and part of it was  
our desire not to bloat dev archdata by adding such info that as you  
say is either bus specific or conveyed in the dma addr mask.

- k

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-18 13:25     ` Kumar Gala
@ 2009-05-18 21:49       ` Benjamin Herrenschmidt
  2009-05-19 13:04         ` Kumar Gala
  0 siblings, 1 reply; 30+ messages in thread
From: Benjamin Herrenschmidt @ 2009-05-18 21:49 UTC (permalink / raw)
  To: Kumar Gala; +Cc: fujita.tomonori, linuxppc-dev

On Mon, 2009-05-18 at 08:25 -0500, Kumar Gala wrote:
> 
> Part of this is how the generic swiotlb code works and part of it
> was  
> our desire not to bloat dev archdata by adding such info that as you  
> say is either bus specific or conveyed in the dma addr mask.

Right but perf sucks :-)

Maybe an option is to clamp the DMA mask when it's set by the driver to
limit it to the available inbound window ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
                     ` (2 preceding siblings ...)
  2009-05-18  4:49   ` Benjamin Herrenschmidt
@ 2009-05-19  5:27   ` FUJITA Tomonori
  2009-05-19 20:57     ` Becky Bruce
  3 siblings, 1 reply; 30+ messages in thread
From: FUJITA Tomonori @ 2009-05-19  5:27 UTC (permalink / raw)
  To: beckyb; +Cc: fujita.tomonori, linuxppc-dev, linux-kernel

CC'ed linux-kernel

On Thu, 14 May 2009 17:42:28 -0500
Becky Bruce <beckyb@kernel.crashing.org> wrote:

> This patch includes the basic infrastructure to use swiotlb
> bounce buffering on 32-bit powerpc.  It is not yet enabled on
> any platforms.  Probably the most interesting bit is the
> addition of addr_needs_map to dma_ops - we need this as
> a dma_op because the decision of whether or not an addr
> can be mapped by a device is device-specific.
> 
> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
> ---
>  arch/powerpc/Kconfig                   |   12 ++-
>  arch/powerpc/include/asm/dma-mapping.h |   11 ++
>  arch/powerpc/include/asm/swiotlb.h     |   27 +++++
>  arch/powerpc/kernel/Makefile           |    1 +
>  arch/powerpc/kernel/dma-swiotlb.c      |  163 ++++++++++++++++++++++++++++++++
>  arch/powerpc/kernel/dma.c              |    2 +-
>  arch/powerpc/kernel/setup_32.c         |    6 +
>  arch/powerpc/kernel/setup_64.c         |    6 +
>  8 files changed, 226 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/swiotlb.h
>  create mode 100644 arch/powerpc/kernel/dma-swiotlb.c
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index a0d1146..54e519a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -296,9 +296,19 @@ config IOMMU_VMERGE
>  config IOMMU_HELPER
>  	def_bool PPC64
>  
> +config SWIOTLB
> +	bool "SWIOTLB support"
> +	default n
> +	select IOMMU_HELPER
> +	---help---
> +	  Support for IO bounce buffering for systems without an IOMMU.
> +	  This allows us to DMA to the full physical address space on
> +	  platforms where the size of a physical address is larger
> +	  than the bus address.  Not all platforms support this.
> +
>  config PPC_NEED_DMA_SYNC_OPS
>  	def_bool y
> -	depends on NOT_COHERENT_CACHE
> +	depends on (NOT_COHERENT_CACHE || SWIOTLB)
>  
>  config HOTPLUG_CPU
>  	bool "Support for enabling/disabling CPUs"
> diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> index c69f2b5..71bbc17 100644
> --- a/arch/powerpc/include/asm/dma-mapping.h
> +++ b/arch/powerpc/include/asm/dma-mapping.h
> @@ -15,9 +15,18 @@
>  #include <linux/scatterlist.h>
>  #include <linux/dma-attrs.h>
>  #include <asm/io.h>
> +#include <asm/swiotlb.h>
>  
>  #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
>  
> +/* Some dma direct funcs must be visible for use in other dma_ops */
> +extern void *dma_direct_alloc_coherent(struct device *dev, size_t size,
> +				       dma_addr_t *dma_handle, gfp_t flag);
> +extern void dma_direct_free_coherent(struct device *dev, size_t size,
> +				     void *vaddr, dma_addr_t dma_handle);
> +
> +extern unsigned long get_dma_direct_offset(struct device *dev);
> +
>  #ifdef CONFIG_NOT_COHERENT_CACHE
>  /*
>   * DMA-consistent mapping functions for PowerPCs that don't support
> @@ -76,6 +85,8 @@ struct dma_mapping_ops {
>  				dma_addr_t dma_address, size_t size,
>  				enum dma_data_direction direction,
>  				struct dma_attrs *attrs);
> +	int		(*addr_needs_map)(struct device *dev, dma_addr_t addr,
> +				size_t size);
>  #ifdef CONFIG_PPC_NEED_DMA_SYNC_OPS
>  	void            (*sync_single_range_for_cpu)(struct device *hwdev,
>  				dma_addr_t dma_handle, unsigned long offset,
> diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
> new file mode 100644
> index 0000000..30891d6
> --- /dev/null
> +++ b/arch/powerpc/include/asm/swiotlb.h
> @@ -0,0 +1,27 @@
> +/*
> + * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
> + *
> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + *
> + */
> +
> +#ifndef __ASM_SWIOTLB_H
> +#define __ASM_SWIOTLB_H
> +
> +#include <linux/swiotlb.h>
> +
> +extern struct dma_mapping_ops swiotlb_dma_ops;
> +extern struct dma_mapping_ops swiotlb_pci_dma_ops;
> +
> +int swiotlb_arch_address_needs_mapping(struct device *, dma_addr_t,
> +				       size_t size);
> +
> +static inline void dma_mark_clean(void *addr, size_t size) {}
> +
> +extern unsigned int ppc_swiotlb_enable;
> +int __init swiotlb_setup_bus_notifier(void);
> +
> +#endif /* __ASM_SWIOTLB_H */
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 71901fb..34c0a95 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -82,6 +82,7 @@ obj-$(CONFIG_SMP)		+= smp.o
>  obj-$(CONFIG_KPROBES)		+= kprobes.o
>  obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
>  obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
> +obj-$(CONFIG_SWIOTLB)		+= dma-swiotlb.o
>  
>  pci64-$(CONFIG_PPC64)		+= pci_dn.o isa-bridge.o
>  obj-$(CONFIG_PCI)		+= pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
> new file mode 100644
> index 0000000..68ccf11
> --- /dev/null
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -0,0 +1,163 @@
> +/*
> + * Contains routines needed to support swiotlb for ppc.
> + *
> + * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
> + *
> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + *
> + */
> +
> +#include <linux/dma-mapping.h>
> +#include <linux/pfn.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_device.h>
> +#include <linux/pci.h>
> +
> +#include <asm/machdep.h>
> +#include <asm/swiotlb.h>
> +#include <asm/dma.h>
> +#include <asm/abs_addr.h>
> +
> +int swiotlb __read_mostly;
> +unsigned int ppc_swiotlb_enable;
> +
> +void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr)
> +{
> +	unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr));
> +	void *pageaddr = page_address(pfn_to_page(pfn));
> +
> +	if (pageaddr != NULL)
> +		return pageaddr + (addr % PAGE_SIZE);
> +	return NULL;
> +}
> +
> +dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
> +{
> +	return paddr + get_dma_direct_offset(hwdev);
> +}
> +
> +phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
> +
> +{
> +	return baddr - get_dma_direct_offset(hwdev);
> +}
> +
> +/*
> + * Determine if an address needs bounce buffering via swiotlb.
> + * Going forward I expect the swiotlb code to generalize on using
> + * a dma_ops->addr_needs_map, and this function will move from here to the
> + * generic swiotlb code.
> + */
> +int
> +swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr,
> +				   size_t size)
> +{
> +	struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
> +
> +	BUG_ON(!dma_ops);
> +	return dma_ops->addr_needs_map(hwdev, addr, size);
> +}
> +
> +/*
> + * Determine if an address is reachable by a pci device, or if we must bounce.
> + */
> +static int
> +swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
> +{
> +	u64 mask = dma_get_mask(hwdev);
> +	dma_addr_t max;
> +	struct pci_controller *hose;
> +	struct pci_dev *pdev = to_pci_dev(hwdev);
> +
> +	hose = pci_bus_to_host(pdev->bus);
> +	max = hose->dma_window_base_cur + hose->dma_window_size;
> +
> +	/* check that we're within mapped pci window space */
> +	if ((addr + size > max) | (addr < hose->dma_window_base_cur))
> +		return 1;
> +
> +	return !is_buffer_dma_capable(mask, addr, size);
> +}
> +
> +static int
> +swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
> +{
> +	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
> +}

I think that swiotlb_pci_addr_needs_map and swiotlb_addr_needs_map
don't need swiotlb_arch_range_needs_mapping() since

- swiotlb_arch_range_needs_mapping() is always used with
swiotlb_arch_address_needs_mapping().

- swiotlb_arch_address_needs_mapping() uses is_buffer_dma_capable()
and powerpc doesn't overwrite swiotlb_arch_address_needs_mapping()


Do I miss something?

Anyway, we need to fix swiotlb checking code to if an area is
DMA-capable or not.

swiotlb_arch_address_needs_mapping() calls is_buffer_dma_capable() in
dma-mapping.h but it should not. It should live in an arch-specific
place such as arch's dma-mapping.h or something since
is_buffer_dma_capable() is arch-specific. I didn't know that
is_buffer_dma_capable() is arch-specific when I added it to the
generic place.

If we have something like in arch/{x86|ia64|powerpc}/dma-mapping.h:

static inline int is_buffer_dma_capable(struct device *dev, dma_addr_t addr, size_t size)

then we don't need two checking functions, address_needs_mapping and
range_needs_mapping.

But I guess the bad thing is that we can't the arch abstraction due to
dom0 support.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-18 21:49       ` Benjamin Herrenschmidt
@ 2009-05-19 13:04         ` Kumar Gala
  2009-05-19 20:06           ` Becky Bruce
  2009-05-28  4:42           ` Kumar Gala
  0 siblings, 2 replies; 30+ messages in thread
From: Kumar Gala @ 2009-05-19 13:04 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: fujita.tomonori, linuxppc-dev


On May 18, 2009, at 4:49 PM, Benjamin Herrenschmidt wrote:

> On Mon, 2009-05-18 at 08:25 -0500, Kumar Gala wrote:
>>
>> Part of this is how the generic swiotlb code works and part of it
>> was
>> our desire not to bloat dev archdata by adding such info that as you
>> say is either bus specific or conveyed in the dma addr mask.
>
> Right but perf sucks :-)
>
> Maybe an option is to clamp the DMA mask when it's set by the driver  
> to
> limit it to the available inbound window ?

Clamping the DMA mask is even worse than the additional indirection  
for us.  We have valid scenarios in which we'd have 512M of outbound  
PCI address space and 4G of mem and thus 3.5G of inbound PCI address  
space.  With the DMA mask we'd be limited to 2G and bouncing from  
2..3.5G when we don't need to.

I think our options are to change archdata as follows:

Option 1 - just add a new data member to dev_archdata

struct dev_archdata {
         /* Optional pointer to an OF device node */
         struct device_node      *of_node;

         /* DMA operations on that device */
         struct dma_mapping_ops  *dma_ops;
         void 		        *dma_data;
	dma_addr_t		direct_dma_addr;
};

Option 2 - introduce a proper container for how we use dma_data.  This  
may just be moving the indirection from an indirection function call  
to an indirection data reference:

struct dma_data {
         dma_addr_t      offset;
         dma_addr_t      direct_dma_addr;
         struct          iommu_table *iommu_table;
};

struct dev_archdata {
         /* Optional pointer to an OF device node */
         struct device_node      *of_node;

         /* DMA operations on that device */
         struct dma_mapping_ops  *dma_ops;
         struct dma_data         *dma_data;
};

Option 3 - use dma_data to keep the addr at which we need to bounce vs  
not for SWIOTLB - this has potential issues w/conflicting with  
dma_data being used as the dma_offset. (need to think on that a bit  
more).  Additionally this has the benefit in that we need dma_data to  
be a 64-bit quantity on ppc32 w/>32-bit phys addr.

struct dev_archdata {
         /* Optional pointer to an OF device node */
         struct device_node      *of_node;

         /* DMA operations on that device */
         struct dma_mapping_ops  *dma_ops;
         dma_addr_t 	        dma_data;
};

others??

- k

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-19 13:04         ` Kumar Gala
@ 2009-05-19 20:06           ` Becky Bruce
  2009-05-28  4:42           ` Kumar Gala
  1 sibling, 0 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-19 20:06 UTC (permalink / raw)
  To: Kumar Gala; +Cc: fujita.tomonori, linuxppc-dev


On May 19, 2009, at 8:04 AM, Kumar Gala wrote:

>
> On May 18, 2009, at 4:49 PM, Benjamin Herrenschmidt wrote:
>
>> On Mon, 2009-05-18 at 08:25 -0500, Kumar Gala wrote:
>>>
>>> Part of this is how the generic swiotlb code works and part of it
>>> was
>>> our desire not to bloat dev archdata by adding such info that as you
>>> say is either bus specific or conveyed in the dma addr mask.
>>
>> Right but perf sucks :-)
>>
>> Maybe an option is to clamp the DMA mask when it's set by the  
>> driver to
>> limit it to the available inbound window ?
>
> Clamping the DMA mask is even worse than the additional indirection  
> for us.  We have valid scenarios in which we'd have 512M of outbound  
> PCI address space and 4G of mem and thus 3.5G of inbound PCI address  
> space.  With the DMA mask we'd be limited to 2G and bouncing from  
> 2..3.5G when we don't need to.

>
>
> I think our options are to change archdata as follows:
>
> Option 1 - just add a new data member to dev_archdata
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        void 		        *dma_data;
> 	dma_addr_t		direct_dma_addr;
> };
>
> Option 2 - introduce a proper container for how we use dma_data.   
> This may just be moving the indirection from an indirection function  
> call to an indirection data reference:
>
> struct dma_data {
>        dma_addr_t      offset;
>        dma_addr_t      direct_dma_addr;
>        struct          iommu_table *iommu_table;
> };
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        struct dma_data         *dma_data;
> };

Personally, I prefer this.  However, I understand Ben has some  
objection here.   At a minimum, I *do* need to change dma_data to be  
able to contain a 64-bit number on a 32-bit platform as part of the  
optimization for 64-bit PCI devices.   It should probably be a  
dma_addr_t for that, looking at how it's being used.  However, that is  
a bit of a philosophical break from the type of dma_data being generic  
void *.  I'm open to suggestions here.

>
>
> Option 3 - use dma_data to keep the addr at which we need to bounce  
> vs not for SWIOTLB - this has potential issues w/conflicting with  
> dma_data being used as the dma_offset. (need to think on that a bit  
> more).  Additionally this has the benefit in that we need dma_data  
> to be a 64-bit quantity on ppc32 w/>32-bit phys addr.
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        dma_addr_t 	        dma_data;
> };
>

This won't work.  I need the dma offset that's currently stored there  
for 64-bit PCI devices.

It sounds like we want to do some combination of the above:

1) add a max_dma_addr field to archdata to indicate the boundary  
beyond which a device must bounce buffer (we currently don't have  
anything that requires a start/size type paradigm here - does anyone  
know of any cases that I don't know about here?)

2) change the type of dma_data so it can hold 64 bits on a 32-bit  
platform.

-Becky

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-19  5:27   ` FUJITA Tomonori
@ 2009-05-19 20:57     ` Becky Bruce
  2009-05-21 17:43       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 30+ messages in thread
From: Becky Bruce @ 2009-05-19 20:57 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linuxppc-dev, linux-kernel


On May 19, 2009, at 12:27 AM, FUJITA Tomonori wrote:

> CC'ed linux-kernel
>
> On Thu, 14 May 2009 17:42:28 -0500
> Becky Bruce <beckyb@kernel.crashing.org> wrote:

>
>
>> This patch includes the basic infrastructure to use swiotlb
>> bounce buffering on 32-bit powerpc.  It is not yet enabled on
>> any platforms.  Probably the most interesting bit is the
>> addition of addr_needs_map to dma_ops - we need this as
>> a dma_op because the decision of whether or not an addr
>> can be mapped by a device is device-specific.
>>
>> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>

<snip>

>> +/*
>> + * Determine if an address needs bounce buffering via swiotlb.
>> + * Going forward I expect the swiotlb code to generalize on using
>> + * a dma_ops->addr_needs_map, and this function will move from  
>> here to the
>> + * generic swiotlb code.
>> + */
>> +int
>> +swiotlb_arch_address_needs_mapping(struct device *hwdev,  
>> dma_addr_t addr,
>> +				   size_t size)
>> +{
>> +	struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
>> +
>> +	BUG_ON(!dma_ops);
>> +	return dma_ops->addr_needs_map(hwdev, addr, size);
>> +}
>> +
>> +/*
>> + * Determine if an address is reachable by a pci device, or if we  
>> must bounce.
>> + */
>> +static int
>> +swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr,  
>> size_t size)
>> +{
>> +	u64 mask = dma_get_mask(hwdev);
>> +	dma_addr_t max;
>> +	struct pci_controller *hose;
>> +	struct pci_dev *pdev = to_pci_dev(hwdev);
>> +
>> +	hose = pci_bus_to_host(pdev->bus);
>> +	max = hose->dma_window_base_cur + hose->dma_window_size;
>> +
>> +	/* check that we're within mapped pci window space */
>> +	if ((addr + size > max) | (addr < hose->dma_window_base_cur))
>> +		return 1;
>> +
>> +	return !is_buffer_dma_capable(mask, addr, size);
>> +}
>> +
>> +static int
>> +swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr,  
>> size_t size)
>> +{
>> +	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
>> +}
>
> I think that swiotlb_pci_addr_needs_map and swiotlb_addr_needs_map
> don't need swiotlb_arch_range_needs_mapping() since
>
> - swiotlb_arch_range_needs_mapping() is always used with
> swiotlb_arch_address_needs_mapping().

Yes.  I don't need range_needs_mapping() on ppc, so I let it default  
to the lib/swiotlb.c implementation, which just returns 0.  All the  
determination of whether an address needs mapping comes from  
swiotlb_arch_address_needs_mapping().

>
>
> - swiotlb_arch_address_needs_mapping() uses is_buffer_dma_capable()
> and powerpc doesn't overwrite swiotlb_arch_address_needs_mapping()

I *do* overwrite it.  But I still use is_buffer_dma_capable().  I just  
add some other checks to it in the pci case, because I need to know  
what the controller has mapped as it changes the point at which I must  
begin to bounce buffer.

>
>
>
> Do I miss something?

I don't think so.  But let me know if I've misunderstood you.

>
>
> Anyway, we need to fix swiotlb checking code to if an area is
> DMA-capable or not.
>
> swiotlb_arch_address_needs_mapping() calls is_buffer_dma_capable() in
> dma-mapping.h but it should not. It should live in an arch-specific
> place such as arch's dma-mapping.h or something since
> is_buffer_dma_capable() is arch-specific. I didn't know that
> is_buffer_dma_capable() is arch-specific when I added it to the
> generic place.

Errrr, is_buffer_dma_capable is generic right now, unless I'm missing  
something.  It's in include/linux/dma-mapping.h, unless I'm getting  
bitten by a slightly stale tree.

>
>
> If we have something like in arch/{x86|ia64|powerpc}/dma-mapping.h:
>
> static inline int is_buffer_dma_capable(struct device *dev,  
> dma_addr_t addr, size_t size)
>
> then we don't need two checking functions, address_needs_mapping and
> range_needs_mapping.

It's never been clear to me *why* we had both in the first place - if  
you can explain this, I'd be grateful :)

It actually looks like we want to remove the dma_op that I created for  
addr_needs_map because of the perf hit.... it gets called a ton of  
times, many times on addresses that don't actually require bounce  
buffering (and thus, are more sensitive to the minor performance  
hit).  We are looking instead at adding a per-device variable that is  
used to determine if we need to bounce (in addition to  
is_buffer_dma_capable) that would live inside archdata - see the other  
posts on this thread for details (we're still hashing out exactly how  
we want to do this).

Cheers,
Becky

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-19 20:57     ` Becky Bruce
@ 2009-05-21 17:43       ` Jeremy Fitzhardinge
  2009-05-21 18:27         ` Becky Bruce
  0 siblings, 1 reply; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-05-21 17:43 UTC (permalink / raw)
  To: Becky Bruce; +Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel, Ian Campbell

Becky Bruce wrote:
>>
>>
>> If we have something like in arch/{x86|ia64|powerpc}/dma-mapping.h:
>>
>> static inline int is_buffer_dma_capable(struct device *dev, 
>> dma_addr_t addr, size_t size)
>>
>> then we don't need two checking functions, address_needs_mapping and
>> range_needs_mapping.
>
> It's never been clear to me *why* we had both in the first place - if 
> you can explain this, I'd be grateful :)

I was about to ask the same thing.  It seems that range_needs_mapping 
should be able to do both jobs.

I think range_needs_mapping came from the Xen swiotlb changes, and 
address_needs_mapping came from your powerpc changes.   Many of the 
changes were exact overlaps; I think this was one of the few instances 
where there was a difference.

We need a range check in Xen (rather than iterating over individual 
pages) because we want to check that the underlying pages are machine 
contiguous, but I think that's also sufficient to do whatever checks you 
need to do.

The other difference is that is_buffer_dma_capable operates on a 
dma_addr_t, which presumes that you can generate a dma address and then 
test for its validity.  For Xen, it doesn't make much sense to talk 
about the dma_addr_t for memory which isn't actually dma-capable; we 
need the test to be in terms of phys_addr_t.  Given that the two 
functions are always called from the same place, that doesn't seem to 
pose a problem.

So I think the unified function would be something like:

    int range_needs_mapping(struct device *hwdev, phys_addr_t addr,
    size_t size);

which would be defined somewhere under asm/*.h.  Would that work for 
powerpc?

    J

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 17:43       ` Jeremy Fitzhardinge
@ 2009-05-21 18:27         ` Becky Bruce
  2009-05-21 19:01           ` Ian Campbell
                             ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-21 18:27 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel, Ian Campbell


On May 21, 2009, at 12:43 PM, Jeremy Fitzhardinge wrote:

> Becky Bruce wrote:
>>>
>>>
>>> If we have something like in arch/{x86|ia64|powerpc}/dma-mapping.h:
>>>
>>> static inline int is_buffer_dma_capable(struct device *dev,  
>>> dma_addr_t addr, size_t size)
>>>
>>> then we don't need two checking functions, address_needs_mapping and
>>> range_needs_mapping.
>>
>> It's never been clear to me *why* we had both in the first place -  
>> if you can explain this, I'd be grateful :)
>
> I was about to ask the same thing.  It seems that  
> range_needs_mapping should be able to do both jobs.
>
> I think range_needs_mapping came from the Xen swiotlb changes, and  
> address_needs_mapping came from your powerpc changes.   Many of the  
> changes were exact overlaps; I think this was one of the few  
> instances where there was a difference.

I think address_needs_mapping was already there and I added the  
ability for an arch to provide its own version.  Ian added  
range_needs_mapping in commit b81ea27b2329bf44b.   At the time, it  
took a virtual address as its argument, so we couldn't use it for  
highmem.  That's since been changed to phys_addr_t, so I think we  
should be able to merge the two.

>
> We need a range check in Xen (rather than iterating over individual  
> pages) because we want to check that the underlying pages are  
> machine contiguous, but I think that's also sufficient to do  
> whatever checks you need to do.

Yes.

>
> The other difference is that is_buffer_dma_capable operates on a  
> dma_addr_t, which presumes that you can generate a dma address and  
> then test for its validity.  For Xen, it doesn't make much sense to  
> talk about the dma_addr_t for memory which isn't actually dma- 
> capable; we need the test to be in terms of phys_addr_t.  Given that  
> the two functions are always called from the same place, that  
> doesn't seem to pose a problem.
>
> So I think the unified function would be something like:
>
>   int range_needs_mapping(struct device *hwdev, phys_addr_t addr,
>   size_t size);
>
> which would be defined somewhere under asm/*.h.  Would that work for  
> powerpc?

I can work with that, but it's going to be a bit inefficient, as I  
actually need the dma_addr_t, not the phys_addr_t, so I'll have to  
convert.  In every case, this is a conversion I've already done and  
that I need in the calling code as well.  Can we pass in both the  
phys_addr_t and the dma_addr_t?  We have both in every case but one,  
which is in swiotlb_map_page where we call address_needs_mapping()  
without calling range_needs_mapping.  It's not actually clear to me  
that we need that check, though.  Can someone explain what case that  
was designed to catch?

Cheers,
Becky

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 18:27         ` Becky Bruce
@ 2009-05-21 19:01           ` Ian Campbell
  2009-05-22 10:51             ` FUJITA Tomonori
  2009-05-21 20:18           ` Jeremy Fitzhardinge
  2009-05-22 11:11           ` Ian Campbell
  2 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2009-05-21 19:01 UTC (permalink / raw)
  To: Becky Bruce
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel

On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
> We have both in every case but one,  
> which is in swiotlb_map_page where we call address_needs_mapping()  
> without calling range_needs_mapping.

The reason it calls address_needs_mapping without range_needs_mapping is
that in the swiotlb_force=1 case it would trigger every time. If
address_needs_mapping and range_needs_mapping are merged as proposed and
they do not subsume the swiotlb_force check (and I don't think they
would) then I think this will work fine.

> It's not actually clear to me that we need that check, though.  Can 
> someone explain what case that was designed to catch?

If map_single fails and returns NULL then we try to use
io_tlb_overflow_buffer, if that is somehow not DMA-able (for the
particular device) then the check will trigger. I would have thought we
could arrange that the overflow buffer is always OK and really if
map_page is failing we must be close the edge already.

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 18:27         ` Becky Bruce
  2009-05-21 19:01           ` Ian Campbell
@ 2009-05-21 20:18           ` Jeremy Fitzhardinge
  2009-05-21 22:08             ` Ian Campbell
  2009-05-22 10:51             ` FUJITA Tomonori
  2009-05-22 11:11           ` Ian Campbell
  2 siblings, 2 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-05-21 20:18 UTC (permalink / raw)
  To: Becky Bruce; +Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel, Ian Campbell

Becky Bruce wrote:
> I can work with that, but it's going to be a bit inefficient, as I 
> actually need the dma_addr_t, not the phys_addr_t, so I'll have to 
> convert.  In every case, this is a conversion I've already done and 
> that I need in the calling code as well.  Can we pass in both the 
> phys_addr_t and the dma_addr_t?

The Xen implementation would needs to do the phys to bus conversion page 
by page anyway, so it wouldn't help much.  But it also wouldn't hurt.

How expensive is the phys-to-bus conversion on power?  Is it worth 
making the interface more complex for?  Would passing phys+bus mean that 
we wouldn't also need to pass dev?

> We have both in every case but one, which is in swiotlb_map_page where 
> we call address_needs_mapping() without calling range_needs_mapping.  
> It's not actually clear to me that we need that check, though.  Can 
> someone explain what case that was designed to catch?

It called them together a little earlier in the function, and the phys 
address is still available.

I guess the test is checking for a bad implementation of map_single().  
I found a single instance of someone reporting the message (in 2.6.11, 
when swiotlb was ia64-specific: http://lkml.org/lkml/2008/3/3/249).  
panic() is an odd way to handle an error (even an one which is an 
implementation bug), but I guess it's better than scribbling on the 
wrong memory.

    J

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 20:18           ` Jeremy Fitzhardinge
@ 2009-05-21 22:08             ` Ian Campbell
  2009-05-22 10:51             ` FUJITA Tomonori
  1 sibling, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2009-05-21 22:08 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel

On Thu, 2009-05-21 at 16:18 -0400, Jeremy Fitzhardinge wrote:
> I guess the test is checking for a bad implementation of map_single().

More importantly the io_tlb_overflow_buffer is basically a second chance
if you exhaust the swiotlb pool. The check seems to be there to ensure
that the second chance memory is suitable for the device (it's hard to
imagine, but possible I suppose, that it wouldn't be), a bad
implementation of map_single() is a secondary concern I suspect.

If all the callers of map_page did proper error handling this would all
be unnecessary. I guess someone was worried, at least at one point, that
they didn't. The failure case could possibly be scribbling into a random
memory location or more worryingly sprinkling random memory locations
onto your disk or whatever. That said I'd imagine that map_page
returning NULL would cause an oops long before anything tried to DMA
anything and the second chance probably doesn't buy us much in practice.

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 19:01           ` Ian Campbell
@ 2009-05-22 10:51             ` FUJITA Tomonori
  0 siblings, 0 replies; 30+ messages in thread
From: FUJITA Tomonori @ 2009-05-22 10:51 UTC (permalink / raw)
  To: Ian.Campbell; +Cc: fujita.tomonori, linuxppc-dev, jeremy, linux-kernel

On Thu, 21 May 2009 20:01:37 +0100
Ian Campbell <Ian.Campbell@eu.citrix.com> wrote:

> > It's not actually clear to me that we need that check, though.  Can 
> > someone explain what case that was designed to catch?
> 
> If map_single fails and returns NULL then we try to use
> io_tlb_overflow_buffer, if that is somehow not DMA-able (for the
> particular device) then the check will trigger. I would have thought we
> could arrange that the overflow buffer is always OK and really if
> map_page is failing we must be close the edge already.

And iotlb buffers are not guaranteed to be DMA-capable; it's possible
that some drivers (with poor dma address restrictions) can't handle
some part of iotlb buffers.

So after allocating some ioblb buffer, we need to check if the buffers
are DMA-capable for a driver. Well, it's unlikely though.

Well, it would be better to move the check to map_single().

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 20:18           ` Jeremy Fitzhardinge
  2009-05-21 22:08             ` Ian Campbell
@ 2009-05-22 10:51             ` FUJITA Tomonori
  2009-05-27 19:05               ` Becky Bruce
  1 sibling, 1 reply; 30+ messages in thread
From: FUJITA Tomonori @ 2009-05-22 10:51 UTC (permalink / raw)
  To: jeremy; +Cc: fujita.tomonori, linuxppc-dev, linux-kernel, Ian.Campbell

On Thu, 21 May 2009 13:18:54 -0700
Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Becky Bruce wrote:
> > I can work with that, but it's going to be a bit inefficient, as I 
> > actually need the dma_addr_t, not the phys_addr_t, so I'll have to 
> > convert.  In every case, this is a conversion I've already done and 
> > that I need in the calling code as well.  Can we pass in both the 
> > phys_addr_t and the dma_addr_t?
> 
> The Xen implementation would needs to do the phys to bus conversion page 
> by page anyway, so it wouldn't help much.  But it also wouldn't hurt.
> 
> How expensive is the phys-to-bus conversion on power?  Is it worth 
> making the interface more complex for?  Would passing phys+bus mean that 
> we wouldn't also need to pass dev?

I don't think so. POWERPC needs it.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-21 18:27         ` Becky Bruce
  2009-05-21 19:01           ` Ian Campbell
  2009-05-21 20:18           ` Jeremy Fitzhardinge
@ 2009-05-22 11:11           ` Ian Campbell
  2009-05-22 23:55             ` Jeremy Fitzhardinge
  2009-05-27 19:05             ` Becky Bruce
  2 siblings, 2 replies; 30+ messages in thread
From: Ian Campbell @ 2009-05-22 11:11 UTC (permalink / raw)
  To: Becky Bruce
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel

On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
> I can work with that, but it's going to be a bit inefficient, as I  
> actually need the dma_addr_t, not the phys_addr_t, so I'll have to  
> convert.  In every case, this is a conversion I've already done and  
> that I need in the calling code as well. 

Does

    dma_addr_t dma_map_range(struct device *hwdev, phys_addr_t addr,
    size_t size);

work for you?

If the range does not need mapping then it returns the dma address, if
you needed to calculate the dma address anyway to figure out if mapping
is required then this is fine. If the range does need mapping then it
returns NULL.

This subsumes many of the phys->dma address conversions which would
otherwise be done in the callers. IMHO this is an architecture specific
detail which should be pushed down into the arch code as much as
possible. The only ones which I don't think can be pushed down are the
ones in swiotlb_virt_to_bus/swiotlb_bus_to_virt.

BTW do you need swiotlb_bus_to_virt to be __weak or is the fact that it
is implemented in terms of swiotlb_bus_to_phys sufficient?

I have an appointment all afternoon and it's a bank holiday on Monday
but here is my WIP patch as an example of what I'm thinking. Obviously
some cleanups are still very much required:
      * The Xen specific stuff in arch/x86/include/asm/dma-mapping.h
        clearly needs to be properly abstracted away (I just stuck it
        there for testing).
      * swiotlb_phys_to_bus and swiotlb_bus_to_phys need to move to
        arch/*/include/asm/dma-mapping.h instead of being __weak
      * Maybe swiotlb_bus_to_virt can become static again.
      * Need to finish auditing the handful of non-swiotlb users of
        is_buffer_dma_capable.
      * It might be possible to implement swiolb_dma_mapping_error
        and/or swiotlb_dma_supported in terms of dma_map_range. 
      * Minor detail: it doesn't actually work at the moment ;-)

Ian.

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 36c0009..026667f 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -174,4 +174,12 @@ dma_cache_sync (struct device *dev, void *vaddr, size_t size,
 
 #define dma_is_consistent(d, h)	(1)	/* all we do is coherent memory... */
 
+static inline dma_addr_t dma_map_range(struct device *dev, u64 mask,
+				       phys_addr_t addr, size_t size)
+{
+	if (addr + size <= mask)
+		return addr;
+	return 0;
+}
+
 #endif /* _ASM_IA64_DMA_MAPPING_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 916cbb6..b2813ab 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -309,4 +309,20 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 		ops->free_coherent(dev, size, vaddr, bus);
 }
 
+static inline dma_addr_t dma_map_range(struct device *dev, u64 mask,
+				       phys_addr_t addr, size_t size)
+{
+	dma_addr_t dma_addr;
+#ifdef CONFIG_XEN
+	extern int xen_range_needs_mapping(phys_addr_t paddr, size_t size);
+	if (xen_pv_domain() && xen_range_needs_mapping(addr, size))
+		return 0;
+#endif
+
+	dma_addr = swiotlb_phys_to_bus(dev, addr);
+	if (dma_addr + size <= mask)
+		return 0;
+	return dma_addr;
+}
+
 #endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 2fffc22..c2b4196 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -145,8 +145,8 @@ again:
 	if (!page)
 		return NULL;
 
-	addr = page_to_phys(page);
-	if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+	addr = dma_map_range(dev, dma_mask, page_to_phys(page), size);
+	if (!(addr == 0)) {
 		__free_pages(page, get_order(size));
 
 		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 1e8920d..6a55770 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -191,13 +191,13 @@ static inline int
 need_iommu(struct device *dev, unsigned long addr, size_t size)
 {
 	return force_iommu ||
-		!is_buffer_dma_capable(*dev->dma_mask, addr, size);
+		dma_map_range(dev, *dev->dma_mask, addr, size) == 0;
 }
 
 static inline int
 nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
 {
-	return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
+	return dma_map_range(dev, *dev->dma_mask, addr, size) == 0;
 }
 
 /* Map a single continuous physical area into the IOMMU.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 71d412a..712ce59 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -12,13 +12,13 @@
 #include <asm/dma.h>
 
 static int
-check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
+check_addr(char *name, struct device *hwdev, phys_addr_t phys, size_t size)
 {
-	if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
+	if (hwdev && dma_map_range(hwdev, *hwdev->dma_mask, phys, size) == 0) {
 		if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
 			printk(KERN_ERR
 			    "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
-				name, (long long)bus, size,
+				name, (long long)phys, size,
 				(long long)*hwdev->dma_mask);
 		return 0;
 	}
@@ -30,12 +30,12 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 enum dma_data_direction dir,
 				 struct dma_attrs *attrs)
 {
-	dma_addr_t bus = page_to_phys(page) + offset;
+	phys_addr_t phys = page_to_phys(page) + offset;
 	WARN_ON(size == 0);
-	if (!check_addr("map_single", dev, bus, size))
+	if (!check_addr("map_single", dev, phys, size))
 		return bad_dma_address;
 	flush_write_buffers();
-	return bus;
+	return phys;
 }
 
 /* Map a set of buffers described by scatterlist in streaming
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8083b6a..85dafa1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -96,11 +96,6 @@ static inline int is_device_dma_capable(struct device *dev)
 	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
 }
 
-static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
-{
-	return addr + size <= mask;
-}
-
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
 #else
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cec5f62..4fd5185 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -147,17 +147,6 @@ void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
 }
 
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
-					       dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
-}
-
 static void swiotlb_print_info(unsigned long bytes)
 {
 	phys_addr_t pstart, pend;
@@ -318,17 +307,6 @@ cleanup1:
 	return -ENOMEM;
 }
 
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
-{
-	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
-}
-
-static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
-}
-
 static int is_swiotlb_buffer(char *addr)
 {
 	return addr >= io_tlb_start && addr < io_tlb_end;
@@ -555,18 +533,17 @@ void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		       dma_addr_t *dma_handle, gfp_t flags)
 {
-	dma_addr_t dev_addr;
+	phys_addr_t phys;
 	void *ret;
 	int order = get_order(size);
 	u64 dma_mask = DMA_BIT_MASK(32);
+	dma_addr_t dma_addr;
 
 	if (hwdev && hwdev->coherent_dma_mask)
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret &&
-	    !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
-				   size)) {
+	if (ret && dma_map_range(hwdev, dma_mask, virt_to_phys(ret), size) == 0) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 */
@@ -585,19 +562,20 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	}
 
 	memset(ret, 0, size);
-	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+	phys = virt_to_phys(ret);
 
 	/* Confirm address can be DMA'd by device */
-	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+	dma_addr = dma_map_range(hwdev, dma_mask, phys, size);
+	if (dma_addr == 0) {
+		printk("hwdev DMA mask = 0x%016Lx, physical addr = 0x%016Lx\n",
 		       (unsigned long long)dma_mask,
-		       (unsigned long long)dev_addr);
+		       (unsigned long long)phys);
 
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
 		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
-	*dma_handle = dev_addr;
+	*dma_handle = dma_addr;
 	return ret;
 }
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -649,7 +627,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
+	dma_addr_t dma_addr;
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -658,9 +636,9 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(phys, size))
-		return dev_addr;
+	dma_addr = dma_map_range(dev, dma_get_mask(dev), phys, size);
+	if (dma_addr && !swiotlb_force)
+		return dma_addr;
 
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
@@ -671,15 +649,14 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 		map = io_tlb_overflow_buffer;
 	}
 
-	dev_addr = swiotlb_virt_to_bus(dev, map);
-
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(dev, dev_addr, size))
+	dma_addr = dma_map_range(dev, dma_get_mask(dev), virt_to_phys(map), size);
+	if (dma_addr == 0)
 		panic("map_single: bounce buffer is not DMA'ble");
 
-	return dev_addr;
+	return dma_addr;
 }
 EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
@@ -820,10 +797,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
-
-		if (range_needs_mapping(paddr, sg->length) ||
-		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
+		dma_addr_t dma_addr = dma_map_range(hwdev, dma_get_mask(hwdev), paddr, sg->length);
+		if (dma_addr == 0 || swiotlb_force) {
 			void *map = map_single(hwdev, sg_phys(sg),
 					       sg->length, dir);
 			if (!map) {
@@ -835,9 +810,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
-		} else
-			sg->dma_address = dev_addr;
+			dma_addr = dma_map_range(hwdev, dma_get_mask(hwdev), virt_to_phys(map), sg->length);
+		}
+		sg->dma_address = dma_addr;
 		sg->dma_length = sg->length;
 	}
 	return nelems;

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-22 11:11           ` Ian Campbell
@ 2009-05-22 23:55             ` Jeremy Fitzhardinge
  2009-05-23 22:59               ` Leon Woestenberg
  2009-05-26 12:51               ` Ian Campbell
  2009-05-27 19:05             ` Becky Bruce
  1 sibling, 2 replies; 30+ messages in thread
From: Jeremy Fitzhardinge @ 2009-05-22 23:55 UTC (permalink / raw)
  To: Ian Campbell; +Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel

Ian Campbell wrote:
> On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
>   
>> I can work with that, but it's going to be a bit inefficient, as I  
>> actually need the dma_addr_t, not the phys_addr_t, so I'll have to  
>> convert.  In every case, this is a conversion I've already done and  
>> that I need in the calling code as well. 
>>     
>
> Does
>
>     dma_addr_t dma_map_range(struct device *hwdev, phys_addr_t addr,
>     size_t size);
>
> work for you?
>
> If the range does not need mapping then it returns the dma address, if
> you needed to calculate the dma address anyway to figure out if mapping
> is required then this is fine. If the range does need mapping then it
> returns NULL.
>   

My only concern is whether dma_addr_t == 0 is actually equivalent to 
NULL.  That is, can we be sure that address 0 will never be used?

Taking dma_alloc_coherent as a model, we could have something like:

    int dma_map_range(struct device *hwdev, phys_addr_t addr, size_t size, dma_addr_t *dma_addrp);
      

where *dma_addrp is set if the function returns success (bool return 
type might be clearer).

    J

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-22 23:55             ` Jeremy Fitzhardinge
@ 2009-05-23 22:59               ` Leon Woestenberg
  2009-05-26 12:51               ` Ian Campbell
  1 sibling, 0 replies; 30+ messages in thread
From: Leon Woestenberg @ 2009-05-23 22:59 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ian Campbell, FUJITA Tomonori, linux-kernel, linuxppc-dev

Hello,

On Sat, May 23, 2009 at 1:55 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrot=
e:
> Ian Campbell wrote:
>>
>> On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
>>
>>>
>>> I can work with that, but it's going to be a bit inefficient, as I
>>> =A0actually need the dma_addr_t, not the phys_addr_t, so I'll have to
>>> =A0convert. =A0In every case, this is a conversion I've already done an=
d =A0that I
>>> need in the calling code as well.
>>
>> Does
>>
>> =A0 =A0dma_addr_t dma_map_range(struct device *hwdev, phys_addr_t addr,
>> =A0 =A0size_t size);
>>
>> work for you?
>>
>> If the range does not need mapping then it returns the dma address, if
>> you needed to calculate the dma address anyway to figure out if mapping
>> is required then this is fine. If the range does need mapping then it
>> returns NULL.
>>
>
> My only concern is whether dma_addr_t =3D=3D 0 is actually equivalent to =
NULL.
> =A0That is, can we be sure that address 0 will never be used?
>
Indeed, I remember seeing 0 returned on pci_alloc_coherent() as an
address (cookie).

Regards,
--=20
Leon

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-22 23:55             ` Jeremy Fitzhardinge
  2009-05-23 22:59               ` Leon Woestenberg
@ 2009-05-26 12:51               ` Ian Campbell
  2009-05-27 19:11                 ` Becky Bruce
  1 sibling, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2009-05-26 12:51 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: FUJITA Tomonori, linuxppc-dev, linux-kernel

On Fri, 2009-05-22 at 19:55 -0400, Jeremy Fitzhardinge wrote:
> Ian Campbell wrote:
> > On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
> >   
> >> I can work with that, but it's going to be a bit inefficient, as I  
> >> actually need the dma_addr_t, not the phys_addr_t, so I'll have to  
> >> convert.  In every case, this is a conversion I've already done and  
> >> that I need in the calling code as well. 
> >>     
> >
> > Does
> >
> >     dma_addr_t dma_map_range(struct device *hwdev, phys_addr_t addr,
> >     size_t size);
> >
> > work for you?
> >
> > If the range does not need mapping then it returns the dma address, if
> > you needed to calculate the dma address anyway to figure out if mapping
> > is required then this is fine. If the range does need mapping then it
> > returns NULL.
> >   
> 
> My only concern is whether dma_addr_t == 0 is actually equivalent to 
> NULL.  That is, can we be sure that address 0 will never be used?

It seems not, ~0UL might have been an option, but...

> Taking dma_alloc_coherent as a model, we could have something like:
> 
>     int dma_map_range(struct device *hwdev, phys_addr_t addr, size_t size, dma_addr_t *dma_addrp);
>       
> 
> where *dma_addrp is set if the function returns success (bool return 
> type might be clearer).

... this sounds like a good idea to me.

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-22 11:11           ` Ian Campbell
  2009-05-22 23:55             ` Jeremy Fitzhardinge
@ 2009-05-27 19:05             ` Becky Bruce
  2009-05-27 20:29               ` Ian Campbell
  1 sibling, 1 reply; 30+ messages in thread
From: Becky Bruce @ 2009-05-27 19:05 UTC (permalink / raw)
  To: Ian Campbell
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel


On May 22, 2009, at 6:11 AM, Ian Campbell wrote:
>
>
> BTW do you need swiotlb_bus_to_virt to be __weak or is the fact that  
> it
> is implemented in terms of swiotlb_bus_to_phys sufficient?

The default one in swiotlb calls phys_to_virt on the result of  
swiotlb_bus_to_phys, which only works on lowmem.

>
>
> I have an appointment all afternoon and it's a bank holiday on Monday
> but here is my WIP patch as an example of what I'm thinking. Obviously
> some cleanups are still very much required:
>      * The Xen specific stuff in arch/x86/include/asm/dma-mapping.h
>        clearly needs to be properly abstracted away (I just stuck it
>        there for testing).
>      * swiotlb_phys_to_bus and swiotlb_bus_to_phys need to move to
>        arch/*/include/asm/dma-mapping.h instead of being __weak
>      * Maybe swiotlb_bus_to_virt can become static again.
>      * Need to finish auditing the handful of non-swiotlb users of
>        is_buffer_dma_capable.
>      * It might be possible to implement swiolb_dma_mapping_error
>        and/or swiotlb_dma_supported in terms of dma_map_range.
>      * Minor detail: it doesn't actually work at the moment ;-)

Minor detail :)

I'm in the same boat- been offline most of the week and will continue  
to be offline (I'm sneaking in an hour here to read emails!!!!) until  
sometime next week.   I'm in the middle of a big lab construction and  
move project.  I'll take a look as soon as I can - thanks!

-Becky

>
>
> Ian.
>
> diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/ 
> asm/dma-mapping.h
> index 36c0009..026667f 100644
> --- a/arch/ia64/include/asm/dma-mapping.h
> +++ b/arch/ia64/include/asm/dma-mapping.h
> @@ -174,4 +174,12 @@ dma_cache_sync (struct device *dev, void  
> *vaddr, size_t size,
>
> #define dma_is_consistent(d, h)	(1)	/* all we do is coherent  
> memory... */
>
> +static inline dma_addr_t dma_map_range(struct device *dev, u64 mask,
> +				       phys_addr_t addr, size_t size)
> +{
> +	if (addr + size <= mask)
> +		return addr;
> +	return 0;
> +}
> +
> #endif /* _ASM_IA64_DMA_MAPPING_H */
> diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/ 
> asm/dma-mapping.h
> index 916cbb6..b2813ab 100644
> --- a/arch/x86/include/asm/dma-mapping.h
> +++ b/arch/x86/include/asm/dma-mapping.h
> @@ -309,4 +309,20 @@ static inline void dma_free_coherent(struct  
> device *dev, size_t size,
> 		ops->free_coherent(dev, size, vaddr, bus);
> }
>
> +static inline dma_addr_t dma_map_range(struct device *dev, u64 mask,
> +				       phys_addr_t addr, size_t size)
> +{
> +	dma_addr_t dma_addr;
> +#ifdef CONFIG_XEN
> +	extern int xen_range_needs_mapping(phys_addr_t paddr, size_t size);
> +	if (xen_pv_domain() && xen_range_needs_mapping(addr, size))
> +		return 0;
> +#endif
> +
> +	dma_addr = swiotlb_phys_to_bus(dev, addr);
> +	if (dma_addr + size <= mask)
> +		return 0;
> +	return dma_addr;
> +}
> +
> #endif
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index 2fffc22..c2b4196 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -145,8 +145,8 @@ again:
> 	if (!page)
> 		return NULL;
>
> -	addr = page_to_phys(page);
> -	if (!is_buffer_dma_capable(dma_mask, addr, size)) {
> +	addr = dma_map_range(dev, dma_mask, page_to_phys(page), size);
> +	if (!(addr == 0)) {
> 		__free_pages(page, get_order(size));
>
> 		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci- 
> gart_64.c
> index 1e8920d..6a55770 100644
> --- a/arch/x86/kernel/pci-gart_64.c
> +++ b/arch/x86/kernel/pci-gart_64.c
> @@ -191,13 +191,13 @@ static inline int
> need_iommu(struct device *dev, unsigned long addr, size_t size)
> {
> 	return force_iommu ||
> -		!is_buffer_dma_capable(*dev->dma_mask, addr, size);
> +		dma_map_range(dev, *dev->dma_mask, addr, size) == 0;
> }
>
> static inline int
> nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
> {
> -	return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
> +	return dma_map_range(dev, *dev->dma_mask, addr, size) == 0;
> }
>
> /* Map a single continuous physical area into the IOMMU.
> diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
> index 71d412a..712ce59 100644
> --- a/arch/x86/kernel/pci-nommu.c
> +++ b/arch/x86/kernel/pci-nommu.c
> @@ -12,13 +12,13 @@
> #include <asm/dma.h>
>
> static int
> -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t  
> size)
> +check_addr(char *name, struct device *hwdev, phys_addr_t phys,  
> size_t size)
> {
> -	if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
> +	if (hwdev && dma_map_range(hwdev, *hwdev->dma_mask, phys, size) ==  
> 0) {
> 		if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
> 			printk(KERN_ERR
> 			    "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
> -				name, (long long)bus, size,
> +				name, (long long)phys, size,
> 				(long long)*hwdev->dma_mask);
> 		return 0;
> 	}
> @@ -30,12 +30,12 @@ static dma_addr_t nommu_map_page(struct device  
> *dev, struct page *page,
> 				 enum dma_data_direction dir,
> 				 struct dma_attrs *attrs)
> {
> -	dma_addr_t bus = page_to_phys(page) + offset;
> +	phys_addr_t phys = page_to_phys(page) + offset;
> 	WARN_ON(size == 0);
> -	if (!check_addr("map_single", dev, bus, size))
> +	if (!check_addr("map_single", dev, phys, size))
> 		return bad_dma_address;
> 	flush_write_buffers();
> -	return bus;
> +	return phys;
> }
>
> /* Map a set of buffers described by scatterlist in streaming
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 8083b6a..85dafa1 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -96,11 +96,6 @@ static inline int is_device_dma_capable(struct  
> device *dev)
> 	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
> }
>
> -static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr,  
> size_t size)
> -{
> -	return addr + size <= mask;
> -}
> -
> #ifdef CONFIG_HAS_DMA
> #include <asm/dma-mapping.h>
> #else
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index cec5f62..4fd5185 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -147,17 +147,6 @@ void * __weak swiotlb_bus_to_virt(struct device  
> *hwdev, dma_addr_t address)
> 	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
> }
>
> -int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
> -					       dma_addr_t addr, size_t size)
> -{
> -	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
> -}
> -
> -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr,  
> size_t size)
> -{
> -	return 0;
> -}
> -
> static void swiotlb_print_info(unsigned long bytes)
> {
> 	phys_addr_t pstart, pend;
> @@ -318,17 +307,6 @@ cleanup1:
> 	return -ENOMEM;
> }
>
> -static inline int
> -address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t  
> size)
> -{
> -	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
> -}
> -
> -static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
> -{
> -	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr,  
> size);
> -}
> -
> static int is_swiotlb_buffer(char *addr)
> {
> 	return addr >= io_tlb_start && addr < io_tlb_end;
> @@ -555,18 +533,17 @@ void *
> swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> 		       dma_addr_t *dma_handle, gfp_t flags)
> {
> -	dma_addr_t dev_addr;
> +	phys_addr_t phys;
> 	void *ret;
> 	int order = get_order(size);
> 	u64 dma_mask = DMA_BIT_MASK(32);
> +	dma_addr_t dma_addr;
>
> 	if (hwdev && hwdev->coherent_dma_mask)
> 		dma_mask = hwdev->coherent_dma_mask;
>
> 	ret = (void *)__get_free_pages(flags, order);
> -	if (ret &&
> -	    !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev,  
> ret),
> -				   size)) {
> +	if (ret && dma_map_range(hwdev, dma_mask, virt_to_phys(ret), size)  
> == 0) {
> 		/*
> 		 * The allocated memory isn't reachable by the device.
> 		 */
> @@ -585,19 +562,20 @@ swiotlb_alloc_coherent(struct device *hwdev,  
> size_t size,
> 	}
>
> 	memset(ret, 0, size);
> -	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
> +	phys = virt_to_phys(ret);
>
> 	/* Confirm address can be DMA'd by device */
> -	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
> -		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
> +	dma_addr = dma_map_range(hwdev, dma_mask, phys, size);
> +	if (dma_addr == 0) {
> +		printk("hwdev DMA mask = 0x%016Lx, physical addr = 0x%016Lx\n",
> 		       (unsigned long long)dma_mask,
> -		       (unsigned long long)dev_addr);
> +		       (unsigned long long)phys);
>
> 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
> 		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
> 		return NULL;
> 	}
> -	*dma_handle = dev_addr;
> +	*dma_handle = dma_addr;
> 	return ret;
> }
> EXPORT_SYMBOL(swiotlb_alloc_coherent);
> @@ -649,7 +627,7 @@ dma_addr_t swiotlb_map_page(struct device *dev,  
> struct page *page,
> 			    struct dma_attrs *attrs)
> {
> 	phys_addr_t phys = page_to_phys(page) + offset;
> -	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
> +	dma_addr_t dma_addr;
> 	void *map;
>
> 	BUG_ON(dir == DMA_NONE);
> @@ -658,9 +636,9 @@ dma_addr_t swiotlb_map_page(struct device *dev,  
> struct page *page,
> 	 * we can safely return the device addr and not worry about bounce
> 	 * buffering it.
> 	 */
> -	if (!address_needs_mapping(dev, dev_addr, size) &&
> -	    !range_needs_mapping(phys, size))
> -		return dev_addr;
> +	dma_addr = dma_map_range(dev, dma_get_mask(dev), phys, size);
> +	if (dma_addr && !swiotlb_force)
> +		return dma_addr;
>
> 	/*
> 	 * Oh well, have to allocate and map a bounce buffer.
> @@ -671,15 +649,14 @@ dma_addr_t swiotlb_map_page(struct device  
> *dev, struct page *page,
> 		map = io_tlb_overflow_buffer;
> 	}
>
> -	dev_addr = swiotlb_virt_to_bus(dev, map);
> -
> 	/*
> 	 * Ensure that the address returned is DMA'ble
> 	 */
> -	if (address_needs_mapping(dev, dev_addr, size))
> +	dma_addr = dma_map_range(dev, dma_get_mask(dev),  
> virt_to_phys(map), size);
> +	if (dma_addr == 0)
> 		panic("map_single: bounce buffer is not DMA'ble");
>
> -	return dev_addr;
> +	return dma_addr;
> }
> EXPORT_SYMBOL_GPL(swiotlb_map_page);
>
> @@ -820,10 +797,8 @@ swiotlb_map_sg_attrs(struct device *hwdev,  
> struct scatterlist *sgl, int nelems,
>
> 	for_each_sg(sgl, sg, nelems, i) {
> 		phys_addr_t paddr = sg_phys(sg);
> -		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
> -
> -		if (range_needs_mapping(paddr, sg->length) ||
> -		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
> +		dma_addr_t dma_addr = dma_map_range(hwdev, dma_get_mask(hwdev),  
> paddr, sg->length);
> +		if (dma_addr == 0 || swiotlb_force) {
> 			void *map = map_single(hwdev, sg_phys(sg),
> 					       sg->length, dir);
> 			if (!map) {
> @@ -835,9 +810,9 @@ swiotlb_map_sg_attrs(struct device *hwdev,  
> struct scatterlist *sgl, int nelems,
> 				sgl[0].dma_length = 0;
> 				return 0;
> 			}
> -			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
> -		} else
> -			sg->dma_address = dev_addr;
> +			dma_addr = dma_map_range(hwdev, dma_get_mask(hwdev),  
> virt_to_phys(map), sg->length);
> +		}
> +		sg->dma_address = dma_addr;
> 		sg->dma_length = sg->length;
> 	}
> 	return nelems;
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-22 10:51             ` FUJITA Tomonori
@ 2009-05-27 19:05               ` Becky Bruce
  0 siblings, 0 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-27 19:05 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linuxppc-dev, jeremy, linux-kernel, Ian.Campbell


On May 22, 2009, at 5:51 AM, FUJITA Tomonori wrote:

> On Thu, 21 May 2009 13:18:54 -0700
> Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>> Becky Bruce wrote:
>>> I can work with that, but it's going to be a bit inefficient, as I
>>> actually need the dma_addr_t, not the phys_addr_t, so I'll have to
>>> convert.  In every case, this is a conversion I've already done and
>>> that I need in the calling code as well.  Can we pass in both the
>>> phys_addr_t and the dma_addr_t?
>>
>> The Xen implementation would needs to do the phys to bus conversion  
>> page
>> by page anyway, so it wouldn't help much.  But it also wouldn't hurt.
>>
>> How expensive is the phys-to-bus conversion on power?  Is it worth
>> making the interface more complex for?  Would passing phys+bus mean  
>> that
>> we wouldn't also need to pass dev?
>
> I don't think so. POWERPC needs it.

Hrm, it looks like my response to this got dropped.  Fujita is correct  
- we still need the dev on powerpc because we use it to get device- 
specific information that is used to determining when we need to  
actually bounce.

Cheers,
B

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-26 12:51               ` Ian Campbell
@ 2009-05-27 19:11                 ` Becky Bruce
  0 siblings, 0 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-27 19:11 UTC (permalink / raw)
  To: Ian Campbell
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel


On May 26, 2009, at 7:51 AM, Ian Campbell wrote:

> On Fri, 2009-05-22 at 19:55 -0400, Jeremy Fitzhardinge wrote:
>> Ian Campbell wrote:
>>> On Thu, 2009-05-21 at 14:27 -0400, Becky Bruce wrote:
>>>
>>>> I can work with that, but it's going to be a bit inefficient, as I
>>>> actually need the dma_addr_t, not the phys_addr_t, so I'll have to
>>>> convert.  In every case, this is a conversion I've already done and
>>>> that I need in the calling code as well.
>>>>
>>>
>>> Does
>>>
>>>    dma_addr_t dma_map_range(struct device *hwdev, phys_addr_t addr,
>>>    size_t size);
>>>
>>> work for you?
>>>
>>> If the range does not need mapping then it returns the dma  
>>> address, if
>>> you needed to calculate the dma address anyway to figure out if  
>>> mapping
>>> is required then this is fine. If the range does need mapping then  
>>> it
>>> returns NULL.
>>>
>>
>> My only concern is whether dma_addr_t == 0 is actually equivalent to
>> NULL.  That is, can we be sure that address 0 will never be used?
>
> It seems not, ~0UL might have been an option, but...
>
>> Taking dma_alloc_coherent as a model, we could have something like:
>>
>>    int dma_map_range(struct device *hwdev, phys_addr_t addr, size_t  
>> size, dma_addr_t *dma_addrp);
>>
>>
>> where *dma_addrp is set if the function returns success (bool return
>> type might be clearer).
>
> ... this sounds like a good idea to me.

I agree.  This will work for me as well.

-becky

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-27 19:05             ` Becky Bruce
@ 2009-05-27 20:29               ` Ian Campbell
  2009-05-27 22:11                 ` Becky Bruce
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2009-05-27 20:29 UTC (permalink / raw)
  To: Becky Bruce
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel

On Wed, 2009-05-27 at 15:05 -0400, Becky Bruce wrote:
> On May 22, 2009, at 6:11 AM, Ian Campbell wrote:
> >
> >
> > BTW do you need swiotlb_bus_to_virt to be __weak or is the fact that  
> > it
> > is implemented in terms of swiotlb_bus_to_phys sufficient?
> 
> The default one in swiotlb calls phys_to_virt on the result of  
> swiotlb_bus_to_phys, which only works on lowmem.

Perhaps we can simply enhance the default one to handle highmem instead
of making it arch specific?

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-27 20:29               ` Ian Campbell
@ 2009-05-27 22:11                 ` Becky Bruce
  0 siblings, 0 replies; 30+ messages in thread
From: Becky Bruce @ 2009-05-27 22:11 UTC (permalink / raw)
  To: Ian Campbell
  Cc: FUJITA Tomonori, linuxppc-dev, Jeremy Fitzhardinge, linux-kernel


On May 27, 2009, at 3:29 PM, Ian Campbell wrote:

> On Wed, 2009-05-27 at 15:05 -0400, Becky Bruce wrote:
>> On May 22, 2009, at 6:11 AM, Ian Campbell wrote:
>>>
>>>
>>> BTW do you need swiotlb_bus_to_virt to be __weak or is the fact that
>>> it
>>> is implemented in terms of swiotlb_bus_to_phys sufficient?
>>
>> The default one in swiotlb calls phys_to_virt on the result of
>> swiotlb_bus_to_phys, which only works on lowmem.
>
> Perhaps we can simply enhance the default one to handle highmem  
> instead
> of making it arch specific?

That's fine by me - as long as what I've got will work for you guys  
I'm happy to stick it in the standard one (inside a nice pretty #ifdef  
CONFIG_HIGHMEM, of course).

Cheers,
B

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-19 13:04         ` Kumar Gala
  2009-05-19 20:06           ` Becky Bruce
@ 2009-05-28  4:42           ` Kumar Gala
  2009-05-28  6:11             ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 30+ messages in thread
From: Kumar Gala @ 2009-05-28  4:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: FUJITA Tomonori, linuxppc-dev list

Ben,

Any comments on this.. need a decision so we can have patches ready  
for .31.

- k

On May 19, 2009, at 8:04 AM, Kumar Gala wrote:

>
> On May 18, 2009, at 4:49 PM, Benjamin Herrenschmidt wrote:
>
>> On Mon, 2009-05-18 at 08:25 -0500, Kumar Gala wrote:
>>>
>>> Part of this is how the generic swiotlb code works and part of it
>>> was
>>> our desire not to bloat dev archdata by adding such info that as you
>>> say is either bus specific or conveyed in the dma addr mask.
>>
>> Right but perf sucks :-)
>>
>> Maybe an option is to clamp the DMA mask when it's set by the  
>> driver to
>> limit it to the available inbound window ?
>
> Clamping the DMA mask is even worse than the additional indirection  
> for us.  We have valid scenarios in which we'd have 512M of outbound  
> PCI address space and 4G of mem and thus 3.5G of inbound PCI address  
> space.  With the DMA mask we'd be limited to 2G and bouncing from  
> 2..3.5G when we don't need to.
>
> I think our options are to change archdata as follows:
>
> Option 1 - just add a new data member to dev_archdata
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        void 		        *dma_data;
> 	dma_addr_t		direct_dma_addr;
> };
>
> Option 2 - introduce a proper container for how we use dma_data.   
> This may just be moving the indirection from an indirection function  
> call to an indirection data reference:
>
> struct dma_data {
>        dma_addr_t      offset;
>        dma_addr_t      direct_dma_addr;
>        struct          iommu_table *iommu_table;
> };
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        struct dma_data         *dma_data;
> };
>
> Option 3 - use dma_data to keep the addr at which we need to bounce  
> vs not for SWIOTLB - this has potential issues w/conflicting with  
> dma_data being used as the dma_offset. (need to think on that a bit  
> more).  Additionally this has the benefit in that we need dma_data  
> to be a 64-bit quantity on ppc32 w/>32-bit phys addr.
>
> struct dev_archdata {
>        /* Optional pointer to an OF device node */
>        struct device_node      *of_node;
>
>        /* DMA operations on that device */
>        struct dma_mapping_ops  *dma_ops;
>        dma_addr_t 	        dma_data;
> };
>
> others??
>
> - k
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-28  4:42           ` Kumar Gala
@ 2009-05-28  6:11             ` Benjamin Herrenschmidt
  2009-05-28 13:06               ` Kumar Gala
  0 siblings, 1 reply; 30+ messages in thread
From: Benjamin Herrenschmidt @ 2009-05-28  6:11 UTC (permalink / raw)
  To: Kumar Gala; +Cc: FUJITA Tomonori, linuxppc-dev list

On Wed, 2009-05-27 at 23:42 -0500, Kumar Gala wrote:
> Ben,
> 
> Any comments on this.. need a decision so we can have patches ready  
> for .31.

> > Clamping the DMA mask is even worse than the additional indirection  
> > for us.  We have valid scenarios in which we'd have 512M of outbound  
> > PCI address space and 4G of mem and thus 3.5G of inbound PCI address  
> > space.  With the DMA mask we'd be limited to 2G and bouncing from  
> > 2..3.5G when we don't need to.

Ok and agreed.

> > I think our options are to change archdata as follows:
> >
> > Option 1 - just add a new data member to dev_archdata
> >
> > struct dev_archdata {
> >        /* Optional pointer to an OF device node */
> >        struct device_node      *of_node;
> >
> >        /* DMA operations on that device */
> >        struct dma_mapping_ops  *dma_ops;
> >        void 		        *dma_data;
> > 	dma_addr_t		direct_dma_addr;
> > };

That sounds like the "simple" option, might want for now to make
it conditional on SWIOTLB but the bloat is reasonably small I would
expect.

> > Option 2 - introduce a proper container for how we use dma_data.   
> > This may just be moving the indirection from an indirection function  
> > call to an indirection data reference:

Right, it somewhat defeats the purpose though an indirect data reference
tends to hit the pipeline less hard than an indirect function call...
 
> > Option 3 - use dma_data to keep the addr at which we need to bounce  
> > vs not for SWIOTLB - this has potential issues w/conflicting with  
> > dma_data being used as the dma_offset. (need to think on that a bit  
> > more).  Additionally this has the benefit in that we need dma_data  
> > to be a 64-bit quantity on ppc32 w/>32-bit phys addr.

Well, that means that swiotlb can not be used with a !0 offset. That
-might- be an issue if the PCI is setup "backward", ie with 0..N being
the outbound MMIO and N..4G the DMA region, remapped to 0. There are
reasons to do it this way, it's not invalid, for example it allow easy
access to ISA/VGA holes.

At this stage I have no firm opinion. I'm thinking that either we try
to limit the overhead and option 1 is probably the simplest, at the
expense of a little bit of memory, or we think the overhead is going
to be minimum and we may as well stick to 2 functions since that's
going to be more flexible.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit
  2009-05-28  6:11             ` Benjamin Herrenschmidt
@ 2009-05-28 13:06               ` Kumar Gala
  0 siblings, 0 replies; 30+ messages in thread
From: Kumar Gala @ 2009-05-28 13:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: FUJITA Tomonori, linuxppc-dev list


On May 28, 2009, at 1:11 AM, Benjamin Herrenschmidt wrote:

> On Wed, 2009-05-27 at 23:42 -0500, Kumar Gala wrote:
>> Ben,
>>
>> Any comments on this.. need a decision so we can have patches ready
>> for .31.
>
>>> Clamping the DMA mask is even worse than the additional indirection
>>> for us.  We have valid scenarios in which we'd have 512M of outbound
>>> PCI address space and 4G of mem and thus 3.5G of inbound PCI address
>>> space.  With the DMA mask we'd be limited to 2G and bouncing from
>>> 2..3.5G when we don't need to.
>
> Ok and agreed.
>
>>> I think our options are to change archdata as follows:
>>>
>>> Option 1 - just add a new data member to dev_archdata
>>>
>>> struct dev_archdata {
>>>       /* Optional pointer to an OF device node */
>>>       struct device_node      *of_node;
>>>
>>>       /* DMA operations on that device */
>>>       struct dma_mapping_ops  *dma_ops;
>>>       void 		        *dma_data;
>>> 	dma_addr_t		direct_dma_addr;
>>> };
>
> That sounds like the "simple" option, might want for now to make
> it conditional on SWIOTLB but the bloat is reasonably small I would
> expect.
>
>>> Option 2 - introduce a proper container for how we use dma_data.
>>> This may just be moving the indirection from an indirection function
>>> call to an indirection data reference:
>
> Right, it somewhat defeats the purpose though an indirect data  
> reference
> tends to hit the pipeline less hard than an indirect function call...
>
>>> Option 3 - use dma_data to keep the addr at which we need to bounce
>>> vs not for SWIOTLB - this has potential issues w/conflicting with
>>> dma_data being used as the dma_offset. (need to think on that a bit
>>> more).  Additionally this has the benefit in that we need dma_data
>>> to be a 64-bit quantity on ppc32 w/>32-bit phys addr.
>
> Well, that means that swiotlb can not be used with a !0 offset. That
> -might- be an issue if the PCI is setup "backward", ie with 0..N being
> the outbound MMIO and N..4G the DMA region, remapped to 0. There are
> reasons to do it this way, it's not invalid, for example it allow easy
> access to ISA/VGA holes.

Yeah, realized that after thinking about it a bit more.

> At this stage I have no firm opinion. I'm thinking that either we try
> to limit the overhead and option 1 is probably the simplest, at the
> expense of a little bit of memory, or we think the overhead is going
> to be minimum and we may as well stick to 2 functions since that's
> going to be more flexible.

If you don't have a firm opinion I would ask you pull in this patch as  
it is in your next tree.  If we decide to move away from the 2  
function method and use a little bit more memory per dev_archdata we  
at least have the history in the tree of the 2 function method.

- k

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2009-05-28 13:06 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-14 22:42 [PATCH V2 1/3] powerpc: Use sg->dma_length in sg_dma_len() macro on 32-bit Becky Bruce
2009-05-14 22:42 ` [PATCH V2 2/3] powerpc: Add support for swiotlb " Becky Bruce
2009-05-14 22:42   ` [PATCH V2 3/3] powerpc: Add 86xx support for SWIOTLB Becky Bruce
2009-05-15  4:49   ` [PATCH V2 2/3] powerpc: Add support for swiotlb on 32-bit Kumar Gala
2009-05-18  4:49   ` Benjamin Herrenschmidt
2009-05-18 13:25     ` Kumar Gala
2009-05-18 21:49       ` Benjamin Herrenschmidt
2009-05-19 13:04         ` Kumar Gala
2009-05-19 20:06           ` Becky Bruce
2009-05-28  4:42           ` Kumar Gala
2009-05-28  6:11             ` Benjamin Herrenschmidt
2009-05-28 13:06               ` Kumar Gala
2009-05-19  5:27   ` FUJITA Tomonori
2009-05-19 20:57     ` Becky Bruce
2009-05-21 17:43       ` Jeremy Fitzhardinge
2009-05-21 18:27         ` Becky Bruce
2009-05-21 19:01           ` Ian Campbell
2009-05-22 10:51             ` FUJITA Tomonori
2009-05-21 20:18           ` Jeremy Fitzhardinge
2009-05-21 22:08             ` Ian Campbell
2009-05-22 10:51             ` FUJITA Tomonori
2009-05-27 19:05               ` Becky Bruce
2009-05-22 11:11           ` Ian Campbell
2009-05-22 23:55             ` Jeremy Fitzhardinge
2009-05-23 22:59               ` Leon Woestenberg
2009-05-26 12:51               ` Ian Campbell
2009-05-27 19:11                 ` Becky Bruce
2009-05-27 19:05             ` Becky Bruce
2009-05-27 20:29               ` Ian Campbell
2009-05-27 22:11                 ` Becky Bruce

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).