Hi Xen maintainers and friends, please take a look at this series that cleans up the parts of swiotlb-xen that deal with non-coherent caches.
Reuse the arm64 code that uses the dma-direct/swiotlb helpers for DMA non-coherent devices. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/Kconfig | 4 + arch/arm/include/asm/device.h | 3 - arch/arm/include/asm/xen/page-coherent.h | 93 ---------------------- arch/arm/mm/Kconfig | 4 - arch/arm/mm/dma-mapping.c | 8 +- arch/arm64/include/asm/xen/page-coherent.h | 75 ----------------- drivers/xen/swiotlb-xen.c | 49 +----------- include/xen/arm/page-coherent.h | 71 +++++++++++++++++ 8 files changed, 78 insertions(+), 229 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 33b00579beff..24360211534a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,6 +7,8 @@ config ARM select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB + select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KEEPINITRD @@ -18,6 +20,8 @@ config ARM select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB + select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB select ARCH_HAS_TEARDOWN_DMA_OPS if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index f6955b55c544..c675bc0d5aa8 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -14,9 +14,6 @@ struct dev_archdata { #endif #ifdef CONFIG_ARM_DMA_USE_IOMMU struct dma_iommu_mapping *mapping; -#endif -#ifdef CONFIG_XEN - const struct dma_map_ops *dev_dma_ops; #endif unsigned int dma_coherent:1; unsigned int dma_ops_setup:1; diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 2c403e7c782d..27e984977402 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -1,95 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H -#define _ASM_ARM_XEN_PAGE_COHERENT_H - -#include <linux/dma-mapping.h> -#include <asm/page.h> #include <xen/arm/page-coherent.h> - -static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) -{ - if (dev && dev->archdata.dev_dma_ops) - return dev->archdata.dev_dma_ops; - return get_arch_dma_ops(NULL); -} - -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; - bool local = (page_pfn <= dev_pfn) && - (dev_pfn - page_pfn < compound_pages); - - /* - * Dom0 is mapped 1:1, while the Linux page can span across - * multiple Xen pages, it's not possible for it to contain a - * mix of local and foreign Xen pages. So if the first xen_pfn - * == mfn the page is local otherwise it's a foreign page - * grant-mapped in dom0. If the page is local we can safely - * call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (local) - xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->unmap_page) - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); - } else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) - xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_device) - xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c54cd7ed90ba..c1222c0e9fd3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -664,10 +664,6 @@ config ARM_LPAE !CPU_32v4 && !CPU_32v3 select PHYS_ADDR_T_64BIT select SWIOTLB - select ARCH_HAS_DMA_COHERENT_TO_PFN - select ARCH_HAS_DMA_MMAP_PGPROT - select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_SYNC_DMA_FOR_CPU help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index d42557ee69c2..738097396445 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1132,10 +1132,6 @@ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) * 32-bit DMA. * Use the generic dma-direct / swiotlb ops code in that case, as that * handles bounce buffering for us. - * - * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the - * latter is also selected by the Xen code, but that code for now relies - * on non-NULL dev_dma_ops. To be cleaned up later. */ if (IS_ENABLED(CONFIG_ARM_LPAE)) return NULL; @@ -2363,10 +2359,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); #ifdef CONFIG_XEN - if (xen_initial_domain()) { - dev->archdata.dev_dma_ops = dev->dma_ops; + if (xen_initial_domain()) dev->dma_ops = xen_dma_ops; - } #endif dev->archdata.dma_ops_setup = true; } diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index d88e56b90b93..27e984977402 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -1,77 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H -#define _ASM_ARM64_XEN_PAGE_COHERENT_H - -#include <linux/dma-mapping.h> -#include <asm/page.h> #include <xen/arm/page-coherent.h> - -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - - if (pfn_valid(pfn)) - dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) - dma_direct_sync_single_for_device(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; - bool local = (page_pfn <= dev_pfn) && - (dev_pfn - page_pfn < compound_pages); - - if (local) - dma_direct_map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) - dma_direct_unmap_page(hwdev, handle, size, dir, attrs); - else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ae1df496bf38..b8808677ae1d 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -547,51 +547,6 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; } -/* - * Create userspace mapping for the DMA-coherent memory. - * This function should be called with the pages from the current domain only, - * passing pages mapped from other domains would lead to memory corruption. - */ -static int -xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_ARM - if (xen_get_dma_ops(dev)->mmap) - return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, - dma_addr, size, attrs); -#endif - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} - -/* - * This function should be called with the pages from the current domain only, - * passing pages mapped from other domains would lead to memory corruption. - */ -static int -xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_ARM - if (xen_get_dma_ops(dev)->get_sgtable) { -#if 0 - /* - * This check verifies that the page belongs to the current domain and - * is not one mapped from another domain. - * This check is for debug only, and should not go to production build - */ - unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); - BUG_ON (!page_is_ram(bfn)); -#endif - return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, - handle, size, attrs); - } -#endif - return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); -} - const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, @@ -604,6 +559,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .map_page = xen_swiotlb_map_page, .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, - .mmap = xen_swiotlb_dma_mmap, - .get_sgtable = xen_swiotlb_get_sgtable, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 2ca9164a79bf..da2cc09c8eda 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -2,6 +2,9 @@ #ifndef _XEN_ARM_PAGE_COHERENT_H #define _XEN_ARM_PAGE_COHERENT_H +#include <linux/dma-mapping.h> +#include <asm/page.h> + void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -13,4 +16,72 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev, void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + + if (pfn_valid(pfn)) + dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) + dma_direct_sync_single_for_device(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<<compound_order(page)) * XEN_PFN_PER_PAGE; + bool local = (page_pfn <= dev_pfn) && + (dev_pfn - page_pfn < compound_pages); + + if (local) + dma_direct_map_page(hwdev, page, offset, size, dir, attrs); + else + __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long pfn = PFN_DOWN(handle); + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a + * foreign mfn will always return false. If the page is local we can + * safely call the native dma_ops function, otherwise we call the xen + * specific function. + */ + if (pfn_valid(pfn)) + dma_direct_unmap_page(hwdev, handle, size, dir, attrs); + else + __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); +} + #endif /* _XEN_ARM_PAGE_COHERENT_H */ -- 2.20.1
Use the dma-noncoherent dev_is_dma_coherent helper instead of the home grown variant. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/include/asm/dma-mapping.h | 6 ------ arch/arm/xen/mm.c | 12 ++++++------ arch/arm64/include/asm/dma-mapping.h | 9 --------- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index dba9355e2484..bdd80ddbca34 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) } #endif -/* do not use this function in a driver */ -static inline bool is_device_dma_coherent(struct device *dev) -{ - return dev->archdata.dma_coherent; -} - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d33b77e9add3..90574d89d0d4 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/cpu.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/export.h> @@ -99,7 +99,7 @@ void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (is_device_dma_coherent(hwdev)) + if (dev_is_dma_coherent(hwdev)) return; if (attrs & DMA_ATTR_SKIP_CPU_SYNC) return; @@ -112,7 +112,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, unsigned long attrs) { - if (is_device_dma_coherent(hwdev)) + if (dev_is_dma_coherent(hwdev)) return; if (attrs & DMA_ATTR_SKIP_CPU_SYNC) return; @@ -123,7 +123,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, void __xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - if (is_device_dma_coherent(hwdev)) + if (dev_is_dma_coherent(hwdev)) return; __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); } @@ -131,7 +131,7 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev, void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - if (is_device_dma_coherent(hwdev)) + if (dev_is_dma_coherent(hwdev)) return; __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); } @@ -159,7 +159,7 @@ bool xen_arch_need_swiotlb(struct device *dev, * memory and we are not able to flush the cache. */ return (!hypercall_cflush && (xen_pfn != bfn) && - !is_device_dma_coherent(dev)); + !dev_is_dma_coherent(dev)); } int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index bdcb0922a40c..67243255a858 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -18,14 +18,5 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -/* - * Do not use this function in a driver, it is only provided for - * arch/arm/mm/xen.c, which is used by arm64 as well. - */ -static inline bool is_device_dma_coherent(struct device *dev) -{ - return dev->dma_coherent; -} - #endif /* __KERNEL__ */ #endif /* __ASM_DMA_MAPPING_H */ -- 2.20.1
Instead of taking apart the dma address in both callers do it inside dma_cache_maint itself. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/xen/mm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 90574d89d0d4..d9da24fda2f7 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -43,13 +43,15 @@ static bool hypercall_cflush = false; /* functions called by SWIOTLB */ -static void dma_cache_maint(dma_addr_t handle, unsigned long offset, - size_t size, enum dma_data_direction dir, enum dma_cache_op op) +static void dma_cache_maint(dma_addr_t handle, size_t size, + enum dma_data_direction dir, enum dma_cache_op op) { struct gnttab_cache_flush cflush; unsigned long xen_pfn; + unsigned long offset = handle & ~PAGE_MASK; size_t left = size; + offset &= PAGE_MASK; xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; offset %= XEN_PAGE_SIZE; @@ -86,13 +88,13 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP); + dma_cache_maint(handle, size, dir, DMA_UNMAP); } static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP); + dma_cache_maint(handle, size, dir, DMA_MAP); } void __xen_dma_map_page(struct device *hwdev, struct page *page, -- 2.20.1
arm and arm64 can just use xen_swiotlb_dma_ops directly like x86, no need for a pointer indirection. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/mm/dma-mapping.c | 3 ++- arch/arm/xen/mm.c | 4 ---- arch/arm64/mm/dma-mapping.c | 3 ++- include/xen/arm/hypervisor.h | 2 -- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 738097396445..2661cad36359 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -35,6 +35,7 @@ #include <asm/mach/map.h> #include <asm/system_info.h> #include <asm/dma-contiguous.h> +#include <xen/swiotlb-xen.h> #include "dma.h" #include "mm.h" @@ -2360,7 +2361,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, #ifdef CONFIG_XEN if (xen_initial_domain()) - dev->dma_ops = xen_dma_ops; + dev->dma_ops = &xen_swiotlb_dma_ops; #endif dev->archdata.dma_ops_setup = true; } diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d9da24fda2f7..388a45002bad 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -183,16 +183,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) } EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); -const struct dma_map_ops *xen_dma_ops; -EXPORT_SYMBOL(xen_dma_ops); - int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; if (!xen_initial_domain()) return 0; xen_swiotlb_init(1, false); - xen_dma_ops = &xen_swiotlb_dma_ops; cflush.op = 0; cflush.a.dev_bus_addr = 0; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index bd2b039f43a6..4b244a037349 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -8,6 +8,7 @@ #include <linux/cache.h> #include <linux/dma-noncoherent.h> #include <linux/dma-iommu.h> +#include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> @@ -64,6 +65,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, #ifdef CONFIG_XEN if (xen_initial_domain()) - dev->dma_ops = xen_dma_ops; + dev->dma_ops = &xen_swiotlb_dma_ops; #endif } diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 2982571f7cc1..43ef24dd030e 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return PARAVIRT_LAZY_NONE; } -extern const struct dma_map_ops *xen_dma_ops; - #ifdef CONFIG_XEN void __init xen_early_init(void); #else -- 2.20.1
These routines are only used by swiotlb-xen, which cannot be modular. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/xen/mm.c | 2 -- arch/x86/xen/mmu_pv.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 388a45002bad..a59980f1aa54 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -175,13 +175,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, *dma_handle = pstart; return 0; } -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { return; } -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); int __init xen_mm_init(void) { diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 26e8b326966d..c8dbee62ec2a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, *dma_handle = virt_to_machine(vstart).maddr; return success ? 0 : -ENOMEM; } -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { @@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_unlock_irqrestore(&xen_reservation_lock, flags); } -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); static noinline void xen_flush_tlb_all(void) { -- 2.20.1
x86 currently calls alloc_pages, but using dma-direct works as well there, with the added benefit of using the CMA pool if available. The biggest advantage is of course to remove a pointless bit of architecture specific code. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/x86/include/asm/xen/page-coherent.h | 16 ---------------- drivers/xen/swiotlb-xen.c | 7 +++---- include/xen/arm/page-coherent.h | 12 ------------ 3 files changed, 3 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 116777e7f387..8ee33c5edded 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -5,22 +5,6 @@ #include <asm/page.h> #include <linux/dma-mapping.h> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) -{ - void *vstart = (void*)__get_free_pages(flags, get_order(size)); - *dma_handle = virt_to_phys(vstart); - return vstart; -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - unsigned long attrs) -{ - free_pages((unsigned long) cpu_addr, get_order(size)); -} - static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index b8808677ae1d..f9dd4cb6e4b3 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -299,8 +299,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, * address. In fact on ARM virt_to_phys only works for kernel direct * mapped RAM memory. Also see comment below. */ - ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs); - + ret = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); if (!ret) return ret; @@ -319,7 +318,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, else { if (xen_create_contiguous_region(phys, order, fls64(dma_mask), dma_handle) != 0) { - xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs); + dma_direct_free(hwdev, size, ret, (dma_addr_t)phys, attrs); return NULL; } SetPageXenRemapped(virt_to_page(ret)); @@ -351,7 +350,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, TestClearPageXenRemapped(virt_to_page(vaddr))) xen_destroy_contiguous_region(phys, order); - xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); + dma_direct_free(hwdev, size, vaddr, (dma_addr_t)phys, attrs); } /* diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index da2cc09c8eda..4294a31305ca 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -16,18 +16,6 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev, void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); -} - static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { -- 2.20.1
Merge the various page-coherent.h files into a single one that either provides prototypes or stubs depending on the need for cache maintainance. For extra benefits alo include <xen/page-coherent.h> in the file actually implementing the interfaces provided. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/include/asm/xen/page-coherent.h | 2 -- arch/arm/xen/mm.c | 1 + arch/arm64/include/asm/xen/page-coherent.h | 2 -- arch/x86/include/asm/xen/page-coherent.h | 22 ------------------ drivers/xen/swiotlb-xen.c | 4 +--- include/Kbuild | 2 +- include/xen/{arm => }/page-coherent.h | 27 +++++++++++++++++++--- 7 files changed, 27 insertions(+), 33 deletions(-) delete mode 100644 arch/arm/include/asm/xen/page-coherent.h delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h delete mode 100644 arch/x86/include/asm/xen/page-coherent.h rename include/xen/{arm => }/page-coherent.h (76%) diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h deleted file mode 100644 index 27e984977402..000000000000 --- a/arch/arm/include/asm/xen/page-coherent.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <xen/arm/page-coherent.h> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index a59980f1aa54..85482cdda1e5 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -15,6 +15,7 @@ #include <xen/interface/grant_table.h> #include <xen/interface/memory.h> #include <xen/page.h> +#include <xen/page-coherent.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h deleted file mode 100644 index 27e984977402..000000000000 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <xen/arm/page-coherent.h> diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h deleted file mode 100644 index 8ee33c5edded..000000000000 --- a/arch/x86/include/asm/xen/page-coherent.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_XEN_PAGE_COHERENT_H -#define _ASM_X86_XEN_PAGE_COHERENT_H - -#include <asm/page.h> -#include <linux/dma-mapping.h> - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) { } - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) { } - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f9dd4cb6e4b3..7b23929854e7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -31,12 +31,10 @@ #include <linux/export.h> #include <xen/swiotlb-xen.h> #include <xen/page.h> +#include <xen/page-coherent.h> #include <xen/xen-ops.h> #include <xen/hvc-console.h> -#include <asm/dma-mapping.h> -#include <asm/xen/page-coherent.h> - #include <trace/events/swiotlb.h> /* * Used to do a quick range check in swiotlb_tbl_unmap_single and diff --git a/include/Kbuild b/include/Kbuild index c38f0d46b267..e2ae52ef9e1e 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1189,7 +1189,6 @@ header-test- += video/vga.h header-test- += video/w100fb.h header-test- += xen/acpi.h header-test- += xen/arm/hypercall.h -header-test- += xen/arm/page-coherent.h header-test- += xen/arm/page.h header-test- += xen/balloon.h header-test- += xen/events.h @@ -1231,6 +1230,7 @@ header-test- += xen/interface/xen.h header-test- += xen/interface/xenpmu.h header-test- += xen/mem-reservation.h header-test- += xen/page.h +header-test- += xen/page-coherent.h header-test- += xen/platform_pci.h header-test- += xen/swiotlb-xen.h header-test- += xen/xen-front-pgdir-shbuf.h diff --git a/include/xen/arm/page-coherent.h b/include/xen/page-coherent.h similarity index 76% rename from include/xen/arm/page-coherent.h rename to include/xen/page-coherent.h index 4294a31305ca..7c32944de051 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/page-coherent.h @@ -1,10 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _XEN_ARM_PAGE_COHERENT_H -#define _XEN_ARM_PAGE_COHERENT_H +#ifndef _XEN_PAGE_COHERENT_H +#define _XEN_PAGE_COHERENT_H #include <linux/dma-mapping.h> #include <asm/page.h> +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -71,5 +73,24 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, else __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); } +#else +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ +} +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ +} +#endif -#endif /* _XEN_ARM_PAGE_COHERENT_H */ +#endif /* _XEN_PAGE_COHERENT_H */ -- 2.20.1
xen_dma_map_page uses a different and more complicated check for foreign pages than the other three cache maintainance helpers. Switch it to the simpler pfn_vali method a well. Signed-off-by: Christoph Hellwig <hch@lst.de> --- include/xen/page-coherent.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/include/xen/page-coherent.h b/include/xen/page-coherent.h index 7c32944de051..0f4d468e7a89 100644 --- a/include/xen/page-coherent.h +++ b/include/xen/page-coherent.h @@ -43,14 +43,9 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; - bool local = (page_pfn <= dev_pfn) && - (dev_pfn - page_pfn < compound_pages); + unsigned long pfn = PFN_DOWN(dev_addr); - if (local) + if (pfn_valid(pfn)) dma_direct_map_page(hwdev, page, offset, size, dir, attrs); else __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -- 2.20.1
Now that we know we always have the dma-noncoherent.h helpers available if we are on an architecture with support for non-coherent devices, we can just call them directly, and remove the calls to the dma-direct routines, including the fact that we call the dma_direct_map_page routines but ignore the value returned from it. Instead we now have Xen wrappers for the arch_sync_dma_for_{device,cpu} helpers that call the special Xen versions of those routines for foreign pages. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/xen/mm.c | 47 ++--------------- drivers/xen/swiotlb-xen.c | 19 ++++--- include/xen/page-coherent.h | 100 +++++++++++------------------------- 3 files changed, 42 insertions(+), 124 deletions(-) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 85482cdda1e5..0eb88f1355c2 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -86,59 +86,18 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, } while (left); } -static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) +void __xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir) { dma_cache_maint(handle, size, dir, DMA_UNMAP); } -static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, +void __xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { dma_cache_maint(handle, size, dir, DMA_MAP); } -void __xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - if (dev_is_dma_coherent(hwdev)) - return; - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir); -} - -void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) - -{ - if (dev_is_dma_coherent(hwdev)) - return; - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); -} - -void __xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (dev_is_dma_coherent(hwdev)) - return; - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); -} - -void __xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (dev_is_dma_coherent(hwdev)) - return; - __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); -} - bool xen_arch_need_swiotlb(struct device *dev, phys_addr_t phys, dma_addr_t dev_addr) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 7b23929854e7..c3c383033ae4 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -388,6 +388,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; + phys = map; dev_addr = xen_phys_to_bus(map); /* @@ -399,14 +400,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, return DMA_MAPPING_ERROR; } - page = pfn_to_page(map >> PAGE_SHIFT); - offset = map & ~PAGE_MASK; done: - /* - * we are not interested in the dma_addr returned by xen_dma_map_page, - * only in the potential cache flushes executed by the function. - */ - xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs); + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + xen_dma_sync_for_device(dev, dev_addr, phys, size, dir); return dev_addr; } @@ -426,7 +422,8 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); - xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); + if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir); /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) @@ -446,7 +443,8 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, { phys_addr_t paddr = xen_bus_to_phys(dma_addr); - xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev)) + xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir); if (is_xen_swiotlb_buffer(dma_addr)) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); @@ -461,7 +459,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, if (is_xen_swiotlb_buffer(dma_addr)) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); - xen_dma_sync_single_for_device(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev)) + xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir); } /* diff --git a/include/xen/page-coherent.h b/include/xen/page-coherent.h index 0f4d468e7a89..38b572ed0879 100644 --- a/include/xen/page-coherent.h +++ b/include/xen/page-coherent.h @@ -2,88 +2,48 @@ #ifndef _XEN_PAGE_COHERENT_H #define _XEN_PAGE_COHERENT_H -#include <linux/dma-mapping.h> -#include <asm/page.h> +#include <linux/dma-noncoherent.h> #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) -void __xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs); -void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); -void __xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir); -void __xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir); - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - - if (pfn_valid(pfn)) - dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) - dma_direct_sync_single_for_device(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(dev_addr); - - if (pfn_valid(pfn)) - dma_direct_map_page(hwdev, page, offset, size, dir, attrs); +/* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross multiple Xen + * pages, it's not possible to have a mix of local and foreign Xen page. Dom0 + * is mapped 1:1, so calling pfn_valid on a foreign mfn will always return + * false. If the page is local we can safely use the native routines for cache + * maintainance, otherwise we call the Xen specific function. + */ +void __xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir); +void __xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +static inline void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) +{ + if (pfn_valid(PFN_DOWN(handle))) + arch_sync_dma_for_cpu(dev, paddr, size, dir); else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); + __xen_dma_sync_for_cpu(dev, handle, size, dir); } -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) +static inline void xen_dma_sync_for_device(struct device *dev, + dma_addr_t handle, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) - dma_direct_unmap_page(hwdev, handle, size, dir, attrs); + if (pfn_valid(PFN_DOWN(handle))) + arch_sync_dma_for_device(dev, paddr, size, dir); else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); + __xen_dma_sync_for_device(dev, handle, size, dir); } #else -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ -} -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ -} -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) +static inline void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) { } -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) +static inline void xen_dma_sync_for_device(struct device *dev, + dma_addr_t handle, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { } #endif -- 2.20.1
No need for a no-op wrapper. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/xen/swiotlb-xen.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index c3c383033ae4..b6b9c4c1b397 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -414,9 +414,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t paddr = xen_bus_to_phys(dev_addr); @@ -430,13 +429,6 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); } -static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - xen_unmap_single(hwdev, dev_addr, size, dir, attrs); -} - static void xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) @@ -477,7 +469,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); + xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), + dir, attrs); } -- 2.20.1
Now that the Xen special cases are gone nothing worth mentioning is left in the arm64 <asm/dma-mapping.h> file, so switch to use the asm-generic version instead. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/dma-mapping.h | 22 ---------------------- arch/arm64/mm/dma-mapping.c | 1 + 3 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 arch/arm64/include/asm/dma-mapping.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index c52e151afab0..98a5405c8558 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += delay.h generic-y += div64.h generic-y += dma.h generic-y += dma-contiguous.h +generic-y += dma-mapping.h generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += hw_irq.h diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h deleted file mode 100644 index 67243255a858..000000000000 --- a/arch/arm64/include/asm/dma-mapping.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ -#ifndef __ASM_DMA_MAPPING_H -#define __ASM_DMA_MAPPING_H - -#ifdef __KERNEL__ - -#include <linux/types.h> -#include <linux/vmalloc.h> - -#include <xen/xen.h> -#include <asm/xen/hypervisor.h> - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return NULL; -} - -#endif /* __KERNEL__ */ -#endif /* __ASM_DMA_MAPPING_H */ diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4b244a037349..6578abcfbbc7 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -8,6 +8,7 @@ #include <linux/cache.h> #include <linux/dma-noncoherent.h> #include <linux/dma-iommu.h> +#include <xen/xen.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -- 2.20.1
On 16/08/2019 14:00, Christoph Hellwig wrote: > Instead of taking apart the dma address in both callers do it inside > dma_cache_maint itself. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > arch/arm/xen/mm.c | 10 ++++++---- > 1 file changed, 6 insertions(+), 4 deletions(-) > > diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c > index 90574d89d0d4..d9da24fda2f7 100644 > --- a/arch/arm/xen/mm.c > +++ b/arch/arm/xen/mm.c > @@ -43,13 +43,15 @@ static bool hypercall_cflush = false; > > /* functions called by SWIOTLB */ > > -static void dma_cache_maint(dma_addr_t handle, unsigned long offset, > - size_t size, enum dma_data_direction dir, enum dma_cache_op op) > +static void dma_cache_maint(dma_addr_t handle, size_t size, > + enum dma_data_direction dir, enum dma_cache_op op) > { > struct gnttab_cache_flush cflush; > unsigned long xen_pfn; > + unsigned long offset = handle & ~PAGE_MASK; > size_t left = size; > > + offset &= PAGE_MASK; Ahem... presumably that should be handle, not offset. Robin. > xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; > offset %= XEN_PAGE_SIZE; > > @@ -86,13 +88,13 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, > static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, > size_t size, enum dma_data_direction dir) > { > - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP); > + dma_cache_maint(handle, size, dir, DMA_UNMAP); > } > > static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, > size_t size, enum dma_data_direction dir) > { > - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP); > + dma_cache_maint(handle, size, dir, DMA_MAP); > } > > void __xen_dma_map_page(struct device *hwdev, struct page *page, >
On Fri, Aug 16, 2019 at 02:37:58PM +0100, Robin Murphy wrote:
> On 16/08/2019 14:00, Christoph Hellwig wrote:
>> Instead of taking apart the dma address in both callers do it inside
>> dma_cache_maint itself.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>> arch/arm/xen/mm.c | 10 ++++++----
>> 1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
>> index 90574d89d0d4..d9da24fda2f7 100644
>> --- a/arch/arm/xen/mm.c
>> +++ b/arch/arm/xen/mm.c
>> @@ -43,13 +43,15 @@ static bool hypercall_cflush = false;
>> /* functions called by SWIOTLB */
>> -static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
>> - size_t size, enum dma_data_direction dir, enum dma_cache_op op)
>> +static void dma_cache_maint(dma_addr_t handle, size_t size,
>> + enum dma_data_direction dir, enum dma_cache_op op)
>> {
>> struct gnttab_cache_flush cflush;
>> unsigned long xen_pfn;
>> + unsigned long offset = handle & ~PAGE_MASK;
>> size_t left = size;
>> + offset &= PAGE_MASK;
>
> Ahem... presumably that should be handle, not offset.
Ooops, yes.
Hi,
On 8/16/19 2:00 PM, Christoph Hellwig wrote:
> Merge the various page-coherent.h files into a single one that either
> provides prototypes or stubs depending on the need for cache
> maintainance.
>
> For extra benefits alo include <xen/page-coherent.h> in the file
> actually implementing the interfaces provided.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> arch/arm/include/asm/xen/page-coherent.h | 2 --
> arch/arm/xen/mm.c | 1 +
> arch/arm64/include/asm/xen/page-coherent.h | 2 --
> arch/x86/include/asm/xen/page-coherent.h | 22 ------------------
> drivers/xen/swiotlb-xen.c | 4 +---
> include/Kbuild | 2 +-
> include/xen/{arm => }/page-coherent.h | 27 +++++++++++++++++++---
I am not sure I agree with this rename. The implementation of the
helpers are very Arm specific as this is assuming Dom0 is 1:1 mapped.
This was necessary due to the lack of IOMMU on Arm platforms back then.
But this is now a pain to get rid of it on newer platform...
Cheers,
--
Julien Grall
On Fri, Aug 16, 2019 at 11:40:43PM +0100, Julien Grall wrote:
> I am not sure I agree with this rename. The implementation of the helpers
> are very Arm specific as this is assuming Dom0 is 1:1 mapped.
>
> This was necessary due to the lack of IOMMU on Arm platforms back then.
> But this is now a pain to get rid of it on newer platform...
So if you look at the final version of the header after the whole
series, what assumes a 1:1 mapping? It all just is
if (pfn_valid())
local cache sync;
else
call into the arch code;
are you concerned that the local cache sync might have to be split
up more for a non-1:1 map in that case? We could just move
the xen_dma_* routines into the arch instead of __xen_dma, but it
really helps to have a common interface header.
Hi Christoph, On 8/17/19 7:50 AM, Christoph Hellwig wrote: > On Fri, Aug 16, 2019 at 11:40:43PM +0100, Julien Grall wrote: >> I am not sure I agree with this rename. The implementation of the helpers >> are very Arm specific as this is assuming Dom0 is 1:1 mapped. >> >> This was necessary due to the lack of IOMMU on Arm platforms back then. >> But this is now a pain to get rid of it on newer platform... > > So if you look at the final version of the header after the whole > series, what assumes a 1:1 mapping? It all just is > > if (pfn_valid()) > local cache sync; > else > call into the arch code; In the context of Xen Arm, the dev_addr is a host physical address. From my understanding pfn_valid() is dealing with a guest physical frame. Therefore by passing PFN_DOWN(dev_addr) in argument you assume that the host and guest address spaces are the same. > > are you concerned that the local cache sync might have to be split > up more for a non-1:1 map in that case? We could just movea > the xen_dma_* routines into the arch instead of __xen_dma, but it > really helps to have a common interface header. Moving xen_dma_* routines into the arch would be a good option. Although, I would still consider a stub version for arch not requiring specific DMA. Cheers, -- Julien Grall
On Fri, Aug 16, 2019 at 03:00:13PM +0200, Christoph Hellwig wrote:
> Now that the Xen special cases are gone nothing worth mentioning is
> left in the arm64 <asm/dma-mapping.h> file, so switch to use the
> asm-generic version instead.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> arch/arm64/include/asm/Kbuild | 1 +
> arch/arm64/include/asm/dma-mapping.h | 22 ----------------------
> arch/arm64/mm/dma-mapping.c | 1 +
> 3 files changed, 2 insertions(+), 22 deletions(-)
> delete mode 100644 arch/arm64/include/asm/dma-mapping.h
Acked-by: Will Deacon <will@kernel.org>
Thanks for cleaning this up.
Will
Hi Christoph, On 8/16/19 2:00 PM, Christoph Hellwig wrote: > Use the dma-noncoherent dev_is_dma_coherent helper instead of the home > grown variant. It took me a bit of time to understand that dev->archdata.dma_coherent and dev->dma_coherent will always contain the same value. Would you mind it mention it in the commit message? Other than that: Reviewed-by: Julien Grall <julien.grall@arm.com> > > Signed-off-by: Christoph Hellwig <hch@lst.de> Cheers, -- Julien Grall
Hi Christoph,
On 8/16/19 2:00 PM, Christoph Hellwig wrote:
> arm and arm64 can just use xen_swiotlb_dma_ops directly like x86, no
> need for a pointer indirection.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Julien Grall <julien.grall@arm.com>
Cheers,
--
Julien Grall
Hi Christoph, On 8/16/19 2:00 PM, Christoph Hellwig wrote: > +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, > + dma_addr_t dev_addr, unsigned long offset, size_t size, > + enum dma_data_direction dir, unsigned long attrs) > +{ > + unsigned long page_pfn = page_to_xen_pfn(page); > + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); > + unsigned long compound_pages = > + (1<<compound_order(page)) * XEN_PFN_PER_PAGE; > + bool local = (page_pfn <= dev_pfn) && > + (dev_pfn - page_pfn < compound_pages); > + The Arm version as a comment here. Could we retain it? > + if (local) > + dma_direct_map_page(hwdev, page, offset, size, dir, attrs); > + else > + __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); > +} > + Cheers, -- Julien Grall
Hi Christoph, On 8/16/19 2:00 PM, Christoph Hellwig wrote: > xen_dma_map_page uses a different and more complicated check for > foreign pages than the other three cache maintainance helpers. > Switch it to the simpler pfn_vali method a well. NIT: s/pfn_vali/pfn_valid/ > > Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Julien Grall <julien.grall@arm.com> Cheers, -- Julien Grall
On Mon, Aug 19, 2019 at 12:45:17PM +0100, Julien Grall wrote:
> On 8/16/19 2:00 PM, Christoph Hellwig wrote:
>> +static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
>> + dma_addr_t dev_addr, unsigned long offset, size_t size,
>> + enum dma_data_direction dir, unsigned long attrs)
>> +{
>> + unsigned long page_pfn = page_to_xen_pfn(page);
>> + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
>> + unsigned long compound_pages =
>> + (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
>> + bool local = (page_pfn <= dev_pfn) &&
>> + (dev_pfn - page_pfn < compound_pages);
>> +
>
> The Arm version as a comment here. Could we retain it?
I've added it in this patch, altough the rewrites later on mean it will
go away in favour of a new comment elsewhere anyway.
On Fri, 16 Aug 2019, Christoph Hellwig wrote:
> Hi Xen maintainers and friends,
>
> please take a look at this series that cleans up the parts of swiotlb-xen
> that deal with non-coherent caches.
Hi Christoph,
I just wanted to let you know that your series is on my radar, but I
have been swamped the last few days. I hope to get to it by the end of
the week.
Cheers,
Stefano
On Mon, Aug 26, 2019 at 07:00:44PM -0700, Stefano Stabellini wrote:
> On Fri, 16 Aug 2019, Christoph Hellwig wrote:
> > Hi Xen maintainers and friends,
> >
> > please take a look at this series that cleans up the parts of swiotlb-xen
> > that deal with non-coherent caches.
>
> Hi Christoph,
>
> I just wanted to let you know that your series is on my radar, but I
> have been swamped the last few days. I hope to get to it by the end of
> the week.
Thanks, and no rush. Note that I posted a v2 with a few significant
changes yesterday.