All of lore.kernel.org
 help / color / mirror / Atom feed
* use generic dma-direct and swiotlb code for x86 V2
@ 2018-03-14 17:51 ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:51 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Hi all,

this series switches the x86 code the the dma-direct implementation
for direct (non-iommu) dma and the generic swiotlb ops.  This includes
getting rid of the special ops for the AMD memory encryption case and
the STA2x11 SOC.  The generic implementations are based on the x86
code, so they provide the same functionality.

Changes since V1:
 - fix the length in the set_memory_decrypted call
 - fix the SEV case

^ permalink raw reply	[flat|nested] 60+ messages in thread

* use generic dma-direct and swiotlb code for x86 V2
@ 2018-03-14 17:51 ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:51 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Hi all,

this series switches the x86 code the the dma-direct implementation
for direct (non-iommu) dma and the generic swiotlb ops.  This includes
getting rid of the special ops for the AMD memory encryption case and
the STA2x11 SOC.  The generic implementations are based on the x86
code, so they provide the same functionality.

Changes since V1:
 - fix the length in the set_memory_decrypted call
 - fix the SEV case

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 01/14] x86: remove X86_PPRO_FENCE
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

There were only a few Pentium Pro multiprocessors systems where this
errata applied. They are more than 20 years old now, and we've slowly
dropped places where put the workarounds in and discouraged anyone
from enabling the workaround.

Get rid of it for good.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/Kconfig.cpu                        | 13 -------------
 arch/x86/entry/vdso/vdso32/vclock_gettime.c |  2 --
 arch/x86/include/asm/barrier.h              | 30 -----------------------------
 arch/x86/include/asm/io.h                   | 15 ---------------
 arch/x86/kernel/pci-nommu.c                 | 19 ------------------
 arch/x86/um/asm/barrier.h                   |  4 ----
 6 files changed, 83 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8b8d2297d486..638411f22267 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
 	default "4" if MELAN || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
-config X86_PPRO_FENCE
-	bool "PentiumPro memory ordering errata workaround"
-	depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
-	---help---
-	  Old PentiumPro multiprocessor systems had errata that could cause
-	  memory operations to violate the x86 ordering standard in rare cases.
-	  Enabling this option will attempt to work around some (but not all)
-	  occurrences of this problem, at the cost of much heavier spinlock and
-	  memory barrier operations.
-
-	  If unsure, say n here. Even distro kernels should think twice before
-	  enabling this: there are few systems, and an unlikely bug.
-
 config X86_F00F_BUG
 	def_bool y
 	depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
 #undef CONFIG_OPTIMIZE_INLINING
 #endif
 
-#undef CONFIG_X86_PPRO_FENCE
-
 #ifdef CONFIG_X86_64
 
 /*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f043ae9..042b5e892ed1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
 					   "lfence", X86_FEATURE_LFENCE_RDTSC)
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()	rmb()
-#else
 #define dma_rmb()	barrier()
-#endif
 #define dma_wmb()	barrier()
 
 #ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define __smp_wmb()	barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v)					\
-do {									\
-	compiletime_assert_atomic_type(*p);				\
-	__smp_mb();							\
-	WRITE_ONCE(*p, v);						\
-} while (0)
-
-#define __smp_load_acquire(p)						\
-({									\
-	typeof(*p) ___p1 = READ_ONCE(*p);				\
-	compiletime_assert_atomic_type(*p);				\
-	__smp_mb();							\
-	___p1;								\
-})
-
-#else /* regular x86 TSO memory ordering */
-
 #define __smp_store_release(p, v)					\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
@@ -107,8 +79,6 @@ do {									\
 	___p1;								\
 })
 
-#endif
-
 /* Atomic operations are already serializing on x86 */
 #define __smp_mb__before_atomic()	barrier()
 #define __smp_mb__after_atomic()	barrier()
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e948627fd0..f6e5b9375d8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
  */
 #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
 
-/*
- *	Cache management
- *
- *	This needed for two cases
- *	1. Out of order aware processors
- *	2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
-	asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 618285e475c6..ac7ea3a8242f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 	WARN_ON(size == 0);
 	if (!check_addr("map_single", dev, bus, size))
 		return NOMMU_MAPPING_ERROR;
-	flush_write_buffers();
 	return bus;
 }
 
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 			return 0;
 		s->dma_length = s->length;
 	}
-	flush_write_buffers();
 	return nents;
 }
 
-static void nommu_sync_single_for_device(struct device *dev,
-			dma_addr_t addr, size_t size,
-			enum dma_data_direction dir)
-{
-	flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
-			struct scatterlist *sg, int nelems,
-			enum dma_data_direction dir)
-{
-	flush_write_buffers();
-}
-
 static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
 	.free			= dma_generic_free_coherent,
 	.map_sg			= nommu_map_sg,
 	.map_page		= nommu_map_page,
-	.sync_single_for_device = nommu_sync_single_for_device,
-	.sync_sg_for_device	= nommu_sync_sg_for_device,
 	.is_phys		= 1,
 	.mapping_error		= nommu_mapping_error,
 	.dma_supported		= x86_dma_supported,
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index b7d73400ea29..f31e5d903161 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -30,11 +30,7 @@
 
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()	rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
 #define dma_rmb()	barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
 #define dma_wmb()	barrier()
 
 #include <asm-generic/barrier.h>
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 01/14] x86: remove X86_PPRO_FENCE
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

There were only a few Pentium Pro multiprocessors systems where this
errata applied. They are more than 20 years old now, and we've slowly
dropped places where put the workarounds in and discouraged anyone
from enabling the workaround.

Get rid of it for good.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/Kconfig.cpu                        | 13 -------------
 arch/x86/entry/vdso/vdso32/vclock_gettime.c |  2 --
 arch/x86/include/asm/barrier.h              | 30 -----------------------------
 arch/x86/include/asm/io.h                   | 15 ---------------
 arch/x86/kernel/pci-nommu.c                 | 19 ------------------
 arch/x86/um/asm/barrier.h                   |  4 ----
 6 files changed, 83 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8b8d2297d486..638411f22267 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
 	default "4" if MELAN || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
-config X86_PPRO_FENCE
-	bool "PentiumPro memory ordering errata workaround"
-	depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
-	---help---
-	  Old PentiumPro multiprocessor systems had errata that could cause
-	  memory operations to violate the x86 ordering standard in rare cases.
-	  Enabling this option will attempt to work around some (but not all)
-	  occurrences of this problem, at the cost of much heavier spinlock and
-	  memory barrier operations.
-
-	  If unsure, say n here. Even distro kernels should think twice before
-	  enabling this: there are few systems, and an unlikely bug.
-
 config X86_F00F_BUG
 	def_bool y
 	depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
 #undef CONFIG_OPTIMIZE_INLINING
 #endif
 
-#undef CONFIG_X86_PPRO_FENCE
-
 #ifdef CONFIG_X86_64
 
 /*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f043ae9..042b5e892ed1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
 					   "lfence", X86_FEATURE_LFENCE_RDTSC)
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()	rmb()
-#else
 #define dma_rmb()	barrier()
-#endif
 #define dma_wmb()	barrier()
 
 #ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define __smp_wmb()	barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v)					\
-do {									\
-	compiletime_assert_atomic_type(*p);				\
-	__smp_mb();							\
-	WRITE_ONCE(*p, v);						\
-} while (0)
-
-#define __smp_load_acquire(p)						\
-({									\
-	typeof(*p) ___p1 = READ_ONCE(*p);				\
-	compiletime_assert_atomic_type(*p);				\
-	__smp_mb();							\
-	___p1;								\
-})
-
-#else /* regular x86 TSO memory ordering */
-
 #define __smp_store_release(p, v)					\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
@@ -107,8 +79,6 @@ do {									\
 	___p1;								\
 })
 
-#endif
-
 /* Atomic operations are already serializing on x86 */
 #define __smp_mb__before_atomic()	barrier()
 #define __smp_mb__after_atomic()	barrier()
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e948627fd0..f6e5b9375d8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
  */
 #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
 
-/*
- *	Cache management
- *
- *	This needed for two cases
- *	1. Out of order aware processors
- *	2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
-	asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 618285e475c6..ac7ea3a8242f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 	WARN_ON(size == 0);
 	if (!check_addr("map_single", dev, bus, size))
 		return NOMMU_MAPPING_ERROR;
-	flush_write_buffers();
 	return bus;
 }
 
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 			return 0;
 		s->dma_length = s->length;
 	}
-	flush_write_buffers();
 	return nents;
 }
 
-static void nommu_sync_single_for_device(struct device *dev,
-			dma_addr_t addr, size_t size,
-			enum dma_data_direction dir)
-{
-	flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
-			struct scatterlist *sg, int nelems,
-			enum dma_data_direction dir)
-{
-	flush_write_buffers();
-}
-
 static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
 	.free			= dma_generic_free_coherent,
 	.map_sg			= nommu_map_sg,
 	.map_page		= nommu_map_page,
-	.sync_single_for_device = nommu_sync_single_for_device,
-	.sync_sg_for_device	= nommu_sync_sg_for_device,
 	.is_phys		= 1,
 	.mapping_error		= nommu_mapping_error,
 	.dma_supported		= x86_dma_supported,
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index b7d73400ea29..f31e5d903161 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -30,11 +30,7 @@
 
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()	rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
 #define dma_rmb()	barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
 #define dma_wmb()	barrier()
 
 #include <asm-generic/barrier.h>
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 02/14] x86: remove dma_alloc_coherent_mask
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

These days all devices (including the ISA fallback device) have a coherent
DMA mask set, so remove the workaround.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/dma-mapping.h | 18 ++----------------
 arch/x86/kernel/pci-dma.c          | 10 ++++------
 arch/x86/mm/mem_encrypt.c          |  4 +---
 drivers/xen/swiotlb-xen.c          | 16 +---------------
 4 files changed, 8 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6277c83c0eb1..545bf3721bc0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -44,26 +44,12 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
 				      unsigned long attrs);
 
-static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
-						    gfp_t gfp)
-{
-	unsigned long dma_mask = 0;
-
-	dma_mask = dev->coherent_dma_mask;
-	if (!dma_mask)
-		dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
-	return dma_mask;
-}
-
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
-	unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
-
-	if (dma_mask <= DMA_BIT_MASK(24))
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
 		gfp |= GFP_DMA;
 #ifdef CONFIG_X86_64
-	if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
 		gfp |= GFP_DMA32;
 #endif
        return gfp;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df7ab02f959f..b59820872ec7 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -80,13 +80,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag,
 				 unsigned long attrs)
 {
-	unsigned long dma_mask;
 	struct page *page;
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	dma_addr_t addr;
 
-	dma_mask = dma_alloc_coherent_mask(dev, flag);
-
 again:
 	page = NULL;
 	/* CMA can be used only in the context which permits sleeping */
@@ -95,7 +92,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 						 flag);
 		if (page) {
 			addr = phys_to_dma(dev, page_to_phys(page));
-			if (addr + size > dma_mask) {
+			if (addr + size > dev->coherent_dma_mask) {
 				dma_release_from_contiguous(dev, page, count);
 				page = NULL;
 			}
@@ -108,10 +105,11 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 		return NULL;
 
 	addr = phys_to_dma(dev, page_to_phys(page));
-	if (addr + size > dma_mask) {
+	if (addr + size > dev->coherent_dma_mask) {
 		__free_pages(page, get_order(size));
 
-		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+		if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+		    !(flag & GFP_DMA)) {
 			flag = (flag & ~GFP_DMA32) | GFP_DMA;
 			goto again;
 		}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..75dc8b525c12 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -203,12 +203,10 @@ void __init sme_early_init(void)
 static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		       gfp_t gfp, unsigned long attrs)
 {
-	unsigned long dma_mask;
 	unsigned int order;
 	struct page *page;
 	void *vaddr = NULL;
 
-	dma_mask = dma_alloc_coherent_mask(dev, gfp);
 	order = get_order(size);
 
 	/*
@@ -226,7 +224,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		 * mask with it already cleared.
 		 */
 		addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
-		if ((addr + size) > dma_mask) {
+		if ((addr + size) > dev->coherent_dma_mask) {
 			__free_pages(page, get_order(size));
 		} else {
 			vaddr = page_address(page);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 5bb72d3f8337..e1c60899fdbc 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -53,20 +53,6 @@
  * API.
  */
 
-#ifndef CONFIG_X86
-static unsigned long dma_alloc_coherent_mask(struct device *dev,
-					    gfp_t gfp)
-{
-	unsigned long dma_mask = 0;
-
-	dma_mask = dev->coherent_dma_mask;
-	if (!dma_mask)
-		dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
-	return dma_mask;
-}
-#endif
-
 #define XEN_SWIOTLB_ERROR_CODE	(~(dma_addr_t)0x0)
 
 static char *xen_io_tlb_start, *xen_io_tlb_end;
@@ -328,7 +314,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		return ret;
 
 	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+		dma_mask = hwdev->coherent_dma_mask;
 
 	/* At this point dma_handle is the physical address, next we are
 	 * going to set it to the machine address.
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 02/14] x86: remove dma_alloc_coherent_mask
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

These days all devices (including the ISA fallback device) have a coherent
DMA mask set, so remove the workaround.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/include/asm/dma-mapping.h | 18 ++----------------
 arch/x86/kernel/pci-dma.c          | 10 ++++------
 arch/x86/mm/mem_encrypt.c          |  4 +---
 drivers/xen/swiotlb-xen.c          | 16 +---------------
 4 files changed, 8 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6277c83c0eb1..545bf3721bc0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -44,26 +44,12 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
 				      unsigned long attrs);
 
-static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
-						    gfp_t gfp)
-{
-	unsigned long dma_mask = 0;
-
-	dma_mask = dev->coherent_dma_mask;
-	if (!dma_mask)
-		dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
-	return dma_mask;
-}
-
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
-	unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
-
-	if (dma_mask <= DMA_BIT_MASK(24))
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
 		gfp |= GFP_DMA;
 #ifdef CONFIG_X86_64
-	if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
 		gfp |= GFP_DMA32;
 #endif
        return gfp;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df7ab02f959f..b59820872ec7 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -80,13 +80,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag,
 				 unsigned long attrs)
 {
-	unsigned long dma_mask;
 	struct page *page;
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	dma_addr_t addr;
 
-	dma_mask = dma_alloc_coherent_mask(dev, flag);
-
 again:
 	page = NULL;
 	/* CMA can be used only in the context which permits sleeping */
@@ -95,7 +92,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 						 flag);
 		if (page) {
 			addr = phys_to_dma(dev, page_to_phys(page));
-			if (addr + size > dma_mask) {
+			if (addr + size > dev->coherent_dma_mask) {
 				dma_release_from_contiguous(dev, page, count);
 				page = NULL;
 			}
@@ -108,10 +105,11 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 		return NULL;
 
 	addr = phys_to_dma(dev, page_to_phys(page));
-	if (addr + size > dma_mask) {
+	if (addr + size > dev->coherent_dma_mask) {
 		__free_pages(page, get_order(size));
 
-		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+		if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+		    !(flag & GFP_DMA)) {
 			flag = (flag & ~GFP_DMA32) | GFP_DMA;
 			goto again;
 		}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..75dc8b525c12 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -203,12 +203,10 @@ void __init sme_early_init(void)
 static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		       gfp_t gfp, unsigned long attrs)
 {
-	unsigned long dma_mask;
 	unsigned int order;
 	struct page *page;
 	void *vaddr = NULL;
 
-	dma_mask = dma_alloc_coherent_mask(dev, gfp);
 	order = get_order(size);
 
 	/*
@@ -226,7 +224,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		 * mask with it already cleared.
 		 */
 		addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
-		if ((addr + size) > dma_mask) {
+		if ((addr + size) > dev->coherent_dma_mask) {
 			__free_pages(page, get_order(size));
 		} else {
 			vaddr = page_address(page);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 5bb72d3f8337..e1c60899fdbc 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -53,20 +53,6 @@
  * API.
  */
 
-#ifndef CONFIG_X86
-static unsigned long dma_alloc_coherent_mask(struct device *dev,
-					    gfp_t gfp)
-{
-	unsigned long dma_mask = 0;
-
-	dma_mask = dev->coherent_dma_mask;
-	if (!dma_mask)
-		dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
-	return dma_mask;
-}
-#endif
-
 #define XEN_SWIOTLB_ERROR_CODE	(~(dma_addr_t)0x0)
 
 static char *xen_io_tlb_start, *xen_io_tlb_end;
@@ -328,7 +314,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		return ret;
 
 	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+		dma_mask = hwdev->coherent_dma_mask;
 
 	/* At this point dma_handle is the physical address, next we are
 	 * going to set it to the machine address.
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 03/14] x86: use dma-direct
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

The generic dma-direct implementation is now functionally equivalent to
the x86 nommu dma_map implementation, so switch over to using it.

Note that the various iommu drivers are switched from x86_dma_supported
to dma_direct_supported to provide identical functionality, although the
checks looks fairly questionable for at least some of them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/Kconfig                   |  1 +
 arch/x86/include/asm/dma-mapping.h |  8 -----
 arch/x86/include/asm/iommu.h       |  3 --
 arch/x86/kernel/Makefile           |  2 +-
 arch/x86/kernel/amd_gart_64.c      |  7 ++--
 arch/x86/kernel/pci-calgary_64.c   |  3 +-
 arch/x86/kernel/pci-dma.c          | 66 +-------------------------------------
 arch/x86/kernel/pci-swiotlb.c      |  5 ++-
 arch/x86/pci/sta2x11-fixup.c       |  2 +-
 drivers/iommu/amd_iommu.c          |  7 ++--
 drivers/iommu/intel-iommu.c        |  3 +-
 11 files changed, 17 insertions(+), 90 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78ec99..10f482beda1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
 	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
 	select CLOCKSOURCE_WATCHDOG
 	select DCACHE_WORD_ACCESS
+	select DMA_DIRECT_OPS
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 545bf3721bc0..df9816b385eb 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-					dma_addr_t *dma_addr, gfp_t flag,
-					unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs);
-
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
 	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d92eb40..baedab8ac538 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-extern const struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
 
-int x86_dma_supported(struct device *dev, u64 mask);
-
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..2e8c8a09ecab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-$(CONFIG_SYSFS)	+= ksysfs.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y			+= alternative.o i8253.o hw_breakpoint.o
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486cb06ab..52e3abcf3e70 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 		}
 		__free_pages(page, get_order(size));
 	} else
-		return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
-						  attrs);
+		return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
 
 	return NULL;
 }
@@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 		   dma_addr_t dma_addr, unsigned long attrs)
 {
 	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
-	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+	dma_direct_free(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = {
 	.alloc				= gart_alloc_coherent,
 	.free				= gart_free_coherent,
 	.mapping_error			= gart_mapping_error,
-	.dma_supported			= x86_dma_supported,
+	.dma_supported			= dma_direct_supported,
 };
 
 static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f21815..5647853053bd 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/bitmap.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
@@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = {
 	.map_page = calgary_map_page,
 	.unmap_page = calgary_unmap_page,
 	.mapping_error = calgary_mapping_error,
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 };
 
 static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b59820872ec7..db0b88ea8d1b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -18,7 +18,7 @@
 
 static int forbid_dac __read_mostly;
 
-const struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &dma_direct_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void)
 		}
 	}
 }
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-				 dma_addr_t *dma_addr, gfp_t flag,
-				 unsigned long attrs)
-{
-	struct page *page;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	dma_addr_t addr;
-
-again:
-	page = NULL;
-	/* CMA can be used only in the context which permits sleeping */
-	if (gfpflags_allow_blocking(flag)) {
-		page = dma_alloc_from_contiguous(dev, count, get_order(size),
-						 flag);
-		if (page) {
-			addr = phys_to_dma(dev, page_to_phys(page));
-			if (addr + size > dev->coherent_dma_mask) {
-				dma_release_from_contiguous(dev, page, count);
-				page = NULL;
-			}
-		}
-	}
-	/* fallback */
-	if (!page)
-		page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
-	if (!page)
-		return NULL;
-
-	addr = phys_to_dma(dev, page_to_phys(page));
-	if (addr + size > dev->coherent_dma_mask) {
-		__free_pages(page, get_order(size));
-
-		if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-		    !(flag & GFP_DMA)) {
-			flag = (flag & ~GFP_DMA32) | GFP_DMA;
-			goto again;
-		}
-
-		return NULL;
-	}
-	memset(page_address(page), 0, size);
-	*dma_addr = addr;
-	return page_address(page);
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
-			       dma_addr_t dma_addr, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct page *page = virt_to_page(vaddr);
-
-	if (!dma_release_from_contiguous(dev, page, count))
-		free_pages((unsigned long)vaddr, get_order(size));
-}
 
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
 {
@@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask)
 }
 EXPORT_SYMBOL(arch_dma_supported);
 
-int x86_dma_supported(struct device *dev, u64 mask)
-{
-	/* Copied from i386. Doesn't make much sense, because it will
-	   only work for pci_alloc_coherent.
-	   The caller just has to use GFP_DMA in this case. */
-	if (mask < DMA_BIT_MASK(24))
-		return 0;
-	return 1;
-}
-
 static int __init pci_iommu_init(void)
 {
 	struct iommu_table_entry *p;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f34251..bcb6a9bf64ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	 */
 	flags |= __GFP_NOWARN;
 
-	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
-					   attrs);
+	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
 	if (vaddr)
 		return vaddr;
 
@@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
 		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 	else
-		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
 }
 
 static const struct dma_map_ops x86_swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 75577c1490c4..6c712fe11bdc 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = {
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
 	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 };
 
 /* At setup time, we use our own ops if the device is a ConneXt one */
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fdeb773..0bf19423b588 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -28,6 +28,7 @@
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
 #include <linux/delay.h>
@@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev)
 				dev_name(dev));
 
 		iommu_ignore_device(dev);
-		dev->dma_ops = &nommu_dma_ops;
+		dev->dma_ops = &dma_direct_ops;
 		goto out;
 	}
 	init_iommu_group(dev);
@@ -2680,7 +2681,7 @@ static void free_coherent(struct device *dev, size_t size,
  */
 static int amd_iommu_dma_supported(struct device *dev, u64 mask)
 {
-	if (!x86_dma_supported(dev, mask))
+	if (!dma_direct_supported(dev, mask))
 		return 0;
 	return check_device(dev);
 }
@@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void)
 	 * continue to be SWIOTLB.
 	 */
 	if (!swiotlb)
-		dma_ops = &nommu_dma_ops;
+		dma_ops = &dma_direct_ops;
 
 	if (amd_iommu_unmap_flush)
 		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 582fd01cb7d1..fd899b2a12bb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -45,6 +45,7 @@
 #include <linux/pci-ats.h>
 #include <linux/memblock.h>
 #include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
 #include <linux/crash_dump.h>
 #include <asm/irq_remapping.h>
 #include <asm/cacheflush.h>
@@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = {
 	.unmap_page = intel_unmap_page,
 	.mapping_error = intel_mapping_error,
 #ifdef CONFIG_X86
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 #endif
 };
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 03/14] x86: use dma-direct
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

The generic dma-direct implementation is now functionally equivalent to
the x86 nommu dma_map implementation, so switch over to using it.

Note that the various iommu drivers are switched from x86_dma_supported
to dma_direct_supported to provide identical functionality, although the
checks looks fairly questionable for at least some of them.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/Kconfig                   |  1 +
 arch/x86/include/asm/dma-mapping.h |  8 -----
 arch/x86/include/asm/iommu.h       |  3 --
 arch/x86/kernel/Makefile           |  2 +-
 arch/x86/kernel/amd_gart_64.c      |  7 ++--
 arch/x86/kernel/pci-calgary_64.c   |  3 +-
 arch/x86/kernel/pci-dma.c          | 66 +-------------------------------------
 arch/x86/kernel/pci-swiotlb.c      |  5 ++-
 arch/x86/pci/sta2x11-fixup.c       |  2 +-
 drivers/iommu/amd_iommu.c          |  7 ++--
 drivers/iommu/intel-iommu.c        |  3 +-
 11 files changed, 17 insertions(+), 90 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78ec99..10f482beda1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
 	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
 	select CLOCKSOURCE_WATCHDOG
 	select DCACHE_WORD_ACCESS
+	select DMA_DIRECT_OPS
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 545bf3721bc0..df9816b385eb 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-					dma_addr_t *dma_addr, gfp_t flag,
-					unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs);
-
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
 	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d92eb40..baedab8ac538 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-extern const struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
 
-int x86_dma_supported(struct device *dev, u64 mask);
-
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..2e8c8a09ecab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-$(CONFIG_SYSFS)	+= ksysfs.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y			+= alternative.o i8253.o hw_breakpoint.o
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486cb06ab..52e3abcf3e70 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 		}
 		__free_pages(page, get_order(size));
 	} else
-		return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
-						  attrs);
+		return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
 
 	return NULL;
 }
@@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 		   dma_addr_t dma_addr, unsigned long attrs)
 {
 	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
-	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+	dma_direct_free(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = {
 	.alloc				= gart_alloc_coherent,
 	.free				= gart_free_coherent,
 	.mapping_error			= gart_mapping_error,
-	.dma_supported			= x86_dma_supported,
+	.dma_supported			= dma_direct_supported,
 };
 
 static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f21815..5647853053bd 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/bitmap.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
@@ -493,7 +494,7 @@ static const struct dma_map_ops calgary_dma_ops = {
 	.map_page = calgary_map_page,
 	.unmap_page = calgary_unmap_page,
 	.mapping_error = calgary_mapping_error,
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 };
 
 static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b59820872ec7..db0b88ea8d1b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -18,7 +18,7 @@
 
 static int forbid_dac __read_mostly;
 
-const struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &dma_direct_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -76,60 +76,6 @@ void __init pci_iommu_alloc(void)
 		}
 	}
 }
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-				 dma_addr_t *dma_addr, gfp_t flag,
-				 unsigned long attrs)
-{
-	struct page *page;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	dma_addr_t addr;
-
-again:
-	page = NULL;
-	/* CMA can be used only in the context which permits sleeping */
-	if (gfpflags_allow_blocking(flag)) {
-		page = dma_alloc_from_contiguous(dev, count, get_order(size),
-						 flag);
-		if (page) {
-			addr = phys_to_dma(dev, page_to_phys(page));
-			if (addr + size > dev->coherent_dma_mask) {
-				dma_release_from_contiguous(dev, page, count);
-				page = NULL;
-			}
-		}
-	}
-	/* fallback */
-	if (!page)
-		page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
-	if (!page)
-		return NULL;
-
-	addr = phys_to_dma(dev, page_to_phys(page));
-	if (addr + size > dev->coherent_dma_mask) {
-		__free_pages(page, get_order(size));
-
-		if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-		    !(flag & GFP_DMA)) {
-			flag = (flag & ~GFP_DMA32) | GFP_DMA;
-			goto again;
-		}
-
-		return NULL;
-	}
-	memset(page_address(page), 0, size);
-	*dma_addr = addr;
-	return page_address(page);
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
-			       dma_addr_t dma_addr, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct page *page = virt_to_page(vaddr);
-
-	if (!dma_release_from_contiguous(dev, page, count))
-		free_pages((unsigned long)vaddr, get_order(size));
-}
 
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
 {
@@ -243,16 +189,6 @@ int arch_dma_supported(struct device *dev, u64 mask)
 }
 EXPORT_SYMBOL(arch_dma_supported);
 
-int x86_dma_supported(struct device *dev, u64 mask)
-{
-	/* Copied from i386. Doesn't make much sense, because it will
-	   only work for pci_alloc_coherent.
-	   The caller just has to use GFP_DMA in this case. */
-	if (mask < DMA_BIT_MASK(24))
-		return 0;
-	return 1;
-}
-
 static int __init pci_iommu_init(void)
 {
 	struct iommu_table_entry *p;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f34251..bcb6a9bf64ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -30,8 +30,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	 */
 	flags |= __GFP_NOWARN;
 
-	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
-					   attrs);
+	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
 	if (vaddr)
 		return vaddr;
 
@@ -45,7 +44,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
 		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 	else
-		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
 }
 
 static const struct dma_map_ops x86_swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 75577c1490c4..6c712fe11bdc 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -193,7 +193,7 @@ static const struct dma_map_ops sta2x11_dma_ops = {
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
 	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 };
 
 /* At setup time, we use our own ops if the device is a ConneXt one */
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fdeb773..0bf19423b588 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -28,6 +28,7 @@
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
 #include <linux/delay.h>
@@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev)
 				dev_name(dev));
 
 		iommu_ignore_device(dev);
-		dev->dma_ops = &nommu_dma_ops;
+		dev->dma_ops = &dma_direct_ops;
 		goto out;
 	}
 	init_iommu_group(dev);
@@ -2680,7 +2681,7 @@ static void free_coherent(struct device *dev, size_t size,
  */
 static int amd_iommu_dma_supported(struct device *dev, u64 mask)
 {
-	if (!x86_dma_supported(dev, mask))
+	if (!dma_direct_supported(dev, mask))
 		return 0;
 	return check_device(dev);
 }
@@ -2794,7 +2795,7 @@ int __init amd_iommu_init_dma_ops(void)
 	 * continue to be SWIOTLB.
 	 */
 	if (!swiotlb)
-		dma_ops = &nommu_dma_ops;
+		dma_ops = &dma_direct_ops;
 
 	if (amd_iommu_unmap_flush)
 		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 582fd01cb7d1..fd899b2a12bb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -45,6 +45,7 @@
 #include <linux/pci-ats.h>
 #include <linux/memblock.h>
 #include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
 #include <linux/crash_dump.h>
 #include <asm/irq_remapping.h>
 #include <asm/cacheflush.h>
@@ -3871,7 +3872,7 @@ const struct dma_map_ops intel_dma_ops = {
 	.unmap_page = intel_unmap_page,
 	.mapping_error = intel_mapping_error,
 #ifdef CONFIG_X86
-	.dma_supported = x86_dma_supported,
+	.dma_supported = dma_direct_supported,
 #endif
 };
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.

Also fix the sta2x11 case.  For that SOC the dma map ops need an
additional physical to dma address translations.  For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer.  With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/include/asm/device.h  |  3 +++
 arch/x86/include/asm/swiotlb.h |  8 -------
 arch/x86/kernel/pci-swiotlb.c  | 47 +-----------------------------------------
 arch/x86/pci/sta2x11-fixup.c   | 46 +++++------------------------------------
 4 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..812bd6c5d602 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
+#ifdef CONFIG_STA2X11
+	bool is_sta2x11 : 1;
+#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
 {
 }
 #endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_addr,
-					unsigned long attrs);
-
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
 
 int swiotlb __read_mostly;
 
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs)
-{
-	void *vaddr;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA
-	 * memory allocation ultimately failed.
-	 */
-	flags |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-	if (vaddr)
-		return vaddr;
-
-	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs)
-{
-	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
-	else
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
-	.mapping_error = swiotlb_dma_mapping_error,
-	.alloc = x86_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.dma_supported = NULL,
-};
-
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
  *
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
 {
 	if (swiotlb) {
 		swiotlb_init(0);
-		dma_ops = &x86_swiotlb_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 	}
 }
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
 	return p;
 }
 
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- *     returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
-					    size_t size,
-					    dma_addr_t *dma_handle,
-					    gfp_t flags,
-					    unsigned long attrs)
-{
-	void *vaddr;
-
-	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
-	return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
-	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = dma_direct_supported,
-};
-
 /* At setup time, we use our own ops if the device is a ConneXt one */
 static void sta2x11_setup_pdev(struct pci_dev *pdev)
 {
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 		return;
 	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
 	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pdev->dev.dma_ops = &sta2x11_dma_ops;
+	pdev->dev.dma_ops = &swiotlb_dma_ops;
+	pdev->dev.archdata.is_sta2x11 = true;
 
 	/* We must enable all devices as master, for audio DMA to work */
 	pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	struct sta2x11_mapping *map;
 
-	if (dev->dma_ops != &sta2x11_dma_ops) {
+	if (!dev->archdata.is_sta2x11) {
 		if (!dev->dma_mask)
 			return false;
 		return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
  */
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return paddr;
 	return p2a(paddr, to_pci_dev(dev));
 }
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  */
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return daddr;
 	return a2p(daddr, to_pci_dev(dev));
 }
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.

Also fix the sta2x11 case.  For that SOC the dma map ops need an
additional physical to dma address translations.  For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer.  With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/include/asm/device.h  |  3 +++
 arch/x86/include/asm/swiotlb.h |  8 -------
 arch/x86/kernel/pci-swiotlb.c  | 47 +-----------------------------------------
 arch/x86/pci/sta2x11-fixup.c   | 46 +++++------------------------------------
 4 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..812bd6c5d602 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
+#ifdef CONFIG_STA2X11
+	bool is_sta2x11 : 1;
+#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
 {
 }
 #endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_addr,
-					unsigned long attrs);
-
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
 
 int swiotlb __read_mostly;
 
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs)
-{
-	void *vaddr;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA
-	 * memory allocation ultimately failed.
-	 */
-	flags |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-	if (vaddr)
-		return vaddr;
-
-	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs)
-{
-	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
-	else
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
-	.mapping_error = swiotlb_dma_mapping_error,
-	.alloc = x86_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.dma_supported = NULL,
-};
-
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
  *
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
 {
 	if (swiotlb) {
 		swiotlb_init(0);
-		dma_ops = &x86_swiotlb_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 	}
 }
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
 	return p;
 }
 
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- *     returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
-					    size_t size,
-					    dma_addr_t *dma_handle,
-					    gfp_t flags,
-					    unsigned long attrs)
-{
-	void *vaddr;
-
-	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
-	return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
-	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = dma_direct_supported,
-};
-
 /* At setup time, we use our own ops if the device is a ConneXt one */
 static void sta2x11_setup_pdev(struct pci_dev *pdev)
 {
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 		return;
 	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
 	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pdev->dev.dma_ops = &sta2x11_dma_ops;
+	pdev->dev.dma_ops = &swiotlb_dma_ops;
+	pdev->dev.archdata.is_sta2x11 = true;
 
 	/* We must enable all devices as master, for audio DMA to work */
 	pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	struct sta2x11_mapping *map;
 
-	if (dev->dma_ops != &sta2x11_dma_ops) {
+	if (!dev->archdata.is_sta2x11) {
 		if (!dev->dma_mask)
 			return false;
 		return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
  */
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return paddr;
 	return p2a(paddr, to_pci_dev(dev));
 }
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  */
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return daddr;
 	return a2p(daddr, to_pci_dev(dev));
 }
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

We want to phase out looking at the magic GFP_DMA flag in the dma mapping
routines, so switch the gart driver to use the coherent_dma_mask instead,
which is used to select the GFP_DMA flag in the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/amd_gart_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 52e3abcf3e70..79ac6caaaabb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 	unsigned long align_mask;
 	struct page *page;
 
-	if (force_iommu && !(flag & GFP_DMA)) {
+	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
 		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
 		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
 		if (!page)
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

We want to phase out looking at the magic GFP_DMA flag in the dma mapping
routines, so switch the gart driver to use the coherent_dma_mask instead,
which is used to select the GFP_DMA flag in the caller.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/kernel/amd_gart_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 52e3abcf3e70..79ac6caaaabb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 	unsigned long align_mask;
 	struct page *page;
 
-	if (force_iommu && !(flag & GFP_DMA)) {
+	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
 		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
 		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
 		if (!page)
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 06/14] x86/amd_gart: use dma_direct_{alloc,free}
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

This gains support for CMA allocations for the force_iommu case, and
cleans up the code a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/amd_gart_64.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 79ac6caaaabb..f299d8a479bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,29 +480,21 @@ static void *
 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 		    gfp_t flag, unsigned long attrs)
 {
-	dma_addr_t paddr;
-	unsigned long align_mask;
-	struct page *page;
-
-	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
-		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-		if (!page)
-			return NULL;
-
-		align_mask = (1UL << get_order(size)) - 1;
-		paddr = dma_map_area(dev, page_to_phys(page), size,
-				     DMA_BIDIRECTIONAL, align_mask);
-
-		flush_gart();
-		if (paddr != bad_dma_addr) {
-			*dma_addr = paddr;
-			return page_address(page);
-		}
-		__free_pages(page, get_order(size));
-	} else
-		return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	void *vaddr;
+
+	vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	if (!vaddr ||
+	    !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+		return vaddr;
 
+	*dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+			DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+	flush_gart();
+	if (unlikely(*dma_addr == bad_dma_addr))
+		goto out_free;
+	return vaddr;
+out_free:
+	dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
 	return NULL;
 }
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 06/14] x86/amd_gart: use dma_direct_{alloc,free}
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

This gains support for CMA allocations for the force_iommu case, and
cleans up the code a bit.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/kernel/amd_gart_64.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 79ac6caaaabb..f299d8a479bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,29 +480,21 @@ static void *
 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 		    gfp_t flag, unsigned long attrs)
 {
-	dma_addr_t paddr;
-	unsigned long align_mask;
-	struct page *page;
-
-	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
-		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-		if (!page)
-			return NULL;
-
-		align_mask = (1UL << get_order(size)) - 1;
-		paddr = dma_map_area(dev, page_to_phys(page), size,
-				     DMA_BIDIRECTIONAL, align_mask);
-
-		flush_gart();
-		if (paddr != bad_dma_addr) {
-			*dma_addr = paddr;
-			return page_address(page);
-		}
-		__free_pages(page, get_order(size));
-	} else
-		return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	void *vaddr;
+
+	vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	if (!vaddr ||
+	    !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+		return vaddr;
 
+	*dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+			DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+	flush_gart();
+	if (unlikely(*dma_addr == bad_dma_addr))
+		goto out_free;
+	return vaddr;
+out_free:
+	dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
 	return NULL;
 }
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 07/14] iommu/amd_iommu: use dma_direct_{alloc,free}
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

This cleans up the code a lot by removing duplicate logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/Kconfig     |  1 +
 drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
 2 files changed, 22 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..dc7c1914645d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,6 +107,7 @@ config IOMMU_PGTABLES_L2
 # AMD IOMMU support
 config AMD_IOMMU
 	bool "AMD IOMMU support"
+	select DMA_DIRECT_OPS
 	select SWIOTLB
 	select PCI_MSI
 	select PCI_ATS
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0bf19423b588..83819d0cbf90 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2600,51 +2600,32 @@ static void *alloc_coherent(struct device *dev, size_t size,
 			    unsigned long attrs)
 {
 	u64 dma_mask = dev->coherent_dma_mask;
-	struct protection_domain *domain;
-	struct dma_ops_domain *dma_dom;
-	struct page *page;
-
-	domain = get_domain(dev);
-	if (PTR_ERR(domain) == -EINVAL) {
-		page = alloc_pages(flag, get_order(size));
-		*dma_addr = page_to_phys(page);
-		return page_address(page);
-	} else if (IS_ERR(domain))
-		return NULL;
+	struct protection_domain *domain = get_domain(dev);
+	bool is_direct = false;
+	void *virt_addr;
 
-	dma_dom   = to_dma_ops_domain(domain);
-	size	  = PAGE_ALIGN(size);
-	dma_mask  = dev->coherent_dma_mask;
-	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-	flag     |= __GFP_ZERO;
-
-	page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
-	if (!page) {
-		if (!gfpflags_allow_blocking(flag))
-			return NULL;
-
-		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), flag);
-		if (!page)
+	if (IS_ERR(domain)) {
+		if (PTR_ERR(domain) != -EINVAL)
 			return NULL;
+		is_direct = true;
 	}
 
+	virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	if (!virt_addr || is_direct)
+		return virt_addr;
+
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
-	*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
-				 size, DMA_BIDIRECTIONAL, dma_mask);
-
+	*dma_addr = __map_single(dev, to_dma_ops_domain(domain),
+			virt_to_phys(virt_addr), PAGE_ALIGN(size),
+			DMA_BIDIRECTIONAL, dma_mask);
 	if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
 		goto out_free;
-
-	return page_address(page);
+	return virt_addr;
 
 out_free:
-
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, get_order(size));
-
+	dma_direct_free(dev, size, virt_addr, *dma_addr, attrs);
 	return NULL;
 }
 
@@ -2655,24 +2636,17 @@ static void free_coherent(struct device *dev, size_t size,
 			  void *virt_addr, dma_addr_t dma_addr,
 			  unsigned long attrs)
 {
-	struct protection_domain *domain;
-	struct dma_ops_domain *dma_dom;
-	struct page *page;
+	struct protection_domain *domain = get_domain(dev);
 
-	page = virt_to_page(virt_addr);
 	size = PAGE_ALIGN(size);
 
-	domain = get_domain(dev);
-	if (IS_ERR(domain))
-		goto free_mem;
+	if (!IS_ERR(domain)) {
+		struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain);
 
-	dma_dom = to_dma_ops_domain(domain);
-
-	__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+		__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+	}
 
-free_mem:
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, get_order(size));
+	dma_direct_free(dev, size, virt_addr, dma_addr, attrs);
 }
 
 /*
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 07/14] iommu/amd_iommu: use dma_direct_{alloc,free}
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

This cleans up the code a lot by removing duplicate logic.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 drivers/iommu/Kconfig     |  1 +
 drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
 2 files changed, 22 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..dc7c1914645d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,6 +107,7 @@ config IOMMU_PGTABLES_L2
 # AMD IOMMU support
 config AMD_IOMMU
 	bool "AMD IOMMU support"
+	select DMA_DIRECT_OPS
 	select SWIOTLB
 	select PCI_MSI
 	select PCI_ATS
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0bf19423b588..83819d0cbf90 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2600,51 +2600,32 @@ static void *alloc_coherent(struct device *dev, size_t size,
 			    unsigned long attrs)
 {
 	u64 dma_mask = dev->coherent_dma_mask;
-	struct protection_domain *domain;
-	struct dma_ops_domain *dma_dom;
-	struct page *page;
-
-	domain = get_domain(dev);
-	if (PTR_ERR(domain) == -EINVAL) {
-		page = alloc_pages(flag, get_order(size));
-		*dma_addr = page_to_phys(page);
-		return page_address(page);
-	} else if (IS_ERR(domain))
-		return NULL;
+	struct protection_domain *domain = get_domain(dev);
+	bool is_direct = false;
+	void *virt_addr;
 
-	dma_dom   = to_dma_ops_domain(domain);
-	size	  = PAGE_ALIGN(size);
-	dma_mask  = dev->coherent_dma_mask;
-	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-	flag     |= __GFP_ZERO;
-
-	page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
-	if (!page) {
-		if (!gfpflags_allow_blocking(flag))
-			return NULL;
-
-		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), flag);
-		if (!page)
+	if (IS_ERR(domain)) {
+		if (PTR_ERR(domain) != -EINVAL)
 			return NULL;
+		is_direct = true;
 	}
 
+	virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	if (!virt_addr || is_direct)
+		return virt_addr;
+
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
-	*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
-				 size, DMA_BIDIRECTIONAL, dma_mask);
-
+	*dma_addr = __map_single(dev, to_dma_ops_domain(domain),
+			virt_to_phys(virt_addr), PAGE_ALIGN(size),
+			DMA_BIDIRECTIONAL, dma_mask);
 	if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
 		goto out_free;
-
-	return page_address(page);
+	return virt_addr;
 
 out_free:
-
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, get_order(size));
-
+	dma_direct_free(dev, size, virt_addr, *dma_addr, attrs);
 	return NULL;
 }
 
@@ -2655,24 +2636,17 @@ static void free_coherent(struct device *dev, size_t size,
 			  void *virt_addr, dma_addr_t dma_addr,
 			  unsigned long attrs)
 {
-	struct protection_domain *domain;
-	struct dma_ops_domain *dma_dom;
-	struct page *page;
+	struct protection_domain *domain = get_domain(dev);
 
-	page = virt_to_page(virt_addr);
 	size = PAGE_ALIGN(size);
 
-	domain = get_domain(dev);
-	if (IS_ERR(domain))
-		goto free_mem;
+	if (!IS_ERR(domain)) {
+		struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain);
 
-	dma_dom = to_dma_ops_domain(domain);
-
-	__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+		__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
+	}
 
-free_mem:
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, get_order(size));
+	dma_direct_free(dev, size, virt_addr, dma_addr, attrs);
 }
 
 /*
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 08/14] iommu/intel-iommu: cleanup intel_{alloc,free}_coherent
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Use the dma_direct_* helpers and cleanup the code flow.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/Kconfig       |  1 +
 drivers/iommu/intel-iommu.c | 62 ++++++++++++---------------------------------
 2 files changed, 17 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc7c1914645d..df171cb85822 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,6 +143,7 @@ config DMAR_TABLE
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+	select DMA_DIRECT_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
 	select DMAR_TABLE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fd899b2a12bb..24d1b1b42013 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/mempool.h>
 #include <linux/memory.h>
 #include <linux/cpu.h>
@@ -3708,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  unsigned long attrs)
 {
-	struct page *page = NULL;
-	int order;
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	if (!iommu_no_mapping(dev))
-		flags &= ~(GFP_DMA | GFP_DMA32);
-	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
-		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
-			flags |= GFP_DMA;
-		else
-			flags |= GFP_DMA32;
-	}
+	void *vaddr;
 
-	if (gfpflags_allow_blocking(flags)) {
-		unsigned int count = size >> PAGE_SHIFT;
+	vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+	if (iommu_no_mapping(dev) || !vaddr)
+		return vaddr;
 
-		page = dma_alloc_from_contiguous(dev, count, order, flags);
-		if (page && iommu_no_mapping(dev) &&
-		    page_to_phys(page) + size > dev->coherent_dma_mask) {
-			dma_release_from_contiguous(dev, page, count);
-			page = NULL;
-		}
-	}
-
-	if (!page)
-		page = alloc_pages(flags, order);
-	if (!page)
-		return NULL;
-	memset(page_address(page), 0, size);
-
-	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
-					 DMA_BIDIRECTIONAL,
-					 dev->coherent_dma_mask);
-	if (*dma_handle)
-		return page_address(page);
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, order);
+	*dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
+			PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
+			dev->coherent_dma_mask);
+	if (!*dma_handle)
+		goto out_free_pages;
+	return vaddr;
 
+out_free_pages:
+	dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
 	return NULL;
 }
 
 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
 				dma_addr_t dma_handle, unsigned long attrs)
 {
-	int order;
-	struct page *page = virt_to_page(vaddr);
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	intel_unmap(dev, dma_handle, size);
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, order);
+	if (!iommu_no_mapping(dev))
+		intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
+	dma_direct_free(dev, size, vaddr, dma_handle, attrs);
 }
 
 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 08/14] iommu/intel-iommu: cleanup intel_{alloc,free}_coherent
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Use the dma_direct_* helpers and cleanup the code flow.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 drivers/iommu/Kconfig       |  1 +
 drivers/iommu/intel-iommu.c | 62 ++++++++++++---------------------------------
 2 files changed, 17 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc7c1914645d..df171cb85822 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,6 +143,7 @@ config DMAR_TABLE
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+	select DMA_DIRECT_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
 	select DMAR_TABLE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fd899b2a12bb..24d1b1b42013 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/mempool.h>
 #include <linux/memory.h>
 #include <linux/cpu.h>
@@ -3708,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  unsigned long attrs)
 {
-	struct page *page = NULL;
-	int order;
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	if (!iommu_no_mapping(dev))
-		flags &= ~(GFP_DMA | GFP_DMA32);
-	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
-		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
-			flags |= GFP_DMA;
-		else
-			flags |= GFP_DMA32;
-	}
+	void *vaddr;
 
-	if (gfpflags_allow_blocking(flags)) {
-		unsigned int count = size >> PAGE_SHIFT;
+	vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+	if (iommu_no_mapping(dev) || !vaddr)
+		return vaddr;
 
-		page = dma_alloc_from_contiguous(dev, count, order, flags);
-		if (page && iommu_no_mapping(dev) &&
-		    page_to_phys(page) + size > dev->coherent_dma_mask) {
-			dma_release_from_contiguous(dev, page, count);
-			page = NULL;
-		}
-	}
-
-	if (!page)
-		page = alloc_pages(flags, order);
-	if (!page)
-		return NULL;
-	memset(page_address(page), 0, size);
-
-	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
-					 DMA_BIDIRECTIONAL,
-					 dev->coherent_dma_mask);
-	if (*dma_handle)
-		return page_address(page);
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, order);
+	*dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
+			PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
+			dev->coherent_dma_mask);
+	if (!*dma_handle)
+		goto out_free_pages;
+	return vaddr;
 
+out_free_pages:
+	dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
 	return NULL;
 }
 
 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
 				dma_addr_t dma_handle, unsigned long attrs)
 {
-	int order;
-	struct page *page = virt_to_page(vaddr);
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	intel_unmap(dev, dma_handle, size);
-	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-		__free_pages(page, order);
+	if (!iommu_no_mapping(dev))
+		intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
+	dma_direct_free(dev, size, vaddr, dma_handle, attrs);
 }
 
 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 09/14] x86: remove dma_alloc_coherent_gfp_flags
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

All dma_ops implementations used on x86 now take care of setting their own
required GFP_ masks for the allocation.  And given that the common code
now clears harmful flags itself that means we can stop the flags in all
the iommu implementations as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/include/asm/dma-mapping.h | 11 -----------
 arch/x86/kernel/pci-calgary_64.c   |  2 --
 arch/x86/kernel/pci-dma.c          |  2 --
 arch/x86/mm/mem_encrypt.c          |  7 -------
 4 files changed, 22 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index df9816b385eb..89ce4bfd241f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
-		gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-		gfp |= GFP_DMA32;
-#endif
-       return gfp;
-}
-
 #endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5647853053bd..bbfc8b1e9104 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -446,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
 	npages = size >> PAGE_SHIFT;
 	order = get_order(size);
 
-	flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
 	/* alloc enough pages (and possibly more) */
 	ret = (void *)__get_free_pages(flag, order);
 	if (!ret)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index db0b88ea8d1b..14437116ffea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,8 +82,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
 	if (!*dev)
 		*dev = &x86_dma_fallback_dev;
 
-	*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
 	if (!is_device_dma_capable(*dev))
 		return false;
 	return true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 75dc8b525c12..66beedc8fe3d 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -208,13 +208,6 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	void *vaddr = NULL;
 
 	order = get_order(size);
-
-	/*
-	 * Memory will be memset to zero after marking decrypted, so don't
-	 * bother clearing it before.
-	 */
-	gfp &= ~__GFP_ZERO;
-
 	page = alloc_pages_node(dev_to_node(dev), gfp, order);
 	if (page) {
 		dma_addr_t addr;
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 09/14] x86: remove dma_alloc_coherent_gfp_flags
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

All dma_ops implementations used on x86 now take care of setting their own
required GFP_ masks for the allocation.  And given that the common code
now clears harmful flags itself that means we can stop the flags in all
the iommu implementations as well.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/include/asm/dma-mapping.h | 11 -----------
 arch/x86/kernel/pci-calgary_64.c   |  2 --
 arch/x86/kernel/pci-dma.c          |  2 --
 arch/x86/mm/mem_encrypt.c          |  7 -------
 4 files changed, 22 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index df9816b385eb..89ce4bfd241f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
-		gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-		gfp |= GFP_DMA32;
-#endif
-       return gfp;
-}
-
 #endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5647853053bd..bbfc8b1e9104 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -446,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
 	npages = size >> PAGE_SHIFT;
 	order = get_order(size);
 
-	flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
 	/* alloc enough pages (and possibly more) */
 	ret = (void *)__get_free_pages(flag, order);
 	if (!ret)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index db0b88ea8d1b..14437116ffea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,8 +82,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
 	if (!*dev)
 		*dev = &x86_dma_fallback_dev;
 
-	*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
 	if (!is_device_dma_capable(*dev))
 		return false;
 	return true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 75dc8b525c12..66beedc8fe3d 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -208,13 +208,6 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	void *vaddr = NULL;
 
 	order = get_order(size);
-
-	/*
-	 * Memory will be memset to zero after marking decrypted, so don't
-	 * bother clearing it before.
-	 */
-	gfp &= ~__GFP_ZERO;
-
 	page = alloc_pages_node(dev_to_node(dev), gfp, order);
 	if (page) {
 		dma_addr_t addr;
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/set_memory.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e5140648f638..da5178216da5 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+static inline int set_memory_encrypted(unsigned long addr, int numpages)
+{
+	return 0;
+}
+
+static inline int set_memory_decrypted(unsigned long addr, int numpages)
+{
+	return 0;
+}
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
 #endif /* _LINUX_SET_MEMORY_H_ */
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 include/linux/set_memory.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e5140648f638..da5178216da5 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+static inline int set_memory_encrypted(unsigned long addr, int numpages)
+{
+	return 0;
+}
+
+static inline int set_memory_decrypted(unsigned long addr, int numpages)
+{
+	return 0;
+}
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
 #endif /* _LINUX_SET_MEMORY_H_ */
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Now that set_memory_decrypted is always available we can just call it
directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/include/asm/mem_encrypt.h |  2 --
 arch/x86/mm/mem_encrypt.c          |  9 ---------
 lib/swiotlb.c                      | 12 ++++++------
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..9da0b63c8fc7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,8 +48,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
 bool sme_active(void);
 bool sev_active(void);
 
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 66beedc8fe3d..d3b80d5f9828 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -446,15 +446,6 @@ void __init mem_encrypt_init(void)
 			     : "Secure Memory Encryption (SME)");
 }
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
-	WARN(PAGE_ALIGN(size) != size,
-	     "size is not page-aligned (%#lx)\n", size);
-
-	/* Make the SWIOTLB buffer area decrypted */
-	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
-
 struct sme_populate_pgd_data {
 	void	*pgtable_area;
 	pgd_t	*pgd;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..005d1d87bb2e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
 #include <linux/gfp.h>
 #include <linux/scatterlist.h>
 #include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
 /* For swiotlb, clear memory encryption mask from dma addresses */
 static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
 				      phys_addr_t address)
@@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
 
 	vaddr = phys_to_virt(io_tlb_start);
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 
 	vaddr = phys_to_virt(io_tlb_overflow_buffer);
 	bytes = PAGE_ALIGN(io_tlb_overflow);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
@@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	io_tlb_start = virt_to_phys(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	swiotlb_set_mem_attributes(tlb, bytes);
+	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
 
 	/*
@@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!v_overflow_buffer)
 		goto cleanup2;
 
-	swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+	set_memory_decrypted((unsigned long)v_overflow_buffer,
+			io_tlb_overflow >> PAGE_SHIFT);
 	memset(v_overflow_buffer, 0, io_tlb_overflow);
 	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Now that set_memory_decrypted is always available we can just call it
directly.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/include/asm/mem_encrypt.h |  2 --
 arch/x86/mm/mem_encrypt.c          |  9 ---------
 lib/swiotlb.c                      | 12 ++++++------
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..9da0b63c8fc7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,8 +48,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
 bool sme_active(void);
 bool sev_active(void);
 
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 66beedc8fe3d..d3b80d5f9828 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -446,15 +446,6 @@ void __init mem_encrypt_init(void)
 			     : "Secure Memory Encryption (SME)");
 }
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
-	WARN(PAGE_ALIGN(size) != size,
-	     "size is not page-aligned (%#lx)\n", size);
-
-	/* Make the SWIOTLB buffer area decrypted */
-	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
-
 struct sme_populate_pgd_data {
 	void	*pgtable_area;
 	pgd_t	*pgd;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..005d1d87bb2e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
 #include <linux/gfp.h>
 #include <linux/scatterlist.h>
 #include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
 /* For swiotlb, clear memory encryption mask from dma addresses */
 static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
 				      phys_addr_t address)
@@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
 
 	vaddr = phys_to_virt(io_tlb_start);
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 
 	vaddr = phys_to_virt(io_tlb_overflow_buffer);
 	bytes = PAGE_ALIGN(io_tlb_overflow);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
@@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	io_tlb_start = virt_to_phys(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	swiotlb_set_mem_attributes(tlb, bytes);
+	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
 
 	/*
@@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!v_overflow_buffer)
 		goto cleanup2;
 
-	swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+	set_memory_decrypted((unsigned long)v_overflow_buffer,
+			io_tlb_overflow >> PAGE_SHIFT);
 	memset(v_overflow_buffer, 0, io_tlb_overflow);
 	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 12/14] dma-direct: handle the memory encryption bit in common code
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
the memory encryption mask to the non-prefixed versions.  Use the
__-prefixed versions directly instead of clearing the mask again in
various places.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/include/asm/dma-direct.h                  |  4 ++--
 arch/mips/cavium-octeon/dma-octeon.c               | 10 ++++-----
 .../include/asm/mach-cavium-octeon/dma-coherence.h |  4 ++--
 .../include/asm/mach-loongson64/dma-coherence.h    | 10 ++++-----
 arch/mips/loongson64/common/dma-swiotlb.c          |  4 ++--
 arch/powerpc/include/asm/dma-direct.h              |  4 ++--
 arch/x86/Kconfig                                   |  2 +-
 arch/x86/include/asm/dma-direct.h                  | 25 ++--------------------
 arch/x86/mm/mem_encrypt.c                          |  2 +-
 arch/x86/pci/sta2x11-fixup.c                       |  6 +++---
 include/linux/dma-direct.h                         | 21 ++++++++++++++++--
 lib/swiotlb.c                                      | 25 ++++++++--------------
 12 files changed, 53 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index 5b0a8a421894..b67e5fc1fe43 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,13 +2,13 @@
 #ifndef ASM_ARM_DMA_DIRECT_H
 #define ASM_ARM_DMA_DIRECT_H 1
 
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	unsigned int offset = paddr & ~PAGE_MASK;
 	return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 {
 	unsigned int offset = dev_addr & ~PAGE_MASK;
 	return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c7bb8a407041..7b335ab21697 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -10,7 +10,7 @@
  * IP32 changes by Ilya.
  * Copyright (C) 2010 Cavium Networks, Inc.
  */
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/scatterlist.h>
 #include <linux/bootmem.h>
 #include <linux/export.h>
@@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
 	phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
 };
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
 						      struct octeon_dma_map_ops,
@@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 
 	return ops->phys_to_dma(dev, paddr);
 }
-EXPORT_SYMBOL(phys_to_dma);
+EXPORT_SYMBOL(__phys_to_dma);
 
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
 						      struct octeon_dma_map_ops,
@@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 
 	return ops->dma_to_phys(dev, daddr);
 }
-EXPORT_SYMBOL(dma_to_phys);
+EXPORT_SYMBOL(__dma_to_phys);
 
 static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
 	.dma_map_ops = {
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index 138edf6b5b48..6eb1ee548b11 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
 struct dma_map_ops;
 extern const struct dma_map_ops *octeon_pci_dma_map_ops;
diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
index b1b575f5c6c1..64fc44dec0a8 100644
--- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
@@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
 					  size_t size)
 {
 #ifdef CONFIG_CPU_LOONGSON3
-	return phys_to_dma(dev, virt_to_phys(addr));
+	return __phys_to_dma(dev, virt_to_phys(addr));
 #else
 	return virt_to_phys(addr) | 0x80000000;
 #endif
@@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
 					       struct page *page)
 {
 #ifdef CONFIG_CPU_LOONGSON3
-	return phys_to_dma(dev, page_to_phys(page));
+	return __phys_to_dma(dev, page_to_phys(page));
 #else
 	return page_to_phys(page) | 0x80000000;
 #endif
@@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
 	dma_addr_t dma_addr)
 {
 #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
-	return dma_to_phys(dev, dma_addr);
+	return __dma_to_phys(dev, dma_addr);
 #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
 	return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
 #else
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 7bbcf89475f3..6a739f8ae110 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
 	return swiotlb_dma_supported(dev, mask);
 }
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	long nid;
 #ifdef CONFIG_PHYS48_TO_HT40
@@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	long nid;
 #ifdef CONFIG_PHYS48_TO_HT40
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a5b59c765426..7702875aabb7 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	return paddr + get_dma_offset(dev);
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	return daddr - get_dma_offset(dev);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 10f482beda1b..1ca4f0874517 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
-	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_REFCOUNT
@@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
 config STA2X11
 	bool "STA2X11 Companion Chip Support"
 	depends on X86_32_NON_STANDARD && PCI
+	select ARCH_HAS_PHYS_TO_DMA
 	select X86_DEV_DMA_OPS
 	select X86_DMA_REMAP
 	select SWIOTLB
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
index 1295bc622ebe..1a19251eaac9 100644
--- a/arch/x86/include/asm/dma-direct.h
+++ b/arch/x86/include/asm/dma-direct.h
@@ -2,29 +2,8 @@
 #ifndef ASM_X86_DMA_DIRECT_H
 #define ASM_X86_DMA_DIRECT_H 1
 
-#include <linux/mem_encrypt.h>
-
-#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
 bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-#else
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	if (!dev->dma_mask)
-		return 0;
-
-	return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	return __sme_set(paddr);
-}
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
-	return __sme_clr(daddr);
-}
-#endif /* CONFIG_X86_DMA_REMAP */
 #endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d3b80d5f9828..1a05bea831a8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -216,7 +216,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		 * Since we will be clearing the encryption bit, check the
 		 * mask with it already cleared.
 		 */
-		addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
+		addr = __phys_to_dma(dev, page_to_phys(page));
 		if ((addr + size) > dev->coherent_dma_mask) {
 			__free_pages(page, get_order(size));
 		} else {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index eac58e03f43c..7a5bafb76d77 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 }
 
 /**
- * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
+ * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
  * @dev: device for a PCI device
  * @paddr: Physical address
  */
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	if (!dev->archdata.is_sta2x11)
 		return paddr;
@@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  * @dev: device for a PCI device
  * @daddr: STA2x11 AMBA DMA address
  */
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	if (!dev->archdata.is_sta2x11)
 		return daddr;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bcdb1a3e4b1f..53ad6a47f513 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -3,18 +3,19 @@
 #define _LINUX_DMA_DIRECT_H 1
 
 #include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
 
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	dma_addr_t dev_addr = (dma_addr_t)paddr;
 
 	return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 {
 	phys_addr_t paddr = (phys_addr_t)dev_addr;
 
@@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
+/*
+ * If memory encryption is supported, phys_to_dma will set the memory encryption
+ * bit in the DMA address, and dma_to_phys will clear it.  The raw __phys_to_dma
+ * and __dma_to_phys versions should only be used on non-encrypted memory for
+ * special occasions like DMA coherent buffers.
+ */
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return __sme_set(__phys_to_dma(dev, paddr));
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return __sme_clr(__dma_to_phys(dev, daddr));
+}
+
 #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
 void dma_mark_clean(void *addr, size_t size);
 #else
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 005d1d87bb2e..8b06b4485e65 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
-				      phys_addr_t address)
-{
-	return __sme_clr(phys_to_dma(hwdev, address));
-}
-
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 		return SWIOTLB_MAP_ERROR;
 	}
 
-	start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
 				      dir, attrs);
 }
@@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		goto out_warn;
 
 	phys_addr = swiotlb_tbl_map_single(dev,
-			swiotlb_phys_to_dma(dev, io_tlb_start),
+			__phys_to_dma(dev, io_tlb_start),
 			0, size, DMA_FROM_DEVICE, 0);
 	if (phys_addr == SWIOTLB_MAP_ERROR)
 		goto out_warn;
 
-	*dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+	*dma_handle = __phys_to_dma(dev, phys_addr);
 	if (dma_coherent_ok(dev, *dma_handle, size))
 		goto out_unmap;
 
@@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
-		return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+		return __phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_phys_to_dma(dev, map);
+	dev_addr = __phys_to_dma(dev, map);
 
 	/* Ensure that the address returned is DMA'ble */
 	if (dma_capable(dev, dev_addr, size))
@@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+	return __phys_to_dma(dev, io_tlb_overflow_buffer);
 }
 
 /*
@@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sg_dma_len(sgl) = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+			sg->dma_address = __phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg_dma_len(sg) = sg->length;
@@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
 
 /*
@@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
 #ifdef CONFIG_DMA_DIRECT_OPS
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 12/14] dma-direct: handle the memory encryption bit in common code
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
the memory encryption mask to the non-prefixed versions.  Use the
__-prefixed versions directly instead of clearing the mask again in
various places.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/arm/include/asm/dma-direct.h                  |  4 ++--
 arch/mips/cavium-octeon/dma-octeon.c               | 10 ++++-----
 .../include/asm/mach-cavium-octeon/dma-coherence.h |  4 ++--
 .../include/asm/mach-loongson64/dma-coherence.h    | 10 ++++-----
 arch/mips/loongson64/common/dma-swiotlb.c          |  4 ++--
 arch/powerpc/include/asm/dma-direct.h              |  4 ++--
 arch/x86/Kconfig                                   |  2 +-
 arch/x86/include/asm/dma-direct.h                  | 25 ++--------------------
 arch/x86/mm/mem_encrypt.c                          |  2 +-
 arch/x86/pci/sta2x11-fixup.c                       |  6 +++---
 include/linux/dma-direct.h                         | 21 ++++++++++++++++--
 lib/swiotlb.c                                      | 25 ++++++++--------------
 12 files changed, 53 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h
index 5b0a8a421894..b67e5fc1fe43 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,13 +2,13 @@
 #ifndef ASM_ARM_DMA_DIRECT_H
 #define ASM_ARM_DMA_DIRECT_H 1
 
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	unsigned int offset = paddr & ~PAGE_MASK;
 	return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 {
 	unsigned int offset = dev_addr & ~PAGE_MASK;
 	return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c7bb8a407041..7b335ab21697 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -10,7 +10,7 @@
  * IP32 changes by Ilya.
  * Copyright (C) 2010 Cavium Networks, Inc.
  */
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/scatterlist.h>
 #include <linux/bootmem.h>
 #include <linux/export.h>
@@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
 	phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
 };
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
 						      struct octeon_dma_map_ops,
@@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 
 	return ops->phys_to_dma(dev, paddr);
 }
-EXPORT_SYMBOL(phys_to_dma);
+EXPORT_SYMBOL(__phys_to_dma);
 
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
 						      struct octeon_dma_map_ops,
@@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 
 	return ops->dma_to_phys(dev, daddr);
 }
-EXPORT_SYMBOL(dma_to_phys);
+EXPORT_SYMBOL(__dma_to_phys);
 
 static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
 	.dma_map_ops = {
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index 138edf6b5b48..6eb1ee548b11 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
 struct dma_map_ops;
 extern const struct dma_map_ops *octeon_pci_dma_map_ops;
diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
index b1b575f5c6c1..64fc44dec0a8 100644
--- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
@@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
 					  size_t size)
 {
 #ifdef CONFIG_CPU_LOONGSON3
-	return phys_to_dma(dev, virt_to_phys(addr));
+	return __phys_to_dma(dev, virt_to_phys(addr));
 #else
 	return virt_to_phys(addr) | 0x80000000;
 #endif
@@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
 					       struct page *page)
 {
 #ifdef CONFIG_CPU_LOONGSON3
-	return phys_to_dma(dev, page_to_phys(page));
+	return __phys_to_dma(dev, page_to_phys(page));
 #else
 	return page_to_phys(page) | 0x80000000;
 #endif
@@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
 	dma_addr_t dma_addr)
 {
 #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT)
-	return dma_to_phys(dev, dma_addr);
+	return __dma_to_phys(dev, dma_addr);
 #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
 	return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff);
 #else
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 7bbcf89475f3..6a739f8ae110 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask)
 	return swiotlb_dma_supported(dev, mask);
 }
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	long nid;
 #ifdef CONFIG_PHYS48_TO_HT40
@@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	long nid;
 #ifdef CONFIG_PHYS48_TO_HT40
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a5b59c765426..7702875aabb7 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size - 1 <= *dev->dma_mask;
 }
 
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	return paddr + get_dma_offset(dev);
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	return daddr - get_dma_offset(dev);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 10f482beda1b..1ca4f0874517 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
-	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_REFCOUNT
@@ -692,6 +691,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
 config STA2X11
 	bool "STA2X11 Companion Chip Support"
 	depends on X86_32_NON_STANDARD && PCI
+	select ARCH_HAS_PHYS_TO_DMA
 	select X86_DEV_DMA_OPS
 	select X86_DMA_REMAP
 	select SWIOTLB
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
index 1295bc622ebe..1a19251eaac9 100644
--- a/arch/x86/include/asm/dma-direct.h
+++ b/arch/x86/include/asm/dma-direct.h
@@ -2,29 +2,8 @@
 #ifndef ASM_X86_DMA_DIRECT_H
 #define ASM_X86_DMA_DIRECT_H 1
 
-#include <linux/mem_encrypt.h>
-
-#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
 bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-#else
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-	if (!dev->dma_mask)
-		return 0;
-
-	return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	return __sme_set(paddr);
-}
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
-	return __sme_clr(daddr);
-}
-#endif /* CONFIG_X86_DMA_REMAP */
 #endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d3b80d5f9828..1a05bea831a8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -216,7 +216,7 @@ static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		 * Since we will be clearing the encryption bit, check the
 		 * mask with it already cleared.
 		 */
-		addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
+		addr = __phys_to_dma(dev, page_to_phys(page));
 		if ((addr + size) > dev->coherent_dma_mask) {
 			__free_pages(page, get_order(size));
 		} else {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index eac58e03f43c..7a5bafb76d77 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -207,11 +207,11 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 }
 
 /**
- * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
+ * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
  * @dev: device for a PCI device
  * @paddr: Physical address
  */
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	if (!dev->archdata.is_sta2x11)
 		return paddr;
@@ -223,7 +223,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  * @dev: device for a PCI device
  * @daddr: STA2x11 AMBA DMA address
  */
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	if (!dev->archdata.is_sta2x11)
 		return daddr;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bcdb1a3e4b1f..53ad6a47f513 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -3,18 +3,19 @@
 #define _LINUX_DMA_DIRECT_H 1
 
 #include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
 
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	dma_addr_t dev_addr = (dma_addr_t)paddr;
 
 	return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr)
 {
 	phys_addr_t paddr = (phys_addr_t)dev_addr;
 
@@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
+/*
+ * If memory encryption is supported, phys_to_dma will set the memory encryption
+ * bit in the DMA address, and dma_to_phys will clear it.  The raw __phys_to_dma
+ * and __dma_to_phys versions should only be used on non-encrypted memory for
+ * special occasions like DMA coherent buffers.
+ */
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return __sme_set(__phys_to_dma(dev, paddr));
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return __sme_clr(__dma_to_phys(dev, daddr));
+}
+
 #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN
 void dma_mark_clean(void *addr, size_t size);
 #else
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 005d1d87bb2e..8b06b4485e65 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
-				      phys_addr_t address)
-{
-	return __sme_clr(phys_to_dma(hwdev, address));
-}
-
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 		return SWIOTLB_MAP_ERROR;
 	}
 
-	start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
 				      dir, attrs);
 }
@@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		goto out_warn;
 
 	phys_addr = swiotlb_tbl_map_single(dev,
-			swiotlb_phys_to_dma(dev, io_tlb_start),
+			__phys_to_dma(dev, io_tlb_start),
 			0, size, DMA_FROM_DEVICE, 0);
 	if (phys_addr == SWIOTLB_MAP_ERROR)
 		goto out_warn;
 
-	*dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+	*dma_handle = __phys_to_dma(dev, phys_addr);
 	if (dma_coherent_ok(dev, *dma_handle, size))
 		goto out_unmap;
 
@@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
-		return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+		return __phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_phys_to_dma(dev, map);
+	dev_addr = __phys_to_dma(dev, map);
 
 	/* Ensure that the address returned is DMA'ble */
 	if (dma_capable(dev, dev_addr, size))
@@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+	return __phys_to_dma(dev, io_tlb_overflow_buffer);
 }
 
 /*
@@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sg_dma_len(sgl) = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+			sg->dma_address = __phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg_dma_len(sg) = sg->length;
@@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
 
 /*
@@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
 #ifdef CONFIG_DMA_DIRECT_OPS
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 13/14] dma-direct: handle force decryption for dma coherent buffers in common code
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

With that in place the generic dma-direct routines can be used to
allocate non-encrypted bounce buffers, and the x86 SEV case can use
the generic swiotlb ops including nice features such as using CMA
allocations.

Note that I'm not too happy about using sev_active() in dma-direct, but
I couldn't come up with a good enough name for a wrapper to make it
worth adding.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/mm/mem_encrypt.c | 73 ++---------------------------------------------
 lib/dma-direct.c          | 32 +++++++++++++++++----
 2 files changed, 29 insertions(+), 76 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a05bea831a8..65f45e0ef496 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -200,58 +200,6 @@ void __init sme_early_init(void)
 		swiotlb_force = SWIOTLB_FORCE;
 }
 
-static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		       gfp_t gfp, unsigned long attrs)
-{
-	unsigned int order;
-	struct page *page;
-	void *vaddr = NULL;
-
-	order = get_order(size);
-	page = alloc_pages_node(dev_to_node(dev), gfp, order);
-	if (page) {
-		dma_addr_t addr;
-
-		/*
-		 * Since we will be clearing the encryption bit, check the
-		 * mask with it already cleared.
-		 */
-		addr = __phys_to_dma(dev, page_to_phys(page));
-		if ((addr + size) > dev->coherent_dma_mask) {
-			__free_pages(page, get_order(size));
-		} else {
-			vaddr = page_address(page);
-			*dma_handle = addr;
-		}
-	}
-
-	if (!vaddr)
-		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
-
-	if (!vaddr)
-		return NULL;
-
-	/* Clear the SME encryption bit for DMA use if not swiotlb area */
-	if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
-		set_memory_decrypted((unsigned long)vaddr, 1 << order);
-		memset(vaddr, 0, PAGE_SIZE << order);
-		*dma_handle = __sme_clr(*dma_handle);
-	}
-
-	return vaddr;
-}
-
-static void sev_free(struct device *dev, size_t size, void *vaddr,
-		     dma_addr_t dma_handle, unsigned long attrs)
-{
-	/* Set the SME encryption bit for re-use if not swiotlb area */
-	if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
-		set_memory_encrypted((unsigned long)vaddr,
-				     1 << get_order(size));
-
-	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-}
-
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
@@ -404,20 +352,6 @@ bool sev_active(void)
 }
 EXPORT_SYMBOL(sev_active);
 
-static const struct dma_map_ops sev_dma_ops = {
-	.alloc                  = sev_alloc,
-	.free                   = sev_free,
-	.map_page               = swiotlb_map_page,
-	.unmap_page             = swiotlb_unmap_page,
-	.map_sg                 = swiotlb_map_sg_attrs,
-	.unmap_sg               = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device     = swiotlb_sync_sg_for_device,
-	.mapping_error          = swiotlb_dma_mapping_error,
-};
-
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void)
 {
@@ -428,12 +362,11 @@ void __init mem_encrypt_init(void)
 	swiotlb_update_mem_attributes();
 
 	/*
-	 * With SEV, DMA operations cannot use encryption. New DMA ops
-	 * are required in order to mark the DMA areas as decrypted or
-	 * to use bounce buffers.
+	 * With SEV, DMA operations cannot use encryption, we need to use
+	 * SWIOTLB to bounce buffer DMA operation.
 	 */
 	if (sev_active())
-		dma_ops = &sev_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 
 	/*
 	 * With SEV, we need to unroll the rep string I/O instructions.
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c9e8e21cb334..1277d293d4da 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-contiguous.h>
 #include <linux/pfn.h>
+#include <linux/set_memory.h>
 
 #define DIRECT_MAPPING_ERROR		0
 
@@ -20,6 +21,14 @@
 #define ARCH_ZONE_DMA_BITS 24
 #endif
 
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+	return sev_active();
+}
+
 static bool
 check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 		const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
-	return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+	dma_addr_t addr = force_dma_unencrypted() ?
+		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+	return addr + size - 1 <= dev->coherent_dma_mask;
 }
 
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	int page_order = get_order(size);
 	struct page *page = NULL;
+	void *ret;
 
 	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
 	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,10 +90,15 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 	if (!page)
 		return NULL;
-
-	*dma_handle = phys_to_dma(dev, page_to_phys(page));
-	memset(page_address(page), 0, size);
-	return page_address(page);
+	ret = page_address(page);
+	if (force_dma_unencrypted()) {
+		set_memory_decrypted((unsigned long)ret, 1 << page_order);
+		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
+	} else {
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+	}
+	memset(ret, 0, size);
+	return ret;
 }
 
 /*
@@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int page_order = get_order(size);
 
+	if (force_dma_unencrypted())
+		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
 	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-		free_pages((unsigned long)cpu_addr, get_order(size));
+		free_pages((unsigned long)cpu_addr, page_order);
 }
 
 static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 13/14] dma-direct: handle force decryption for dma coherent buffers in common code
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

With that in place the generic dma-direct routines can be used to
allocate non-encrypted bounce buffers, and the x86 SEV case can use
the generic swiotlb ops including nice features such as using CMA
allocations.

Note that I'm not too happy about using sev_active() in dma-direct, but
I couldn't come up with a good enough name for a wrapper to make it
worth adding.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/mm/mem_encrypt.c | 73 ++---------------------------------------------
 lib/dma-direct.c          | 32 +++++++++++++++++----
 2 files changed, 29 insertions(+), 76 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a05bea831a8..65f45e0ef496 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -200,58 +200,6 @@ void __init sme_early_init(void)
 		swiotlb_force = SWIOTLB_FORCE;
 }
 
-static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		       gfp_t gfp, unsigned long attrs)
-{
-	unsigned int order;
-	struct page *page;
-	void *vaddr = NULL;
-
-	order = get_order(size);
-	page = alloc_pages_node(dev_to_node(dev), gfp, order);
-	if (page) {
-		dma_addr_t addr;
-
-		/*
-		 * Since we will be clearing the encryption bit, check the
-		 * mask with it already cleared.
-		 */
-		addr = __phys_to_dma(dev, page_to_phys(page));
-		if ((addr + size) > dev->coherent_dma_mask) {
-			__free_pages(page, get_order(size));
-		} else {
-			vaddr = page_address(page);
-			*dma_handle = addr;
-		}
-	}
-
-	if (!vaddr)
-		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
-
-	if (!vaddr)
-		return NULL;
-
-	/* Clear the SME encryption bit for DMA use if not swiotlb area */
-	if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
-		set_memory_decrypted((unsigned long)vaddr, 1 << order);
-		memset(vaddr, 0, PAGE_SIZE << order);
-		*dma_handle = __sme_clr(*dma_handle);
-	}
-
-	return vaddr;
-}
-
-static void sev_free(struct device *dev, size_t size, void *vaddr,
-		     dma_addr_t dma_handle, unsigned long attrs)
-{
-	/* Set the SME encryption bit for re-use if not swiotlb area */
-	if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
-		set_memory_encrypted((unsigned long)vaddr,
-				     1 << get_order(size));
-
-	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-}
-
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
@@ -404,20 +352,6 @@ bool sev_active(void)
 }
 EXPORT_SYMBOL(sev_active);
 
-static const struct dma_map_ops sev_dma_ops = {
-	.alloc                  = sev_alloc,
-	.free                   = sev_free,
-	.map_page               = swiotlb_map_page,
-	.unmap_page             = swiotlb_unmap_page,
-	.map_sg                 = swiotlb_map_sg_attrs,
-	.unmap_sg               = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device     = swiotlb_sync_sg_for_device,
-	.mapping_error          = swiotlb_dma_mapping_error,
-};
-
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void)
 {
@@ -428,12 +362,11 @@ void __init mem_encrypt_init(void)
 	swiotlb_update_mem_attributes();
 
 	/*
-	 * With SEV, DMA operations cannot use encryption. New DMA ops
-	 * are required in order to mark the DMA areas as decrypted or
-	 * to use bounce buffers.
+	 * With SEV, DMA operations cannot use encryption, we need to use
+	 * SWIOTLB to bounce buffer DMA operation.
 	 */
 	if (sev_active())
-		dma_ops = &sev_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 
 	/*
 	 * With SEV, we need to unroll the rep string I/O instructions.
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c9e8e21cb334..1277d293d4da 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-contiguous.h>
 #include <linux/pfn.h>
+#include <linux/set_memory.h>
 
 #define DIRECT_MAPPING_ERROR		0
 
@@ -20,6 +21,14 @@
 #define ARCH_ZONE_DMA_BITS 24
 #endif
 
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+	return sev_active();
+}
+
 static bool
 check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 		const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
-	return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+	dma_addr_t addr = force_dma_unencrypted() ?
+		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+	return addr + size - 1 <= dev->coherent_dma_mask;
 }
 
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	int page_order = get_order(size);
 	struct page *page = NULL;
+	void *ret;
 
 	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
 	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,10 +90,15 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 	if (!page)
 		return NULL;
-
-	*dma_handle = phys_to_dma(dev, page_to_phys(page));
-	memset(page_address(page), 0, size);
-	return page_address(page);
+	ret = page_address(page);
+	if (force_dma_unencrypted()) {
+		set_memory_decrypted((unsigned long)ret, 1 << page_order);
+		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
+	} else {
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+	}
+	memset(ret, 0, size);
+	return ret;
 }
 
 /*
@@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int page_order = get_order(size);
 
+	if (force_dma_unencrypted())
+		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
 	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-		free_pages((unsigned long)cpu_addr, get_order(size));
+		free_pages((unsigned long)cpu_addr, page_order);
 }
 
 static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 14/14] swiotlb: remove swiotlb_{alloc,free}_coherent
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

Unused now that everyone uses swiotlb_{alloc,free}.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/swiotlb.h |  8 --------
 lib/swiotlb.c           | 38 --------------------------------------
 2 files changed, 46 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5b1f2a00491c..965be92c33b5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
 void swiotlb_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_addr, unsigned long attrs);
 
-extern void
-*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-			dma_addr_t *dma_handle, gfp_t flags);
-
-extern void
-swiotlb_free_coherent(struct device *hwdev, size_t size,
-		      void *vaddr, dma_addr_t dma_handle);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8b06b4485e65..15954b86f09e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
-				      volatile void *address)
-{
-	return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
 static bool no_iotlb_memory;
 
 void swiotlb_print_info(void)
@@ -752,28 +745,6 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	return NULL;
 }
 
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
-{
-	int order = get_order(size);
-	unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
-	void *ret;
-
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret) {
-		*dma_handle = swiotlb_virt_to_bus(hwdev, ret);
-		if (dma_coherent_ok(hwdev, *dma_handle, size)) {
-			memset(ret, 0, size);
-			return ret;
-		}
-		free_pages((unsigned long)ret, order);
-	}
-
-	return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
 static bool swiotlb_free_buffer(struct device *dev, size_t size,
 		dma_addr_t dma_addr)
 {
@@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
 	return true;
 }
 
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dev_addr)
-{
-	if (!swiotlb_free_buffer(hwdev, size, dev_addr))
-		free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
-
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
 	     int do_panic)
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 14/14] swiotlb: remove swiotlb_{alloc,free}_coherent
@ 2018-03-14 17:52   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-14 17:52 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

Unused now that everyone uses swiotlb_{alloc,free}.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 include/linux/swiotlb.h |  8 --------
 lib/swiotlb.c           | 38 --------------------------------------
 2 files changed, 46 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5b1f2a00491c..965be92c33b5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
 void swiotlb_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_addr, unsigned long attrs);
 
-extern void
-*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-			dma_addr_t *dma_handle, gfp_t flags);
-
-extern void
-swiotlb_free_coherent(struct device *hwdev, size_t size,
-		      void *vaddr, dma_addr_t dma_handle);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8b06b4485e65..15954b86f09e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
-				      volatile void *address)
-{
-	return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
 static bool no_iotlb_memory;
 
 void swiotlb_print_info(void)
@@ -752,28 +745,6 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	return NULL;
 }
 
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
-{
-	int order = get_order(size);
-	unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
-	void *ret;
-
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret) {
-		*dma_handle = swiotlb_virt_to_bus(hwdev, ret);
-		if (dma_coherent_ok(hwdev, *dma_handle, size)) {
-			memset(ret, 0, size);
-			return ret;
-		}
-		free_pages((unsigned long)ret, order);
-	}
-
-	return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
 static bool swiotlb_free_buffer(struct device *dev, size_t size,
 		dma_addr_t dma_addr)
 {
@@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
 	return true;
 }
 
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dev_addr)
-{
-	if (!swiotlb_free_buffer(hwdev, size, dev_addr))
-		free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
-
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
 	     int do_panic)
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15  8:56     ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15  8:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

On Wed, 14 Mar 2018, Christoph Hellwig wrote:

> The generic dma-direct implementation is now functionally equivalent to
> the x86 nommu dma_map implementation, so switch over to using it.

Can you please convert the various drivers first and then remove the
unused code?

> Note that the various iommu drivers are switched from x86_dma_supported
> to dma_direct_supported to provide identical functionality, although the
> checks looks fairly questionable for at least some of them.

Can you please elaborate? From the above it's not clear which checks you
are referring to. If you convert these drivers seperately then explicit
information about your concerns wants to be in the changelogs.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15  8:56     ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15  8:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, 14 Mar 2018, Christoph Hellwig wrote:

> The generic dma-direct implementation is now functionally equivalent to
> the x86 nommu dma_map implementation, so switch over to using it.

Can you please convert the various drivers first and then remove the
unused code?

> Note that the various iommu drivers are switched from x86_dma_supported
> to dma_direct_supported to provide identical functionality, although the
> checks looks fairly questionable for at least some of them.

Can you please elaborate? From the above it's not clear which checks you
are referring to. If you convert these drivers seperately then explicit
information about your concerns wants to be in the changelogs.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15  9:00     ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15  9:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

On Wed, 14 Mar 2018, Christoph Hellwig wrote:
>  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
>  	void *iommu; /* hook for IOMMU specific extension */
>  #endif
> +#ifdef CONFIG_STA2X11
> +	bool is_sta2x11 : 1;

Huch? Please use either bool or an unsigned int based bitfield. A boolean
bitfield doesn't make sense.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15  9:00     ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15  9:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, 14 Mar 2018, Christoph Hellwig wrote:
>  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
>  	void *iommu; /* hook for IOMMU specific extension */
>  #endif
> +#ifdef CONFIG_STA2X11
> +	bool is_sta2x11 : 1;

Huch? Please use either bool or an unsigned int based bitfield. A boolean
bitfield doesn't make sense.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
  2018-03-15  8:56     ` Thomas Gleixner
  (?)
@ 2018-03-15 11:53     ` Christoph Hellwig
  2018-03-15 12:53       ` Thomas Gleixner
  -1 siblings, 1 reply; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 11:53 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Christoph Hellwig, x86, Konrad Rzeszutek Wilk, Tom Lendacky,
	David Woodhouse, Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu,
	linux-kernel

On Thu, Mar 15, 2018 at 09:56:13AM +0100, Thomas Gleixner wrote:
> On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> 
> > The generic dma-direct implementation is now functionally equivalent to
> > the x86 nommu dma_map implementation, so switch over to using it.
> 
> Can you please convert the various drivers first and then remove the
> unused code?

Which various drivers?

> > Note that the various iommu drivers are switched from x86_dma_supported
> > to dma_direct_supported to provide identical functionality, although the
> > checks looks fairly questionable for at least some of them.
> 
> Can you please elaborate? From the above it's not clear which checks you
> are referring to. If you convert these drivers seperately then explicit
> information about your concerns wants to be in the changelogs.

This bit:

	/* Copied from i386. Doesn't make much sense, because it will
	   only work for pci_alloc_coherent.
	   The caller just has to use GFP_DMA in this case. */
	if (mask < DMA_BIT_MASK(24))
		return 0;

in x86_dma_supported, or the equivalent bit in dma_direct_supported.
Kept for bug to bug compatibility, but I guess I should reword or
just drop the changelog bit іf it causes confusion.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 11:54       ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 11:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Christoph Hellwig, x86, Konrad Rzeszutek Wilk, Tom Lendacky,
	David Woodhouse, Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu,
	linux-kernel

On Thu, Mar 15, 2018 at 10:00:57AM +0100, Thomas Gleixner wrote:
> On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> >  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
> >  	void *iommu; /* hook for IOMMU specific extension */
> >  #endif
> > +#ifdef CONFIG_STA2X11
> > +	bool is_sta2x11 : 1;
> 
> Huch? Please use either bool or an unsigned int based bitfield. A boolean
> bitfield doesn't make sense.

bool bitfields are perfectly valid in C99 and used in various places in
the kernel. But if you want either bool or an unsigned bitfield let me
know and I'll switch it over.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 11:54       ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 11:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse, Christoph Hellwig

On Thu, Mar 15, 2018 at 10:00:57AM +0100, Thomas Gleixner wrote:
> On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> >  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
> >  	void *iommu; /* hook for IOMMU specific extension */
> >  #endif
> > +#ifdef CONFIG_STA2X11
> > +	bool is_sta2x11 : 1;
> 
> Huch? Please use either bool or an unsigned int based bitfield. A boolean
> bitfield doesn't make sense.

bool bitfields are perfectly valid in C99 and used in various places in
the kernel. But if you want either bool or an unsigned bitfield let me
know and I'll switch it over.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 12:52         ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 12:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

On Thu, 15 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 15, 2018 at 10:00:57AM +0100, Thomas Gleixner wrote:
> > On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> > >  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
> > >  	void *iommu; /* hook for IOMMU specific extension */
> > >  #endif
> > > +#ifdef CONFIG_STA2X11
> > > +	bool is_sta2x11 : 1;
> > 
> > Huch? Please use either bool or an unsigned int based bitfield. A boolean
> > bitfield doesn't make sense.
> 
> bool bitfields are perfectly valid in C99 and used in various places in
> the kernel. But if you want either bool or an unsigned bitfield let me
> know and I'll switch it over.

Yeah, I know that the standard defines it, but that doesn't mean it makes
sense. At least not to me.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 12:52         ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 12:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Thu, 15 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 15, 2018 at 10:00:57AM +0100, Thomas Gleixner wrote:
> > On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> > >  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
> > >  	void *iommu; /* hook for IOMMU specific extension */
> > >  #endif
> > > +#ifdef CONFIG_STA2X11
> > > +	bool is_sta2x11 : 1;
> > 
> > Huch? Please use either bool or an unsigned int based bitfield. A boolean
> > bitfield doesn't make sense.
> 
> bool bitfields are perfectly valid in C99 and used in various places in
> the kernel. But if you want either bool or an unsigned bitfield let me
> know and I'll switch it over.

Yeah, I know that the standard defines it, but that doesn't mean it makes
sense. At least not to me.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
  2018-03-15 11:53     ` Christoph Hellwig
@ 2018-03-15 12:53       ` Thomas Gleixner
  2018-03-15 13:40           ` Christoph Hellwig
  0 siblings, 1 reply; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 12:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1382 bytes --]

On Thu, 15 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 15, 2018 at 09:56:13AM +0100, Thomas Gleixner wrote:
> > On Wed, 14 Mar 2018, Christoph Hellwig wrote:
> > 
> > > The generic dma-direct implementation is now functionally equivalent to
> > > the x86 nommu dma_map implementation, so switch over to using it.
> > 
> > Can you please convert the various drivers first and then remove the
> > unused code?
> 
> Which various drivers?
> 
> > > Note that the various iommu drivers are switched from x86_dma_supported

The various iommu drivers ....

> > > to dma_direct_supported to provide identical functionality, although the
> > > checks looks fairly questionable for at least some of them.
> > 
> > Can you please elaborate? From the above it's not clear which checks you
> > are referring to. If you convert these drivers seperately then explicit
> > information about your concerns wants to be in the changelogs.
> 
> This bit:
> 
> 	/* Copied from i386. Doesn't make much sense, because it will
> 	   only work for pci_alloc_coherent.
> 	   The caller just has to use GFP_DMA in this case. */
> 	if (mask < DMA_BIT_MASK(24))
> 		return 0;
> 
> in x86_dma_supported, or the equivalent bit in dma_direct_supported.
> Kept for bug to bug compatibility, but I guess I should reword or
> just drop the changelog bit іf it causes confusion.

Reword please.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 13:35           ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 13:35 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Christoph Hellwig, x86, Konrad Rzeszutek Wilk, Tom Lendacky,
	David Woodhouse, Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu,
	linux-kernel

On Thu, Mar 15, 2018 at 01:52:14PM +0100, Thomas Gleixner wrote:
> Yeah, I know that the standard defines it, but that doesn't mean it makes
> sense. At least not to me.

It makes sense in that it logically is a boolean but we only want
to allocate 1 bit for it, unlike the normal ABI allocations of at
least a byte.

Either way, tell me what you want it changed to, and I'll do it.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 13:35           ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 13:35 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse, Christoph Hellwig

On Thu, Mar 15, 2018 at 01:52:14PM +0100, Thomas Gleixner wrote:
> Yeah, I know that the standard defines it, but that doesn't mean it makes
> sense. At least not to me.

It makes sense in that it logically is a boolean but we only want
to allocate 1 bit for it, unlike the normal ABI allocations of at
least a byte.

Either way, tell me what you want it changed to, and I'll do it.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15 13:40           ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 13:40 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Christoph Hellwig, x86, Konrad Rzeszutek Wilk, Tom Lendacky,
	David Woodhouse, Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu,
	linux-kernel

On Thu, Mar 15, 2018 at 01:53:52PM +0100, Thomas Gleixner wrote:
> > > > The generic dma-direct implementation is now functionally equivalent to
> > > > the x86 nommu dma_map implementation, so switch over to using it.
> > > 
> > > Can you please convert the various drivers first and then remove the
> > > unused code?
> > 
> > Which various drivers?
> > 
> > > > Note that the various iommu drivers are switched from x86_dma_supported
> 
> The various iommu drivers ....

It doesn't really make any sense to switch them separately.  They've
inherited the ops from x86 which used to override it on an arch level,
and we've now generalized the exactly same implementation to common code.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15 13:40           ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-15 13:40 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse, Christoph Hellwig

On Thu, Mar 15, 2018 at 01:53:52PM +0100, Thomas Gleixner wrote:
> > > > The generic dma-direct implementation is now functionally equivalent to
> > > > the x86 nommu dma_map implementation, so switch over to using it.
> > > 
> > > Can you please convert the various drivers first and then remove the
> > > unused code?
> > 
> > Which various drivers?
> > 
> > > > Note that the various iommu drivers are switched from x86_dma_supported
> 
> The various iommu drivers ....

It doesn't really make any sense to switch them separately.  They've
inherited the ops from x86 which used to override it on an arch level,
and we've now generalized the exactly same implementation to common code.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15 13:45             ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 13:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

On Thu, 15 Mar 2018, Christoph Hellwig wrote:
> On Thu, Mar 15, 2018 at 01:53:52PM +0100, Thomas Gleixner wrote:
> > > > > The generic dma-direct implementation is now functionally equivalent to
> > > > > the x86 nommu dma_map implementation, so switch over to using it.
> > > > 
> > > > Can you please convert the various drivers first and then remove the
> > > > unused code?
> > > 
> > > Which various drivers?
> > > 
> > > > > Note that the various iommu drivers are switched from x86_dma_supported
> > 
> > The various iommu drivers ....
> 
> It doesn't really make any sense to switch them separately.  They've
> inherited the ops from x86 which used to override it on an arch level,
> and we've now generalized the exactly same implementation to common code.

Fair enough.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 03/14] x86: use dma-direct
@ 2018-03-15 13:45             ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 13:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Thu, 15 Mar 2018, Christoph Hellwig wrote:
> On Thu, Mar 15, 2018 at 01:53:52PM +0100, Thomas Gleixner wrote:
> > > > > The generic dma-direct implementation is now functionally equivalent to
> > > > > the x86 nommu dma_map implementation, so switch over to using it.
> > > > 
> > > > Can you please convert the various drivers first and then remove the
> > > > unused code?
> > > 
> > > Which various drivers?
> > > 
> > > > > Note that the various iommu drivers are switched from x86_dma_supported
> > 
> > The various iommu drivers ....
> 
> It doesn't really make any sense to switch them separately.  They've
> inherited the ops from x86 which used to override it on an arch level,
> and we've now generalized the exactly same implementation to common code.

Fair enough.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 13:48             ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 13:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

On Thu, 15 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 15, 2018 at 01:52:14PM +0100, Thomas Gleixner wrote:
> > Yeah, I know that the standard defines it, but that doesn't mean it makes
> > sense. At least not to me.
> 
> It makes sense in that it logically is a boolean but we only want
> to allocate 1 bit for it, unlike the normal ABI allocations of at
> least a byte.
>
> Either way, tell me what you want it changed to, and I'll do it.

I'd prefer either bool or a regular bitfield, but I can live with the
boolean bitfield as well.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-15 13:48             ` Thomas Gleixner
  0 siblings, 0 replies; 60+ messages in thread
From: Thomas Gleixner @ 2018-03-15 13:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Thu, 15 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 15, 2018 at 01:52:14PM +0100, Thomas Gleixner wrote:
> > Yeah, I know that the standard defines it, but that doesn't mean it makes
> > sense. At least not to me.
> 
> It makes sense in that it logically is a boolean but we only want
> to allocate 1 bit for it, unlike the normal ABI allocations of at
> least a byte.
>
> Either way, tell me what you want it changed to, and I'll do it.

I'd prefer either bool or a regular bitfield, but I can live with the
boolean bitfield as well.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 07/14] iommu/amd_iommu: use dma_direct_{alloc,free}
@ 2018-03-15 15:30     ` Joerg Roedel
  0 siblings, 0 replies; 60+ messages in thread
From: Joerg Roedel @ 2018-03-15 15:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, iommu, linux-kernel

On Wed, Mar 14, 2018 at 06:52:06PM +0100, Christoph Hellwig wrote:
> This cleans up the code a lot by removing duplicate logic.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/iommu/Kconfig     |  1 +
>  drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
>  2 files changed, 22 insertions(+), 47 deletions(-)

Tested-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 07/14] iommu/amd_iommu: use dma_direct_{alloc,free}
@ 2018-03-15 15:30     ` Joerg Roedel
  0 siblings, 0 replies; 60+ messages in thread
From: Joerg Roedel @ 2018-03-15 15:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, Mar 14, 2018 at 06:52:06PM +0100, Christoph Hellwig wrote:
> This cleans up the code a lot by removing duplicate logic.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  drivers/iommu/Kconfig     |  1 +
>  drivers/iommu/amd_iommu.c | 68 +++++++++++++++--------------------------------
>  2 files changed, 22 insertions(+), 47 deletions(-)

Tested-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Acked-by: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA
@ 2018-03-15 17:47     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Tom Lendacky, David Woodhouse, Muli Ben-Yehuda, Jon Mason,
	Joerg Roedel, iommu, linux-kernel

On Wed, Mar 14, 2018 at 06:52:04PM +0100, Christoph Hellwig wrote:
> We want to phase out looking at the magic GFP_DMA flag in the dma mapping
> routines, so switch the gart driver to use the coherent_dma_mask instead,
> which is used to select the GFP_DMA flag in the caller.
> 
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/x86/kernel/amd_gart_64.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
> index 52e3abcf3e70..79ac6caaaabb 100644
> --- a/arch/x86/kernel/amd_gart_64.c
> +++ b/arch/x86/kernel/amd_gart_64.c
> @@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
>  	unsigned long align_mask;
>  	struct page *page;
>  
> -	if (force_iommu && !(flag & GFP_DMA)) {
> +	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
>  		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
>  		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
>  		if (!page)
> -- 
> 2.14.2
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA
@ 2018-03-15 17:47     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, Mar 14, 2018 at 06:52:04PM +0100, Christoph Hellwig wrote:
> We want to phase out looking at the magic GFP_DMA flag in the dma mapping
> routines, so switch the gart driver to use the coherent_dma_mask instead,
> which is used to select the GFP_DMA flag in the caller.
> 
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
> ---
>  arch/x86/kernel/amd_gart_64.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
> index 52e3abcf3e70..79ac6caaaabb 100644
> --- a/arch/x86/kernel/amd_gart_64.c
> +++ b/arch/x86/kernel/amd_gart_64.c
> @@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
>  	unsigned long align_mask;
>  	struct page *page;
>  
> -	if (force_iommu && !(flag & GFP_DMA)) {
> +	if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
>  		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
>  		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
>  		if (!page)
> -- 
> 2.14.2
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs
@ 2018-03-15 17:50     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:50 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Tom Lendacky, David Woodhouse, Muli Ben-Yehuda, Jon Mason,
	Joerg Roedel, iommu, linux-kernel

On Wed, Mar 14, 2018 at 06:52:09PM +0100, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Thank you!
> ---
>  include/linux/set_memory.h | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
> index e5140648f638..da5178216da5 100644
> --- a/include/linux/set_memory.h
> +++ b/include/linux/set_memory.h
> @@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  #endif
>  
> +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
> +static inline int set_memory_encrypted(unsigned long addr, int numpages)
> +{
> +	return 0;
> +}
> +
> +static inline int set_memory_decrypted(unsigned long addr, int numpages)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
> +
>  #endif /* _LINUX_SET_MEMORY_H_ */
> -- 
> 2.14.2
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs
@ 2018-03-15 17:50     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:50 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, Mar 14, 2018 at 06:52:09PM +0100, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

Thank you!
> ---
>  include/linux/set_memory.h | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
> index e5140648f638..da5178216da5 100644
> --- a/include/linux/set_memory.h
> +++ b/include/linux/set_memory.h
> @@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  #endif
>  
> +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
> +static inline int set_memory_encrypted(unsigned long addr, int numpages)
> +{
> +	return 0;
> +}
> +
> +static inline int set_memory_decrypted(unsigned long addr, int numpages)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
> +
>  #endif /* _LINUX_SET_MEMORY_H_ */
> -- 
> 2.14.2
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-15 17:51     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:51 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Tom Lendacky, David Woodhouse, Muli Ben-Yehuda, Jon Mason,
	Joerg Roedel, iommu, linux-kernel

On Wed, Mar 14, 2018 at 06:52:10PM +0100, Christoph Hellwig wrote:
> Now that set_memory_decrypted is always available we can just call it
> directly.
> 

Won't this break ARM build?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-15 17:51     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:51 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Wed, Mar 14, 2018 at 06:52:10PM +0100, Christoph Hellwig wrote:
> Now that set_memory_decrypted is always available we can just call it
> directly.
> 

Won't this break ARM build?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-15 17:54       ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: x86, Tom Lendacky, David Woodhouse, Muli Ben-Yehuda, Jon Mason,
	Joerg Roedel, iommu, linux-kernel

On Thu, Mar 15, 2018 at 01:51:59PM -0400, Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 14, 2018 at 06:52:10PM +0100, Christoph Hellwig wrote:
> > Now that set_memory_decrypted is always available we can just call it
> > directly.
> > 
> 
> Won't this break ARM build?

<brainfart> And of course right after asking that question I went back to
the previous patch as I had that in my mind it was arch/x86/include, not the global
one.

So pls ignore my query and instead:

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes
@ 2018-03-15 17:54       ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 60+ messages in thread
From: Konrad Rzeszutek Wilk @ 2018-03-15 17:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tom Lendacky, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

On Thu, Mar 15, 2018 at 01:51:59PM -0400, Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 14, 2018 at 06:52:10PM +0100, Christoph Hellwig wrote:
> > Now that set_memory_decrypted is always available we can just call it
> > directly.
> > 
> 
> Won't this break ARM build?

<brainfart> And of course right after asking that question I went back to
the previous patch as I had that in my mind it was arch/x86/include, not the global
one.

So pls ignore my query and instead:

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-19 10:38   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-19 10:38 UTC (permalink / raw)
  To: x86
  Cc: Konrad Rzeszutek Wilk, Tom Lendacky, David Woodhouse,
	Muli Ben-Yehuda, Jon Mason, Joerg Roedel, iommu, linux-kernel

The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.

Also fix the sta2x11 case.  For that SOC the dma map ops need an
additional physical to dma address translations.  For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer.  With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/include/asm/device.h  |  3 +++
 arch/x86/include/asm/swiotlb.h |  8 -------
 arch/x86/kernel/pci-swiotlb.c  | 47 +-----------------------------------------
 arch/x86/pci/sta2x11-fixup.c   | 46 +++++------------------------------------
 4 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..a8f6c809d9b1 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
+#ifdef CONFIG_STA2X11
+	bool is_sta2x11;
+#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
 {
 }
 #endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_addr,
-					unsigned long attrs);
-
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
 
 int swiotlb __read_mostly;
 
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs)
-{
-	void *vaddr;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA
-	 * memory allocation ultimately failed.
-	 */
-	flags |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-	if (vaddr)
-		return vaddr;
-
-	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs)
-{
-	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
-	else
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
-	.mapping_error = swiotlb_dma_mapping_error,
-	.alloc = x86_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.dma_supported = NULL,
-};
-
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
  *
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
 {
 	if (swiotlb) {
 		swiotlb_init(0);
-		dma_ops = &x86_swiotlb_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 	}
 }
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
 	return p;
 }
 
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- *     returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
-					    size_t size,
-					    dma_addr_t *dma_handle,
-					    gfp_t flags,
-					    unsigned long attrs)
-{
-	void *vaddr;
-
-	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
-	return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
-	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = dma_direct_supported,
-};
-
 /* At setup time, we use our own ops if the device is a ConneXt one */
 static void sta2x11_setup_pdev(struct pci_dev *pdev)
 {
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 		return;
 	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
 	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pdev->dev.dma_ops = &sta2x11_dma_ops;
+	pdev->dev.dma_ops = &swiotlb_dma_ops;
+	pdev->dev.archdata.is_sta2x11 = true;
 
 	/* We must enable all devices as master, for audio DMA to work */
 	pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	struct sta2x11_mapping *map;
 
-	if (dev->dma_ops != &sta2x11_dma_ops) {
+	if (!dev->archdata.is_sta2x11) {
 		if (!dev->dma_mask)
 			return false;
 		return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
  */
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return paddr;
 	return p2a(paddr, to_pci_dev(dev));
 }
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  */
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return daddr;
 	return a2p(daddr, to_pci_dev(dev));
 }
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 04/14] x86: use generic swiotlb_ops
@ 2018-03-19 10:38   ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2018-03-19 10:38 UTC (permalink / raw)
  To: x86-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Tom Lendacky, Konrad Rzeszutek Wilk,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Muli Ben-Yehuda,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	David Woodhouse

The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.

Also fix the sta2x11 case.  For that SOC the dma map ops need an
additional physical to dma address translations.  For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer.  With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
 arch/x86/include/asm/device.h  |  3 +++
 arch/x86/include/asm/swiotlb.h |  8 -------
 arch/x86/kernel/pci-swiotlb.c  | 47 +-----------------------------------------
 arch/x86/pci/sta2x11-fixup.c   | 46 +++++------------------------------------
 4 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..a8f6c809d9b1 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
+#ifdef CONFIG_STA2X11
+	bool is_sta2x11;
+#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
 {
 }
 #endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-					void *vaddr, dma_addr_t dma_addr,
-					unsigned long attrs);
-
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
 
 int swiotlb __read_mostly;
 
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-					dma_addr_t *dma_handle, gfp_t flags,
-					unsigned long attrs)
-{
-	void *vaddr;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA
-	 * memory allocation ultimately failed.
-	 */
-	flags |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-	if (vaddr)
-		return vaddr;
-
-	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_addr,
-				      unsigned long attrs)
-{
-	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
-	else
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
-	.mapping_error = swiotlb_dma_mapping_error,
-	.alloc = x86_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.dma_supported = NULL,
-};
-
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
  *
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
 {
 	if (swiotlb) {
 		swiotlb_init(0);
-		dma_ops = &x86_swiotlb_dma_ops;
+		dma_ops = &swiotlb_dma_ops;
 	}
 }
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
 	return p;
 }
 
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- *     returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
-					    size_t size,
-					    dma_addr_t *dma_handle,
-					    gfp_t flags,
-					    unsigned long attrs)
-{
-	void *vaddr;
-
-	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
-	return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
-	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = x86_swiotlb_free_coherent,
-	.map_page = swiotlb_map_page,
-	.unmap_page = swiotlb_unmap_page,
-	.map_sg = swiotlb_map_sg_attrs,
-	.unmap_sg = swiotlb_unmap_sg_attrs,
-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-	.sync_single_for_device = swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = dma_direct_supported,
-};
-
 /* At setup time, we use our own ops if the device is a ConneXt one */
 static void sta2x11_setup_pdev(struct pci_dev *pdev)
 {
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
 		return;
 	pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
 	pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
-	pdev->dev.dma_ops = &sta2x11_dma_ops;
+	pdev->dev.dma_ops = &swiotlb_dma_ops;
+	pdev->dev.archdata.is_sta2x11 = true;
 
 	/* We must enable all devices as master, for audio DMA to work */
 	pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	struct sta2x11_mapping *map;
 
-	if (dev->dma_ops != &sta2x11_dma_ops) {
+	if (!dev->archdata.is_sta2x11) {
 		if (!dev->dma_mask)
 			return false;
 		return addr + size - 1 <= *dev->dma_mask;
@@ -249,7 +213,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
  */
 dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return paddr;
 	return p2a(paddr, to_pci_dev(dev));
 }
@@ -261,7 +225,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
  */
 phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-	if (dev->dma_ops != &sta2x11_dma_ops)
+	if (!dev->archdata.is_sta2x11)
 		return daddr;
 	return a2p(daddr, to_pci_dev(dev));
 }
-- 
2.14.2

^ permalink raw reply related	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2018-03-19 10:39 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-14 17:51 use generic dma-direct and swiotlb code for x86 V2 Christoph Hellwig
2018-03-14 17:51 ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 01/14] x86: remove X86_PPRO_FENCE Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 02/14] x86: remove dma_alloc_coherent_mask Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 03/14] x86: use dma-direct Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15  8:56   ` Thomas Gleixner
2018-03-15  8:56     ` Thomas Gleixner
2018-03-15 11:53     ` Christoph Hellwig
2018-03-15 12:53       ` Thomas Gleixner
2018-03-15 13:40         ` Christoph Hellwig
2018-03-15 13:40           ` Christoph Hellwig
2018-03-15 13:45           ` Thomas Gleixner
2018-03-15 13:45             ` Thomas Gleixner
2018-03-14 17:52 ` [PATCH 04/14] x86: use generic swiotlb_ops Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15  9:00   ` Thomas Gleixner
2018-03-15  9:00     ` Thomas Gleixner
2018-03-15 11:54     ` Christoph Hellwig
2018-03-15 11:54       ` Christoph Hellwig
2018-03-15 12:52       ` Thomas Gleixner
2018-03-15 12:52         ` Thomas Gleixner
2018-03-15 13:35         ` Christoph Hellwig
2018-03-15 13:35           ` Christoph Hellwig
2018-03-15 13:48           ` Thomas Gleixner
2018-03-15 13:48             ` Thomas Gleixner
2018-03-14 17:52 ` [PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15 17:47   ` Konrad Rzeszutek Wilk
2018-03-15 17:47     ` Konrad Rzeszutek Wilk
2018-03-14 17:52 ` [PATCH 06/14] x86/amd_gart: use dma_direct_{alloc,free} Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 07/14] iommu/amd_iommu: " Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15 15:30   ` Joerg Roedel
2018-03-15 15:30     ` Joerg Roedel
2018-03-14 17:52 ` [PATCH 08/14] iommu/intel-iommu: cleanup intel_{alloc,free}_coherent Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 09/14] x86: remove dma_alloc_coherent_gfp_flags Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15 17:50   ` Konrad Rzeszutek Wilk
2018-03-15 17:50     ` Konrad Rzeszutek Wilk
2018-03-14 17:52 ` [PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-15 17:51   ` Konrad Rzeszutek Wilk
2018-03-15 17:51     ` Konrad Rzeszutek Wilk
2018-03-15 17:54     ` Konrad Rzeszutek Wilk
2018-03-15 17:54       ` Konrad Rzeszutek Wilk
2018-03-14 17:52 ` [PATCH 12/14] dma-direct: handle the memory encryption bit in common code Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 13/14] dma-direct: handle force decryption for dma coherent buffers " Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-14 17:52 ` [PATCH 14/14] swiotlb: remove swiotlb_{alloc,free}_coherent Christoph Hellwig
2018-03-14 17:52   ` Christoph Hellwig
2018-03-19 10:38 use generic dma-direct and swiotlb code for x86 V3 Christoph Hellwig
2018-03-19 10:38 ` [PATCH 04/14] x86: use generic swiotlb_ops Christoph Hellwig
2018-03-19 10:38   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.