iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
* dma-pool fixes
@ 2020-07-28 10:47 Christoph Hellwig
  2020-07-28 10:47 ` [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 10:47 UTC (permalink / raw)
  To: Amit Pundir, Nicolas Saenz Julienne
  Cc: iommu, Robin Murphy, linux-rpi-kernel, jeremy.linton, David Rientjes

Hi Amit,

can you try these two patches?  The first one makes sure we don't apply
physical address based checks for IOMMU allocations, and the second one
is a slightly tweaked version of the patch from Nicolas to allow dipping
into the CMA areas for allocations to expand the atomic pools.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings
  2020-07-28 10:47 dma-pool fixes Christoph Hellwig
@ 2020-07-28 10:47 ` Christoph Hellwig
  2020-07-28 14:56   ` Nicolas Saenz Julienne
  2020-07-28 10:47 ` [PATCH 2/2] dma-pool: Only allocate from CMA when in same memory zone Christoph Hellwig
  2020-07-28 12:02 ` dma-pool fixes Amit Pundir
  2 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 10:47 UTC (permalink / raw)
  To: Amit Pundir, Nicolas Saenz Julienne
  Cc: iommu, Robin Murphy, linux-rpi-kernel, jeremy.linton, David Rientjes

When allocating coherent pool memory for an IOMMU mapping we don't care
about the DMA mask.  Move the guess for the initial GFP mask into the
dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer
argument so that it doesn't get applied to the IOMMU case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/iommu/dma-iommu.c   |   4 +-
 include/linux/dma-direct.h  |   3 -
 include/linux/dma-mapping.h |   5 +-
 kernel/dma/direct.c         |  13 ++--
 kernel/dma/pool.c           | 114 +++++++++++++++---------------------
 5 files changed, 62 insertions(+), 77 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4959f5df21bd07..5141d49a046baa 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1035,8 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
 
 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
 	    !gfpflags_allow_blocking(gfp) && !coherent)
-		cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
-					       gfp);
+		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
+					       gfp, NULL);
 	else
 		cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
 	if (!cpu_addr)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index ab2e20cba9514b..ba22952c24e244 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -67,9 +67,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
 }
 
 u64 dma_direct_get_required_mask(struct device *dev);
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
-				  u64 *phys_mask);
-bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index a33ed3954ed465..0dc08701d7b7ee 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -715,8 +715,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
 			pgprot_t prot, const void *caller);
 void dma_common_free_remap(void *cpu_addr, size_t size);
 
-void *dma_alloc_from_pool(struct device *dev, size_t size,
-			  struct page **ret_page, gfp_t flags);
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+		void **cpu_addr, gfp_t flags,
+		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t));
 bool dma_free_from_pool(struct device *dev, void *start, size_t size);
 
 int
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 67f060b86a73fa..f17aec9d01f0c0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -45,7 +45,7 @@ u64 dma_direct_get_required_mask(struct device *dev)
 	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
 }
 
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
+static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 				  u64 *phys_limit)
 {
 	u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
@@ -70,7 +70,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 	return 0;
 }
 
-bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
 	return phys_to_dma_direct(dev, phys) + size - 1 <=
 			min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
@@ -163,8 +163,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
 	size = PAGE_ALIGN(size);
 
 	if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
-		ret = dma_alloc_from_pool(dev, size, &page, gfp);
-		if (!ret)
+		u64 phys_mask;
+
+		gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+				&phys_mask);
+		page = dma_alloc_from_pool(dev, size, &ret, gfp,
+				dma_coherent_ok);
+		if (!page)
 			return NULL;
 		goto done;
 	}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 6bc74a2d51273e..1ddcd48f271fff 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -196,93 +196,75 @@ static int __init dma_atomic_pool_init(void)
 }
 postcore_initcall(dma_atomic_pool_init);
 
-static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev)
+static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
 {
-	u64 phys_mask;
-	gfp_t gfp;
-
-	gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
-					  &phys_mask);
-	if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
+	if (prev == NULL) {
+		if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+			return atomic_pool_dma32;
+		if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+			return atomic_pool_dma;
+		return atomic_pool_kernel;
+	}
+	if (prev == atomic_pool_kernel)
+		return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma;
+	if (prev == atomic_pool_dma32)
 		return atomic_pool_dma;
-	if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
-		return atomic_pool_dma32;
-	return atomic_pool_kernel;
+	return NULL;
 }
 
-static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool)
+static struct page *__dma_alloc_from_pool(struct device *dev, size_t size,
+		struct gen_pool *pool, void **cpu_addr,
+		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
 {
-	if (bad_pool == atomic_pool_kernel)
-		return atomic_pool_dma32 ? : atomic_pool_dma;
+	unsigned long addr;
+	phys_addr_t phys;
 
-	if (bad_pool == atomic_pool_dma32)
-		return atomic_pool_dma;
+	addr = gen_pool_alloc(pool, size);
+	if (!addr)
+		return NULL;
 
-	return NULL;
-}
+	phys = gen_pool_virt_to_phys(pool, addr);
+	if (!phys_addr_ok(dev, phys, size)) {
+		gen_pool_free(pool, addr, size);
+		return NULL;
+	}
 
-static inline struct gen_pool *dma_guess_pool(struct device *dev,
-					      struct gen_pool *bad_pool)
-{
-	if (bad_pool)
-		return dma_get_safer_pool(bad_pool);
+	if (gen_pool_avail(pool) < atomic_pool_size)
+		schedule_work(&atomic_pool_work);
 
-	return dma_guess_pool_from_device(dev);
+	*cpu_addr = (void *)addr;
+	memset(*cpu_addr, 0, size);
+	return pfn_to_page(__phys_to_pfn(phys));
 }
 
-void *dma_alloc_from_pool(struct device *dev, size_t size,
-			  struct page **ret_page, gfp_t flags)
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+		void **cpu_addr, gfp_t gfp,
+		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
 {
 	struct gen_pool *pool = NULL;
-	unsigned long val = 0;
-	void *ptr = NULL;
-	phys_addr_t phys;
-
-	while (1) {
-		pool = dma_guess_pool(dev, pool);
-		if (!pool) {
-			WARN(1, "Failed to get suitable pool for %s\n",
-			     dev_name(dev));
-			break;
-		}
-
-		val = gen_pool_alloc(pool, size);
-		if (!val)
-			continue;
-
-		phys = gen_pool_virt_to_phys(pool, val);
-		if (dma_coherent_ok(dev, phys, size))
-			break;
-
-		gen_pool_free(pool, val, size);
-		val = 0;
-	}
-
-
-	if (val) {
-		*ret_page = pfn_to_page(__phys_to_pfn(phys));
-		ptr = (void *)val;
-		memset(ptr, 0, size);
+	struct page *page;
 
-		if (gen_pool_avail(pool) < atomic_pool_size)
-			schedule_work(&atomic_pool_work);
+	while ((pool = dma_guess_pool(pool, gfp))) {
+		page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
+					     phys_addr_ok);
+		if (page)
+			return page;
 	}
 
-	return ptr;
+	WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
+	return NULL;
 }
 
 bool dma_free_from_pool(struct device *dev, void *start, size_t size)
 {
 	struct gen_pool *pool = NULL;
 
-	while (1) {
-		pool = dma_guess_pool(dev, pool);
-		if (!pool)
-			return false;
-
-		if (gen_pool_has_addr(pool, (unsigned long)start, size)) {
-			gen_pool_free(pool, (unsigned long)start, size);
-			return true;
-		}
+	while ((pool = dma_guess_pool(pool, 0))) {
+		if (!gen_pool_has_addr(pool, (unsigned long)start, size))
+			continue;
+		gen_pool_free(pool, (unsigned long)start, size);
+		return true;
 	}
+
+	return false;
 }
-- 
2.27.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 2/2] dma-pool: Only allocate from CMA when in same memory zone
  2020-07-28 10:47 dma-pool fixes Christoph Hellwig
  2020-07-28 10:47 ` [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings Christoph Hellwig
@ 2020-07-28 10:47 ` Christoph Hellwig
  2020-07-28 12:02 ` dma-pool fixes Amit Pundir
  2 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 10:47 UTC (permalink / raw)
  To: Amit Pundir, Nicolas Saenz Julienne
  Cc: iommu, Robin Murphy, linux-rpi-kernel, jeremy.linton, David Rientjes

From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>

There is no guarantee to CMA's placement, so allocating a zone specific
atomic pool from CMA might return memory from a completely different
memory zone. To get around this double check CMA's placement before
allocating from it.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
[hch: rebased, added a fallback to the page allocator, allow dipping into
      lower CMA pools]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/pool.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 1ddcd48f271fff..83fda10394937b 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -3,7 +3,9 @@
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2020 Google LLC
  */
+#include <linux/cma.h>
 #include <linux/debugfs.h>
+#include <linux/dma-contiguous.h>
 #include <linux/dma-direct.h>
 #include <linux/dma-noncoherent.h>
 #include <linux/init.h>
@@ -55,6 +57,29 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
 		pool_size_kernel += size;
 }
 
+static bool cma_in_zone(gfp_t gfp)
+{
+	unsigned long size;
+	phys_addr_t end;
+	struct cma *cma;
+
+	cma = dev_get_cma_area(NULL);
+	if (!cma)
+		return false;
+
+	size = cma_get_size(cma);
+	if (!size)
+		return false;
+
+	/* CMA can't cross zone boundaries, see cma_activate_area() */
+	end = cma_get_base(cma) - memblock_start_of_DRAM() + size - 1;
+	if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+		return end <= DMA_BIT_MASK(zone_dma_bits);
+	if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+		return end <= DMA_BIT_MASK(32);
+	return true;
+}
+
 static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
 			      gfp_t gfp)
 {
@@ -68,7 +93,11 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
 
 	do {
 		pool_size = 1 << (PAGE_SHIFT + order);
-		page = alloc_pages(gfp, order);
+		if (cma_in_zone(gfp))
+ 			page = dma_alloc_from_contiguous(NULL, 1 << order,
+ 							 order, false);
+		if (!page)
+			page = alloc_pages(gfp, order);
 	} while (!page && order-- > 0);
 	if (!page)
 		goto out;
-- 
2.27.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 10:47 dma-pool fixes Christoph Hellwig
  2020-07-28 10:47 ` [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings Christoph Hellwig
  2020-07-28 10:47 ` [PATCH 2/2] dma-pool: Only allocate from CMA when in same memory zone Christoph Hellwig
@ 2020-07-28 12:02 ` Amit Pundir
  2020-07-28 12:07   ` Christoph Hellwig
  2 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-28 12:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes, Robin Murphy

Hi Christoph,

On Tue, 28 Jul 2020 at 16:17, Christoph Hellwig <hch@lst.de> wrote:
>
> Hi Amit,
>
> can you try these two patches?  The first one makes sure we don't apply
> physical address based checks for IOMMU allocations, and the second one
> is a slightly tweaked version of the patch from Nicolas to allow dipping
> into the CMA areas for allocations to expand the atomic pools.

Sorry, verified a couple of times but these two patches are not working
for me. I'm stuck at the bootloader splash screen on my phone.

Regards,
Amit Pundir
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 12:02 ` dma-pool fixes Amit Pundir
@ 2020-07-28 12:07   ` Christoph Hellwig
  2020-07-28 12:25     ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 12:07 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Tue, Jul 28, 2020 at 05:32:56PM +0530, Amit Pundir wrote:
> > can you try these two patches?  The first one makes sure we don't apply
> > physical address based checks for IOMMU allocations, and the second one
> > is a slightly tweaked version of the patch from Nicolas to allow dipping
> > into the CMA areas for allocations to expand the atomic pools.
> 
> Sorry, verified a couple of times but these two patches are not working
> for me. I'm stuck at the bootloader splash screen on my phone.

Thanks for testing.  The only intended functional change compared to
Fridays patch was the issue Nicolas pointed out.  Can you try this hack
on top?


diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 83fda10394937b..88e40a022b6bfd 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -70,13 +70,14 @@ static bool cma_in_zone(gfp_t gfp)
 	size = cma_get_size(cma);
 	if (!size)
 		return false;
-
+#if 0
 	/* CMA can't cross zone boundaries, see cma_activate_area() */
 	end = cma_get_base(cma) - memblock_start_of_DRAM() + size - 1;
 	if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
 		return end <= DMA_BIT_MASK(zone_dma_bits);
 	if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
 		return end <= DMA_BIT_MASK(32);
+#endif
 	return true;
 }
 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 12:07   ` Christoph Hellwig
@ 2020-07-28 12:25     ` Amit Pundir
  2020-07-28 12:41       ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-28 12:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes, Robin Murphy

On Tue, 28 Jul 2020 at 17:37, Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Jul 28, 2020 at 05:32:56PM +0530, Amit Pundir wrote:
> > > can you try these two patches?  The first one makes sure we don't apply
> > > physical address based checks for IOMMU allocations, and the second one
> > > is a slightly tweaked version of the patch from Nicolas to allow dipping
> > > into the CMA areas for allocations to expand the atomic pools.
> >
> > Sorry, verified a couple of times but these two patches are not working
> > for me. I'm stuck at the bootloader splash screen on my phone.
>
> Thanks for testing.  The only intended functional change compared to
> Fridays patch was the issue Nicolas pointed out.  Can you try this hack
> on top?

Yes, that worked.

>
>
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 83fda10394937b..88e40a022b6bfd 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -70,13 +70,14 @@ static bool cma_in_zone(gfp_t gfp)
>         size = cma_get_size(cma);
>         if (!size)
>                 return false;
> -
> +#if 0
>         /* CMA can't cross zone boundaries, see cma_activate_area() */
>         end = cma_get_base(cma) - memblock_start_of_DRAM() + size - 1;
>         if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
>                 return end <= DMA_BIT_MASK(zone_dma_bits);
>         if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
>                 return end <= DMA_BIT_MASK(32);
> +#endif
>         return true;
>  }
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 12:25     ` Amit Pundir
@ 2020-07-28 12:41       ` Christoph Hellwig
  2020-07-28 12:48         ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 12:41 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Tue, Jul 28, 2020 at 05:55:30PM +0530, Amit Pundir wrote:
> On Tue, 28 Jul 2020 at 17:37, Christoph Hellwig <hch@lst.de> wrote:
> >
> > On Tue, Jul 28, 2020 at 05:32:56PM +0530, Amit Pundir wrote:
> > > > can you try these two patches?  The first one makes sure we don't apply
> > > > physical address based checks for IOMMU allocations, and the second one
> > > > is a slightly tweaked version of the patch from Nicolas to allow dipping
> > > > into the CMA areas for allocations to expand the atomic pools.
> > >
> > > Sorry, verified a couple of times but these two patches are not working
> > > for me. I'm stuck at the bootloader splash screen on my phone.
> >
> > Thanks for testing.  The only intended functional change compared to
> > Fridays patch was the issue Nicolas pointed out.  Can you try this hack
> > on top?
> 
> Yes, that worked.

Oh well, this leaves me confused again.  It looks like your setup
really needs a CMA in zone normal for the dma or dma32 pool.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 12:41       ` Christoph Hellwig
@ 2020-07-28 12:48         ` Amit Pundir
  2020-07-28 15:30           ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-28 12:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes, Robin Murphy

On Tue, 28 Jul 2020 at 18:11, Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Jul 28, 2020 at 05:55:30PM +0530, Amit Pundir wrote:
> > On Tue, 28 Jul 2020 at 17:37, Christoph Hellwig <hch@lst.de> wrote:
> > >
> > > On Tue, Jul 28, 2020 at 05:32:56PM +0530, Amit Pundir wrote:
> > > > > can you try these two patches?  The first one makes sure we don't apply
> > > > > physical address based checks for IOMMU allocations, and the second one
> > > > > is a slightly tweaked version of the patch from Nicolas to allow dipping
> > > > > into the CMA areas for allocations to expand the atomic pools.
> > > >
> > > > Sorry, verified a couple of times but these two patches are not working
> > > > for me. I'm stuck at the bootloader splash screen on my phone.
> > >
> > > Thanks for testing.  The only intended functional change compared to
> > > Fridays patch was the issue Nicolas pointed out.  Can you try this hack
> > > on top?
> >
> > Yes, that worked.
>
> Oh well, this leaves me confused again.  It looks like your setup
> really needs a CMA in zone normal for the dma or dma32 pool.

Anything I should look up in the downstream kernel/dts?

Regards,
Amit Pundir
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings
  2020-07-28 10:47 ` [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings Christoph Hellwig
@ 2020-07-28 14:56   ` Nicolas Saenz Julienne
  2020-07-28 15:28     ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Nicolas Saenz Julienne @ 2020-07-28 14:56 UTC (permalink / raw)
  To: Christoph Hellwig, Amit Pundir
  Cc: iommu, Robin Murphy, linux-rpi-kernel, jeremy.linton, David Rientjes


[-- Attachment #1.1: Type: text/plain, Size: 1475 bytes --]

On Tue, 2020-07-28 at 12:47 +0200, Christoph Hellwig wrote:
> When allocating coherent pool memory for an IOMMU mapping we don't care
> about the DMA mask.  Move the guess for the initial GFP mask into the
> dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer
> argument so that it doesn't get applied to the IOMMU case.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/iommu/dma-iommu.c   |   4 +-
>  include/linux/dma-direct.h  |   3 -
>  include/linux/dma-mapping.h |   5 +-
>  kernel/dma/direct.c         |  13 ++--
>  kernel/dma/pool.c           | 114 +++++++++++++++---------------------
>  5 files changed, 62 insertions(+), 77 deletions(-)
> 

[...]

> -static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool)
> +static struct page *__dma_alloc_from_pool(struct device *dev, size_t size,
> +		struct gen_pool *pool, void **cpu_addr,
> +		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
>  {
> -	if (bad_pool == atomic_pool_kernel)
> -		return atomic_pool_dma32 ? : atomic_pool_dma;
> +	unsigned long addr;
> +	phys_addr_t phys;
>  
> -	if (bad_pool == atomic_pool_dma32)
> -		return atomic_pool_dma;
> +	addr = gen_pool_alloc(pool, size);
> +	if (!addr)
> +		return NULL;
>  
> -	return NULL;
> -}
> +	phys = gen_pool_virt_to_phys(pool, addr);
> +	if (!phys_addr_ok(dev, phys, size)) {

Shoudn't we check if phys_addr_ok() != NULL?

Regards,
Nicolas



[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

[-- Attachment #2: Type: text/plain, Size: 156 bytes --]

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings
  2020-07-28 14:56   ` Nicolas Saenz Julienne
@ 2020-07-28 15:28     ` Christoph Hellwig
  0 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 15:28 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: Amit Pundir, jeremy.linton, iommu, linux-rpi-kernel,
	David Rientjes, Robin Murphy, Christoph Hellwig

On Tue, Jul 28, 2020 at 04:56:29PM +0200, Nicolas Saenz Julienne wrote:
> > +	phys = gen_pool_virt_to_phys(pool, addr);
> > +	if (!phys_addr_ok(dev, phys, size)) {
> 
> Shoudn't we check if phys_addr_ok() != NULL?

Yes, we should.  This also means the system that causes problems for
Amit isn't using the iommu.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 12:48         ` Amit Pundir
@ 2020-07-28 15:30           ` Christoph Hellwig
  2020-07-29 10:45             ` Nicolas Saenz Julienne
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-28 15:30 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > Oh well, this leaves me confused again.  It looks like your setup
> > really needs a CMA in zone normal for the dma or dma32 pool.
> 
> Anything I should look up in the downstream kernel/dts?

I don't have a good idea right now.  Nicolas, can you think of something
else?
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-28 15:30           ` Christoph Hellwig
@ 2020-07-29 10:45             ` Nicolas Saenz Julienne
  2020-07-29 12:22               ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Nicolas Saenz Julienne @ 2020-07-29 10:45 UTC (permalink / raw)
  To: Christoph Hellwig, Amit Pundir
  Cc: iommu, Robin Murphy, linux-rpi-kernel, jeremy.linton, David Rientjes

On Tue, 2020-07-28 at 17:30 +0200, Christoph Hellwig wrote:
> On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > > Oh well, this leaves me confused again.  It looks like your setup
> > > really needs a CMA in zone normal for the dma or dma32 pool.
> > 
> > Anything I should look up in the downstream kernel/dts?
> 
> I don't have a good idea right now.  Nicolas, can you think of something
> else?

To summarise, the device is:
 - Using the dma-direct code path.
 - Requesting ZONE_DMA memory to then fail when provided memory falls in
   ZONE_DMA. Actually, the only acceptable memory comes from CMA, which is
   located topmost of the 4GB boundary.

My wild guess is that we may be abusing an iommu identity mapping setup by
firmware.

That said, what would be helpful to me is to find out the troublesome device.
Amit, could you try adding this patch along with Christoph's modified series
(so the board boots). Ultimately DMA atomic allocations are not that common, so
we should get only a few hits:

diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 83fda1039493..de93fce6d5d2 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -276,8 +276,11 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size,
        while ((pool = dma_guess_pool(pool, gfp))) {
                page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
                                             phys_addr_ok);
-               if (page)
+               if (page) {
+                       dev_err(dev, "%s: phys addr 0x%llx, size %lu, dev->coherent_dma_mask 0x%llx, dev->bus_dma_limit 0x%llx\n",
+                               __func__, (phys_addr_t)*cpu_addr, size, dev->coherent_dma_mask, dev->bus_dma_limit);
                        return page;
+               }
        }
 
        WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));


_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-29 10:45             ` Nicolas Saenz Julienne
@ 2020-07-29 12:22               ` Amit Pundir
  2020-07-31  7:46                 ` Amit Pundir
  2020-07-31 10:47                 ` Nicolas Saenz Julienne
  0 siblings, 2 replies; 35+ messages in thread
From: Amit Pundir @ 2020-07-29 12:22 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Wed, 29 Jul 2020 at 16:15, Nicolas Saenz Julienne
<nsaenzjulienne@suse.de> wrote:
>
> On Tue, 2020-07-28 at 17:30 +0200, Christoph Hellwig wrote:
> > On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > > > Oh well, this leaves me confused again.  It looks like your setup
> > > > really needs a CMA in zone normal for the dma or dma32 pool.
> > >
> > > Anything I should look up in the downstream kernel/dts?
> >
> > I don't have a good idea right now.  Nicolas, can you think of something
> > else?
>
> To summarise, the device is:
>  - Using the dma-direct code path.
>  - Requesting ZONE_DMA memory to then fail when provided memory falls in
>    ZONE_DMA. Actually, the only acceptable memory comes from CMA, which is
>    located topmost of the 4GB boundary.
>
> My wild guess is that we may be abusing an iommu identity mapping setup by
> firmware.
>
> That said, what would be helpful to me is to find out the troublesome device.
> Amit, could you try adding this patch along with Christoph's modified series
> (so the board boots). Ultimately DMA atomic allocations are not that common, so
> we should get only a few hits:

Hi, still not hitting dma_alloc_from_pool().

I hit the following direct alloc path only once, at starting:

dma_alloc_coherent ()
-> dma_alloc_attrs()
   -> dma_is_direct() -> dma_direct_alloc()
      -> dma_direct_alloc_pages()
         -> dma_should_alloc_from_pool() #returns FALSE from here

After that I'm hitting following iommu dma alloc path all the time:

dma_alloc_coherent()
-> dma_alloc_attrs()
   -> (ops->alloc) -> iommu_dma_alloc()
      -> iommu_dma_alloc_remap() #always returns from here

So dma_alloc_from_pool() is not getting called at all in either of the
above cases.

>
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 83fda1039493..de93fce6d5d2 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -276,8 +276,11 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size,
>         while ((pool = dma_guess_pool(pool, gfp))) {
>                 page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
>                                              phys_addr_ok);
> -               if (page)
> +               if (page) {
> +                       dev_err(dev, "%s: phys addr 0x%llx, size %lu, dev->coherent_dma_mask 0x%llx, dev->bus_dma_limit 0x%llx\n",
> +                               __func__, (phys_addr_t)*cpu_addr, size, dev->coherent_dma_mask, dev->bus_dma_limit);
>                         return page;
> +               }
>         }
>
>         WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
>
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-29 12:22               ` Amit Pundir
@ 2020-07-31  7:46                 ` Amit Pundir
  2020-07-31 13:09                   ` Christoph Hellwig
  2020-07-31 10:47                 ` Nicolas Saenz Julienne
  1 sibling, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-31  7:46 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Wed, 29 Jul 2020 at 17:52, Amit Pundir <amit.pundir@linaro.org> wrote:
>
> On Wed, 29 Jul 2020 at 16:15, Nicolas Saenz Julienne
> <nsaenzjulienne@suse.de> wrote:
> >
> > On Tue, 2020-07-28 at 17:30 +0200, Christoph Hellwig wrote:
> > > On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > > > > Oh well, this leaves me confused again.  It looks like your setup
> > > > > really needs a CMA in zone normal for the dma or dma32 pool.
> > > >
> > > > Anything I should look up in the downstream kernel/dts?
> > >
> > > I don't have a good idea right now.  Nicolas, can you think of something
> > > else?
> >
> > To summarise, the device is:
> >  - Using the dma-direct code path.
> >  - Requesting ZONE_DMA memory to then fail when provided memory falls in
> >    ZONE_DMA. Actually, the only acceptable memory comes from CMA, which is
> >    located topmost of the 4GB boundary.
> >
> > My wild guess is that we may be abusing an iommu identity mapping setup by
> > firmware.
> >

Hi Nicolas, Christoph,

Just out of curiosity, I'm wondering if we can restore the earlier
behaviour and make DMA atomic allocation configured thru platform
specific device tree instead?

Or if you can allow a more hackish approach to restore the earlier
logic, using of_machine_is_compatible() just for my device for the
time being. Meanwhile I'm checking with other developers running the
mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
issue too.

Regards,
Amit Pundir

> > That said, what would be helpful to me is to find out the troublesome device.
> > Amit, could you try adding this patch along with Christoph's modified series
> > (so the board boots). Ultimately DMA atomic allocations are not that common, so
> > we should get only a few hits:
>
> Hi, still not hitting dma_alloc_from_pool().
>
> I hit the following direct alloc path only once, at starting:
>
> dma_alloc_coherent ()
> -> dma_alloc_attrs()
>    -> dma_is_direct() -> dma_direct_alloc()
>       -> dma_direct_alloc_pages()
>          -> dma_should_alloc_from_pool() #returns FALSE from here
>
> After that I'm hitting following iommu dma alloc path all the time:
>
> dma_alloc_coherent()
> -> dma_alloc_attrs()
>    -> (ops->alloc) -> iommu_dma_alloc()
>       -> iommu_dma_alloc_remap() #always returns from here
>
> So dma_alloc_from_pool() is not getting called at all in either of the
> above cases.
>
> >
> > diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> > index 83fda1039493..de93fce6d5d2 100644
> > --- a/kernel/dma/pool.c
> > +++ b/kernel/dma/pool.c
> > @@ -276,8 +276,11 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size,
> >         while ((pool = dma_guess_pool(pool, gfp))) {
> >                 page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
> >                                              phys_addr_ok);
> > -               if (page)
> > +               if (page) {
> > +                       dev_err(dev, "%s: phys addr 0x%llx, size %lu, dev->coherent_dma_mask 0x%llx, dev->bus_dma_limit 0x%llx\n",
> > +                               __func__, (phys_addr_t)*cpu_addr, size, dev->coherent_dma_mask, dev->bus_dma_limit);
> >                         return page;
> > +               }
> >         }
> >
> >         WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
> >
> >
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-29 12:22               ` Amit Pundir
  2020-07-31  7:46                 ` Amit Pundir
@ 2020-07-31 10:47                 ` Nicolas Saenz Julienne
  2020-07-31 11:17                   ` Amit Pundir
  1 sibling, 1 reply; 35+ messages in thread
From: Nicolas Saenz Julienne @ 2020-07-31 10:47 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

Hi Amit,

On Wed, 2020-07-29 at 17:52 +0530, Amit Pundir wrote:
> On Wed, 29 Jul 2020 at 16:15, Nicolas Saenz Julienne
> <nsaenzjulienne@suse.de> wrote:
> > On Tue, 2020-07-28 at 17:30 +0200, Christoph Hellwig wrote:
> > > On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > > > > Oh well, this leaves me confused again.  It looks like your setup
> > > > > really needs a CMA in zone normal for the dma or dma32 pool.
> > > > 
> > > > Anything I should look up in the downstream kernel/dts?
> > > 
> > > I don't have a good idea right now.  Nicolas, can you think of something
> > > else?
> > 
> > To summarise, the device is:
> >  - Using the dma-direct code path.
> >  - Requesting ZONE_DMA memory to then fail when provided memory falls in
> >    ZONE_DMA. Actually, the only acceptable memory comes from CMA, which is
> >    located topmost of the 4GB boundary.
> > 
> > My wild guess is that we may be abusing an iommu identity mapping setup by
> > firmware.
> > 
> > That said, what would be helpful to me is to find out the troublesome device.
> > Amit, could you try adding this patch along with Christoph's modified series
> > (so the board boots). Ultimately DMA atomic allocations are not that common, so
> > we should get only a few hits:
> 
> Hi, still not hitting dma_alloc_from_pool().

Sorry I insisted, but not hitting the atomic path makes the issue even harder
to understand.

> I hit the following direct alloc path only once, at starting:
> 
> dma_alloc_coherent ()
> -> dma_alloc_attrs()
>    -> dma_is_direct() -> dma_direct_alloc()
>       -> dma_direct_alloc_pages()
>          -> dma_should_alloc_from_pool() #returns FALSE from here
> 
> After that I'm hitting following iommu dma alloc path all the time:
> 
> dma_alloc_coherent()
> -> dma_alloc_attrs()
>    -> (ops->alloc) -> iommu_dma_alloc()
>       -> iommu_dma_alloc_remap() #always returns from here
> 
> So dma_alloc_from_pool() is not getting called at all in either of the
> above cases.

Ok, so lets see who's doing what and with what constraints:

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4959f5df21bd..d28b3e4b91d3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -594,6 +594,9 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
        dma_addr_t iova;
        void *vaddr;
 
+       dev_info(dev, "%s, bus_dma_limit %llx, dma_mask %llx, coherent_dma_mask %llx, in irq %lu, size %lu, gfp %x, attrs %lx\n",
+                __func__, dev->bus_dma_limit, *dev->dma_mask, dev->coherent_dma_mask, in_interrupt(), size, gfp, attrs);
+
        *dma_handle = DMA_MAPPING_ERROR;
 
        if (unlikely(iommu_dma_deferred_attach(dev, domain)))
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index bb0041e99659..e5474e709e7b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -160,6 +160,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
 
        size = PAGE_ALIGN(size);
 
+       dev_info(dev, "%s, bus_dma_limit %llx, dma_mask %llx, coherent_dma_mask %llx, in irq %lu, size %lu, gfp %x, attrs %lx\n",
+                __func__, dev->bus_dma_limit, *dev->dma_mask, dev->coherent_dma_mask, in_interrupt(), size, gfp, attrs);
+
        if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
                ret = dma_alloc_from_pool(dev, size, &page, gfp);
                if (!ret)

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 10:47                 ` Nicolas Saenz Julienne
@ 2020-07-31 11:17                   ` Amit Pundir
  2020-07-31 14:15                     ` Nicolas Saenz Julienne
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-31 11:17 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Fri, 31 Jul 2020 at 16:17, Nicolas Saenz Julienne
<nsaenzjulienne@suse.de> wrote:
>
> Hi Amit,
>
> On Wed, 2020-07-29 at 17:52 +0530, Amit Pundir wrote:
> > On Wed, 29 Jul 2020 at 16:15, Nicolas Saenz Julienne
> > <nsaenzjulienne@suse.de> wrote:
> > > On Tue, 2020-07-28 at 17:30 +0200, Christoph Hellwig wrote:
> > > > On Tue, Jul 28, 2020 at 06:18:41PM +0530, Amit Pundir wrote:
> > > > > > Oh well, this leaves me confused again.  It looks like your setup
> > > > > > really needs a CMA in zone normal for the dma or dma32 pool.
> > > > >
> > > > > Anything I should look up in the downstream kernel/dts?
> > > >
> > > > I don't have a good idea right now.  Nicolas, can you think of something
> > > > else?
> > >
> > > To summarise, the device is:
> > >  - Using the dma-direct code path.
> > >  - Requesting ZONE_DMA memory to then fail when provided memory falls in
> > >    ZONE_DMA. Actually, the only acceptable memory comes from CMA, which is
> > >    located topmost of the 4GB boundary.
> > >
> > > My wild guess is that we may be abusing an iommu identity mapping setup by
> > > firmware.
> > >
> > > That said, what would be helpful to me is to find out the troublesome device.
> > > Amit, could you try adding this patch along with Christoph's modified series
> > > (so the board boots). Ultimately DMA atomic allocations are not that common, so
> > > we should get only a few hits:
> >
> > Hi, still not hitting dma_alloc_from_pool().
>
> Sorry I insisted, but not hitting the atomic path makes the issue even harder
> to understand.

No worries. I was more concerned about not following the instructions
correctly. Thank you for looking into this issue.

>
> > I hit the following direct alloc path only once, at starting:
> >
> > dma_alloc_coherent ()
> > -> dma_alloc_attrs()
> >    -> dma_is_direct() -> dma_direct_alloc()
> >       -> dma_direct_alloc_pages()
> >          -> dma_should_alloc_from_pool() #returns FALSE from here
> >
> > After that I'm hitting following iommu dma alloc path all the time:
> >
> > dma_alloc_coherent()
> > -> dma_alloc_attrs()
> >    -> (ops->alloc) -> iommu_dma_alloc()
> >       -> iommu_dma_alloc_remap() #always returns from here
> >
> > So dma_alloc_from_pool() is not getting called at all in either of the
> > above cases.
>
> Ok, so lets see who's doing what and with what constraints:

Here is the relevant dmesg log: https://pastebin.ubuntu.com/p/dh3pPnxS2v/

Regards,
Amit Pundir

>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 4959f5df21bd..d28b3e4b91d3 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -594,6 +594,9 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
>         dma_addr_t iova;
>         void *vaddr;
>
> +       dev_info(dev, "%s, bus_dma_limit %llx, dma_mask %llx, coherent_dma_mask %llx, in irq %lu, size %lu, gfp %x, attrs %lx\n",
> +                __func__, dev->bus_dma_limit, *dev->dma_mask, dev->coherent_dma_mask, in_interrupt(), size, gfp, attrs);
> +
>         *dma_handle = DMA_MAPPING_ERROR;
>
>         if (unlikely(iommu_dma_deferred_attach(dev, domain)))
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index bb0041e99659..e5474e709e7b 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -160,6 +160,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
>
>         size = PAGE_ALIGN(size);
>
> +       dev_info(dev, "%s, bus_dma_limit %llx, dma_mask %llx, coherent_dma_mask %llx, in irq %lu, size %lu, gfp %x, attrs %lx\n",
> +                __func__, dev->bus_dma_limit, *dev->dma_mask, dev->coherent_dma_mask, in_interrupt(), size, gfp, attrs);
> +
>         if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
>                 ret = dma_alloc_from_pool(dev, size, &page, gfp);
>                 if (!ret)
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31  7:46                 ` Amit Pundir
@ 2020-07-31 13:09                   ` Christoph Hellwig
  2020-07-31 14:15                     ` Amit Pundir
  2020-07-31 19:04                     ` David Rientjes via iommu
  0 siblings, 2 replies; 35+ messages in thread
From: Christoph Hellwig @ 2020-07-31 13:09 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Fri, Jul 31, 2020 at 01:16:34PM +0530, Amit Pundir wrote:
> Hi Nicolas, Christoph,
> 
> Just out of curiosity, I'm wondering if we can restore the earlier
> behaviour and make DMA atomic allocation configured thru platform
> specific device tree instead?
> 
> Or if you can allow a more hackish approach to restore the earlier
> logic, using of_machine_is_compatible() just for my device for the
> time being. Meanwhile I'm checking with other developers running the
> mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
> issue too.

If we don't find a fix for your platform I'm going to send Linus a
last minute revert this weekend, to stick to the no regressions policy.
I still hope we can fix the issue for real.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 11:17                   ` Amit Pundir
@ 2020-07-31 14:15                     ` Nicolas Saenz Julienne
  2020-07-31 14:20                       ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Nicolas Saenz Julienne @ 2020-07-31 14:15 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Fri, 2020-07-31 at 16:47 +0530, Amit Pundir wrote:
> On Fri, 31 Jul 2020 at 16:17, Nicolas Saenz Julienne

[...]

> > Ok, so lets see who's doing what and with what constraints:
> 
> Here is the relevant dmesg log: https://pastebin.ubuntu.com/p/dh3pPnxS2v/

Sadly nothing out of the ordinary, looks reasonable.

I have an idea, I've been going over the downstream device tree and it seems
the reserved-memory entries, specially the ones marked with 'no-map' don't
fully match what we have upstream. On top of that all these reserved areas seem
to fall into ZONE_DMA.

So, what could be happening is that, while allocating pages for the ZONE_DMA
atomic pool, something in the page allocator is either writing/mapping into a
reserved area triggering some kind of fault.

Amir, could you go over the no-map reserved-memory entries in the downstream
device-tree, both in 'beryllium-*.dtsi' (I think those are the relevant ones)
and 'sdm845.dtsi'[1], and make sure they match what you are using. If not just
edit them in and see if it helps. If you need any help with that I'll be happy
to give you a hand.

Regards,
Nicolas

[1] You could also extract the device tree from a device running with the
    downstream kernel, whatever is easier for you.

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 13:09                   ` Christoph Hellwig
@ 2020-07-31 14:15                     ` Amit Pundir
  2020-07-31 19:04                     ` David Rientjes via iommu
  1 sibling, 0 replies; 35+ messages in thread
From: Amit Pundir @ 2020-07-31 14:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: iommu, jeremy.linton, Caleb Connolly, linux-rpi-kernel,
	David Rientjes, Robin Murphy

On Fri, 31 Jul 2020 at 18:39, Christoph Hellwig <hch@lst.de> wrote:
>
> On Fri, Jul 31, 2020 at 01:16:34PM +0530, Amit Pundir wrote:
> > Hi Nicolas, Christoph,
> >
> > Just out of curiosity, I'm wondering if we can restore the earlier
> > behaviour and make DMA atomic allocation configured thru platform
> > specific device tree instead?
> >
> > Or if you can allow a more hackish approach to restore the earlier
> > logic, using of_machine_is_compatible() just for my device for the
> > time being. Meanwhile I'm checking with other developers running the
> > mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
> > issue too.
>
> If we don't find a fix for your platform I'm going to send Linus a
> last minute revert this weekend, to stick to the no regressions policy.
> I still hope we can fix the issue for real.

Thank you. I really appreciate that.

Fwiw I got a confirmation from Caleb (CCed) that he sees the same
boot regression on One Plus 6/6T family phones as well.

Regards,
Amit Pundir
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 14:15                     ` Nicolas Saenz Julienne
@ 2020-07-31 14:20                       ` Amit Pundir
  2020-08-01  7:34                         ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-07-31 14:20 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Fri, 31 Jul 2020 at 19:45, Nicolas Saenz Julienne
<nsaenzjulienne@suse.de> wrote:
>
> On Fri, 2020-07-31 at 16:47 +0530, Amit Pundir wrote:
> > On Fri, 31 Jul 2020 at 16:17, Nicolas Saenz Julienne
>
> [...]
>
> > > Ok, so lets see who's doing what and with what constraints:
> >
> > Here is the relevant dmesg log: https://pastebin.ubuntu.com/p/dh3pPnxS2v/
>
> Sadly nothing out of the ordinary, looks reasonable.
>
> I have an idea, I've been going over the downstream device tree and it seems
> the reserved-memory entries, specially the ones marked with 'no-map' don't
> fully match what we have upstream. On top of that all these reserved areas seem
> to fall into ZONE_DMA.
>
> So, what could be happening is that, while allocating pages for the ZONE_DMA
> atomic pool, something in the page allocator is either writing/mapping into a
> reserved area triggering some kind of fault.
>
> Amir, could you go over the no-map reserved-memory entries in the downstream
> device-tree, both in 'beryllium-*.dtsi' (I think those are the relevant ones)
> and 'sdm845.dtsi'[1], and make sure they match what you are using. If not just
> edit them in and see if it helps. If you need any help with that I'll be happy
> to give you a hand.

Thank you for the pointers. I'll try to match my dts' reserved-memory
entries with the downstream dts. I'll let you know how it goes.

Regards,
Amit Pundir

>
> Regards,
> Nicolas
>
> [1] You could also extract the device tree from a device running with the
>     downstream kernel, whatever is easier for you.
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 13:09                   ` Christoph Hellwig
  2020-07-31 14:15                     ` Amit Pundir
@ 2020-07-31 19:04                     ` David Rientjes via iommu
  2020-08-01  8:20                       ` David Rientjes via iommu
  2020-08-01  8:29                       ` Christoph Hellwig
  1 sibling, 2 replies; 35+ messages in thread
From: David Rientjes via iommu @ 2020-07-31 19:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Amit Pundir, jeremy.linton, iommu, linux-rpi-kernel, Robin Murphy

On Fri, 31 Jul 2020, Christoph Hellwig wrote:

> > Hi Nicolas, Christoph,
> > 
> > Just out of curiosity, I'm wondering if we can restore the earlier
> > behaviour and make DMA atomic allocation configured thru platform
> > specific device tree instead?
> > 
> > Or if you can allow a more hackish approach to restore the earlier
> > logic, using of_machine_is_compatible() just for my device for the
> > time being. Meanwhile I'm checking with other developers running the
> > mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
> > issue too.
> 
> If we don't find a fix for your platform I'm going to send Linus a
> last minute revert this weekend, to stick to the no regressions policy.
> I still hope we can fix the issue for real.
> 

What would be the scope of this potential revert?
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 14:20                       ` Amit Pundir
@ 2020-08-01  7:34                         ` Amit Pundir
  0 siblings, 0 replies; 35+ messages in thread
From: Amit Pundir @ 2020-08-01  7:34 UTC (permalink / raw)
  To: Nicolas Saenz Julienne
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Fri, 31 Jul 2020 at 19:50, Amit Pundir <amit.pundir@linaro.org> wrote:
>
> On Fri, 31 Jul 2020 at 19:45, Nicolas Saenz Julienne
> <nsaenzjulienne@suse.de> wrote:
> >
> > On Fri, 2020-07-31 at 16:47 +0530, Amit Pundir wrote:
> > > On Fri, 31 Jul 2020 at 16:17, Nicolas Saenz Julienne
> >
> > [...]
> >
> > > > Ok, so lets see who's doing what and with what constraints:
> > >
> > > Here is the relevant dmesg log: https://pastebin.ubuntu.com/p/dh3pPnxS2v/
> >
> > Sadly nothing out of the ordinary, looks reasonable.
> >
> > I have an idea, I've been going over the downstream device tree and it seems
> > the reserved-memory entries, specially the ones marked with 'no-map' don't
> > fully match what we have upstream. On top of that all these reserved areas seem
> > to fall into ZONE_DMA.
> >
> > So, what could be happening is that, while allocating pages for the ZONE_DMA
> > atomic pool, something in the page allocator is either writing/mapping into a
> > reserved area triggering some kind of fault.
> >
> > Amir, could you go over the no-map reserved-memory entries in the downstream
> > device-tree, both in 'beryllium-*.dtsi' (I think those are the relevant ones)
> > and 'sdm845.dtsi'[1], and make sure they match what you are using. If not just
> > edit them in and see if it helps. If you need any help with that I'll be happy
> > to give you a hand.
>
> Thank you for the pointers. I'll try to match my dts' reserved-memory
> entries with the downstream dts. I'll let you know how it goes.
>

I matched my dts's reserved-memory nodes with downstream but it didn't help.

Most of the no-map reserved memory regions in the downstream kernel
are accompanied with "removed-dma-pool" compatibility, "to indicate a
region of memory which is meant to be carved out and not exposed to
kernel." [1][2]. Is this something which might be tripping my device
off? I tried to cherry-pick removed-dma-pool from msm kernel[3], to
see if that makes any difference but I might have missed a few
dependencies and my device didn't boot.

[1] https://android.googlesource.com/kernel/msm/+/e9171c1c69c31ec2226f0871fb5535b6f2044ef3%5E%21/#F0
[2] https://lore.kernel.org/patchwork/patch/670515/#857952
[3] https://github.com/OnePlusOSS/android_kernel_oneplus_sm8250/commit/a478a8bf78ade799a5626cee45c2b247071b325f

> Regards,
> Amit Pundir
>
> >
> > Regards,
> > Nicolas
> >
> > [1] You could also extract the device tree from a device running with the
> >     downstream kernel, whatever is easier for you.
> >
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 19:04                     ` David Rientjes via iommu
@ 2020-08-01  8:20                       ` David Rientjes via iommu
  2020-08-01  8:57                         ` revert scope for 5.8, was " Christoph Hellwig
  2020-08-01  8:29                       ` Christoph Hellwig
  1 sibling, 1 reply; 35+ messages in thread
From: David Rientjes via iommu @ 2020-08-01  8:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Amit Pundir, jeremy.linton, iommu, linux-rpi-kernel, Robin Murphy

On Fri, 31 Jul 2020, David Rientjes wrote:

> > > Hi Nicolas, Christoph,
> > > 
> > > Just out of curiosity, I'm wondering if we can restore the earlier
> > > behaviour and make DMA atomic allocation configured thru platform
> > > specific device tree instead?
> > > 
> > > Or if you can allow a more hackish approach to restore the earlier
> > > logic, using of_machine_is_compatible() just for my device for the
> > > time being. Meanwhile I'm checking with other developers running the
> > > mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
> > > issue too.
> > 
> > If we don't find a fix for your platform I'm going to send Linus a
> > last minute revert this weekend, to stick to the no regressions policy.
> > I still hope we can fix the issue for real.
> > 
> 
> What would be the scope of this potential revert?
> 

To follow-up on this, the introduction of the DMA atomic pools in 5.8 
fixes an issue for any AMD SEV enabled guest that has a driver that 
requires atomic DMA allocations (for us, nvme) because runtime decryption 
of memory allocated through the DMA API may block.  This manifests itself 
as "sleeping in invalid context" BUGs for any confidential VM user in 
cloud.

I unfortunately don't have Amit's device to be able to independently debug 
this issue and certainly could not have done a better job at working the 
bug than Nicolas and Christoph have done so far.  I'm as baffled by the 
results as anybody else.

I fully understand the no regressions policy.  I'd also ask that we 
consider that *all* SEV guests are currently broken if they use nvme or 
any other driver that does atomic DMA allocations.  It's an extremely 
serious issue for cloud.  If there is *anything* that I can do to make 
forward progress on this issue for 5.8, including some of the workarounds 
above that Amit requested, I'd be very happy to help.  Christoph will make 
the right decision for DMA in 5.8, but I simply wanted to state how 
critical working SEV guests are to users.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: dma-pool fixes
  2020-07-31 19:04                     ` David Rientjes via iommu
  2020-08-01  8:20                       ` David Rientjes via iommu
@ 2020-08-01  8:29                       ` Christoph Hellwig
  1 sibling, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2020-08-01  8:29 UTC (permalink / raw)
  To: David Rientjes
  Cc: Amit Pundir, jeremy.linton, iommu, linux-rpi-kernel,
	Robin Murphy, Christoph Hellwig

On Fri, Jul 31, 2020 at 12:04:28PM -0700, David Rientjes wrote:
> On Fri, 31 Jul 2020, Christoph Hellwig wrote:
> 
> > > Hi Nicolas, Christoph,
> > > 
> > > Just out of curiosity, I'm wondering if we can restore the earlier
> > > behaviour and make DMA atomic allocation configured thru platform
> > > specific device tree instead?
> > > 
> > > Or if you can allow a more hackish approach to restore the earlier
> > > logic, using of_machine_is_compatible() just for my device for the
> > > time being. Meanwhile I'm checking with other developers running the
> > > mainline kernel on sdm845 phones like OnePlus 6/6T, if they see this
> > > issue too.
> > 
> > If we don't find a fix for your platform I'm going to send Linus a
> > last minute revert this weekend, to stick to the no regressions policy.
> > I still hope we can fix the issue for real.
> > 
> 
> What would be the scope of this potential revert?

I've just looked into that and it seems like we need to revert everything
pool related back ot "dma-pool: add additional coherent pools to map to gfp
mask".
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01  8:20                       ` David Rientjes via iommu
@ 2020-08-01  8:57                         ` Christoph Hellwig
  2020-08-01 11:57                           ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-08-01  8:57 UTC (permalink / raw)
  To: David Rientjes, Linus Torvalds
  Cc: Amit Pundir, jeremy.linton, iommu, linux-rpi-kernel,
	Robin Murphy, Christoph Hellwig

On Sat, Aug 01, 2020 at 01:20:07AM -0700, David Rientjes wrote:
> To follow-up on this, the introduction of the DMA atomic pools in 5.8 
> fixes an issue for any AMD SEV enabled guest that has a driver that 
> requires atomic DMA allocations (for us, nvme) because runtime decryption 
> of memory allocated through the DMA API may block.  This manifests itself 
> as "sleeping in invalid context" BUGs for any confidential VM user in 
> cloud.
> 
> I unfortunately don't have Amit's device to be able to independently debug 
> this issue and certainly could not have done a better job at working the 
> bug than Nicolas and Christoph have done so far.  I'm as baffled by the 
> results as anybody else.
> 
> I fully understand the no regressions policy.  I'd also ask that we 
> consider that *all* SEV guests are currently broken if they use nvme or 
> any other driver that does atomic DMA allocations.  It's an extremely 
> serious issue for cloud.  If there is *anything* that I can do to make 
> forward progress on this issue for 5.8, including some of the workarounds 
> above that Amit requested, I'd be very happy to help.  Christoph will make 
> the right decision for DMA in 5.8, but I simply wanted to state how 
> critical working SEV guests are to users.

I'm between a rock and a hard place here.  If we simply want to revert
commits as-is to make sure both the Raspberry Pi 4 and thone phone do
not regress we'll have to go all the way back and revert the whole SEV
pool support.  I could try to manual revert of the multiple pool
support, but it is very late for that.

Or maybe Linus has decided to cut a -rc8 which would give us a little
more time.
-
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01  8:57                         ` revert scope for 5.8, was " Christoph Hellwig
@ 2020-08-01 11:57                           ` Amit Pundir
  2020-08-01 16:59                             ` Nicolas Saenz Julienne
                                               ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Amit Pundir @ 2020-08-01 11:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Linus Torvalds, jeremy.linton, iommu, linux-rpi-kernel,
	David Rientjes, Robin Murphy

On Sat, 1 Aug 2020 at 14:27, Christoph Hellwig <hch@lst.de> wrote:
>
> On Sat, Aug 01, 2020 at 01:20:07AM -0700, David Rientjes wrote:
> > To follow-up on this, the introduction of the DMA atomic pools in 5.8
> > fixes an issue for any AMD SEV enabled guest that has a driver that
> > requires atomic DMA allocations (for us, nvme) because runtime decryption
> > of memory allocated through the DMA API may block.  This manifests itself
> > as "sleeping in invalid context" BUGs for any confidential VM user in
> > cloud.
> >
> > I unfortunately don't have Amit's device to be able to independently debug
> > this issue and certainly could not have done a better job at working the
> > bug than Nicolas and Christoph have done so far.  I'm as baffled by the
> > results as anybody else.
> >
> > I fully understand the no regressions policy.  I'd also ask that we
> > consider that *all* SEV guests are currently broken if they use nvme or
> > any other driver that does atomic DMA allocations.  It's an extremely
> > serious issue for cloud.  If there is *anything* that I can do to make
> > forward progress on this issue for 5.8, including some of the workarounds
> > above that Amit requested, I'd be very happy to help.  Christoph will make
> > the right decision for DMA in 5.8, but I simply wanted to state how
> > critical working SEV guests are to users.
>
> I'm between a rock and a hard place here.  If we simply want to revert
> commits as-is to make sure both the Raspberry Pi 4 and thone phone do
> not regress we'll have to go all the way back and revert the whole SEV
> pool support.  I could try to manual revert of the multiple pool
> support, but it is very late for that.

Hi, I found the problematic memory region. It was a memory
chunk reserved/removed in the downstream tree but was
seemingly reserved upstream for different drivers. I failed to
calculate the length of the total region reserved downstream
correctly. And there was still a portion of memory left unmarked,
which I should have marked as reserved in my testing earlier
today.

Sorry for all the noise and thanks Nicolas, Christoph and David
for your patience.

Regards,
Amit Pundir


>
> Or maybe Linus has decided to cut a -rc8 which would give us a little
> more time.
> -
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01 11:57                           ` Amit Pundir
@ 2020-08-01 16:59                             ` Nicolas Saenz Julienne
  2020-08-01 17:39                             ` Christoph Hellwig
  2020-08-01 18:28                             ` Linus Torvalds
  2 siblings, 0 replies; 35+ messages in thread
From: Nicolas Saenz Julienne @ 2020-08-01 16:59 UTC (permalink / raw)
  To: Amit Pundir, Christoph Hellwig
  Cc: Linus Torvalds, jeremy.linton, iommu, linux-rpi-kernel,
	David Rientjes, Robin Murphy

On Sat, 2020-08-01 at 17:27 +0530, Amit Pundir wrote:

[...]

> > I'm between a rock and a hard place here.  If we simply want to revert
> > commits as-is to make sure both the Raspberry Pi 4 and thone phone do
> > not regress we'll have to go all the way back and revert the whole SEV
> > pool support.  I could try to manual revert of the multiple pool
> > support, but it is very late for that.
> 
> Hi, I found the problematic memory region. It was a memory
> chunk reserved/removed in the downstream tree but was
> seemingly reserved upstream for different drivers. I failed to
> calculate the length of the total region reserved downstream
> correctly. And there was still a portion of memory left unmarked,
> which I should have marked as reserved in my testing earlier
> today.
> 
> Sorry for all the noise and thanks Nicolas, Christoph and David
> for your patience.

That's great news, thanks for persevering!

Regards,
Nicolas

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01 11:57                           ` Amit Pundir
  2020-08-01 16:59                             ` Nicolas Saenz Julienne
@ 2020-08-01 17:39                             ` Christoph Hellwig
  2020-08-02  4:46                               ` Amit Pundir
  2020-08-01 18:28                             ` Linus Torvalds
  2 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-08-01 17:39 UTC (permalink / raw)
  To: Amit Pundir
  Cc: Linus Torvalds, jeremy.linton, iommu, linux-rpi-kernel,
	David Rientjes, Robin Murphy, Christoph Hellwig

On Sat, Aug 01, 2020 at 05:27:04PM +0530, Amit Pundir wrote:
> Hi, I found the problematic memory region. It was a memory
> chunk reserved/removed in the downstream tree but was
> seemingly reserved upstream for different drivers. I failed to
> calculate the length of the total region reserved downstream
> correctly. And there was still a portion of memory left unmarked,
> which I should have marked as reserved in my testing earlier
> today.
> 
> Sorry for all the noise and thanks Nicolas, Christoph and David
> for your patience.

So you'll need to patch the upstream DTS to fix this up?  Do you also
need my two fixes?  What about the Oneplus phones?  Can you send a
mail with a summary of the status?
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01 11:57                           ` Amit Pundir
  2020-08-01 16:59                             ` Nicolas Saenz Julienne
  2020-08-01 17:39                             ` Christoph Hellwig
@ 2020-08-01 18:28                             ` Linus Torvalds
  2020-08-02  4:35                               ` Amit Pundir
  2 siblings, 1 reply; 35+ messages in thread
From: Linus Torvalds @ 2020-08-01 18:28 UTC (permalink / raw)
  To: Amit Pundir
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Sat, Aug 1, 2020 at 4:57 AM Amit Pundir <amit.pundir@linaro.org> wrote:
>
> Hi, I found the problematic memory region. It was a memory
> chunk reserved/removed in the downstream tree but was
> seemingly reserved upstream for different drivers.

Is this happening with a clean tree, or are there external drivers
involved that trigger the problem?

Because if it's a clean tree, I guess I need to do an rc8 anyway, just
to get whatever workaround you then added to devicetree and/or some
driver to make it work again.

Or is there a quick fix that I can get today or early tomorrow? We had
some confusion this week due to a nasty include header mess, but
despite that there hasn't been anything else that has come up (so
far), so..

                       Linus
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01 18:28                             ` Linus Torvalds
@ 2020-08-02  4:35                               ` Amit Pundir
  0 siblings, 0 replies; 35+ messages in thread
From: Amit Pundir @ 2020-08-02  4:35 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jeremy.linton, iommu, linux-rpi-kernel, David Rientjes,
	Robin Murphy, Christoph Hellwig

On Sat, 1 Aug 2020 at 23:58, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> On Sat, Aug 1, 2020 at 4:57 AM Amit Pundir <amit.pundir@linaro.org> wrote:
> >
> > Hi, I found the problematic memory region. It was a memory
> > chunk reserved/removed in the downstream tree but was
> > seemingly reserved upstream for different drivers.
>
> Is this happening with a clean tree, or are there external drivers
> involved that trigger the problem?
>
> Because if it's a clean tree, I guess I need to do an rc8 anyway, just
> to get whatever workaround you then added to devicetree and/or some
> driver to make it work again.
>

No, this is not on a clean tree. The phone's device-tree is not
upstreamed yet. That is the only change I carry. I have updated
the device-tree for this fix and sent out a newer version for review.
https://lkml.org/lkml/2020/8/1/184

Regards,
Amit Pundir


> Or is there a quick fix that I can get today or early tomorrow? We had
> some confusion this week due to a nasty include header mess, but
> despite that there hasn't been anything else that has come up (so
> far), so..
>
>                        Linus
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-01 17:39                             ` Christoph Hellwig
@ 2020-08-02  4:46                               ` Amit Pundir
  2020-08-02 15:04                                 ` Amit Pundir
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-08-02  4:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Linus Torvalds, jeremy.linton, iommu, linux-rpi-kernel,
	David Rientjes, Robin Murphy

On Sat, 1 Aug 2020 at 23:09, Christoph Hellwig <hch@lst.de> wrote:
>
> On Sat, Aug 01, 2020 at 05:27:04PM +0530, Amit Pundir wrote:
> > Hi, I found the problematic memory region. It was a memory
> > chunk reserved/removed in the downstream tree but was
> > seemingly reserved upstream for different drivers. I failed to
> > calculate the length of the total region reserved downstream
> > correctly. And there was still a portion of memory left unmarked,
> > which I should have marked as reserved in my testing earlier
> > today.
> >
> > Sorry for all the noise and thanks Nicolas, Christoph and David
> > for your patience.
>
> So you'll need to patch the upstream DTS to fix this up?  Do you also
> need my two fixes?  What about the Oneplus phones?  Can you send a
> mail with a summary of the status?

Poco's DTS is not upstreamed yet. I have updated it for this fix
and sent out a newer version for review.
https://lkml.org/lkml/2020/8/1/184

I didn't need to try your two add-on fixes. I'll give them a spin
later today.

I'm sure One Plus 6 and 6T will be running into similar problem.
I'll check with Caleb and send out a status mail with the summary.

Regards,
Amit Pundir
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-02  4:46                               ` Amit Pundir
@ 2020-08-02 15:04                                 ` Amit Pundir
  2020-08-03  4:14                                   ` David Rientjes via iommu
  0 siblings, 1 reply; 35+ messages in thread
From: Amit Pundir @ 2020-08-02 15:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Linus Torvalds, iommu, jeremy.linton, Caleb Connolly,
	linux-rpi-kernel, David Rientjes, Robin Murphy

On Sun, 2 Aug 2020 at 10:16, Amit Pundir <amit.pundir@linaro.org> wrote:
>
> On Sat, 1 Aug 2020 at 23:09, Christoph Hellwig <hch@lst.de> wrote:
> >
> > On Sat, Aug 01, 2020 at 05:27:04PM +0530, Amit Pundir wrote:
> > > Hi, I found the problematic memory region. It was a memory
> > > chunk reserved/removed in the downstream tree but was
> > > seemingly reserved upstream for different drivers. I failed to
> > > calculate the length of the total region reserved downstream
> > > correctly. And there was still a portion of memory left unmarked,
> > > which I should have marked as reserved in my testing earlier
> > > today.
> > >
> > > Sorry for all the noise and thanks Nicolas, Christoph and David
> > > for your patience.
> >
> > So you'll need to patch the upstream DTS to fix this up?  Do you also
> > need my two fixes?  What about the Oneplus phones?  Can you send a
> > mail with a summary of the status?
>
> Poco's DTS is not upstreamed yet. I have updated it for this fix
> and sent out a newer version for review.
> https://lkml.org/lkml/2020/8/1/184
>
> I didn't need to try your two add-on fixes. I'll give them a spin
> later today.

Hi Christoph,

I see no obvious regressions with your twin dma-pool fixes on my
PocoF1 phone.

Caleb also confirmed that a similar reserved-memory region fix in his
One Plus 6 device-tree worked for him too.

>
> I'm sure One Plus 6 and 6T will be running into similar problem.
> I'll check with Caleb and send out a status mail with the summary.
>
> Regards,
> Amit Pundir
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-02 15:04                                 ` Amit Pundir
@ 2020-08-03  4:14                                   ` David Rientjes via iommu
  2020-08-03  6:44                                     ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: David Rientjes via iommu @ 2020-08-03  4:14 UTC (permalink / raw)
  To: Amit Pundir
  Cc: Linus Torvalds, iommu, jeremy.linton, Caleb Connolly,
	linux-rpi-kernel, Robin Murphy, Christoph Hellwig

On Sun, 2 Aug 2020, Amit Pundir wrote:

> > > > Hi, I found the problematic memory region. It was a memory
> > > > chunk reserved/removed in the downstream tree but was
> > > > seemingly reserved upstream for different drivers. I failed to
> > > > calculate the length of the total region reserved downstream
> > > > correctly. And there was still a portion of memory left unmarked,
> > > > which I should have marked as reserved in my testing earlier
> > > > today.
> > > >
> > > > Sorry for all the noise and thanks Nicolas, Christoph and David
> > > > for your patience.
> > >
> > > So you'll need to patch the upstream DTS to fix this up?  Do you also
> > > need my two fixes?  What about the Oneplus phones?  Can you send a
> > > mail with a summary of the status?
> >
> > Poco's DTS is not upstreamed yet. I have updated it for this fix
> > and sent out a newer version for review.
> > https://lkml.org/lkml/2020/8/1/184
> >
> > I didn't need to try your two add-on fixes. I'll give them a spin
> > later today.
> 
> Hi Christoph,
> 
> I see no obvious regressions with your twin dma-pool fixes on my
> PocoF1 phone.
> 
> Caleb also confirmed that a similar reserved-memory region fix in his
> One Plus 6 device-tree worked for him too.
> 

Thanks very much for the update, Amit.

Christoph: since we have atomic DMA coherent pools in 5.8 now, recall our 
previous discussions, including Greg KH, regarding backports to stable 
trees (we are interested in 5.4 and 5.6) of this support for the purposes 
of confidential VM users who wish to run their own SEV-enabled guests in 
cloud.

Would you have any objections to backports being offered for this support 
now?  We can prepare them immediately.  Or, if you would prefer we hold 
off for a while, please let me know and we can delay it until you are more 
comfortable.  (For confidential VM users, the ability to run your own 
unmodified stable kernel is a much different security story than a guest 
modified by the cloud provider.)
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-03  4:14                                   ` David Rientjes via iommu
@ 2020-08-03  6:44                                     ` Christoph Hellwig
  2020-08-03 18:30                                       ` David Rientjes via iommu
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2020-08-03  6:44 UTC (permalink / raw)
  To: David Rientjes
  Cc: Amit Pundir, Linus Torvalds, iommu, jeremy.linton,
	Caleb Connolly, linux-rpi-kernel, Robin Murphy,
	Christoph Hellwig

On Sun, Aug 02, 2020 at 09:14:41PM -0700, David Rientjes wrote:
> Christoph: since we have atomic DMA coherent pools in 5.8 now, recall our 
> previous discussions, including Greg KH, regarding backports to stable 
> trees (we are interested in 5.4 and 5.6) of this support for the purposes 
> of confidential VM users who wish to run their own SEV-enabled guests in 
> cloud.
> 
> Would you have any objections to backports being offered for this support 
> now?  We can prepare them immediately.  Or, if you would prefer we hold 
> off for a while, please let me know and we can delay it until you are more 
> comfortable.  (For confidential VM users, the ability to run your own 
> unmodified stable kernel is a much different security story than a guest 
> modified by the cloud provider.)

Before we start backporting I think we need the two fixes from
the "dma pool fixes" series.  Can you start reviewing them?  I also
think we should probably wait at least until -rc2 for any fallout
before starting the backport.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: revert scope for 5.8, was Re: dma-pool fixes
  2020-08-03  6:44                                     ` Christoph Hellwig
@ 2020-08-03 18:30                                       ` David Rientjes via iommu
  0 siblings, 0 replies; 35+ messages in thread
From: David Rientjes via iommu @ 2020-08-03 18:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Amit Pundir, Linus Torvalds, iommu, jeremy.linton,
	Caleb Connolly, linux-rpi-kernel, Robin Murphy

On Mon, 3 Aug 2020, Christoph Hellwig wrote:

> On Sun, Aug 02, 2020 at 09:14:41PM -0700, David Rientjes wrote:
> > Christoph: since we have atomic DMA coherent pools in 5.8 now, recall our 
> > previous discussions, including Greg KH, regarding backports to stable 
> > trees (we are interested in 5.4 and 5.6) of this support for the purposes 
> > of confidential VM users who wish to run their own SEV-enabled guests in 
> > cloud.
> > 
> > Would you have any objections to backports being offered for this support 
> > now?  We can prepare them immediately.  Or, if you would prefer we hold 
> > off for a while, please let me know and we can delay it until you are more 
> > comfortable.  (For confidential VM users, the ability to run your own 
> > unmodified stable kernel is a much different security story than a guest 
> > modified by the cloud provider.)
> 
> Before we start backporting I think we need the two fixes from
> the "dma pool fixes" series.  Can you start reviewing them?  I also
> think we should probably wait at least until -rc2 for any fallout
> before starting the backport.
> 

Thanks Christoph, this makes perfect sense.  I see Nicolas has refreshed 
the two patches:

dma-pool: fix coherent pool allocations for IOMMU mappings
dma-pool: Only allocate from CMA when in same memory zone

and posted them again today on LKML for review.  I'll review those and 
we'll send a full series of backports for stable consideration for 5.4 LTS 
and 5.6 around 5.9-rc2 timeframe.

Thanks!
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2020-08-03 18:30 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-28 10:47 dma-pool fixes Christoph Hellwig
2020-07-28 10:47 ` [PATCH 1/2] dma-pool: fix coherent pool allocations for IOMMU mappings Christoph Hellwig
2020-07-28 14:56   ` Nicolas Saenz Julienne
2020-07-28 15:28     ` Christoph Hellwig
2020-07-28 10:47 ` [PATCH 2/2] dma-pool: Only allocate from CMA when in same memory zone Christoph Hellwig
2020-07-28 12:02 ` dma-pool fixes Amit Pundir
2020-07-28 12:07   ` Christoph Hellwig
2020-07-28 12:25     ` Amit Pundir
2020-07-28 12:41       ` Christoph Hellwig
2020-07-28 12:48         ` Amit Pundir
2020-07-28 15:30           ` Christoph Hellwig
2020-07-29 10:45             ` Nicolas Saenz Julienne
2020-07-29 12:22               ` Amit Pundir
2020-07-31  7:46                 ` Amit Pundir
2020-07-31 13:09                   ` Christoph Hellwig
2020-07-31 14:15                     ` Amit Pundir
2020-07-31 19:04                     ` David Rientjes via iommu
2020-08-01  8:20                       ` David Rientjes via iommu
2020-08-01  8:57                         ` revert scope for 5.8, was " Christoph Hellwig
2020-08-01 11:57                           ` Amit Pundir
2020-08-01 16:59                             ` Nicolas Saenz Julienne
2020-08-01 17:39                             ` Christoph Hellwig
2020-08-02  4:46                               ` Amit Pundir
2020-08-02 15:04                                 ` Amit Pundir
2020-08-03  4:14                                   ` David Rientjes via iommu
2020-08-03  6:44                                     ` Christoph Hellwig
2020-08-03 18:30                                       ` David Rientjes via iommu
2020-08-01 18:28                             ` Linus Torvalds
2020-08-02  4:35                               ` Amit Pundir
2020-08-01  8:29                       ` Christoph Hellwig
2020-07-31 10:47                 ` Nicolas Saenz Julienne
2020-07-31 11:17                   ` Amit Pundir
2020-07-31 14:15                     ` Nicolas Saenz Julienne
2020-07-31 14:20                       ` Amit Pundir
2020-08-01  7:34                         ` Amit Pundir

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).