All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
@ 2019-06-10  3:08 Nicholas Piggin
  2019-06-10  3:08 ` [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range Nicholas Piggin
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-10  3:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

__ioremap_at error handling is wonky, it requires caller to clean up
after it. Implement a helper that does the map and error cleanup and
remove the requirement from the caller.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---

This series is a different approach to the problem, using the generic
ioremap_page_range directly which reduces added code, and moves
the radix specific code into radix files. Thanks to Christophe for
pointing out various problems with the previous patch.

 arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d2d976ff8a0e..6bd3660388aa 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -108,14 +108,30 @@ unsigned long ioremap_bot;
 unsigned long ioremap_bot = IOREMAP_BASE;
 #endif
 
+static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
+{
+	unsigned long i;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int err = map_kernel_page(ea + i, pa + i, prot);
+		if (err) {
+			if (slab_is_available())
+				unmap_kernel_range(ea, size);
+			else
+				WARN_ON_ONCE(1); /* Should clean up */
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * __ioremap_at - Low level function to establish the page tables
  *                for an IO mapping
  */
 void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
 {
-	unsigned long i;
-
 	/* We don't support the 4K PFN hack with ioremap */
 	if (pgprot_val(prot) & H_PAGE_4K_PFN)
 		return NULL;
@@ -129,9 +145,8 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
 	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
 	WARN_ON(size & ~PAGE_MASK);
 
-	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
-			return NULL;
+	if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE))
+		return NULL;
 
 	return (void __iomem *)ea;
 }
@@ -182,8 +197,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
 
 		area->phys_addr = paligned;
 		ret = __ioremap_at(paligned, area->addr, size, prot);
-		if (!ret)
-			vunmap(area->addr);
 	} else {
 		ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
 		if (ret)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
@ 2019-06-10  3:08 ` Nicholas Piggin
  2019-06-11  6:46   ` Christophe Leroy
  2019-06-10  3:08 ` [PATCH 3/3] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP Nicholas Piggin
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-10  3:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Radix can use ioremap_page_range for ioremap, after slab is available.
This makes it possible to enable huge ioremap mapping support.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h |  3 +++
 arch/powerpc/mm/book3s64/pgtable.c         | 21 +++++++++++++++++++++
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 21 +++++++++++++++++++++
 arch/powerpc/mm/pgtable_64.c               |  2 +-
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 574eca33f893..e04a839cb5b9 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -266,6 +266,9 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
 
+extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa,
+				unsigned long size, pgprot_t prot, int nid);
+
 static inline unsigned long radix__get_tree_size(void)
 {
 	unsigned long rts_field;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index ff98b663c83e..953850a602f7 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -450,3 +450,24 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
 
 	return true;
 }
+
+int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
+{
+	unsigned long i;
+
+	if (radix_enabled())
+		return radix__ioremap_range(ea, pa, size, prot, nid);
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int err = map_kernel_page(ea + i, pa + i, prot);
+		if (err) {
+			if (slab_is_available())
+				unmap_kernel_range(ea, size);
+			else
+				WARN_ON_ONCE(1); /* Should clean up */
+			return err;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index c9bcf428dd2b..db993bc1aef3 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) "radix-mmu: " fmt
 
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/memblock.h>
@@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
 
 	set_pte_at(mm, addr, ptep, pte);
 }
+
+int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
+			pgprot_t prot, int nid)
+{
+	if (likely(slab_is_available())) {
+		int err = ioremap_page_range(ea, ea + size, pa, prot);
+		if (err)
+			unmap_kernel_range(ea, size);
+		return err;
+	} else {
+		unsigned long i;
+
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			int err = map_kernel_page(ea + i, pa + i, prot);
+			if (WARN_ON_ONCE(err)) /* Should clean up */
+				return err;
+		}
+		return 0;
+	}
+}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6bd3660388aa..63cd81130643 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -108,7 +108,7 @@ unsigned long ioremap_bot;
 unsigned long ioremap_bot = IOREMAP_BASE;
 #endif
 
-static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
+int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
 {
 	unsigned long i;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP
  2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
  2019-06-10  3:08 ` [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range Nicholas Piggin
@ 2019-06-10  3:08 ` Nicholas Piggin
  2019-06-11  6:28 ` [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Christophe Leroy
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-10  3:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.

This enables huge (2MB and 1GB) ioremap mappings. I don't have a
benchmark for this change, but huge vmap will be used by a later core
kernel change to enable huge vmalloc memory mappings. This improves
cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB
size dentry cache hash.

  Profiling git diff dTLB misses with a vanilla kernel:

  81.75%  git      [kernel.vmlinux]    [k] __d_lookup_rcu
   7.21%  git      [kernel.vmlinux]    [k] strncpy_from_user
   1.77%  git      [kernel.vmlinux]    [k] find_get_entry
   1.59%  git      [kernel.vmlinux]    [k] kmem_cache_free

            40,168      dTLB-miss
       0.100342754 seconds time elapsed

  With powerpc huge vmalloc:

             2,987      dTLB-miss
       0.095933138 seconds time elapsed

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 .../admin-guide/kernel-parameters.txt         |   2 +-
 arch/powerpc/Kconfig                          |   1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  |   8 ++
 arch/powerpc/mm/book3s64/radix_pgtable.c      | 100 ++++++++++++++++++
 4 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..a4c3538857e9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2927,7 +2927,7 @@
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
-	nohugeiomap	[KNL,x86] Disable kernel huge I/O mappings.
+	nohugeiomap	[KNL,x86,PPC] Disable kernel huge I/O mappings.
 
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..f0e5b38d52e8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,6 +167,7 @@ config PPC
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN			if PPC32
 	select HAVE_ARCH_KGDB
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ccf00a8b98c6..5faceeefd9f9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
 #define VMALLOC_START	__vmalloc_start
 #define VMALLOC_END	__vmalloc_end
 
+static inline unsigned int ioremap_max_order(void)
+{
+	if (radix_enabled())
+		return PUD_SHIFT;
+	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ioremap_max_order()
+
 extern unsigned long __kernel_virt_start;
 extern unsigned long __kernel_virt_size;
 extern unsigned long __kernel_io_start;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index db993bc1aef3..35cf04bbd50b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1124,6 +1124,106 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
 	set_pte_at(mm, addr, ptep, pte);
 }
 
+int __init arch_ioremap_pud_supported(void)
+{
+	/* HPT does not cope with large pages in the vmalloc area */
+	return radix_enabled();
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+	return radix_enabled();
+}
+
+int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
+{
+	return 0;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pud;
+	pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+	if (!radix_enabled())
+		return 0;
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (pud_huge(*pud)) {
+		pud_clear(pud);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+	pmd_t *pmd;
+	int i;
+
+	pmd = (pmd_t *)pud_page_vaddr(*pud);
+	pud_clear(pud);
+
+	flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd[i])) {
+			pte_t *pte;
+			pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+			pte_free_kernel(&init_mm, pte);
+		}
+	}
+
+	pmd_free(&init_mm, pmd);
+
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pmd;
+	pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+	if (!radix_enabled())
+		return 0;
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (pmd_huge(*pmd)) {
+		pmd_clear(pmd);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte;
+
+	pte = (pte_t *)pmd_page_vaddr(*pmd);
+	pmd_clear(pmd);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+	pte_free_kernel(&init_mm, pte);
+
+	return 1;
+}
+
 int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
 			pgprot_t prot, int nid)
 {
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
  2019-06-10  3:08 ` [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range Nicholas Piggin
  2019-06-10  3:08 ` [PATCH 3/3] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP Nicholas Piggin
@ 2019-06-11  6:28 ` Christophe Leroy
  2019-06-19  4:04   ` Nicholas Piggin
  2019-06-30  8:37 ` Michael Ellerman
  2019-08-20  7:44 ` Christophe Leroy
  4 siblings, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-06-11  6:28 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
> __ioremap_at error handling is wonky, it requires caller to clean up
> after it. Implement a helper that does the map and error cleanup and
> remove the requirement from the caller.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> 
> This series is a different approach to the problem, using the generic
> ioremap_page_range directly which reduces added code, and moves
> the radix specific code into radix files. Thanks to Christophe for
> pointing out various problems with the previous patch.
> 
>   arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
>   1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index d2d976ff8a0e..6bd3660388aa 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -108,14 +108,30 @@ unsigned long ioremap_bot;
>   unsigned long ioremap_bot = IOREMAP_BASE;
>   #endif
>   
> +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
> +{
> +	unsigned long i;
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		int err = map_kernel_page(ea + i, pa + i, prot);

Missing a blank line

> +		if (err) {

I'd have done the following to reduce indentation depth

		if (!err)
			continue

> +			if (slab_is_available())
> +				unmap_kernel_range(ea, size);

Shouldn't it be unmap_kernel_range(ea, i) ?

Christophe

> +			else
> +				WARN_ON_ONCE(1); /* Should clean up */
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * __ioremap_at - Low level function to establish the page tables
>    *                for an IO mapping
>    */
>   void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
>   {
> -	unsigned long i;
> -
>   	/* We don't support the 4K PFN hack with ioremap */
>   	if (pgprot_val(prot) & H_PAGE_4K_PFN)
>   		return NULL;
> @@ -129,9 +145,8 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
>   	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
>   	WARN_ON(size & ~PAGE_MASK);
>   
> -	for (i = 0; i < size; i += PAGE_SIZE)
> -		if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
> -			return NULL;
> +	if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE))
> +		return NULL;
>   
>   	return (void __iomem *)ea;
>   }
> @@ -182,8 +197,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
>   
>   		area->phys_addr = paligned;
>   		ret = __ioremap_at(paligned, area->addr, size, prot);
> -		if (!ret)
> -			vunmap(area->addr);
>   	} else {
>   		ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
>   		if (ret)
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-10  3:08 ` [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range Nicholas Piggin
@ 2019-06-11  6:46   ` Christophe Leroy
  2019-06-19  3:59     ` Nicholas Piggin
  0 siblings, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-06-11  6:46 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
> Radix can use ioremap_page_range for ioremap, after slab is available.
> This makes it possible to enable huge ioremap mapping support.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/include/asm/book3s/64/radix.h |  3 +++
>   arch/powerpc/mm/book3s64/pgtable.c         | 21 +++++++++++++++++++++
>   arch/powerpc/mm/book3s64/radix_pgtable.c   | 21 +++++++++++++++++++++
>   arch/powerpc/mm/pgtable_64.c               |  2 +-
>   4 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 574eca33f893..e04a839cb5b9 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -266,6 +266,9 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
>   extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>   				 pgprot_t flags, unsigned int psz);
>   
> +extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa,
> +				unsigned long size, pgprot_t prot, int nid);
> +

'extern' is pointless here, and checkpatch will cry.

>   static inline unsigned long radix__get_tree_size(void)
>   {
>   	unsigned long rts_field;
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
> index ff98b663c83e..953850a602f7 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -450,3 +450,24 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
>   
>   	return true;
>   }
> +
> +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
> +{
> +	unsigned long i;
> +
> +	if (radix_enabled())
> +		return radix__ioremap_range(ea, pa, size, prot, nid);

This function looks pretty similar to the one in the previous patch.
Since radix_enabled() is available and return false for all other 
subarches, I think the above could go in the generic ioremap_range(), 
you'll only need to move the function declaration in a common file, for 
instance asm/io.h

> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		int err = map_kernel_page(ea + i, pa + i, prot);
> +		if (err) {
> +			if (slab_is_available())
> +				unmap_kernel_range(ea, size);
> +			else
> +				WARN_ON_ONCE(1); /* Should clean up */
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index c9bcf428dd2b..db993bc1aef3 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -11,6 +11,7 @@
>   
>   #define pr_fmt(fmt) "radix-mmu: " fmt
>   
> +#include <linux/io.h>
>   #include <linux/kernel.h>
>   #include <linux/sched/mm.h>
>   #include <linux/memblock.h>
> @@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
>   
>   	set_pte_at(mm, addr, ptep, pte);
>   }
> +
> +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
> +			pgprot_t prot, int nid)
> +{
> +	if (likely(slab_is_available())) {
> +		int err = ioremap_page_range(ea, ea + size, pa, prot);
> +		if (err)
> +			unmap_kernel_range(ea, size);
> +		return err;
> +	} else {
> +		unsigned long i;
> +
> +		for (i = 0; i < size; i += PAGE_SIZE) {
> +			int err = map_kernel_page(ea + i, pa + i, prot);
> +			if (WARN_ON_ONCE(err)) /* Should clean up */
> +				return err;
> +		}

Same loop again.

What about not doing a radix specific function and just putting 
something like below in the core ioremap_range() function ?

	if (likely(slab_is_available()) && radix_enabled()) {
		int err = ioremap_page_range(ea, ea + size, pa, prot);

		if (err)
			unmap_kernel_range(ea, size);
		return err;
	}

Because I'm pretty sure will more and more use ioremap_page_range().

> +		return 0;
> +	}
> +}
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 6bd3660388aa..63cd81130643 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -108,7 +108,7 @@ unsigned long ioremap_bot;
>   unsigned long ioremap_bot = IOREMAP_BASE;
>   #endif
>   
> -static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
> +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)

Hum. Weak functions remain in unused in vmlinux unless 
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected.

Also, they are some how dangerous because people might change them 
without seeing that it is overridden for some particular configuration.

Christophe

>   {
>   	unsigned long i;
>   
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-11  6:46   ` Christophe Leroy
@ 2019-06-19  3:59     ` Nicholas Piggin
  2019-06-19 12:49       ` Christophe Leroy
  2019-06-19 16:25       ` Christophe Leroy
  0 siblings, 2 replies; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-19  3:59 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on June 11, 2019 4:46 pm:
> 
> 
> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>> Radix can use ioremap_page_range for ioremap, after slab is available.
>> This makes it possible to enable huge ioremap mapping support.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/radix.h |  3 +++
>>   arch/powerpc/mm/book3s64/pgtable.c         | 21 +++++++++++++++++++++
>>   arch/powerpc/mm/book3s64/radix_pgtable.c   | 21 +++++++++++++++++++++
>>   arch/powerpc/mm/pgtable_64.c               |  2 +-
>>   4 files changed, 46 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
>> index 574eca33f893..e04a839cb5b9 100644
>> --- a/arch/powerpc/include/asm/book3s/64/radix.h
>> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
>> @@ -266,6 +266,9 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
>>   extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>>   				 pgprot_t flags, unsigned int psz);
>>   
>> +extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa,
>> +				unsigned long size, pgprot_t prot, int nid);
>> +
> 
> 'extern' is pointless here, and checkpatch will cry.
> 
>>   static inline unsigned long radix__get_tree_size(void)
>>   {
>>   	unsigned long rts_field;
>> diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
>> index ff98b663c83e..953850a602f7 100644
>> --- a/arch/powerpc/mm/book3s64/pgtable.c
>> +++ b/arch/powerpc/mm/book3s64/pgtable.c
>> @@ -450,3 +450,24 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
>>   
>>   	return true;
>>   }
>> +
>> +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>> +{
>> +	unsigned long i;
>> +
>> +	if (radix_enabled())
>> +		return radix__ioremap_range(ea, pa, size, prot, nid);
> 
> This function looks pretty similar to the one in the previous patch.
> Since radix_enabled() is available and return false for all other 
> subarches, I think the above could go in the generic ioremap_range(), 
> you'll only need to move the function declaration in a common file, for 
> instance asm/io.h
> 
>> +
>> +	for (i = 0; i < size; i += PAGE_SIZE) {
>> +		int err = map_kernel_page(ea + i, pa + i, prot);
>> +		if (err) {
>> +			if (slab_is_available())
>> +				unmap_kernel_range(ea, size);
>> +			else
>> +				WARN_ON_ONCE(1); /* Should clean up */
>> +			return err;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> index c9bcf428dd2b..db993bc1aef3 100644
>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> @@ -11,6 +11,7 @@
>>   
>>   #define pr_fmt(fmt) "radix-mmu: " fmt
>>   
>> +#include <linux/io.h>
>>   #include <linux/kernel.h>
>>   #include <linux/sched/mm.h>
>>   #include <linux/memblock.h>
>> @@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
>>   
>>   	set_pte_at(mm, addr, ptep, pte);
>>   }
>> +
>> +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
>> +			pgprot_t prot, int nid)
>> +{
>> +	if (likely(slab_is_available())) {
>> +		int err = ioremap_page_range(ea, ea + size, pa, prot);
>> +		if (err)
>> +			unmap_kernel_range(ea, size);
>> +		return err;
>> +	} else {
>> +		unsigned long i;
>> +
>> +		for (i = 0; i < size; i += PAGE_SIZE) {
>> +			int err = map_kernel_page(ea + i, pa + i, prot);
>> +			if (WARN_ON_ONCE(err)) /* Should clean up */
>> +				return err;
>> +		}
> 
> Same loop again.
> 
> What about not doing a radix specific function and just putting 
> something like below in the core ioremap_range() function ?
> 
> 	if (likely(slab_is_available()) && radix_enabled()) {
> 		int err = ioremap_page_range(ea, ea + size, pa, prot);
> 
> 		if (err)
> 			unmap_kernel_range(ea, size);
> 		return err;
> 	}
> 
> Because I'm pretty sure will more and more use ioremap_page_range().

Well I agree the duplication is not so nice, but it's convenient
to see what is going on for each MMU type.

There is a significant amount of churn that needs to be done in
this layer so I prefer to make it a bit simpler despite duplication.

I would like to remove the early ioremap or make it into its own
function. Re-implement map_kernel_page with ioremap_page_range,
allow page tables that don't use slab to avoid the early check,
unbolt the hptes mapped in early boot, etc.

I just wanted to escape out the 64s and hash/radix implementations
completely until that settles.

>> -static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>> +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
> 
> Hum. Weak functions remain in unused in vmlinux unless 
> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected.
> 
> Also, they are some how dangerous because people might change them 
> without seeing that it is overridden for some particular configuration.

Well you shouldn't assume that when you see a weak function, but
what's the preferred alternative? A config option?

Thanks,
Nick


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-06-11  6:28 ` [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Christophe Leroy
@ 2019-06-19  4:04   ` Nicholas Piggin
  2019-06-19 13:04     ` Christophe Leroy
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-19  4:04 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on June 11, 2019 4:28 pm:
> 
> 
> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>> __ioremap_at error handling is wonky, it requires caller to clean up
>> after it. Implement a helper that does the map and error cleanup and
>> remove the requirement from the caller.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> 
>> This series is a different approach to the problem, using the generic
>> ioremap_page_range directly which reduces added code, and moves
>> the radix specific code into radix files. Thanks to Christophe for
>> pointing out various problems with the previous patch.
>> 
>>   arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
>>   1 file changed, 20 insertions(+), 7 deletions(-)
>> 
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index d2d976ff8a0e..6bd3660388aa 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
>> @@ -108,14 +108,30 @@ unsigned long ioremap_bot;
>>   unsigned long ioremap_bot = IOREMAP_BASE;
>>   #endif
>>   
>> +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>> +{
>> +	unsigned long i;
>> +
>> +	for (i = 0; i < size; i += PAGE_SIZE) {
>> +		int err = map_kernel_page(ea + i, pa + i, prot);
> 
> Missing a blank line
> 
>> +		if (err) {
> 
> I'd have done the following to reduce indentation depth
> 
> 		if (!err)
> 			continue

I'll consider it, line lengths were not too bad.

>> +			if (slab_is_available())
>> +				unmap_kernel_range(ea, size);
> 
> Shouldn't it be unmap_kernel_range(ea, i) ?

I guess (i - PAGE_SIZE really), although the old code effectively did
the full range. As a "clean up" it may be better to avoid subtle
change in behaviour and do that in another patch?

Thanks,
Nick

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-19  3:59     ` Nicholas Piggin
@ 2019-06-19 12:49       ` Christophe Leroy
  2019-06-24  1:03         ` Nicholas Piggin
  2019-06-19 16:25       ` Christophe Leroy
  1 sibling, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-06-19 12:49 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :
> Christophe Leroy's on June 11, 2019 4:46 pm:
>>
>>
>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :

[snip]

>>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> index c9bcf428dd2b..db993bc1aef3 100644
>>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> @@ -11,6 +11,7 @@
>>>    
>>>    #define pr_fmt(fmt) "radix-mmu: " fmt
>>>    
>>> +#include <linux/io.h>
>>>    #include <linux/kernel.h>
>>>    #include <linux/sched/mm.h>
>>>    #include <linux/memblock.h>
>>> @@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
>>>    
>>>    	set_pte_at(mm, addr, ptep, pte);
>>>    }
>>> +
>>> +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
>>> +			pgprot_t prot, int nid)
>>> +{
>>> +	if (likely(slab_is_available())) {
>>> +		int err = ioremap_page_range(ea, ea + size, pa, prot);
>>> +		if (err)
>>> +			unmap_kernel_range(ea, size);
>>> +		return err;
>>> +	} else {
>>> +		unsigned long i;
>>> +
>>> +		for (i = 0; i < size; i += PAGE_SIZE) {
>>> +			int err = map_kernel_page(ea + i, pa + i, prot);
>>> +			if (WARN_ON_ONCE(err)) /* Should clean up */
>>> +				return err;
>>> +		}
>>
>> Same loop again.
>>
>> What about not doing a radix specific function and just putting
>> something like below in the core ioremap_range() function ?
>>
>> 	if (likely(slab_is_available()) && radix_enabled()) {
>> 		int err = ioremap_page_range(ea, ea + size, pa, prot);
>>
>> 		if (err)
>> 			unmap_kernel_range(ea, size);
>> 		return err;
>> 	}
>>
>> Because I'm pretty sure will more and more use ioremap_page_range().
> 
> Well I agree the duplication is not so nice, but it's convenient
> to see what is going on for each MMU type.
> 
> There is a significant amount of churn that needs to be done in
> this layer so I prefer to make it a bit simpler despite duplication.
> 
> I would like to remove the early ioremap or make it into its own
> function. Re-implement map_kernel_page with ioremap_page_range,
> allow page tables that don't use slab to avoid the early check,
> unbolt the hptes mapped in early boot, etc.
> 
> I just wanted to escape out the 64s and hash/radix implementations
> completely until that settles.

I can understand the benefit in some situations but here I just can't. 
And code duplication should be avoided as much as possible as it makes 
code maintenance more difficult.

Here you have:

+static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned 
long size, pgprot_t prot, int nid)
+{
+	unsigned long i;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int err = map_kernel_page(ea + i, pa + i, prot);
+		if (err) {
+			if (slab_is_available())
+				unmap_kernel_range(ea, size);
+			else
+				WARN_ON_ONCE(1); /* Should clean up */
+			return err;
+		}
+	}
+
+	return 0;
+}

You now create a new one in another file, that is almost identical:

+int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, 
pgprot_t prot, int nid)
+{
+	unsigned long i;
+
+	if (radix_enabled())
+		return radix__ioremap_range(ea, pa, size, prot, nid);
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int err = map_kernel_page(ea + i, pa + i, prot);
+		if (err) {
+			if (slab_is_available())
+				unmap_kernel_range(ea, size);
+			else
+				WARN_ON_ONCE(1); /* Should clean up */
+			return err;
+		}
+	}
+
+	return 0;
+}

Then you have to make the original one __weak.

Sorry I'm still having difficulties understanding what the benefit is.

radix_enabled() is defined for every platforms so could just add the 
following on top of the existing ioremap_range() and voila.

+	if (radix_enabled())
+		return radix__ioremap_range(ea, pa, size, prot, nid);


And with that you wouldn't have the __weak stuff to handle.

> 
>>> -static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>>> +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>>
>> Hum. Weak functions remain in unused in vmlinux unless
>> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected.
>>
>> Also, they are some how dangerous because people might change them
>> without seeing that it is overridden for some particular configuration.
> 
> Well you shouldn't assume that when you see a weak function, but
> what's the preferred alternative? A config option?

Yes you are right, nobody should assume that, but ...

But I think if the fonctions were really different, the preferred 
alternative would be to move the original function into a file dedicated 
to nohash64.

Christophe

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-06-19  4:04   ` Nicholas Piggin
@ 2019-06-19 13:04     ` Christophe Leroy
  0 siblings, 0 replies; 16+ messages in thread
From: Christophe Leroy @ 2019-06-19 13:04 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 19/06/2019 à 06:04, Nicholas Piggin a écrit :
> Christophe Leroy's on June 11, 2019 4:28 pm:
>>
>>
>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>>> __ioremap_at error handling is wonky, it requires caller to clean up
>>> after it. Implement a helper that does the map and error cleanup and
>>> remove the requirement from the caller.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>
>>> This series is a different approach to the problem, using the generic
>>> ioremap_page_range directly which reduces added code, and moves
>>> the radix specific code into radix files. Thanks to Christophe for
>>> pointing out various problems with the previous patch.
>>>
>>>    arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
>>>    1 file changed, 20 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>>> index d2d976ff8a0e..6bd3660388aa 100644
>>> --- a/arch/powerpc/mm/pgtable_64.c
>>> +++ b/arch/powerpc/mm/pgtable_64.c
>>> @@ -108,14 +108,30 @@ unsigned long ioremap_bot;
>>>    unsigned long ioremap_bot = IOREMAP_BASE;
>>>    #endif
>>>    
>>> +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>>> +{
>>> +	unsigned long i;
>>> +
>>> +	for (i = 0; i < size; i += PAGE_SIZE) {
>>> +		int err = map_kernel_page(ea + i, pa + i, prot);
>>
>> Missing a blank line
>>
>>> +		if (err) {
>>
>> I'd have done the following to reduce indentation depth
>>
>> 		if (!err)
>> 			continue
> 
> I'll consider it, line lengths were not too bad.
> 
>>> +			if (slab_is_available())
>>> +				unmap_kernel_range(ea, size);
>>
>> Shouldn't it be unmap_kernel_range(ea, i) ?
> 
> I guess (i - PAGE_SIZE really), although the old code effectively did
> the full range. As a "clean up" it may be better to avoid subtle
> change in behaviour and do that in another patch?

Not sure we have to do it in another patch.
Previous code was doing full range because it was done at upper level so 
it didn't know the boundaries. You are creating a nice brand new 
function that have all necessary information, so why not make it right 
from the start ?

Christophe

> 
> Thanks,
> Nick
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-19  3:59     ` Nicholas Piggin
  2019-06-19 12:49       ` Christophe Leroy
@ 2019-06-19 16:25       ` Christophe Leroy
  2019-06-24  3:12         ` Nicholas Piggin
  1 sibling, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-06-19 16:25 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :
> Christophe Leroy's on June 11, 2019 4:46 pm:
>>
>>
>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
> I would like to remove the early ioremap or make it into its own
> function. Re-implement map_kernel_page with ioremap_page_range,
> allow page tables that don't use slab to avoid the early check,
> unbolt the hptes mapped in early boot, etc.

Getting early ioremap out of the picture is a very good idea, it will 
help making things more common between all platform types. Today we face 
the fact that PPC32 allocates early io from the top of memory while 
PPC64 allocates it from the bottom of memory.

Any idea on how to proceed ?

Christophe

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-19 12:49       ` Christophe Leroy
@ 2019-06-24  1:03         ` Nicholas Piggin
  0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-24  1:03 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on June 19, 2019 10:49 pm:
> 
> 
> Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :
>> Christophe Leroy's on June 11, 2019 4:46 pm:
>>>
>>>
>>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
> 
> [snip]
> 
>>>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>>> index c9bcf428dd2b..db993bc1aef3 100644
>>>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>>>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>>> @@ -11,6 +11,7 @@
>>>>    
>>>>    #define pr_fmt(fmt) "radix-mmu: " fmt
>>>>    
>>>> +#include <linux/io.h>
>>>>    #include <linux/kernel.h>
>>>>    #include <linux/sched/mm.h>
>>>>    #include <linux/memblock.h>
>>>> @@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
>>>>    
>>>>    	set_pte_at(mm, addr, ptep, pte);
>>>>    }
>>>> +
>>>> +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
>>>> +			pgprot_t prot, int nid)
>>>> +{
>>>> +	if (likely(slab_is_available())) {
>>>> +		int err = ioremap_page_range(ea, ea + size, pa, prot);
>>>> +		if (err)
>>>> +			unmap_kernel_range(ea, size);
>>>> +		return err;
>>>> +	} else {
>>>> +		unsigned long i;
>>>> +
>>>> +		for (i = 0; i < size; i += PAGE_SIZE) {
>>>> +			int err = map_kernel_page(ea + i, pa + i, prot);
>>>> +			if (WARN_ON_ONCE(err)) /* Should clean up */
>>>> +				return err;
>>>> +		}
>>>
>>> Same loop again.
>>>
>>> What about not doing a radix specific function and just putting
>>> something like below in the core ioremap_range() function ?
>>>
>>> 	if (likely(slab_is_available()) && radix_enabled()) {
>>> 		int err = ioremap_page_range(ea, ea + size, pa, prot);
>>>
>>> 		if (err)
>>> 			unmap_kernel_range(ea, size);
>>> 		return err;
>>> 	}
>>>
>>> Because I'm pretty sure will more and more use ioremap_page_range().
>> 
>> Well I agree the duplication is not so nice, but it's convenient
>> to see what is going on for each MMU type.
>> 
>> There is a significant amount of churn that needs to be done in
>> this layer so I prefer to make it a bit simpler despite duplication.
>> 
>> I would like to remove the early ioremap or make it into its own
>> function. Re-implement map_kernel_page with ioremap_page_range,
>> allow page tables that don't use slab to avoid the early check,
>> unbolt the hptes mapped in early boot, etc.
>> 
>> I just wanted to escape out the 64s and hash/radix implementations
>> completely until that settles.
> 
> I can understand the benefit in some situations but here I just can't. 
> And code duplication should be avoided as much as possible as it makes 
> code maintenance more difficult.
> 
> Here you have:
> 
> +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned 
> long size, pgprot_t prot, int nid)
> +{
> +	unsigned long i;
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		int err = map_kernel_page(ea + i, pa + i, prot);
> +		if (err) {
> +			if (slab_is_available())
> +				unmap_kernel_range(ea, size);
> +			else
> +				WARN_ON_ONCE(1); /* Should clean up */
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> 
> You now create a new one in another file, that is almost identical:
> 
> +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, 
> pgprot_t prot, int nid)
> +{
> +	unsigned long i;
> +
> +	if (radix_enabled())
> +		return radix__ioremap_range(ea, pa, size, prot, nid);
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		int err = map_kernel_page(ea + i, pa + i, prot);
> +		if (err) {
> +			if (slab_is_available())
> +				unmap_kernel_range(ea, size);
> +			else
> +				WARN_ON_ONCE(1); /* Should clean up */
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> 
> Then you have to make the original one __weak.
> 
> Sorry I'm still having difficulties understanding what the benefit is.
> 
> radix_enabled() is defined for every platforms so could just add the 
> following on top of the existing ioremap_range() and voila.
> 
> +	if (radix_enabled())
> +		return radix__ioremap_range(ea, pa, size, prot, nid);
> 
> 
> And with that you wouldn't have the __weak stuff to handle.

I guess so. I don't really like radix_enabled escaping from 64s too
much though. I'll try to improve the code in follow ups if possible.

>>>> -static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>>>> +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
>>>
>>> Hum. Weak functions remain in unused in vmlinux unless
>>> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected.
>>>
>>> Also, they are some how dangerous because people might change them
>>> without seeing that it is overridden for some particular configuration.
>> 
>> Well you shouldn't assume that when you see a weak function, but
>> what's the preferred alternative? A config option?
> 
> Yes you are right, nobody should assume that, but ...
> 
> But I think if the fonctions were really different, the preferred 
> alternative would be to move the original function into a file dedicated 
> to nohash64.

Possibly we could do that, but we might be able to just collapse these
back to using generic ioremap_page_range.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-19 16:25       ` Christophe Leroy
@ 2019-06-24  3:12         ` Nicholas Piggin
  2019-08-05 12:52           ` Christophe Leroy
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Piggin @ 2019-06-24  3:12 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on June 20, 2019 2:25 am:
> 
> 
> Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :
>> Christophe Leroy's on June 11, 2019 4:46 pm:
>>>
>>>
>>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>> I would like to remove the early ioremap or make it into its own
>> function. Re-implement map_kernel_page with ioremap_page_range,
>> allow page tables that don't use slab to avoid the early check,
>> unbolt the hptes mapped in early boot, etc.
> 
> Getting early ioremap out of the picture is a very good idea, it will 
> help making things more common between all platform types. Today we face 
> the fact that PPC32 allocates early io from the top of memory while 
> PPC64 allocates it from the bottom of memory.
> 
> Any idea on how to proceed ?

I have to have a bit better look at other arches and our platform 
code. Without having looked closely at all the details, I would hope 
we could use GENERIC_EARLY_IOREMAP without too much pain.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
                   ` (2 preceding siblings ...)
  2019-06-11  6:28 ` [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Christophe Leroy
@ 2019-06-30  8:37 ` Michael Ellerman
  2019-08-20  7:44 ` Christophe Leroy
  4 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2019-06-30  8:37 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin

On Mon, 2019-06-10 at 03:08:16 UTC, Nicholas Piggin wrote:
> __ioremap_at error handling is wonky, it requires caller to clean up
> after it. Implement a helper that does the map and error cleanup and
> remove the requirement from the caller.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a72808a7ec5d340417a91a81e5cabdaa50650f2e

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
  2019-06-24  3:12         ` Nicholas Piggin
@ 2019-08-05 12:52           ` Christophe Leroy
  0 siblings, 0 replies; 16+ messages in thread
From: Christophe Leroy @ 2019-08-05 12:52 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 24/06/2019 à 05:12, Nicholas Piggin a écrit :
> Christophe Leroy's on June 20, 2019 2:25 am:
>>
>>
>> Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :
>>> Christophe Leroy's on June 11, 2019 4:46 pm:
>>>>
>>>>
>>>> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>>> I would like to remove the early ioremap or make it into its own
>>> function. Re-implement map_kernel_page with ioremap_page_range,
>>> allow page tables that don't use slab to avoid the early check,
>>> unbolt the hptes mapped in early boot, etc.
>>
>> Getting early ioremap out of the picture is a very good idea, it will
>> help making things more common between all platform types. Today we face
>> the fact that PPC32 allocates early io from the top of memory while
>> PPC64 allocates it from the bottom of memory.
>>
>> Any idea on how to proceed ?
> 
> I have to have a bit better look at other arches and our platform
> code. Without having looked closely at all the details, I would hope
> we could use GENERIC_EARLY_IOREMAP without too much pain.
> 

Good idea.

I have looked at it and implemented it for PPC32. In its own it works 
well, now the challenge is to move all early call sites of ioremap() to 
early_ioremap().

I point however is that early_ioremap() expects all maps being released 
by the time we do paging_init(), whereas several of early PPC ioremap() 
users never release the mapped area. I think we have to dig into this in 
more details.

Christophe

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
                   ` (3 preceding siblings ...)
  2019-06-30  8:37 ` Michael Ellerman
@ 2019-08-20  7:44 ` Christophe Leroy
  2019-08-27  8:16   ` Nicholas Piggin
  4 siblings, 1 reply; 16+ messages in thread
From: Christophe Leroy @ 2019-08-20  7:44 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev

Hi Nick,

Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
> __ioremap_at error handling is wonky, it requires caller to clean up
> after it. Implement a helper that does the map and error cleanup and
> remove the requirement from the caller.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> 
> This series is a different approach to the problem, using the generic
> ioremap_page_range directly which reduces added code, and moves
> the radix specific code into radix files. Thanks to Christophe for
> pointing out various problems with the previous patch.
> 
>   arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
>   1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index d2d976ff8a0e..6bd3660388aa 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c

[...]

> @@ -182,8 +197,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
>   
>   		area->phys_addr = paligned;
>   		ret = __ioremap_at(paligned, area->addr, size, prot);
> -		if (!ret)
> -			vunmap(area->addr);

AFAICS, ioremap_range() calls unmap_kernel_range() in the error case,
but I can't see that that function does the vunmap(), does it ?. If not, 
who frees the area allocated by __get_vm_area_caller() ?

Christophe

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
  2019-08-20  7:44 ` Christophe Leroy
@ 2019-08-27  8:16   ` Nicholas Piggin
  0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2019-08-27  8:16 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on August 20, 2019 5:44 pm:
> Hi Nick,
> 
> Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :
>> __ioremap_at error handling is wonky, it requires caller to clean up
>> after it. Implement a helper that does the map and error cleanup and
>> remove the requirement from the caller.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> 
>> This series is a different approach to the problem, using the generic
>> ioremap_page_range directly which reduces added code, and moves
>> the radix specific code into radix files. Thanks to Christophe for
>> pointing out various problems with the previous patch.
>> 
>>   arch/powerpc/mm/pgtable_64.c | 27 ++++++++++++++++++++-------
>>   1 file changed, 20 insertions(+), 7 deletions(-)
>> 
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index d2d976ff8a0e..6bd3660388aa 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
> 
> [...]
> 
>> @@ -182,8 +197,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
>>   
>>   		area->phys_addr = paligned;
>>   		ret = __ioremap_at(paligned, area->addr, size, prot);
>> -		if (!ret)
>> -			vunmap(area->addr);
> 
> AFAICS, ioremap_range() calls unmap_kernel_range() in the error case,
> but I can't see that that function does the vunmap(), does it ?. If not, 
> who frees the area allocated by __get_vm_area_caller() ?

Oh good catch, I guess we need to keep a free_vm_area here.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-08-27  8:25 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-10  3:08 [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Nicholas Piggin
2019-06-10  3:08 ` [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range Nicholas Piggin
2019-06-11  6:46   ` Christophe Leroy
2019-06-19  3:59     ` Nicholas Piggin
2019-06-19 12:49       ` Christophe Leroy
2019-06-24  1:03         ` Nicholas Piggin
2019-06-19 16:25       ` Christophe Leroy
2019-06-24  3:12         ` Nicholas Piggin
2019-08-05 12:52           ` Christophe Leroy
2019-06-10  3:08 ` [PATCH 3/3] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP Nicholas Piggin
2019-06-11  6:28 ` [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case Christophe Leroy
2019-06-19  4:04   ` Nicholas Piggin
2019-06-19 13:04     ` Christophe Leroy
2019-06-30  8:37 ` Michael Ellerman
2019-08-20  7:44 ` Christophe Leroy
2019-08-27  8:16   ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.