linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] riscv: Fix permissions for all mm's during mm init
@ 2022-09-02 10:13 Vladimir Isaev
  2022-09-02 16:22 ` Conor.Dooley
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Vladimir Isaev @ 2022-09-02 10:13 UTC (permalink / raw)
  To: linux-arch, linux-riscv, Conor.Dooley, atishp, paul.walmsley,
	palmer, aou, anup
  Cc: Vladimir Isaev, Andrew Jones

It is possible to have more than one mm (init_mm) during memory
permission fixes. In my case it was caused by request_module
from drivers/net/phy/phy_device.c and leads to following Oops
during free_initmem() on RV32 platform:
     Unable to handle kernel paging request at virtual address c0800000
     Oops [#1]
     Modules linked in:
     CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
     Hardware name: Syntacore SCR5 SDK board (DT)
     epc : __memset+0x58/0xf4
      ra : free_reserved_area+0xfa/0x15a
     epc : c02b26ac ra : c00eb588 sp : c1c1fed0
      gp : c1898690 tp : c1c98000 t0 : c0800000
      t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
      s1 : c189a000 a0 : c0800000 a1 : cccccccc
      a2 : 00001000 a3 : c0801000 a4 : 00000000
      a5 : 00800000 a6 : fef09000 a7 : 00000000
      s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
      s5 : ffffefff s6 : c188a9f4 s7 : 00000001
      s8 : c0800000 s9 : fef1b000 s10: c10ee000
      s11: c189a000 t3 : 00000000 t4 : 00000000
      t5 : 00000000 t6 : 00000001
     status: 00000120 badaddr: c0800000 cause: 0000000f
     [<c0488658>] free_initmem+0x204/0x222
     [<c048d05a>] kernel_init+0x32/0xfc
     [<c0002f76>] ret_from_exception+0x0/0xc
     ---[ end trace 7a5e2b002350b528 ]---

This is because request_module attempted to modprobe module, so it created
new mm with the copy of kernel's page table. And this copy won't be updated
in case of 4M pages and RV32 (pgd is the leaf).

To fix this we can update protection bits for all of existing mm-s, the
same as ARM does, see commit 08925c2f124f
("ARM: 8464/1: Update all mm structures with section adjustments").

Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
Changes for v3:
  - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
    to make sure that the function used only during permission fixes.
  - Add comment to fix_kernel_mem_early().

Changes for v2:
  - Fix commit message format.
  - Add 'Fixes' tag.
---
 arch/riscv/include/asm/set_memory.h | 20 ++--------
 arch/riscv/kernel/setup.c           | 11 -----
 arch/riscv/mm/init.c                | 29 +++++++++++---
 arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
 4 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index a2c14d4b3993..bb0f6b4ed86b 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 int set_memory_rw_nx(unsigned long addr, int numpages);
-static __always_inline int set_kernel_memory(char *startp, char *endp,
-					     int (*set_memory)(unsigned long start,
-							       int num_pages))
-{
-	unsigned long start = (unsigned long)startp;
-	unsigned long end = (unsigned long)endp;
-	int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
-
-	return set_memory(start, num_pages);
-}
+void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
+			  pgprot_t clear_mask);
 #else
 static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
-static inline int set_kernel_memory(char *startp, char *endp,
-				    int (*set_memory)(unsigned long start,
-						      int num_pages))
-{
-	return 0;
-}
+static inline void fix_kernel_mem_early(char *startp, char *endp,
+					pgprot_t set_mask, pgprot_t clear_mask) { }
 #endif
 
 int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 95ef6e2bf45c..17eae1406092 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -27,7 +27,6 @@
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
-#include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
@@ -318,13 +317,3 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
-
-void free_initmem(void)
-{
-	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
-		set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
-				  IS_ENABLED(CONFIG_64BIT) ?
-					set_memory_rw : set_memory_rw_nx);
-
-	free_initmem_default(POISON_FREE_INITMEM);
-}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b56a0a75533f..978202712535 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -16,7 +16,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/libfdt.h>
-#include <linux/set_memory.h>
 #include <linux/dma-map-ops.h>
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
@@ -28,6 +27,7 @@
 #include <asm/io.h>
 #include <asm/ptdump.h>
 #include <asm/numa.h>
+#include <asm/set_memory.h>
 
 #include "../kernel/head.h"
 
@@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 
 void mark_rodata_ro(void)
 {
-	set_kernel_memory(__start_rodata, _data, set_memory_ro);
-	if (IS_ENABLED(CONFIG_64BIT))
-		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
-				  set_memory_ro);
+	pgprot_t set_mask = __pgprot(_PAGE_READ);
+	pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
+
+	fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
+	if (IS_ENABLED(CONFIG_64BIT)) {
+		fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
+				     set_mask, clear_mask);
+	}
 
 	debug_checkwx();
 }
@@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	return vmemmap_populate_basepages(start, end, node, NULL);
 }
 #endif
+
+void free_initmem(void)
+{
+	pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
+	pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
+			      __pgprot(0) : __pgprot(_PAGE_EXEC);
+
+	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+		fix_kernel_mem_early(lm_alias(__init_begin),
+				     lm_alias(__init_end),
+				     set_mask, clear_mask);
+	}
+
+	free_initmem_default(POISON_FREE_INITMEM);
+}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 5e49e4b4a4cc..74b8107ac743 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -5,6 +5,7 @@
 
 #include <linux/pagewalk.h>
 #include <linux/pgtable.h>
+#include <linux/sched.h>
 #include <asm/tlbflush.h>
 #include <asm/bitops.h>
 #include <asm/set_memory.h>
@@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
 	.pte_hole = pageattr_pte_hole,
 };
 
-static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
-			pgprot_t clear_mask)
+static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
+			   unsigned long end, pgprot_t set_mask,
+			   pgprot_t clear_mask)
 {
 	int ret;
-	unsigned long start = addr;
-	unsigned long end = start + PAGE_SIZE * numpages;
 	struct pageattr_masks masks = {
 		.set_mask = set_mask,
 		.clear_mask = clear_mask
 	};
 
+	mmap_read_lock(mm);
+	ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
+				    &masks);
+	mmap_read_unlock(mm);
+
+	return ret;
+}
+
+void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
+			  pgprot_t clear_mask)
+{
+	struct task_struct *t, *s;
+
+	unsigned long start = (unsigned long)startp;
+	unsigned long end = PAGE_ALIGN((unsigned long)endp);
+
+	/*
+	 * In the SYSTEM_FREEING_INITMEM state we expect that all async code
+	 * is done and no new userspace task can be created.
+	 * So rcu_read_lock() should be enough here.
+	 */
+	WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
+
+	__set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
+	__set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
+
+	rcu_read_lock();
+	for_each_process(t) {
+		if (t->flags & PF_KTHREAD)
+			continue;
+		for_each_thread(t, s) {
+			if (s->mm) {
+				__set_memory_mm(s->mm, start, end, set_mask,
+						clear_mask);
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	flush_tlb_kernel_range(start, end);
+}
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+			pgprot_t clear_mask)
+{
+	int ret;
+	unsigned long start = addr;
+	unsigned long end = start + PAGE_SIZE * numpages;
+
 	if (!numpages)
 		return 0;
 
-	mmap_read_lock(&init_mm);
-	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
-				     &masks);
-	mmap_read_unlock(&init_mm);
+	ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
 
 	flush_tlb_kernel_range(start, end);
 
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] riscv: Fix permissions for all mm's during mm init
  2022-09-02 10:13 [PATCH v3] riscv: Fix permissions for all mm's during mm init Vladimir Isaev
@ 2022-09-02 16:22 ` Conor.Dooley
  2022-09-15 17:33 ` Palmer Dabbelt
  2022-09-17 21:16 ` Palmer Dabbelt
  2 siblings, 0 replies; 6+ messages in thread
From: Conor.Dooley @ 2022-09-02 16:22 UTC (permalink / raw)
  To: vladimir.isaev, linux-arch, linux-riscv, atishp, paul.walmsley,
	palmer, aou, anup
  Cc: ajones

On 02/09/2022 11:13, Vladimir Isaev wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> It is possible to have more than one mm (init_mm) during memory
> permission fixes. In my case it was caused by request_module
> from drivers/net/phy/phy_device.c and leads to following Oops
> during free_initmem() on RV32 platform:
>      Unable to handle kernel paging request at virtual address c0800000
>      Oops [#1]
>      Modules linked in:
>      CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
>      Hardware name: Syntacore SCR5 SDK board (DT)
>      epc : __memset+0x58/0xf4
>       ra : free_reserved_area+0xfa/0x15a
>      epc : c02b26ac ra : c00eb588 sp : c1c1fed0
>       gp : c1898690 tp : c1c98000 t0 : c0800000
>       t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
>       s1 : c189a000 a0 : c0800000 a1 : cccccccc
>       a2 : 00001000 a3 : c0801000 a4 : 00000000
>       a5 : 00800000 a6 : fef09000 a7 : 00000000
>       s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
>       s5 : ffffefff s6 : c188a9f4 s7 : 00000001
>       s8 : c0800000 s9 : fef1b000 s10: c10ee000
>       s11: c189a000 t3 : 00000000 t4 : 00000000
>       t5 : 00000000 t6 : 00000001
>      status: 00000120 badaddr: c0800000 cause: 0000000f
>      [<c0488658>] free_initmem+0x204/0x222
>      [<c048d05a>] kernel_init+0x32/0xfc
>      [<c0002f76>] ret_from_exception+0x0/0xc
>      ---[ end trace 7a5e2b002350b528 ]---
> 
> This is because request_module attempted to modprobe module, so it created
> new mm with the copy of kernel's page table. And this copy won't be updated
> in case of 4M pages and RV32 (pgd is the leaf).
> 
> To fix this we can update protection bits for all of existing mm-s, the
> same as ARM does, see commit 08925c2f124f
> ("ARM: 8464/1: Update all mm structures with section adjustments").
> 
> Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
> Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> Changes for v3:
>   - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
>     to make sure that the function used only during permission fixes.
>   - Add comment to fix_kernel_mem_early().
> 
> Changes for v2:
>   - Fix commit message format.
>   - Add 'Fixes' tag.

Sweet, thanks.

Ran it through a 32bit build & been running with this applied on 64bit for
the afternoon & everything still seems to be intact. Therefore:
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>

Thanks for you patch,
Conor.

> ---
>  arch/riscv/include/asm/set_memory.h | 20 ++--------
>  arch/riscv/kernel/setup.c           | 11 -----
>  arch/riscv/mm/init.c                | 29 +++++++++++---
>  arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
>  4 files changed, 82 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index a2c14d4b3993..bb0f6b4ed86b 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
>  int set_memory_rw_nx(unsigned long addr, int numpages);
> -static __always_inline int set_kernel_memory(char *startp, char *endp,
> -                                            int (*set_memory)(unsigned long start,
> -                                                              int num_pages))
> -{
> -       unsigned long start = (unsigned long)startp;
> -       unsigned long end = (unsigned long)endp;
> -       int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
> -
> -       return set_memory(start, num_pages);
> -}
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +                         pgprot_t clear_mask);
>  #else
>  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
> -static inline int set_kernel_memory(char *startp, char *endp,
> -                                   int (*set_memory)(unsigned long start,
> -                                                     int num_pages))
> -{
> -       return 0;
> -}
> +static inline void fix_kernel_mem_early(char *startp, char *endp,
> +                                       pgprot_t set_mask, pgprot_t clear_mask) { }
>  #endif
> 
>  int set_direct_map_invalid_noflush(struct page *page);
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 95ef6e2bf45c..17eae1406092 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -27,7 +27,6 @@
>  #include <asm/early_ioremap.h>
>  #include <asm/pgtable.h>
>  #include <asm/setup.h>
> -#include <asm/set_memory.h>
>  #include <asm/sections.h>
>  #include <asm/sbi.h>
>  #include <asm/tlbflush.h>
> @@ -318,13 +317,3 @@ static int __init topology_init(void)
>         return 0;
>  }
>  subsys_initcall(topology_init);
> -
> -void free_initmem(void)
> -{
> -       if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
> -               set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
> -                                 IS_ENABLED(CONFIG_64BIT) ?
> -                                       set_memory_rw : set_memory_rw_nx);
> -
> -       free_initmem_default(POISON_FREE_INITMEM);
> -}
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index b56a0a75533f..978202712535 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -16,7 +16,6 @@
>  #include <linux/of_fdt.h>
>  #include <linux/of_reserved_mem.h>
>  #include <linux/libfdt.h>
> -#include <linux/set_memory.h>
>  #include <linux/dma-map-ops.h>
>  #include <linux/crash_dump.h>
>  #include <linux/hugetlb.h>
> @@ -28,6 +27,7 @@
>  #include <asm/io.h>
>  #include <asm/ptdump.h>
>  #include <asm/numa.h>
> +#include <asm/set_memory.h>
> 
>  #include "../kernel/head.h"
> 
> @@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
> 
>  void mark_rodata_ro(void)
>  {
> -       set_kernel_memory(__start_rodata, _data, set_memory_ro);
> -       if (IS_ENABLED(CONFIG_64BIT))
> -               set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
> -                                 set_memory_ro);
> +       pgprot_t set_mask = __pgprot(_PAGE_READ);
> +       pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
> +
> +       fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
> +       if (IS_ENABLED(CONFIG_64BIT)) {
> +               fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
> +                                    set_mask, clear_mask);
> +       }
> 
>         debug_checkwx();
>  }
> @@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>         return vmemmap_populate_basepages(start, end, node, NULL);
>  }
>  #endif
> +
> +void free_initmem(void)
> +{
> +       pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
> +       pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
> +                             __pgprot(0) : __pgprot(_PAGE_EXEC);
> +
> +       if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
> +               fix_kernel_mem_early(lm_alias(__init_begin),
> +                                    lm_alias(__init_end),
> +                                    set_mask, clear_mask);
> +       }
> +
> +       free_initmem_default(POISON_FREE_INITMEM);
> +}
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index 5e49e4b4a4cc..74b8107ac743 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -5,6 +5,7 @@
> 
>  #include <linux/pagewalk.h>
>  #include <linux/pgtable.h>
> +#include <linux/sched.h>
>  #include <asm/tlbflush.h>
>  #include <asm/bitops.h>
>  #include <asm/set_memory.h>
> @@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
>         .pte_hole = pageattr_pte_hole,
>  };
> 
> -static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> -                       pgprot_t clear_mask)
> +static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
> +                          unsigned long end, pgprot_t set_mask,
> +                          pgprot_t clear_mask)
>  {
>         int ret;
> -       unsigned long start = addr;
> -       unsigned long end = start + PAGE_SIZE * numpages;
>         struct pageattr_masks masks = {
>                 .set_mask = set_mask,
>                 .clear_mask = clear_mask
>         };
> 
> +       mmap_read_lock(mm);
> +       ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
> +                                   &masks);
> +       mmap_read_unlock(mm);
> +
> +       return ret;
> +}
> +
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +                         pgprot_t clear_mask)
> +{
> +       struct task_struct *t, *s;
> +
> +       unsigned long start = (unsigned long)startp;
> +       unsigned long end = PAGE_ALIGN((unsigned long)endp);
> +
> +       /*
> +        * In the SYSTEM_FREEING_INITMEM state we expect that all async code
> +        * is done and no new userspace task can be created.
> +        * So rcu_read_lock() should be enough here.
> +        */
> +       WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
> +
> +       __set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
> +       __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> +
> +       rcu_read_lock();
> +       for_each_process(t) {
> +               if (t->flags & PF_KTHREAD)
> +                       continue;
> +               for_each_thread(t, s) {
> +                       if (s->mm) {
> +                               __set_memory_mm(s->mm, start, end, set_mask,
> +                                               clear_mask);
> +                       }
> +               }
> +       }
> +       rcu_read_unlock();
> +
> +       flush_tlb_kernel_range(start, end);
> +}
> +
> +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> +                       pgprot_t clear_mask)
> +{
> +       int ret;
> +       unsigned long start = addr;
> +       unsigned long end = start + PAGE_SIZE * numpages;
> +
>         if (!numpages)
>                 return 0;
> 
> -       mmap_read_lock(&init_mm);
> -       ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
> -                                    &masks);
> -       mmap_read_unlock(&init_mm);
> +       ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> 
>         flush_tlb_kernel_range(start, end);
> 
> --
> 2.37.2
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] riscv: Fix permissions for all mm's during mm init
  2022-09-02 10:13 [PATCH v3] riscv: Fix permissions for all mm's during mm init Vladimir Isaev
  2022-09-02 16:22 ` Conor.Dooley
@ 2022-09-15 17:33 ` Palmer Dabbelt
  2022-09-17 21:16 ` Palmer Dabbelt
  2 siblings, 0 replies; 6+ messages in thread
From: Palmer Dabbelt @ 2022-09-15 17:33 UTC (permalink / raw)
  To: vladimir.isaev
  Cc: linux-arch, linux-riscv, Conor.Dooley, atishp, Paul Walmsley,
	aou, anup, vladimir.isaev, ajones

On Fri, 02 Sep 2022 03:13:12 PDT (-0700), vladimir.isaev@syntacore.com wrote:
> It is possible to have more than one mm (init_mm) during memory
> permission fixes. In my case it was caused by request_module
> from drivers/net/phy/phy_device.c and leads to following Oops
> during free_initmem() on RV32 platform:
>      Unable to handle kernel paging request at virtual address c0800000
>      Oops [#1]
>      Modules linked in:
>      CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
>      Hardware name: Syntacore SCR5 SDK board (DT)
>      epc : __memset+0x58/0xf4
>       ra : free_reserved_area+0xfa/0x15a
>      epc : c02b26ac ra : c00eb588 sp : c1c1fed0
>       gp : c1898690 tp : c1c98000 t0 : c0800000
>       t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
>       s1 : c189a000 a0 : c0800000 a1 : cccccccc
>       a2 : 00001000 a3 : c0801000 a4 : 00000000
>       a5 : 00800000 a6 : fef09000 a7 : 00000000
>       s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
>       s5 : ffffefff s6 : c188a9f4 s7 : 00000001
>       s8 : c0800000 s9 : fef1b000 s10: c10ee000
>       s11: c189a000 t3 : 00000000 t4 : 00000000
>       t5 : 00000000 t6 : 00000001
>      status: 00000120 badaddr: c0800000 cause: 0000000f
>      [<c0488658>] free_initmem+0x204/0x222
>      [<c048d05a>] kernel_init+0x32/0xfc
>      [<c0002f76>] ret_from_exception+0x0/0xc
>      ---[ end trace 7a5e2b002350b528 ]---
>
> This is because request_module attempted to modprobe module, so it created
> new mm with the copy of kernel's page table. And this copy won't be updated
> in case of 4M pages and RV32 (pgd is the leaf).
>
> To fix this we can update protection bits for all of existing mm-s, the
> same as ARM does, see commit 08925c2f124f
> ("ARM: 8464/1: Update all mm structures with section adjustments").
>
> Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
> Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> Changes for v3:
>   - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
>     to make sure that the function used only during permission fixes.
>   - Add comment to fix_kernel_mem_early().
>
> Changes for v2:
>   - Fix commit message format.
>   - Add 'Fixes' tag.
> ---
>  arch/riscv/include/asm/set_memory.h | 20 ++--------
>  arch/riscv/kernel/setup.c           | 11 -----
>  arch/riscv/mm/init.c                | 29 +++++++++++---
>  arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
>  4 files changed, 82 insertions(+), 40 deletions(-)
>
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index a2c14d4b3993..bb0f6b4ed86b 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
>  int set_memory_rw_nx(unsigned long addr, int numpages);
> -static __always_inline int set_kernel_memory(char *startp, char *endp,
> -					     int (*set_memory)(unsigned long start,
> -							       int num_pages))
> -{
> -	unsigned long start = (unsigned long)startp;
> -	unsigned long end = (unsigned long)endp;
> -	int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
> -
> -	return set_memory(start, num_pages);
> -}
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +			  pgprot_t clear_mask);
>  #else
>  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
> -static inline int set_kernel_memory(char *startp, char *endp,
> -				    int (*set_memory)(unsigned long start,
> -						      int num_pages))
> -{
> -	return 0;
> -}
> +static inline void fix_kernel_mem_early(char *startp, char *endp,
> +					pgprot_t set_mask, pgprot_t clear_mask) { }
>  #endif
>
>  int set_direct_map_invalid_noflush(struct page *page);
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 95ef6e2bf45c..17eae1406092 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -27,7 +27,6 @@
>  #include <asm/early_ioremap.h>
>  #include <asm/pgtable.h>
>  #include <asm/setup.h>
> -#include <asm/set_memory.h>
>  #include <asm/sections.h>
>  #include <asm/sbi.h>
>  #include <asm/tlbflush.h>
> @@ -318,13 +317,3 @@ static int __init topology_init(void)
>  	return 0;
>  }
>  subsys_initcall(topology_init);
> -
> -void free_initmem(void)
> -{
> -	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
> -		set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
> -				  IS_ENABLED(CONFIG_64BIT) ?
> -					set_memory_rw : set_memory_rw_nx);
> -
> -	free_initmem_default(POISON_FREE_INITMEM);
> -}
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index b56a0a75533f..978202712535 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -16,7 +16,6 @@
>  #include <linux/of_fdt.h>
>  #include <linux/of_reserved_mem.h>
>  #include <linux/libfdt.h>
> -#include <linux/set_memory.h>
>  #include <linux/dma-map-ops.h>
>  #include <linux/crash_dump.h>
>  #include <linux/hugetlb.h>
> @@ -28,6 +27,7 @@
>  #include <asm/io.h>
>  #include <asm/ptdump.h>
>  #include <asm/numa.h>
> +#include <asm/set_memory.h>
>
>  #include "../kernel/head.h"
>
> @@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
>
>  void mark_rodata_ro(void)
>  {
> -	set_kernel_memory(__start_rodata, _data, set_memory_ro);
> -	if (IS_ENABLED(CONFIG_64BIT))
> -		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
> -				  set_memory_ro);
> +	pgprot_t set_mask = __pgprot(_PAGE_READ);
> +	pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
> +
> +	fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
> +	if (IS_ENABLED(CONFIG_64BIT)) {
> +		fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
> +				     set_mask, clear_mask);
> +	}
>
>  	debug_checkwx();
>  }
> @@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>  	return vmemmap_populate_basepages(start, end, node, NULL);
>  }
>  #endif
> +
> +void free_initmem(void)
> +{
> +	pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
> +	pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
> +			      __pgprot(0) : __pgprot(_PAGE_EXEC);
> +
> +	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
> +		fix_kernel_mem_early(lm_alias(__init_begin),
> +				     lm_alias(__init_end),
> +				     set_mask, clear_mask);
> +	}
> +
> +	free_initmem_default(POISON_FREE_INITMEM);
> +}
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index 5e49e4b4a4cc..74b8107ac743 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -5,6 +5,7 @@
>
>  #include <linux/pagewalk.h>
>  #include <linux/pgtable.h>
> +#include <linux/sched.h>
>  #include <asm/tlbflush.h>
>  #include <asm/bitops.h>
>  #include <asm/set_memory.h>
> @@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
>  	.pte_hole = pageattr_pte_hole,
>  };
>
> -static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> -			pgprot_t clear_mask)
> +static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
> +			   unsigned long end, pgprot_t set_mask,
> +			   pgprot_t clear_mask)
>  {
>  	int ret;
> -	unsigned long start = addr;
> -	unsigned long end = start + PAGE_SIZE * numpages;
>  	struct pageattr_masks masks = {
>  		.set_mask = set_mask,
>  		.clear_mask = clear_mask
>  	};
>
> +	mmap_read_lock(mm);
> +	ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
> +				    &masks);
> +	mmap_read_unlock(mm);
> +
> +	return ret;
> +}
> +
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +			  pgprot_t clear_mask)
> +{
> +	struct task_struct *t, *s;
> +
> +	unsigned long start = (unsigned long)startp;
> +	unsigned long end = PAGE_ALIGN((unsigned long)endp);
> +
> +	/*
> +	 * In the SYSTEM_FREEING_INITMEM state we expect that all async code
> +	 * is done and no new userspace task can be created.
> +	 * So rcu_read_lock() should be enough here.
> +	 */
> +	WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
> +
> +	__set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
> +	__set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> +
> +	rcu_read_lock();
> +	for_each_process(t) {
> +		if (t->flags & PF_KTHREAD)
> +			continue;
> +		for_each_thread(t, s) {
> +			if (s->mm) {
> +				__set_memory_mm(s->mm, start, end, set_mask,
> +						clear_mask);
> +			}
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	flush_tlb_kernel_range(start, end);
> +}
> +
> +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> +			pgprot_t clear_mask)
> +{
> +	int ret;
> +	unsigned long start = addr;
> +	unsigned long end = start + PAGE_SIZE * numpages;
> +
>  	if (!numpages)
>  		return 0;
>
> -	mmap_read_lock(&init_mm);
> -	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
> -				     &masks);
> -	mmap_read_unlock(&init_mm);
> +	ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
>
>  	flush_tlb_kernel_range(start, end);

Thanks, this is on fixes.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] riscv: Fix permissions for all mm's during mm init
  2022-09-02 10:13 [PATCH v3] riscv: Fix permissions for all mm's during mm init Vladimir Isaev
  2022-09-02 16:22 ` Conor.Dooley
  2022-09-15 17:33 ` Palmer Dabbelt
@ 2022-09-17 21:16 ` Palmer Dabbelt
  2022-09-18 16:26   ` Palmer Dabbelt
  2022-09-19  1:29   ` Guo Ren
  2 siblings, 2 replies; 6+ messages in thread
From: Palmer Dabbelt @ 2022-09-17 21:16 UTC (permalink / raw)
  To: vladimir.isaev
  Cc: linux-arch, linux-riscv, Conor.Dooley, atishp, Paul Walmsley,
	aou, anup, vladimir.isaev, ajones

On Fri, 02 Sep 2022 03:13:12 PDT (-0700), vladimir.isaev@syntacore.com wrote:
> It is possible to have more than one mm (init_mm) during memory
> permission fixes. In my case it was caused by request_module
> from drivers/net/phy/phy_device.c and leads to following Oops
> during free_initmem() on RV32 platform:
>      Unable to handle kernel paging request at virtual address c0800000
>      Oops [#1]
>      Modules linked in:
>      CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
>      Hardware name: Syntacore SCR5 SDK board (DT)
>      epc : __memset+0x58/0xf4
>       ra : free_reserved_area+0xfa/0x15a
>      epc : c02b26ac ra : c00eb588 sp : c1c1fed0
>       gp : c1898690 tp : c1c98000 t0 : c0800000
>       t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
>       s1 : c189a000 a0 : c0800000 a1 : cccccccc
>       a2 : 00001000 a3 : c0801000 a4 : 00000000
>       a5 : 00800000 a6 : fef09000 a7 : 00000000
>       s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
>       s5 : ffffefff s6 : c188a9f4 s7 : 00000001
>       s8 : c0800000 s9 : fef1b000 s10: c10ee000
>       s11: c189a000 t3 : 00000000 t4 : 00000000
>       t5 : 00000000 t6 : 00000001
>      status: 00000120 badaddr: c0800000 cause: 0000000f
>      [<c0488658>] free_initmem+0x204/0x222
>      [<c048d05a>] kernel_init+0x32/0xfc
>      [<c0002f76>] ret_from_exception+0x0/0xc
>      ---[ end trace 7a5e2b002350b528 ]---
>
> This is because request_module attempted to modprobe module, so it created
> new mm with the copy of kernel's page table. And this copy won't be updated
> in case of 4M pages and RV32 (pgd is the leaf).
>
> To fix this we can update protection bits for all of existing mm-s, the
> same as ARM does, see commit 08925c2f124f
> ("ARM: 8464/1: Update all mm structures with section adjustments").
>
> Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
> Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> Changes for v3:
>   - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
>     to make sure that the function used only during permission fixes.
>   - Add comment to fix_kernel_mem_early().
>
> Changes for v2:
>   - Fix commit message format.
>   - Add 'Fixes' tag.
> ---
>  arch/riscv/include/asm/set_memory.h | 20 ++--------
>  arch/riscv/kernel/setup.c           | 11 -----
>  arch/riscv/mm/init.c                | 29 +++++++++++---
>  arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
>  4 files changed, 82 insertions(+), 40 deletions(-)
>
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index a2c14d4b3993..bb0f6b4ed86b 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
>  int set_memory_rw_nx(unsigned long addr, int numpages);
> -static __always_inline int set_kernel_memory(char *startp, char *endp,
> -					     int (*set_memory)(unsigned long start,
> -							       int num_pages))
> -{
> -	unsigned long start = (unsigned long)startp;
> -	unsigned long end = (unsigned long)endp;
> -	int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
> -
> -	return set_memory(start, num_pages);
> -}
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +			  pgprot_t clear_mask);
>  #else
>  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
> -static inline int set_kernel_memory(char *startp, char *endp,
> -				    int (*set_memory)(unsigned long start,
> -						      int num_pages))
> -{
> -	return 0;
> -}
> +static inline void fix_kernel_mem_early(char *startp, char *endp,
> +					pgprot_t set_mask, pgprot_t clear_mask) { }
>  #endif
>
>  int set_direct_map_invalid_noflush(struct page *page);
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 95ef6e2bf45c..17eae1406092 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -27,7 +27,6 @@
>  #include <asm/early_ioremap.h>
>  #include <asm/pgtable.h>
>  #include <asm/setup.h>
> -#include <asm/set_memory.h>
>  #include <asm/sections.h>
>  #include <asm/sbi.h>
>  #include <asm/tlbflush.h>
> @@ -318,13 +317,3 @@ static int __init topology_init(void)
>  	return 0;
>  }
>  subsys_initcall(topology_init);
> -
> -void free_initmem(void)
> -{
> -	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
> -		set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
> -				  IS_ENABLED(CONFIG_64BIT) ?
> -					set_memory_rw : set_memory_rw_nx);
> -
> -	free_initmem_default(POISON_FREE_INITMEM);
> -}
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index b56a0a75533f..978202712535 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -16,7 +16,6 @@
>  #include <linux/of_fdt.h>
>  #include <linux/of_reserved_mem.h>
>  #include <linux/libfdt.h>
> -#include <linux/set_memory.h>
>  #include <linux/dma-map-ops.h>
>  #include <linux/crash_dump.h>
>  #include <linux/hugetlb.h>
> @@ -28,6 +27,7 @@
>  #include <asm/io.h>
>  #include <asm/ptdump.h>
>  #include <asm/numa.h>
> +#include <asm/set_memory.h>
>
>  #include "../kernel/head.h"
>
> @@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
>
>  void mark_rodata_ro(void)
>  {
> -	set_kernel_memory(__start_rodata, _data, set_memory_ro);
> -	if (IS_ENABLED(CONFIG_64BIT))
> -		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
> -				  set_memory_ro);
> +	pgprot_t set_mask = __pgprot(_PAGE_READ);
> +	pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
> +
> +	fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
> +	if (IS_ENABLED(CONFIG_64BIT)) {
> +		fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
> +				     set_mask, clear_mask);
> +	}
>
>  	debug_checkwx();
>  }
> @@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>  	return vmemmap_populate_basepages(start, end, node, NULL);
>  }
>  #endif
> +
> +void free_initmem(void)
> +{
> +	pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
> +	pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
> +			      __pgprot(0) : __pgprot(_PAGE_EXEC);
> +
> +	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
> +		fix_kernel_mem_early(lm_alias(__init_begin),
> +				     lm_alias(__init_end),
> +				     set_mask, clear_mask);
> +	}
> +
> +	free_initmem_default(POISON_FREE_INITMEM);
> +}
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index 5e49e4b4a4cc..74b8107ac743 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -5,6 +5,7 @@
>
>  #include <linux/pagewalk.h>
>  #include <linux/pgtable.h>
> +#include <linux/sched.h>
>  #include <asm/tlbflush.h>
>  #include <asm/bitops.h>
>  #include <asm/set_memory.h>
> @@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
>  	.pte_hole = pageattr_pte_hole,
>  };
>
> -static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> -			pgprot_t clear_mask)
> +static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
> +			   unsigned long end, pgprot_t set_mask,
> +			   pgprot_t clear_mask)
>  {
>  	int ret;
> -	unsigned long start = addr;
> -	unsigned long end = start + PAGE_SIZE * numpages;
>  	struct pageattr_masks masks = {
>  		.set_mask = set_mask,
>  		.clear_mask = clear_mask
>  	};
>
> +	mmap_read_lock(mm);
> +	ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
> +				    &masks);
> +	mmap_read_unlock(mm);
> +
> +	return ret;
> +}
> +
> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> +			  pgprot_t clear_mask)
> +{
> +	struct task_struct *t, *s;
> +
> +	unsigned long start = (unsigned long)startp;
> +	unsigned long end = PAGE_ALIGN((unsigned long)endp);
> +
> +	/*
> +	 * In the SYSTEM_FREEING_INITMEM state we expect that all async code
> +	 * is done and no new userspace task can be created.
> +	 * So rcu_read_lock() should be enough here.
> +	 */
> +	WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
> +
> +	__set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
> +	__set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> +
> +	rcu_read_lock();

Sorry for missing this earlier, but as Linus pointed out this doesn't 
work: taking the mmap lock can sleep, which is illegal in an RCU read 
critical section.  Given that we're not really worried about the 
concurrency of this one because nothing else is really running, I don't 
see any reason we can't just take tasklist_lock for reading.

> +	for_each_process(t) {
> +		if (t->flags & PF_KTHREAD)
> +			continue;
> +		for_each_thread(t, s) {
> +			if (s->mm) {
> +				__set_memory_mm(s->mm, start, end, set_mask,
> +						clear_mask);
> +			}
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	flush_tlb_kernel_range(start, end);
> +}
> +
> +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> +			pgprot_t clear_mask)
> +{
> +	int ret;
> +	unsigned long start = addr;
> +	unsigned long end = start + PAGE_SIZE * numpages;
> +
>  	if (!numpages)
>  		return 0;
>
> -	mmap_read_lock(&init_mm);
> -	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
> -				     &masks);
> -	mmap_read_unlock(&init_mm);
> +	ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
>
>  	flush_tlb_kernel_range(start, end);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] riscv: Fix permissions for all mm's during mm init
  2022-09-17 21:16 ` Palmer Dabbelt
@ 2022-09-18 16:26   ` Palmer Dabbelt
  2022-09-19  1:29   ` Guo Ren
  1 sibling, 0 replies; 6+ messages in thread
From: Palmer Dabbelt @ 2022-09-18 16:26 UTC (permalink / raw)
  To: vladimir.isaev
  Cc: linux-arch, linux-riscv, Conor.Dooley, atishp, Paul Walmsley,
	aou, anup, vladimir.isaev, ajones

On Sat, 17 Sep 2022 14:16:28 PDT (-0700), Palmer Dabbelt wrote:
> On Fri, 02 Sep 2022 03:13:12 PDT (-0700), vladimir.isaev@syntacore.com wrote:
>> It is possible to have more than one mm (init_mm) during memory
>> permission fixes. In my case it was caused by request_module
>> from drivers/net/phy/phy_device.c and leads to following Oops
>> during free_initmem() on RV32 platform:
>>      Unable to handle kernel paging request at virtual address c0800000
>>      Oops [#1]
>>      Modules linked in:
>>      CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
>>      Hardware name: Syntacore SCR5 SDK board (DT)
>>      epc : __memset+0x58/0xf4
>>       ra : free_reserved_area+0xfa/0x15a
>>      epc : c02b26ac ra : c00eb588 sp : c1c1fed0
>>       gp : c1898690 tp : c1c98000 t0 : c0800000
>>       t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
>>       s1 : c189a000 a0 : c0800000 a1 : cccccccc
>>       a2 : 00001000 a3 : c0801000 a4 : 00000000
>>       a5 : 00800000 a6 : fef09000 a7 : 00000000
>>       s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
>>       s5 : ffffefff s6 : c188a9f4 s7 : 00000001
>>       s8 : c0800000 s9 : fef1b000 s10: c10ee000
>>       s11: c189a000 t3 : 00000000 t4 : 00000000
>>       t5 : 00000000 t6 : 00000001
>>      status: 00000120 badaddr: c0800000 cause: 0000000f
>>      [<c0488658>] free_initmem+0x204/0x222
>>      [<c048d05a>] kernel_init+0x32/0xfc
>>      [<c0002f76>] ret_from_exception+0x0/0xc
>>      ---[ end trace 7a5e2b002350b528 ]---
>>
>> This is because request_module attempted to modprobe module, so it created
>> new mm with the copy of kernel's page table. And this copy won't be updated
>> in case of 4M pages and RV32 (pgd is the leaf).
>>
>> To fix this we can update protection bits for all of existing mm-s, the
>> same as ARM does, see commit 08925c2f124f
>> ("ARM: 8464/1: Update all mm structures with section adjustments").
>>
>> Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
>> Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> ---
>> Changes for v3:
>>   - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
>>     to make sure that the function used only during permission fixes.
>>   - Add comment to fix_kernel_mem_early().
>>
>> Changes for v2:
>>   - Fix commit message format.
>>   - Add 'Fixes' tag.
>> ---
>>  arch/riscv/include/asm/set_memory.h | 20 ++--------
>>  arch/riscv/kernel/setup.c           | 11 -----
>>  arch/riscv/mm/init.c                | 29 +++++++++++---
>>  arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
>>  4 files changed, 82 insertions(+), 40 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
>> index a2c14d4b3993..bb0f6b4ed86b 100644
>> --- a/arch/riscv/include/asm/set_memory.h
>> +++ b/arch/riscv/include/asm/set_memory.h
>> @@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
>>  int set_memory_x(unsigned long addr, int numpages);
>>  int set_memory_nx(unsigned long addr, int numpages);
>>  int set_memory_rw_nx(unsigned long addr, int numpages);
>> -static __always_inline int set_kernel_memory(char *startp, char *endp,
>> -					     int (*set_memory)(unsigned long start,
>> -							       int num_pages))
>> -{
>> -	unsigned long start = (unsigned long)startp;
>> -	unsigned long end = (unsigned long)endp;
>> -	int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
>> -
>> -	return set_memory(start, num_pages);
>> -}
>> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
>> +			  pgprot_t clear_mask);
>>  #else
>>  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
>>  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
>>  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
>>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>>  static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
>> -static inline int set_kernel_memory(char *startp, char *endp,
>> -				    int (*set_memory)(unsigned long start,
>> -						      int num_pages))
>> -{
>> -	return 0;
>> -}
>> +static inline void fix_kernel_mem_early(char *startp, char *endp,
>> +					pgprot_t set_mask, pgprot_t clear_mask) { }
>>  #endif
>>
>>  int set_direct_map_invalid_noflush(struct page *page);
>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>> index 95ef6e2bf45c..17eae1406092 100644
>> --- a/arch/riscv/kernel/setup.c
>> +++ b/arch/riscv/kernel/setup.c
>> @@ -27,7 +27,6 @@
>>  #include <asm/early_ioremap.h>
>>  #include <asm/pgtable.h>
>>  #include <asm/setup.h>
>> -#include <asm/set_memory.h>
>>  #include <asm/sections.h>
>>  #include <asm/sbi.h>
>>  #include <asm/tlbflush.h>
>> @@ -318,13 +317,3 @@ static int __init topology_init(void)
>>  	return 0;
>>  }
>>  subsys_initcall(topology_init);
>> -
>> -void free_initmem(void)
>> -{
>> -	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
>> -		set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
>> -				  IS_ENABLED(CONFIG_64BIT) ?
>> -					set_memory_rw : set_memory_rw_nx);
>> -
>> -	free_initmem_default(POISON_FREE_INITMEM);
>> -}
>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> index b56a0a75533f..978202712535 100644
>> --- a/arch/riscv/mm/init.c
>> +++ b/arch/riscv/mm/init.c
>> @@ -16,7 +16,6 @@
>>  #include <linux/of_fdt.h>
>>  #include <linux/of_reserved_mem.h>
>>  #include <linux/libfdt.h>
>> -#include <linux/set_memory.h>
>>  #include <linux/dma-map-ops.h>
>>  #include <linux/crash_dump.h>
>>  #include <linux/hugetlb.h>
>> @@ -28,6 +27,7 @@
>>  #include <asm/io.h>
>>  #include <asm/ptdump.h>
>>  #include <asm/numa.h>
>> +#include <asm/set_memory.h>
>>
>>  #include "../kernel/head.h"
>>
>> @@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
>>
>>  void mark_rodata_ro(void)
>>  {
>> -	set_kernel_memory(__start_rodata, _data, set_memory_ro);
>> -	if (IS_ENABLED(CONFIG_64BIT))
>> -		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
>> -				  set_memory_ro);
>> +	pgprot_t set_mask = __pgprot(_PAGE_READ);
>> +	pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
>> +
>> +	fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
>> +	if (IS_ENABLED(CONFIG_64BIT)) {
>> +		fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
>> +				     set_mask, clear_mask);
>> +	}
>>
>>  	debug_checkwx();
>>  }
>> @@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>>  	return vmemmap_populate_basepages(start, end, node, NULL);
>>  }
>>  #endif
>> +
>> +void free_initmem(void)
>> +{
>> +	pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
>> +	pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
>> +			      __pgprot(0) : __pgprot(_PAGE_EXEC);
>> +
>> +	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
>> +		fix_kernel_mem_early(lm_alias(__init_begin),
>> +				     lm_alias(__init_end),
>> +				     set_mask, clear_mask);
>> +	}
>> +
>> +	free_initmem_default(POISON_FREE_INITMEM);
>> +}
>> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
>> index 5e49e4b4a4cc..74b8107ac743 100644
>> --- a/arch/riscv/mm/pageattr.c
>> +++ b/arch/riscv/mm/pageattr.c
>> @@ -5,6 +5,7 @@
>>
>>  #include <linux/pagewalk.h>
>>  #include <linux/pgtable.h>
>> +#include <linux/sched.h>
>>  #include <asm/tlbflush.h>
>>  #include <asm/bitops.h>
>>  #include <asm/set_memory.h>
>> @@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
>>  	.pte_hole = pageattr_pte_hole,
>>  };
>>
>> -static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
>> -			pgprot_t clear_mask)
>> +static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
>> +			   unsigned long end, pgprot_t set_mask,
>> +			   pgprot_t clear_mask)
>>  {
>>  	int ret;
>> -	unsigned long start = addr;
>> -	unsigned long end = start + PAGE_SIZE * numpages;
>>  	struct pageattr_masks masks = {
>>  		.set_mask = set_mask,
>>  		.clear_mask = clear_mask
>>  	};
>>
>> +	mmap_read_lock(mm);
>> +	ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
>> +				    &masks);
>> +	mmap_read_unlock(mm);
>> +
>> +	return ret;
>> +}
>> +
>> +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
>> +			  pgprot_t clear_mask)
>> +{
>> +	struct task_struct *t, *s;
>> +
>> +	unsigned long start = (unsigned long)startp;
>> +	unsigned long end = PAGE_ALIGN((unsigned long)endp);
>> +
>> +	/*
>> +	 * In the SYSTEM_FREEING_INITMEM state we expect that all async code
>> +	 * is done and no new userspace task can be created.
>> +	 * So rcu_read_lock() should be enough here.
>> +	 */
>> +	WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
>> +
>> +	__set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
>> +	__set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
>> +
>> +	rcu_read_lock();
>
> Sorry for missing this earlier, but as Linus pointed out this doesn't
> work: taking the mmap lock can sleep, which is illegal in an RCU read
> critical section.  Given that we're not really worried about the
> concurrency of this one because nothing else is really running, I don't
> see any reason we can't just take tasklist_lock for reading.

Just to avoid any confusion because it looks like I crossed emails here: 
this is also wrong, I'll go sort it out but it might take a few days 
(my last day of conferences was today, so hopefully things return to 
sanity soon).

>
>> +	for_each_process(t) {
>> +		if (t->flags & PF_KTHREAD)
>> +			continue;
>> +		for_each_thread(t, s) {
>> +			if (s->mm) {
>> +				__set_memory_mm(s->mm, start, end, set_mask,
>> +						clear_mask);
>> +			}
>> +		}
>> +	}
>> +	rcu_read_unlock();
>> +
>> +	flush_tlb_kernel_range(start, end);
>> +}
>> +
>> +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
>> +			pgprot_t clear_mask)
>> +{
>> +	int ret;
>> +	unsigned long start = addr;
>> +	unsigned long end = start + PAGE_SIZE * numpages;
>> +
>>  	if (!numpages)
>>  		return 0;
>>
>> -	mmap_read_lock(&init_mm);
>> -	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
>> -				     &masks);
>> -	mmap_read_unlock(&init_mm);
>> +	ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
>>
>>  	flush_tlb_kernel_range(start, end);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] riscv: Fix permissions for all mm's during mm init
  2022-09-17 21:16 ` Palmer Dabbelt
  2022-09-18 16:26   ` Palmer Dabbelt
@ 2022-09-19  1:29   ` Guo Ren
  1 sibling, 0 replies; 6+ messages in thread
From: Guo Ren @ 2022-09-19  1:29 UTC (permalink / raw)
  To: Palmer Dabbelt
  Cc: vladimir.isaev, linux-arch, linux-riscv, Conor.Dooley, atishp,
	Paul Walmsley, aou, anup, ajones

On Sun, Sep 18, 2022 at 5:40 AM Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
> On Fri, 02 Sep 2022 03:13:12 PDT (-0700), vladimir.isaev@syntacore.com wrote:
> > It is possible to have more than one mm (init_mm) during memory
> > permission fixes. In my case it was caused by request_module
> > from drivers/net/phy/phy_device.c and leads to following Oops
> > during free_initmem() on RV32 platform:
> >      Unable to handle kernel paging request at virtual address c0800000
> >      Oops [#1]
> >      Modules linked in:
> >      CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
> >      Hardware name: Syntacore SCR5 SDK board (DT)
> >      epc : __memset+0x58/0xf4
> >       ra : free_reserved_area+0xfa/0x15a
> >      epc : c02b26ac ra : c00eb588 sp : c1c1fed0
> >       gp : c1898690 tp : c1c98000 t0 : c0800000
> >       t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
> >       s1 : c189a000 a0 : c0800000 a1 : cccccccc
> >       a2 : 00001000 a3 : c0801000 a4 : 00000000
> >       a5 : 00800000 a6 : fef09000 a7 : 00000000
> >       s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
> >       s5 : ffffefff s6 : c188a9f4 s7 : 00000001
> >       s8 : c0800000 s9 : fef1b000 s10: c10ee000
> >       s11: c189a000 t3 : 00000000 t4 : 00000000
> >       t5 : 00000000 t6 : 00000001
> >      status: 00000120 badaddr: c0800000 cause: 0000000f
> >      [<c0488658>] free_initmem+0x204/0x222
> >      [<c048d05a>] kernel_init+0x32/0xfc
> >      [<c0002f76>] ret_from_exception+0x0/0xc
> >      ---[ end trace 7a5e2b002350b528 ]---
> >
> > This is because request_module attempted to modprobe module, so it created
> > new mm with the copy of kernel's page table. And this copy won't be updated
> > in case of 4M pages and RV32 (pgd is the leaf).
> >
> > To fix this we can update protection bits for all of existing mm-s, the
> > same as ARM does, see commit 08925c2f124f
> > ("ARM: 8464/1: Update all mm structures with section adjustments").
> >
> > Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
> > Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> > Changes for v3:
> >   - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
> >     to make sure that the function used only during permission fixes.
> >   - Add comment to fix_kernel_mem_early().
> >
> > Changes for v2:
> >   - Fix commit message format.
> >   - Add 'Fixes' tag.
> > ---
> >  arch/riscv/include/asm/set_memory.h | 20 ++--------
> >  arch/riscv/kernel/setup.c           | 11 -----
> >  arch/riscv/mm/init.c                | 29 +++++++++++---
> >  arch/riscv/mm/pageattr.c            | 62 +++++++++++++++++++++++++----
> >  4 files changed, 82 insertions(+), 40 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > index a2c14d4b3993..bb0f6b4ed86b 100644
> > --- a/arch/riscv/include/asm/set_memory.h
> > +++ b/arch/riscv/include/asm/set_memory.h
> > @@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
> >  int set_memory_x(unsigned long addr, int numpages);
> >  int set_memory_nx(unsigned long addr, int numpages);
> >  int set_memory_rw_nx(unsigned long addr, int numpages);
> > -static __always_inline int set_kernel_memory(char *startp, char *endp,
> > -                                          int (*set_memory)(unsigned long start,
> > -                                                            int num_pages))
> > -{
> > -     unsigned long start = (unsigned long)startp;
> > -     unsigned long end = (unsigned long)endp;
> > -     int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
> > -
> > -     return set_memory(start, num_pages);
> > -}
> > +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> > +                       pgprot_t clear_mask);
> >  #else
> >  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
> > -static inline int set_kernel_memory(char *startp, char *endp,
> > -                                 int (*set_memory)(unsigned long start,
> > -                                                   int num_pages))
> > -{
> > -     return 0;
> > -}
> > +static inline void fix_kernel_mem_early(char *startp, char *endp,
> > +                                     pgprot_t set_mask, pgprot_t clear_mask) { }
> >  #endif
> >
> >  int set_direct_map_invalid_noflush(struct page *page);
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 95ef6e2bf45c..17eae1406092 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -27,7 +27,6 @@
> >  #include <asm/early_ioremap.h>
> >  #include <asm/pgtable.h>
> >  #include <asm/setup.h>
> > -#include <asm/set_memory.h>
> >  #include <asm/sections.h>
> >  #include <asm/sbi.h>
> >  #include <asm/tlbflush.h>
> > @@ -318,13 +317,3 @@ static int __init topology_init(void)
> >       return 0;
> >  }
> >  subsys_initcall(topology_init);
> > -
> > -void free_initmem(void)
> > -{
> > -     if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
> > -             set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
> > -                               IS_ENABLED(CONFIG_64BIT) ?
> > -                                     set_memory_rw : set_memory_rw_nx);
> > -
> > -     free_initmem_default(POISON_FREE_INITMEM);
> > -}
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index b56a0a75533f..978202712535 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -16,7 +16,6 @@
> >  #include <linux/of_fdt.h>
> >  #include <linux/of_reserved_mem.h>
> >  #include <linux/libfdt.h>
> > -#include <linux/set_memory.h>
> >  #include <linux/dma-map-ops.h>
> >  #include <linux/crash_dump.h>
> >  #include <linux/hugetlb.h>
> > @@ -28,6 +27,7 @@
> >  #include <asm/io.h>
> >  #include <asm/ptdump.h>
> >  #include <asm/numa.h>
> > +#include <asm/set_memory.h>
> >
> >  #include "../kernel/head.h"
> >
> > @@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
> >
> >  void mark_rodata_ro(void)
> >  {
> > -     set_kernel_memory(__start_rodata, _data, set_memory_ro);
> > -     if (IS_ENABLED(CONFIG_64BIT))
> > -             set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
> > -                               set_memory_ro);
> > +     pgprot_t set_mask = __pgprot(_PAGE_READ);
> > +     pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
> > +
> > +     fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
> > +     if (IS_ENABLED(CONFIG_64BIT)) {
> > +             fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
> > +                                  set_mask, clear_mask);
> > +     }
> >
> >       debug_checkwx();
> >  }
> > @@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> >       return vmemmap_populate_basepages(start, end, node, NULL);
> >  }
> >  #endif
> > +
> > +void free_initmem(void)
> > +{
> > +     pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
> > +     pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
> > +                           __pgprot(0) : __pgprot(_PAGE_EXEC);
> > +
> > +     if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
> > +             fix_kernel_mem_early(lm_alias(__init_begin),
> > +                                  lm_alias(__init_end),
> > +                                  set_mask, clear_mask);
> > +     }
> > +
> > +     free_initmem_default(POISON_FREE_INITMEM);
> > +}
> > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > index 5e49e4b4a4cc..74b8107ac743 100644
> > --- a/arch/riscv/mm/pageattr.c
> > +++ b/arch/riscv/mm/pageattr.c
> > @@ -5,6 +5,7 @@
> >
> >  #include <linux/pagewalk.h>
> >  #include <linux/pgtable.h>
> > +#include <linux/sched.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/bitops.h>
> >  #include <asm/set_memory.h>
> > @@ -104,24 +105,69 @@ static const struct mm_walk_ops pageattr_ops = {
> >       .pte_hole = pageattr_pte_hole,
> >  };
> >
> > -static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > -                     pgprot_t clear_mask)
> > +static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
> > +                        unsigned long end, pgprot_t set_mask,
> > +                        pgprot_t clear_mask)
> >  {
> >       int ret;
> > -     unsigned long start = addr;
> > -     unsigned long end = start + PAGE_SIZE * numpages;
> >       struct pageattr_masks masks = {
> >               .set_mask = set_mask,
> >               .clear_mask = clear_mask
> >       };
> >
> > +     mmap_read_lock(mm);
> > +     ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
> > +                                 &masks);
> > +     mmap_read_unlock(mm);
> > +
> > +     return ret;
> > +}
> > +
> > +void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
> > +                       pgprot_t clear_mask)
> > +{
> > +     struct task_struct *t, *s;
> > +
> > +     unsigned long start = (unsigned long)startp;
> > +     unsigned long end = PAGE_ALIGN((unsigned long)endp);
> > +
> > +     /*
> > +      * In the SYSTEM_FREEING_INITMEM state we expect that all async code
> > +      * is done and no new userspace task can be created.
> > +      * So rcu_read_lock() should be enough here.
> > +      */
> > +     WARN_ON(system_state != SYSTEM_FREEING_INITMEM);
> > +
> > +     __set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
> > +     __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> > +
> > +     rcu_read_lock();
>
> Sorry for missing this earlier, but as Linus pointed out this doesn't
> work: taking the mmap lock can sleep, which is illegal in an RCU read
> critical section.  Given that we're not really worried about the
> concurrency of this one because nothing else is really running, I don't
> see any reason we can't just take tasklist_lock for reading.
I think he wants to use rcu_read_lock/unlock protect
for_each_process(t)&for_each_thread(t, s) list, not whole operations
of __set_memory_mm().

How about:
+       rcu_read_lock();
+       for_each_process(t) {
+               if (t->flags & PF_KTHREAD)
+                       continue;
+               for_each_thread(t, s) {
+                       if (s->mm) {
++                            rcu_read_unlock();
+                               __set_memory_mm(s->mm, start, end, set_mask,
+                                               clear_mask);
++                            rcu_read_lock();
+                       }
+               }
+       }
+       rcu_read_unlock();

>
> > +     for_each_process(t) {
> > +             if (t->flags & PF_KTHREAD)
> > +                     continue;
> > +             for_each_thread(t, s) {
> > +                     if (s->mm) {
> > +                             __set_memory_mm(s->mm, start, end, set_mask,
> > +                                             clear_mask);
> > +                     }
> > +             }
> > +     }
> > +     rcu_read_unlock();
> > +
> > +     flush_tlb_kernel_range(start, end);
> > +}
> > +
> > +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > +                     pgprot_t clear_mask)
> > +{
> > +     int ret;
> > +     unsigned long start = addr;
> > +     unsigned long end = start + PAGE_SIZE * numpages;
> > +
> >       if (!numpages)
> >               return 0;
> >
> > -     mmap_read_lock(&init_mm);
> > -     ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
> > -                                  &masks);
> > -     mmap_read_unlock(&init_mm);
> > +     ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
> >
> >       flush_tlb_kernel_range(start, end);



-- 
Best Regards
 Guo Ren

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-09-19  1:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-02 10:13 [PATCH v3] riscv: Fix permissions for all mm's during mm init Vladimir Isaev
2022-09-02 16:22 ` Conor.Dooley
2022-09-15 17:33 ` Palmer Dabbelt
2022-09-17 21:16 ` Palmer Dabbelt
2022-09-18 16:26   ` Palmer Dabbelt
2022-09-19  1:29   ` Guo Ren

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).