linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] riscv: Fix permissions for all mm's during mm init
@ 2022-09-17 18:47 Vladimir Isaev
  0 siblings, 0 replies; 3+ messages in thread
From: Vladimir Isaev @ 2022-09-17 18:47 UTC (permalink / raw)
  To: linux-riscv, linux-arch
  Cc: Vladimir Isaev, Andrew Jones, Conor Dooley, Palmer Dabbelt,
	Linus Torvalds

It is possible to have more than one mm (init_mm) during memory
permission fixes. In my case it was caused by request_module
from drivers/net/phy/phy_device.c and leads to following Oops
during free_initmem() on RV32 platform:
     Unable to handle kernel paging request at virtual address c0800000
     Oops [#1]
     Modules linked in:
     CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.45
     Hardware name: Syntacore SCR5 SDK board (DT)
     epc : __memset+0x58/0xf4
      ra : free_reserved_area+0xfa/0x15a
     epc : c02b26ac ra : c00eb588 sp : c1c1fed0
      gp : c1898690 tp : c1c98000 t0 : c0800000
      t1 : ffffffff t2 : 00000000 s0 : c1c1ff20
      s1 : c189a000 a0 : c0800000 a1 : cccccccc
      a2 : 00001000 a3 : c0801000 a4 : 00000000
      a5 : 00800000 a6 : fef09000 a7 : 00000000
      s2 : c0e57000 s3 : c10edcf8 s4 : 000000cc
      s5 : ffffefff s6 : c188a9f4 s7 : 00000001
      s8 : c0800000 s9 : fef1b000 s10: c10ee000
      s11: c189a000 t3 : 00000000 t4 : 00000000
      t5 : 00000000 t6 : 00000001
     status: 00000120 badaddr: c0800000 cause: 0000000f
     [<c0488658>] free_initmem+0x204/0x222
     [<c048d05a>] kernel_init+0x32/0xfc
     [<c0002f76>] ret_from_exception+0x0/0xc
     ---[ end trace 7a5e2b002350b528 ]---

This is because request_module attempted to modprobe module, so it created
new mm with the copy of kernel's page table. And this copy won't be updated
in case of 4M pages and RV32 (pgd is the leaf).

To fix this we can update protection bits for all of existing mm-s, the
same as ARM does, see commit 08925c2f124f
("ARM: 8464/1: Update all mm structures with section adjustments").

Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: Conor Dooley <conor.dooley@microchip.com>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 19a00869028f ("RISC-V: Protect all kernel sections including init early")
Signed-off-by: Vladimir Isaev <vladimir.isaev@syntacore.com>
---
Changes for v4:
  - Use read_lock(&tasklist_lock) instead of rcu_read_lock since
    __set_memory_mm can block. Sorry about this.
  - Drop WARN_ON.

Changes for v3:
  - Add WARN_ON(state != SYSTEM_FREEING_INITMEM) to fix_kernel_mem_early()
    to make sure that the function used only during permission fixes.
  - Add comment to fix_kernel_mem_early().

Changes for v2:
  - Fix commit message format.
  - Add 'Fixes' tag.
---
 arch/riscv/include/asm/set_memory.h | 20 +++--------
 arch/riscv/kernel/setup.c           | 11 ------
 arch/riscv/mm/init.c                | 29 ++++++++++++---
 arch/riscv/mm/pageattr.c            | 55 ++++++++++++++++++++++++-----
 4 files changed, 75 insertions(+), 40 deletions(-)

diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index a2c14d4b3993..bb0f6b4ed86b 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -16,28 +16,16 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 int set_memory_rw_nx(unsigned long addr, int numpages);
-static __always_inline int set_kernel_memory(char *startp, char *endp,
-					     int (*set_memory)(unsigned long start,
-							       int num_pages))
-{
-	unsigned long start = (unsigned long)startp;
-	unsigned long end = (unsigned long)endp;
-	int num_pages = PAGE_ALIGN(end - start) >> PAGE_SHIFT;
-
-	return set_memory(start, num_pages);
-}
+void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
+			  pgprot_t clear_mask);
 #else
 static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
-static inline int set_kernel_memory(char *startp, char *endp,
-				    int (*set_memory)(unsigned long start,
-						      int num_pages))
-{
-	return 0;
-}
+static inline void fix_kernel_mem_early(char *startp, char *endp,
+					pgprot_t set_mask, pgprot_t clear_mask) { }
 #endif
 
 int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 95ef6e2bf45c..17eae1406092 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -27,7 +27,6 @@
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
-#include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
@@ -318,13 +317,3 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
-
-void free_initmem(void)
-{
-	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
-		set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
-				  IS_ENABLED(CONFIG_64BIT) ?
-					set_memory_rw : set_memory_rw_nx);
-
-	free_initmem_default(POISON_FREE_INITMEM);
-}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b56a0a75533f..978202712535 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -16,7 +16,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/libfdt.h>
-#include <linux/set_memory.h>
 #include <linux/dma-map-ops.h>
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
@@ -28,6 +27,7 @@
 #include <asm/io.h>
 #include <asm/ptdump.h>
 #include <asm/numa.h>
+#include <asm/set_memory.h>
 
 #include "../kernel/head.h"
 
@@ -714,10 +714,14 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 
 void mark_rodata_ro(void)
 {
-	set_kernel_memory(__start_rodata, _data, set_memory_ro);
-	if (IS_ENABLED(CONFIG_64BIT))
-		set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
-				  set_memory_ro);
+	pgprot_t set_mask = __pgprot(_PAGE_READ);
+	pgprot_t clear_mask = __pgprot(_PAGE_WRITE);
+
+	fix_kernel_mem_early(__start_rodata, _data, set_mask, clear_mask);
+	if (IS_ENABLED(CONFIG_64BIT)) {
+		fix_kernel_mem_early(lm_alias(__start_rodata), lm_alias(_data),
+				     set_mask, clear_mask);
+	}
 
 	debug_checkwx();
 }
@@ -1243,3 +1247,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	return vmemmap_populate_basepages(start, end, node, NULL);
 }
 #endif
+
+void free_initmem(void)
+{
+	pgprot_t set_mask = __pgprot(_PAGE_READ | _PAGE_WRITE);
+	pgprot_t clear_mask = IS_ENABLED(CONFIG_64BIT) ?
+			      __pgprot(0) : __pgprot(_PAGE_EXEC);
+
+	if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+		fix_kernel_mem_early(lm_alias(__init_begin),
+				     lm_alias(__init_end),
+				     set_mask, clear_mask);
+	}
+
+	free_initmem_default(POISON_FREE_INITMEM);
+}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 5e49e4b4a4cc..9f9418c81c50 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -5,6 +5,7 @@
 
 #include <linux/pagewalk.h>
 #include <linux/pgtable.h>
+#include <linux/sched.h>
 #include <asm/tlbflush.h>
 #include <asm/bitops.h>
 #include <asm/set_memory.h>
@@ -104,24 +105,62 @@ static const struct mm_walk_ops pageattr_ops = {
 	.pte_hole = pageattr_pte_hole,
 };
 
-static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
-			pgprot_t clear_mask)
+static int __set_memory_mm(struct mm_struct *mm, unsigned long start,
+			   unsigned long end, pgprot_t set_mask,
+			   pgprot_t clear_mask)
 {
 	int ret;
-	unsigned long start = addr;
-	unsigned long end = start + PAGE_SIZE * numpages;
 	struct pageattr_masks masks = {
 		.set_mask = set_mask,
 		.clear_mask = clear_mask
 	};
 
+	mmap_read_lock(mm);
+	ret = walk_page_range_novma(mm, start, end, &pageattr_ops, NULL,
+				    &masks);
+	mmap_read_unlock(mm);
+
+	return ret;
+}
+
+void fix_kernel_mem_early(char *startp, char *endp, pgprot_t set_mask,
+			  pgprot_t clear_mask)
+{
+	struct task_struct *t, *s;
+
+	unsigned long start = (unsigned long)startp;
+	unsigned long end = PAGE_ALIGN((unsigned long)endp);
+
+	__set_memory_mm(current->active_mm, start, end, set_mask, clear_mask);
+	__set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
+
+	read_lock(&tasklist_lock);
+	for_each_process(t) {
+		if (t->flags & PF_KTHREAD)
+			continue;
+		for_each_thread(t, s) {
+			if (s->mm) {
+				__set_memory_mm(s->mm, start, end, set_mask,
+						clear_mask);
+			}
+		}
+	}
+	read_unlock(&tasklist_lock);
+
+	flush_tlb_kernel_range(start, end);
+}
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+			pgprot_t clear_mask)
+{
+	int ret;
+	unsigned long start = addr;
+	unsigned long end = start + PAGE_SIZE * numpages;
+
 	if (!numpages)
 		return 0;
 
-	mmap_read_lock(&init_mm);
-	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
-				     &masks);
-	mmap_read_unlock(&init_mm);
+	ret = __set_memory_mm(&init_mm, start, end, set_mask, clear_mask);
 
 	flush_tlb_kernel_range(start, end);
 
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] riscv: Fix permissions for all mm's during mm init
  2022-09-18 10:05 ` Palmer Dabbelt
@ 2022-09-19  6:04   ` Vladimir Isaev
  0 siblings, 0 replies; 3+ messages in thread
From: Vladimir Isaev @ 2022-09-19  6:04 UTC (permalink / raw)
  To: Palmer Dabbelt, Linus Torvalds
  Cc: linux-riscv, linux-arch, ajones, conor.dooley

18.09.2022 13:05, Palmer Dabbelt wrote:
> On Sat, 17 Sep 2022 11:58:48 PDT (-0700), Linus Torvalds wrote:
>> Sorry for html crud, I'm afk right now.
>>
>> But no, this does not help at all.
>>
>> Now you're doing a blocking semaphore operation under a spinning rwlock
>> instead.
>>
>> Which also is completely invalid.
>>
>> Your can't do blocking locks in atomic context, and it doesn't matter
>> whether the atomic context is a rcu read section or a spin lock.
> 
> OK, sorry for screwing up a second time.  I'll go try and make sure we get this right before sending anything up.  Vladimir: I'll just take this one over and send a v5, I'll find some time this week to make sure I can get it right this time.
> 
> Sorry for the mess!
Hi Palmer,

I think we can avoid using full __set_memory_mm for processes here, because all non-pgd leafs were
fixed by __set_memory_mm(&init_mm).

So all we need here is to fix mm.pgd in case if this pgd is a leaf, because we copy only pgd for userspace.

Thank you,
Vladimir Isaev
> 
>> Stop making random locking changes like this. And enable lockdep and the
>> lock verification that would have told you all this immediately.
>>
>>        Linus
>>
>>
>> On Sat, Sep 17, 2022, 11:48 Vladimir Isaev <vladimir.isaev@syntacore.com>
>> wrote:
>>
>>> );
>>> +
>>> +       read_lock(&tasklist_lock);
>>> +       for_each_process(t) {
>>> +               if (t->flags & PF_KTHREAD)
>>> +                       continue;
>>> +               for_each_thread(t, s) {
>>> +                       if (s->mm) {
>>> +                               __set_memory_mm(s->mm, start, end,
>>> set_mask,
>>> +                                               clear_mask);
>>> +                       }
>>> +               }
>>> +       }
>>> +       read_unlock(&tasklist_lock);
>>>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] riscv: Fix permissions for all mm's during mm init
       [not found] <CAHk-=wg0vQFgiBEuJKHzxrS8JB=CxJFNf5WgtaofRn+bqs65oA@mail.gmail.com>
@ 2022-09-18 10:05 ` Palmer Dabbelt
  2022-09-19  6:04   ` Vladimir Isaev
  0 siblings, 1 reply; 3+ messages in thread
From: Palmer Dabbelt @ 2022-09-18 10:05 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: vladimir.isaev, linux-riscv, linux-arch, ajones, conor.dooley

On Sat, 17 Sep 2022 11:58:48 PDT (-0700), Linus Torvalds wrote:
> Sorry for html crud, I'm afk right now.
>
> But no, this does not help at all.
>
> Now you're doing a blocking semaphore operation under a spinning rwlock
> instead.
>
> Which also is completely invalid.
>
> Your can't do blocking locks in atomic context, and it doesn't matter
> whether the atomic context is a rcu read section or a spin lock.

OK, sorry for screwing up a second time.  I'll go try and make sure we 
get this right before sending anything up.  Vladimir: I'll just take 
this one over and send a v5, I'll find some time this week to make sure 
I can get it right this time.

Sorry for the mess!

> Stop making random locking changes like this. And enable lockdep and the
> lock verification that would have told you all this immediately.
>
>        Linus
>
>
> On Sat, Sep 17, 2022, 11:48 Vladimir Isaev <vladimir.isaev@syntacore.com>
> wrote:
>
>> );
>> +
>> +       read_lock(&tasklist_lock);
>> +       for_each_process(t) {
>> +               if (t->flags & PF_KTHREAD)
>> +                       continue;
>> +               for_each_thread(t, s) {
>> +                       if (s->mm) {
>> +                               __set_memory_mm(s->mm, start, end,
>> set_mask,
>> +                                               clear_mask);
>> +                       }
>> +               }
>> +       }
>> +       read_unlock(&tasklist_lock);
>>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-09-19  6:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-17 18:47 [PATCH v4] riscv: Fix permissions for all mm's during mm init Vladimir Isaev
     [not found] <CAHk-=wg0vQFgiBEuJKHzxrS8JB=CxJFNf5WgtaofRn+bqs65oA@mail.gmail.com>
2022-09-18 10:05 ` Palmer Dabbelt
2022-09-19  6:04   ` Vladimir Isaev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).