[v2,1/1] riscv/kasan: add KASAN_VMALLOC support
diff mbox series

Message ID 20210116055836.22366-2-nylon7@andestech.com
State Accepted
Commit e178d670f251b6947d6be99c0014e9a57ad4f0e0
Headers show
Series
  • kasan: support backing vmalloc space for riscv
Related show

Commit Message

Nylon Chen Jan. 16, 2021, 5:58 a.m. UTC
It references to x86/s390 architecture.

So, it doesn't map the early shadow page to cover VMALLOC space.

Prepopulate top level page table for the range that would otherwise be
empty.

lower levels are filled dynamically upon memory allocation while
booting.

Signed-off-by: Nylon Chen <nylon7@andestech.com>
Signed-off-by: Nick Hu <nickhu@andestech.com>
---
 arch/riscv/Kconfig         |  1 +
 arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

Comments

Palmer Dabbelt Jan. 23, 2021, 3:56 a.m. UTC | #1
On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
> It references to x86/s390 architecture.
>
> So, it doesn't map the early shadow page to cover VMALLOC space.
>
> Prepopulate top level page table for the range that would otherwise be
> empty.
>
> lower levels are filled dynamically upon memory allocation while
> booting.
>
> Signed-off-by: Nylon Chen <nylon7@andestech.com>
> Signed-off-by: Nick Hu <nickhu@andestech.com>
> ---
>  arch/riscv/Kconfig         |  1 +
>  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 57 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 81b76d44725d..15a2c8088bbe 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -57,6 +57,7 @@ config RISCV
>  	select HAVE_ARCH_JUMP_LABEL
>  	select HAVE_ARCH_JUMP_LABEL_RELATIVE
>  	select HAVE_ARCH_KASAN if MMU && 64BIT
> +	select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>  	select HAVE_ARCH_KGDB
>  	select HAVE_ARCH_KGDB_QXFER_PKT
>  	select HAVE_ARCH_MMAP_RND_BITS if MMU
> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> index 12ddd1f6bf70..4b9149f963d3 100644
> --- a/arch/riscv/mm/kasan_init.c
> +++ b/arch/riscv/mm/kasan_init.c
> @@ -9,6 +9,19 @@
>  #include <linux/pgtable.h>
>  #include <asm/tlbflush.h>
>  #include <asm/fixmap.h>
> +#include <asm/pgalloc.h>
> +
> +static __init void *early_alloc(size_t size, int node)
> +{
> +	void *ptr = memblock_alloc_try_nid(size, size,
> +		__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
> +
> +	if (!ptr)
> +		panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n",
> +			__func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
> +
> +	return ptr;
> +}
>
>  extern pgd_t early_pg_dir[PTRS_PER_PGD];
>  asmlinkage void __init kasan_early_init(void)
> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
>  	memset(start, 0, end - start);
>  }
>
> +void __init kasan_shallow_populate(void *start, void *end)
> +{
> +	unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> +	unsigned long vend = PAGE_ALIGN((unsigned long)end);
> +	unsigned long pfn;
> +	int index;
> +	void *p;
> +	pud_t *pud_dir, *pud_k;
> +	pgd_t *pgd_dir, *pgd_k;
> +	p4d_t *p4d_dir, *p4d_k;
> +
> +	while (vaddr < vend) {
> +		index = pgd_index(vaddr);
> +		pfn = csr_read(CSR_SATP) & SATP_PPN;
> +		pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
> +		pgd_k = init_mm.pgd + index;
> +		pgd_dir = pgd_offset_k(vaddr);
> +		set_pgd(pgd_dir, *pgd_k);
> +
> +		p4d_dir = p4d_offset(pgd_dir, vaddr);
> +		p4d_k  = p4d_offset(pgd_k, vaddr);
> +
> +		vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
> +		pud_dir = pud_offset(p4d_dir, vaddr);
> +		pud_k = pud_offset(p4d_k, vaddr);
> +
> +		if (pud_present(*pud_dir)) {
> +			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
> +			pud_populate(&init_mm, pud_dir, p);
> +		}
> +		vaddr += PAGE_SIZE;
> +	}
> +}
> +
>  void __init kasan_init(void)
>  {
>  	phys_addr_t _start, _end;
> @@ -90,7 +137,15 @@ void __init kasan_init(void)
>
>  	kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
>  				    (void *)kasan_mem_to_shadow((void *)
> -								VMALLOC_END));
> +								VMEMMAP_END));
> +	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
> +		kasan_shallow_populate(
> +			(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> +			(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
> +	else
> +		kasan_populate_early_shadow(
> +			(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> +			(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
>
>  	for_each_mem_range(i, &_start, &_end) {
>  		void *start = (void *)_start;

Thanks, this is on for-next.
Alexandre Ghiti Feb. 8, 2021, 6:28 a.m. UTC | #2
Hi Nylon,

Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
>> It references to x86/s390 architecture.
>> >> So, it doesn't map the early shadow page to cover VMALLOC space.
>>
>> Prepopulate top level page table for the range that would otherwise be
>> empty.
>>
>> lower levels are filled dynamically upon memory allocation while
>> booting.

I think we can improve the changelog a bit here with something like that:

"KASAN vmalloc space used to be mapped using kasan early shadow page. 
KASAN_VMALLOC requires the top-level of the kernel page table to be 
properly populated, lower levels being filled dynamically upon memory 
allocation at runtime."

>>
>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
>> Signed-off-by: Nick Hu <nickhu@andestech.com>
>> ---
>>  arch/riscv/Kconfig         |  1 +
>>  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index 81b76d44725d..15a2c8088bbe 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -57,6 +57,7 @@ config RISCV
>>      select HAVE_ARCH_JUMP_LABEL
>>      select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>      select HAVE_ARCH_KASAN if MMU && 64BIT
>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>>      select HAVE_ARCH_KGDB
>>      select HAVE_ARCH_KGDB_QXFER_PKT
>>      select HAVE_ARCH_MMAP_RND_BITS if MMU
>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>> index 12ddd1f6bf70..4b9149f963d3 100644
>> --- a/arch/riscv/mm/kasan_init.c
>> +++ b/arch/riscv/mm/kasan_init.c
>> @@ -9,6 +9,19 @@
>>  #include <linux/pgtable.h>
>>  #include <asm/tlbflush.h>
>>  #include <asm/fixmap.h>
>> +#include <asm/pgalloc.h>
>> +
>> +static __init void *early_alloc(size_t size, int node)
>> +{
>> +    void *ptr = memblock_alloc_try_nid(size, size,
>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
>> +
>> +    if (!ptr)
>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d 
>> from=%llx\n",
>> +            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
>> +
>> +    return ptr;
>> +}
>>
>>  extern pgd_t early_pg_dir[PTRS_PER_PGD];
>>  asmlinkage void __init kasan_early_init(void)
>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
>>      memset(start, 0, end - start);
>>  }
>>
>> +void __init kasan_shallow_populate(void *start, void *end)
>> +{
>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
>> +    unsigned long pfn;
>> +    int index;
>> +    void *p;
>> +    pud_t *pud_dir, *pud_k;
>> +    pgd_t *pgd_dir, *pgd_k;
>> +    p4d_t *p4d_dir, *p4d_k;
>> +
>> +    while (vaddr < vend) {
>> +        index = pgd_index(vaddr);
>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;

At this point in the boot process, we know that we use swapper_pg_dir so 
no need to read SATP.

>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;

Here, this pgd_dir assignment is overwritten 2 lines below, so no need 
for it.

>> +        pgd_k = init_mm.pgd + index;
>> +        pgd_dir = pgd_offset_k(vaddr);

pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.

>> +        set_pgd(pgd_dir, *pgd_k);
>> +
>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
>> +
>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;

Why do you increase vaddr *before* populating the first one ? And 
pud_addr_end does that properly: it returns the next pud address if it 
does not go beyond end address to map.

>> +        pud_dir = pud_offset(p4d_dir, vaddr);
>> +        pud_k = pud_offset(p4d_k, vaddr);
>> +
>> +        if (pud_present(*pud_dir)) {
>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>> +            pud_populate(&init_mm, pud_dir, p);

init_mm is not needed here.

>> +        }
>> +        vaddr += PAGE_SIZE;

Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.

It seems like this patch tries to populate userspace page table whereas 
at this point in the boot process, only swapper_pg_dir is used or am I 
missing something ?

Thanks,

Alex

>> +    }
>> +}
>> +
>>  void __init kasan_init(void)
>>  {
>>      phys_addr_t _start, _end;
>> @@ -90,7 +137,15 @@ void __init kasan_init(void)
>>
>>      kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
>>                      (void *)kasan_mem_to_shadow((void *)
>> -                                VMALLOC_END));
>> +                                VMEMMAP_END));
>> +    if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
>> +        kasan_shallow_populate(
>> +            (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
>> +            (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
>> +    else
>> +        kasan_populate_early_shadow(
>> +            (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
>> +            (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
>>
>>      for_each_mem_range(i, &_start, &_end) {
>>          void *start = (void *)_start; >
> Thanks, this is on for-next.
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Alexandre Ghiti Feb. 13, 2021, 10:52 a.m. UTC | #3
Hi Nylon, Palmer,

Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
> Hi Nylon,
> 
> Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
>>> It references to x86/s390 architecture.
>>> >> So, it doesn't map the early shadow page to cover VMALLOC space.
>>>
>>> Prepopulate top level page table for the range that would otherwise be
>>> empty.
>>>
>>> lower levels are filled dynamically upon memory allocation while
>>> booting.
> 
> I think we can improve the changelog a bit here with something like that:
> 
> "KASAN vmalloc space used to be mapped using kasan early shadow page. 
> KASAN_VMALLOC requires the top-level of the kernel page table to be 
> properly populated, lower levels being filled dynamically upon memory 
> allocation at runtime."
> 
>>>
>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
>>> ---
>>>  arch/riscv/Kconfig         |  1 +
>>>  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 57 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index 81b76d44725d..15a2c8088bbe 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -57,6 +57,7 @@ config RISCV
>>>      select HAVE_ARCH_JUMP_LABEL
>>>      select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>      select HAVE_ARCH_KASAN if MMU && 64BIT
>>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>>>      select HAVE_ARCH_KGDB
>>>      select HAVE_ARCH_KGDB_QXFER_PKT
>>>      select HAVE_ARCH_MMAP_RND_BITS if MMU
>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>> index 12ddd1f6bf70..4b9149f963d3 100644
>>> --- a/arch/riscv/mm/kasan_init.c
>>> +++ b/arch/riscv/mm/kasan_init.c
>>> @@ -9,6 +9,19 @@
>>>  #include <linux/pgtable.h>
>>>  #include <asm/tlbflush.h>
>>>  #include <asm/fixmap.h>
>>> +#include <asm/pgalloc.h>
>>> +
>>> +static __init void *early_alloc(size_t size, int node)
>>> +{
>>> +    void *ptr = memblock_alloc_try_nid(size, size,
>>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
>>> +
>>> +    if (!ptr)
>>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d 
>>> from=%llx\n",
>>> +            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
>>> +
>>> +    return ptr;
>>> +}
>>>
>>>  extern pgd_t early_pg_dir[PTRS_PER_PGD];
>>>  asmlinkage void __init kasan_early_init(void)
>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
>>>      memset(start, 0, end - start);
>>>  }
>>>
>>> +void __init kasan_shallow_populate(void *start, void *end)
>>> +{
>>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>> +    unsigned long pfn;
>>> +    int index;
>>> +    void *p;
>>> +    pud_t *pud_dir, *pud_k;
>>> +    pgd_t *pgd_dir, *pgd_k;
>>> +    p4d_t *p4d_dir, *p4d_k;
>>> +
>>> +    while (vaddr < vend) {
>>> +        index = pgd_index(vaddr);
>>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;
> 
> At this point in the boot process, we know that we use swapper_pg_dir so 
> no need to read SATP.
> 
>>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
> 
> Here, this pgd_dir assignment is overwritten 2 lines below, so no need 
> for it.
> 
>>> +        pgd_k = init_mm.pgd + index;
>>> +        pgd_dir = pgd_offset_k(vaddr);
> 
> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.
> 
>>> +        set_pgd(pgd_dir, *pgd_k);
>>> +
>>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
>>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
>>> +
>>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
> 
> Why do you increase vaddr *before* populating the first one ? And 
> pud_addr_end does that properly: it returns the next pud address if it 
> does not go beyond end address to map.
> 
>>> +        pud_dir = pud_offset(p4d_dir, vaddr);
>>> +        pud_k = pud_offset(p4d_k, vaddr);
>>> +
>>> +        if (pud_present(*pud_dir)) {
>>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>>> +            pud_populate(&init_mm, pud_dir, p);
> 
> init_mm is not needed here.
> 
>>> +        }
>>> +        vaddr += PAGE_SIZE;
> 
> Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.
> 
> It seems like this patch tries to populate userspace page table whereas 
> at this point in the boot process, only swapper_pg_dir is used or am I 
> missing something ?
> 
> Thanks,
> 
> Alex

I implemented this morning a version that fixes all the comments I made 
earlier. I was able to insert test_kasan_module on both sv39 and sv48 
without any modification: set_pgd "goes through" all the unused page 
table levels, whereas p*d_populate are noop for unused levels.

If you have any comment, do not hesitate.

diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c 

index adbf94b7e68a..d643b222167c 100644 

--- a/arch/riscv/mm/kasan_init.c 

+++ b/arch/riscv/mm/kasan_init.c 

@@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, void 
*end)
         memset(start, KASAN_SHADOW_INIT, end - start); 

  } 

 

+void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned 
long end)
+{ 

+       unsigned long next; 

+       void *p; 

+       pgd_t *pgd_k = pgd_offset_k(vaddr); 

+ 

+       do { 

+               next = pgd_addr_end(vaddr, end); 

+               if (pgd_page_vaddr(*pgd_k) == (unsigned 
long)lm_alias(kasan_early_shadow_pgd_next)) {
+                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 

+                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), 
PAGE_TABLE));
+               } 

+       } while (pgd_k++, vaddr = next, vaddr != end); 

+} 

+ 

+void __init kasan_shallow_populate(void *start, void *end) 

+{ 

+       unsigned long vaddr = (unsigned long)start & PAGE_MASK; 

+       unsigned long vend = PAGE_ALIGN((unsigned long)end); 

+ 

+       kasan_shallow_populate_pgd(vaddr, vend); 

+ 

+       local_flush_tlb_all(); 

+} 

+ 

  void __init kasan_init(void) 

  { 

         phys_addr_t _start, _end; 

@@ -206,7 +231,15 @@ void __init kasan_init(void) 

          */ 

         kasan_populate_early_shadow((void *)KASAN_SHADOW_START, 

                                     (void *)kasan_mem_to_shadow((void 
*)
- 
VMALLOC_END));
+ 
VMEMMAP_END));
+       if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) 

+               kasan_shallow_populate( 

+                       (void *)kasan_mem_to_shadow((void 
*)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void 
*)VMALLOC_END));
+       else 

+               kasan_populate_early_shadow( 

+                       (void *)kasan_mem_to_shadow((void 
*)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void 
*)VMALLOC_END));
 

         /* Populate the linear mapping */ 

         for_each_mem_range(i, &_start, &_end) {
Alexandre Ghiti Feb. 21, 2021, 1:38 p.m. UTC | #4
Le 2/13/21 à 5:52 AM, Alex Ghiti a écrit :
> Hi Nylon, Palmer,
> 
> Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
>> Hi Nylon,
>>
>> Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
>>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
>>>> It references to x86/s390 architecture.
>>>> >> So, it doesn't map the early shadow page to cover VMALLOC space.
>>>>
>>>> Prepopulate top level page table for the range that would otherwise be
>>>> empty.
>>>>
>>>> lower levels are filled dynamically upon memory allocation while
>>>> booting.
>>
>> I think we can improve the changelog a bit here with something like that:
>>
>> "KASAN vmalloc space used to be mapped using kasan early shadow page. 
>> KASAN_VMALLOC requires the top-level of the kernel page table to be 
>> properly populated, lower levels being filled dynamically upon memory 
>> allocation at runtime."
>>
>>>>
>>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
>>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
>>>> ---
>>>>  arch/riscv/Kconfig         |  1 +
>>>>  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
>>>>  2 files changed, 57 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index 81b76d44725d..15a2c8088bbe 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -57,6 +57,7 @@ config RISCV
>>>>      select HAVE_ARCH_JUMP_LABEL
>>>>      select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>>      select HAVE_ARCH_KASAN if MMU && 64BIT
>>>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>>>>      select HAVE_ARCH_KGDB
>>>>      select HAVE_ARCH_KGDB_QXFER_PKT
>>>>      select HAVE_ARCH_MMAP_RND_BITS if MMU
>>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>>> index 12ddd1f6bf70..4b9149f963d3 100644
>>>> --- a/arch/riscv/mm/kasan_init.c
>>>> +++ b/arch/riscv/mm/kasan_init.c
>>>> @@ -9,6 +9,19 @@
>>>>  #include <linux/pgtable.h>
>>>>  #include <asm/tlbflush.h>
>>>>  #include <asm/fixmap.h>
>>>> +#include <asm/pgalloc.h>
>>>> +
>>>> +static __init void *early_alloc(size_t size, int node)
>>>> +{
>>>> +    void *ptr = memblock_alloc_try_nid(size, size,
>>>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
>>>> +
>>>> +    if (!ptr)
>>>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d 
>>>> from=%llx\n",
>>>> +            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
>>>> +
>>>> +    return ptr;
>>>> +}
>>>>
>>>>  extern pgd_t early_pg_dir[PTRS_PER_PGD];
>>>>  asmlinkage void __init kasan_early_init(void)
>>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
>>>>      memset(start, 0, end - start);
>>>>  }
>>>>
>>>> +void __init kasan_shallow_populate(void *start, void *end)
>>>> +{
>>>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>>> +    unsigned long pfn;
>>>> +    int index;
>>>> +    void *p;
>>>> +    pud_t *pud_dir, *pud_k;
>>>> +    pgd_t *pgd_dir, *pgd_k;
>>>> +    p4d_t *p4d_dir, *p4d_k;
>>>> +
>>>> +    while (vaddr < vend) {
>>>> +        index = pgd_index(vaddr);
>>>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;
>>
>> At this point in the boot process, we know that we use swapper_pg_dir 
>> so no need to read SATP.
>>
>>>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
>>
>> Here, this pgd_dir assignment is overwritten 2 lines below, so no need 
>> for it.
>>
>>>> +        pgd_k = init_mm.pgd + index;
>>>> +        pgd_dir = pgd_offset_k(vaddr);
>>
>> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.
>>
>>>> +        set_pgd(pgd_dir, *pgd_k);
>>>> +
>>>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
>>>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
>>>> +
>>>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
>>
>> Why do you increase vaddr *before* populating the first one ? And 
>> pud_addr_end does that properly: it returns the next pud address if it 
>> does not go beyond end address to map.
>>
>>>> +        pud_dir = pud_offset(p4d_dir, vaddr);
>>>> +        pud_k = pud_offset(p4d_k, vaddr);
>>>> +
>>>> +        if (pud_present(*pud_dir)) {
>>>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>>>> +            pud_populate(&init_mm, pud_dir, p);
>>
>> init_mm is not needed here.
>>
>>>> +        }
>>>> +        vaddr += PAGE_SIZE;
>>
>> Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.
>>
>> It seems like this patch tries to populate userspace page table 
>> whereas at this point in the boot process, only swapper_pg_dir is used 
>> or am I missing something ?
>>
>> Thanks,
>>
>> Alex
> 
> I implemented this morning a version that fixes all the comments I made 
> earlier. I was able to insert test_kasan_module on both sv39 and sv48 
> without any modification: set_pgd "goes through" all the unused page 
> table levels, whereas p*d_populate are noop for unused levels.
> 
> If you have any comment, do not hesitate.
> 
> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> index adbf94b7e68a..d643b222167c 100644
> --- a/arch/riscv/mm/kasan_init.c
> +++ b/arch/riscv/mm/kasan_init.c
> @@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, void 
> *end)
>          memset(start, KASAN_SHADOW_INIT, end - start);
>   }
> 
> 
> +void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned 
> long end)
> +{
> +       unsigned long next;
> +       void *p;
> +       pgd_t *pgd_k = pgd_offset_k(vaddr);
> +
> +       do {
> +               next = pgd_addr_end(vaddr, end);
> +               if (pgd_page_vaddr(*pgd_k) == (unsigned 
> long)lm_alias(kasan_early_shadow_pgd_next)) {
> +                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> +                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), 
> PAGE_TABLE));
> +               }
> +       } while (pgd_k++, vaddr = next, vaddr != end);
> +}
> +

This way of going through the page table seems to be largely used across 
the kernel (cf KASAN population functions of arm64/x86) so I do think 
this patch brings value to Nylon and Nick's patch.

I can propose a real patch if you agree and I'll add a co-developed by 
Nylon/Nick since this only 'improves' theirs.

Thanks,

Alex

> +void __init kasan_shallow_populate(void *start, void *end)
> +{
> +       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> +       unsigned long vend = PAGE_ALIGN((unsigned long)end);
> +
> +       kasan_shallow_populate_pgd(vaddr, vend);
> +
> +       local_flush_tlb_all();
> +}
> +
>   void __init kasan_init(void)
>   {
>          phys_addr_t _start, _end;
> @@ -206,7 +231,15 @@ void __init kasan_init(void)
>           */
>          kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
>                                      (void *)kasan_mem_to_shadow((void *)
> - VMALLOC_END));
> + VMEMMAP_END));
> +       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
> +               kasan_shallow_populate(
> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
> +       else
> +               kasan_populate_early_shadow(
> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
> 
> 
>          /* Populate the linear mapping */
>          for_each_mem_range(i, &_start, &_end) {
Nylon Chen Feb. 22, 2021, 1:37 a.m. UTC | #5
Hi Alex, Palmer

Sorry I missed this message.
On Sun, Feb 21, 2021 at 09:38:04PM +0800, Alex Ghiti wrote:
> Le 2/13/21 à 5:52 AM, Alex Ghiti a écrit :
> > Hi Nylon, Palmer,
> > 
> > Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
> >> Hi Nylon,
> >>
> >> Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
> >>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
> >>>> It references to x86/s390 architecture.
> >>>> >> So, it doesn't map the early shadow page to cover VMALLOC space.
> >>>>
> >>>> Prepopulate top level page table for the range that would otherwise be
> >>>> empty.
> >>>>
> >>>> lower levels are filled dynamically upon memory allocation while
> >>>> booting.
> >>
> >> I think we can improve the changelog a bit here with something like that:
> >>
> >> "KASAN vmalloc space used to be mapped using kasan early shadow page. 
> >> KASAN_VMALLOC requires the top-level of the kernel page table to be 
> >> properly populated, lower levels being filled dynamically upon memory 
> >> allocation at runtime."
> >>
> >>>>
> >>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
> >>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
> >>>> ---
> >>>>  arch/riscv/Kconfig         |  1 +
> >>>>  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
> >>>>  2 files changed, 57 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>> index 81b76d44725d..15a2c8088bbe 100644
> >>>> --- a/arch/riscv/Kconfig
> >>>> +++ b/arch/riscv/Kconfig
> >>>> @@ -57,6 +57,7 @@ config RISCV
> >>>>      select HAVE_ARCH_JUMP_LABEL
> >>>>      select HAVE_ARCH_JUMP_LABEL_RELATIVE
> >>>>      select HAVE_ARCH_KASAN if MMU && 64BIT
> >>>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
> >>>>      select HAVE_ARCH_KGDB
> >>>>      select HAVE_ARCH_KGDB_QXFER_PKT
> >>>>      select HAVE_ARCH_MMAP_RND_BITS if MMU
> >>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> >>>> index 12ddd1f6bf70..4b9149f963d3 100644
> >>>> --- a/arch/riscv/mm/kasan_init.c
> >>>> +++ b/arch/riscv/mm/kasan_init.c
> >>>> @@ -9,6 +9,19 @@
> >>>>  #include <linux/pgtable.h>
> >>>>  #include <asm/tlbflush.h>
> >>>>  #include <asm/fixmap.h>
> >>>> +#include <asm/pgalloc.h>
> >>>> +
> >>>> +static __init void *early_alloc(size_t size, int node)
> >>>> +{
> >>>> +    void *ptr = memblock_alloc_try_nid(size, size,
> >>>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
> >>>> +
> >>>> +    if (!ptr)
> >>>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d 
> >>>> from=%llx\n",
> >>>> +            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
> >>>> +
> >>>> +    return ptr;
> >>>> +}
> >>>>
> >>>>  extern pgd_t early_pg_dir[PTRS_PER_PGD];
> >>>>  asmlinkage void __init kasan_early_init(void)
> >>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
> >>>>      memset(start, 0, end - start);
> >>>>  }
> >>>>
> >>>> +void __init kasan_shallow_populate(void *start, void *end)
> >>>> +{
> >>>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> >>>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
> >>>> +    unsigned long pfn;
> >>>> +    int index;
> >>>> +    void *p;
> >>>> +    pud_t *pud_dir, *pud_k;
> >>>> +    pgd_t *pgd_dir, *pgd_k;
> >>>> +    p4d_t *p4d_dir, *p4d_k;
> >>>> +
> >>>> +    while (vaddr < vend) {
> >>>> +        index = pgd_index(vaddr);
> >>>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;
> >>
> >> At this point in the boot process, we know that we use swapper_pg_dir 
> >> so no need to read SATP.
> >>
> >>>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
> >>
> >> Here, this pgd_dir assignment is overwritten 2 lines below, so no need 
> >> for it.
> >>
> >>>> +        pgd_k = init_mm.pgd + index;
> >>>> +        pgd_dir = pgd_offset_k(vaddr);
> >>
> >> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.
> >>
> >>>> +        set_pgd(pgd_dir, *pgd_k);
> >>>> +
> >>>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
> >>>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
> >>>> +
> >>>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
> >>
> >> Why do you increase vaddr *before* populating the first one ? And 
> >> pud_addr_end does that properly: it returns the next pud address if it 
> >> does not go beyond end address to map.
> >>
> >>>> +        pud_dir = pud_offset(p4d_dir, vaddr);
> >>>> +        pud_k = pud_offset(p4d_k, vaddr);
> >>>> +
> >>>> +        if (pud_present(*pud_dir)) {
> >>>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
> >>>> +            pud_populate(&init_mm, pud_dir, p);
> >>
> >> init_mm is not needed here.
> >>
> >>>> +        }
> >>>> +        vaddr += PAGE_SIZE;
> >>
> >> Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.
> >>
> >> It seems like this patch tries to populate userspace page table 
> >> whereas at this point in the boot process, only swapper_pg_dir is used 
> >> or am I missing something ?
> >>
> >> Thanks,
> >>
> >> Alex
> > 
> > I implemented this morning a version that fixes all the comments I made 
> > earlier. I was able to insert test_kasan_module on both sv39 and sv48 
> > without any modification: set_pgd "goes through" all the unused page 
> > table levels, whereas p*d_populate are noop for unused levels.
> > 
> > If you have any comment, do not hesitate.
> > 
> > diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> > index adbf94b7e68a..d643b222167c 100644
> > --- a/arch/riscv/mm/kasan_init.c
> > +++ b/arch/riscv/mm/kasan_init.c
> > @@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, void 
> > *end)
> >          memset(start, KASAN_SHADOW_INIT, end - start);
> >   }
> > 
> > 
> > +void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned 
> > long end)
> > +{
> > +       unsigned long next;
> > +       void *p;
> > +       pgd_t *pgd_k = pgd_offset_k(vaddr);
> > +
> > +       do {
> > +               next = pgd_addr_end(vaddr, end);
> > +               if (pgd_page_vaddr(*pgd_k) == (unsigned 
> > long)lm_alias(kasan_early_shadow_pgd_next)) {
> > +                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> > +                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), 
> > PAGE_TABLE));
> > +               }
> > +       } while (pgd_k++, vaddr = next, vaddr != end);
> > +}
> > +
> 
> This way of going through the page table seems to be largely used across 
> the kernel (cf KASAN population functions of arm64/x86) so I do think 
> this patch brings value to Nylon and Nick's patch.
> 
> I can propose a real patch if you agree and I'll add a co-developed by 
> Nylon/Nick since this only 'improves' theirs.
> 
> Thanks,
> 
> Alex
>
I agree with your proposal, but when I try your patch that it dosen't work
because `kasan_early_shadow_pgd_next` function wasn't define.

Do you have complete patch? or just I missed some content?
> > +void __init kasan_shallow_populate(void *start, void *end)
> > +{
> > +       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> > +       unsigned long vend = PAGE_ALIGN((unsigned long)end);
> > +
> > +       kasan_shallow_populate_pgd(vaddr, vend);
> > +
> > +       local_flush_tlb_all();
> > +}
> > +
> >   void __init kasan_init(void)
> >   {
> >          phys_addr_t _start, _end;
> > @@ -206,7 +231,15 @@ void __init kasan_init(void)
> >           */
> >          kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
> >                                      (void *)kasan_mem_to_shadow((void *)
> > - VMALLOC_END));
> > + VMEMMAP_END));
> > +       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
> > +               kasan_shallow_populate(
> > +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> > +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
> > +       else
> > +               kasan_populate_early_shadow(
> > +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
> > +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
> > 
> > 
> >          /* Populate the linear mapping */
> >          for_each_mem_range(i, &_start, &_end) {
Alexandre Ghiti Feb. 22, 2021, 5:13 p.m. UTC | #6
Le 2/21/21 à 8:37 PM, Nylon Chen a écrit :
> Hi Alex, Palmer
> 
> Sorry I missed this message.
> On Sun, Feb 21, 2021 at 09:38:04PM +0800, Alex Ghiti wrote:
>> Le 2/13/21 à 5:52 AM, Alex Ghiti a écrit :
>>> Hi Nylon, Palmer,
>>>
>>> Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
>>>> Hi Nylon,
>>>>
>>>> Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
>>>>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
>>>>>> It references to x86/s390 architecture.
>>>>>>>> So, it doesn't map the early shadow page to cover VMALLOC space.
>>>>>>
>>>>>> Prepopulate top level page table for the range that would otherwise be
>>>>>> empty.
>>>>>>
>>>>>> lower levels are filled dynamically upon memory allocation while
>>>>>> booting.
>>>>
>>>> I think we can improve the changelog a bit here with something like that:
>>>>
>>>> "KASAN vmalloc space used to be mapped using kasan early shadow page.
>>>> KASAN_VMALLOC requires the top-level of the kernel page table to be
>>>> properly populated, lower levels being filled dynamically upon memory
>>>> allocation at runtime."
>>>>
>>>>>>
>>>>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
>>>>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
>>>>>> ---
>>>>>>   arch/riscv/Kconfig         |  1 +
>>>>>>   arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
>>>>>>   2 files changed, 57 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>> index 81b76d44725d..15a2c8088bbe 100644
>>>>>> --- a/arch/riscv/Kconfig
>>>>>> +++ b/arch/riscv/Kconfig
>>>>>> @@ -57,6 +57,7 @@ config RISCV
>>>>>>       select HAVE_ARCH_JUMP_LABEL
>>>>>>       select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>>>>       select HAVE_ARCH_KASAN if MMU && 64BIT
>>>>>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>>>>>>       select HAVE_ARCH_KGDB
>>>>>>       select HAVE_ARCH_KGDB_QXFER_PKT
>>>>>>       select HAVE_ARCH_MMAP_RND_BITS if MMU
>>>>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>>>>> index 12ddd1f6bf70..4b9149f963d3 100644
>>>>>> --- a/arch/riscv/mm/kasan_init.c
>>>>>> +++ b/arch/riscv/mm/kasan_init.c
>>>>>> @@ -9,6 +9,19 @@
>>>>>>   #include <linux/pgtable.h>
>>>>>>   #include <asm/tlbflush.h>
>>>>>>   #include <asm/fixmap.h>
>>>>>> +#include <asm/pgalloc.h>
>>>>>> +
>>>>>> +static __init void *early_alloc(size_t size, int node)
>>>>>> +{
>>>>>> +    void *ptr = memblock_alloc_try_nid(size, size,
>>>>>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
>>>>>> +
>>>>>> +    if (!ptr)
>>>>>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d
>>>>>> from=%llx\n",
>>>>>> +            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
>>>>>> +
>>>>>> +    return ptr;
>>>>>> +}
>>>>>>
>>>>>>   extern pgd_t early_pg_dir[PTRS_PER_PGD];
>>>>>>   asmlinkage void __init kasan_early_init(void)
>>>>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
>>>>>>       memset(start, 0, end - start);
>>>>>>   }
>>>>>>
>>>>>> +void __init kasan_shallow_populate(void *start, void *end)
>>>>>> +{
>>>>>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>>>>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>>>>> +    unsigned long pfn;
>>>>>> +    int index;
>>>>>> +    void *p;
>>>>>> +    pud_t *pud_dir, *pud_k;
>>>>>> +    pgd_t *pgd_dir, *pgd_k;
>>>>>> +    p4d_t *p4d_dir, *p4d_k;
>>>>>> +
>>>>>> +    while (vaddr < vend) {
>>>>>> +        index = pgd_index(vaddr);
>>>>>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;
>>>>
>>>> At this point in the boot process, we know that we use swapper_pg_dir
>>>> so no need to read SATP.
>>>>
>>>>>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
>>>>
>>>> Here, this pgd_dir assignment is overwritten 2 lines below, so no need
>>>> for it.
>>>>
>>>>>> +        pgd_k = init_mm.pgd + index;
>>>>>> +        pgd_dir = pgd_offset_k(vaddr);
>>>>
>>>> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.
>>>>
>>>>>> +        set_pgd(pgd_dir, *pgd_k);
>>>>>> +
>>>>>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
>>>>>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
>>>>>> +
>>>>>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
>>>>
>>>> Why do you increase vaddr *before* populating the first one ? And
>>>> pud_addr_end does that properly: it returns the next pud address if it
>>>> does not go beyond end address to map.
>>>>
>>>>>> +        pud_dir = pud_offset(p4d_dir, vaddr);
>>>>>> +        pud_k = pud_offset(p4d_k, vaddr);
>>>>>> +
>>>>>> +        if (pud_present(*pud_dir)) {
>>>>>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>>>>>> +            pud_populate(&init_mm, pud_dir, p);
>>>>
>>>> init_mm is not needed here.
>>>>
>>>>>> +        }
>>>>>> +        vaddr += PAGE_SIZE;
>>>>
>>>> Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.
>>>>
>>>> It seems like this patch tries to populate userspace page table
>>>> whereas at this point in the boot process, only swapper_pg_dir is used
>>>> or am I missing something ?
>>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>
>>> I implemented this morning a version that fixes all the comments I made
>>> earlier. I was able to insert test_kasan_module on both sv39 and sv48
>>> without any modification: set_pgd "goes through" all the unused page
>>> table levels, whereas p*d_populate are noop for unused levels.
>>>
>>> If you have any comment, do not hesitate.
>>>
>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>> index adbf94b7e68a..d643b222167c 100644
>>> --- a/arch/riscv/mm/kasan_init.c
>>> +++ b/arch/riscv/mm/kasan_init.c
>>> @@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, void
>>> *end)
>>>           memset(start, KASAN_SHADOW_INIT, end - start);
>>>    }
>>>
>>>
>>> +void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned
>>> long end)
>>> +{
>>> +       unsigned long next;
>>> +       void *p;
>>> +       pgd_t *pgd_k = pgd_offset_k(vaddr);
>>> +
>>> +       do {
>>> +               next = pgd_addr_end(vaddr, end);
>>> +               if (pgd_page_vaddr(*pgd_k) == (unsigned
>>> long)lm_alias(kasan_early_shadow_pgd_next)) {
>>> +                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
>>> +                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)),
>>> PAGE_TABLE));
>>> +               }
>>> +       } while (pgd_k++, vaddr = next, vaddr != end);
>>> +}
>>> +
>>
>> This way of going through the page table seems to be largely used across
>> the kernel (cf KASAN population functions of arm64/x86) so I do think
>> this patch brings value to Nylon and Nick's patch.
>>
>> I can propose a real patch if you agree and I'll add a co-developed by
>> Nylon/Nick since this only 'improves' theirs.
>>
>> Thanks,
>>
>> Alex
>>
> I agree with your proposal, but when I try your patch that it dosen't work
> because `kasan_early_shadow_pgd_next` function wasn't define.

Oops, I messed up my rebase, please replace 
'kasan_early_shadow_pgd_next' with 'kasan_early_shadow_pmd'.

Thank you for your feeback,

Alex

> 
> Do you have complete patch? or just I missed some content?
>>> +void __init kasan_shallow_populate(void *start, void *end)
>>> +{
>>> +       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>> +       unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>> +
>>> +       kasan_shallow_populate_pgd(vaddr, vend);
>>> +
>>> +       local_flush_tlb_all();
>>> +}
>>> +
>>>    void __init kasan_init(void)
>>>    {
>>>           phys_addr_t _start, _end;
>>> @@ -206,7 +231,15 @@ void __init kasan_init(void)
>>>            */
>>>           kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
>>>                                       (void *)kasan_mem_to_shadow((void *)
>>> - VMALLOC_END));
>>> + VMEMMAP_END));
>>> +       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
>>> +               kasan_shallow_populate(
>>> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
>>> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
>>> +       else
>>> +               kasan_populate_early_shadow(
>>> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
>>> +                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
>>>
>>>
>>>           /* Populate the linear mapping */
>>>           for_each_mem_range(i, &_start, &_end) {
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Alexandre Ghiti Feb. 24, 2021, 7:11 p.m. UTC | #7
Hi Nylon,

Le 2/22/21 à 12:13 PM, Alex Ghiti a écrit :
> Le 2/21/21 à 8:37 PM, Nylon Chen a écrit :
>> Hi Alex, Palmer
>>
>> Sorry I missed this message.
>> On Sun, Feb 21, 2021 at 09:38:04PM +0800, Alex Ghiti wrote:
>>> Le 2/13/21 à 5:52 AM, Alex Ghiti a écrit :
>>>> Hi Nylon, Palmer,
>>>>
>>>> Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
>>>>> Hi Nylon,
>>>>>
>>>>> Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
>>>>>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
>>>>>>> It references to x86/s390 architecture.
>>>>>>>>> So, it doesn't map the early shadow page to cover VMALLOC space.
>>>>>>>
>>>>>>> Prepopulate top level page table for the range that would 
>>>>>>> otherwise be
>>>>>>> empty.
>>>>>>>
>>>>>>> lower levels are filled dynamically upon memory allocation while
>>>>>>> booting.
>>>>>
>>>>> I think we can improve the changelog a bit here with something like 
>>>>> that:
>>>>>
>>>>> "KASAN vmalloc space used to be mapped using kasan early shadow page.
>>>>> KASAN_VMALLOC requires the top-level of the kernel page table to be
>>>>> properly populated, lower levels being filled dynamically upon memory
>>>>> allocation at runtime."
>>>>>
>>>>>>>
>>>>>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
>>>>>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
>>>>>>> ---
>>>>>>>   arch/riscv/Kconfig         |  1 +
>>>>>>>   arch/riscv/mm/kasan_init.c | 57 
>>>>>>> +++++++++++++++++++++++++++++++++++++-
>>>>>>>   2 files changed, 57 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>>> index 81b76d44725d..15a2c8088bbe 100644
>>>>>>> --- a/arch/riscv/Kconfig
>>>>>>> +++ b/arch/riscv/Kconfig
>>>>>>> @@ -57,6 +57,7 @@ config RISCV
>>>>>>>       select HAVE_ARCH_JUMP_LABEL
>>>>>>>       select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>>>>>       select HAVE_ARCH_KASAN if MMU && 64BIT
>>>>>>> +    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
>>>>>>>       select HAVE_ARCH_KGDB
>>>>>>>       select HAVE_ARCH_KGDB_QXFER_PKT
>>>>>>>       select HAVE_ARCH_MMAP_RND_BITS if MMU
>>>>>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>>>>>> index 12ddd1f6bf70..4b9149f963d3 100644
>>>>>>> --- a/arch/riscv/mm/kasan_init.c
>>>>>>> +++ b/arch/riscv/mm/kasan_init.c
>>>>>>> @@ -9,6 +9,19 @@
>>>>>>>   #include <linux/pgtable.h>
>>>>>>>   #include <asm/tlbflush.h>
>>>>>>>   #include <asm/fixmap.h>
>>>>>>> +#include <asm/pgalloc.h>
>>>>>>> +
>>>>>>> +static __init void *early_alloc(size_t size, int node)
>>>>>>> +{
>>>>>>> +    void *ptr = memblock_alloc_try_nid(size, size,
>>>>>>> +        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
>>>>>>> +
>>>>>>> +    if (!ptr)
>>>>>>> +        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d
>>>>>>> from=%llx\n",
>>>>>>> +            __func__, size, size, node, 
>>>>>>> (u64)__pa(MAX_DMA_ADDRESS));
>>>>>>> +
>>>>>>> +    return ptr;
>>>>>>> +}
>>>>>>>
>>>>>>>   extern pgd_t early_pg_dir[PTRS_PER_PGD];
>>>>>>>   asmlinkage void __init kasan_early_init(void)
>>>>>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void 
>>>>>>> *end)
>>>>>>>       memset(start, 0, end - start);
>>>>>>>   }
>>>>>>>
>>>>>>> +void __init kasan_shallow_populate(void *start, void *end)
>>>>>>> +{
>>>>>>> +    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>>>>>> +    unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>>>>>> +    unsigned long pfn;
>>>>>>> +    int index;
>>>>>>> +    void *p;
>>>>>>> +    pud_t *pud_dir, *pud_k;
>>>>>>> +    pgd_t *pgd_dir, *pgd_k;
>>>>>>> +    p4d_t *p4d_dir, *p4d_k;
>>>>>>> +
>>>>>>> +    while (vaddr < vend) {
>>>>>>> +        index = pgd_index(vaddr);
>>>>>>> +        pfn = csr_read(CSR_SATP) & SATP_PPN;
>>>>>
>>>>> At this point in the boot process, we know that we use swapper_pg_dir
>>>>> so no need to read SATP.
>>>>>
>>>>>>> +        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
>>>>>
>>>>> Here, this pgd_dir assignment is overwritten 2 lines below, so no need
>>>>> for it.
>>>>>
>>>>>>> +        pgd_k = init_mm.pgd + index;
>>>>>>> +        pgd_dir = pgd_offset_k(vaddr);
>>>>>
>>>>> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == 
>>>>> pgd_dir.
>>>>>
>>>>>>> +        set_pgd(pgd_dir, *pgd_k);
>>>>>>> +
>>>>>>> +        p4d_dir = p4d_offset(pgd_dir, vaddr);
>>>>>>> +        p4d_k  = p4d_offset(pgd_k, vaddr);
>>>>>>> +
>>>>>>> +        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
>>>>>
>>>>> Why do you increase vaddr *before* populating the first one ? And
>>>>> pud_addr_end does that properly: it returns the next pud address if it
>>>>> does not go beyond end address to map.
>>>>>
>>>>>>> +        pud_dir = pud_offset(p4d_dir, vaddr);
>>>>>>> +        pud_k = pud_offset(p4d_k, vaddr);
>>>>>>> +
>>>>>>> +        if (pud_present(*pud_dir)) {
>>>>>>> +            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
>>>>>>> +            pud_populate(&init_mm, pud_dir, p);
>>>>>
>>>>> init_mm is not needed here.
>>>>>
>>>>>>> +        }
>>>>>>> +        vaddr += PAGE_SIZE;
>>>>>
>>>>> Why do you need to add PAGE_SIZE ? vaddr already points to the next 
>>>>> pud.
>>>>>
>>>>> It seems like this patch tries to populate userspace page table
>>>>> whereas at this point in the boot process, only swapper_pg_dir is used
>>>>> or am I missing something ?
>>>>>
>>>>> Thanks,
>>>>>
>>>>> Alex
>>>>
>>>> I implemented this morning a version that fixes all the comments I made
>>>> earlier. I was able to insert test_kasan_module on both sv39 and sv48
>>>> without any modification: set_pgd "goes through" all the unused page
>>>> table levels, whereas p*d_populate are noop for unused levels.
>>>>
>>>> If you have any comment, do not hesitate.
>>>>
>>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
>>>> index adbf94b7e68a..d643b222167c 100644
>>>> --- a/arch/riscv/mm/kasan_init.c
>>>> +++ b/arch/riscv/mm/kasan_init.c
>>>> @@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, 
>>>> void
>>>> *end)
>>>>           memset(start, KASAN_SHADOW_INIT, end - start);
>>>>    }
>>>>
>>>>
>>>> +void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned
>>>> long end)
>>>> +{
>>>> +       unsigned long next;
>>>> +       void *p;
>>>> +       pgd_t *pgd_k = pgd_offset_k(vaddr);
>>>> +
>>>> +       do {
>>>> +               next = pgd_addr_end(vaddr, end);
>>>> +               if (pgd_page_vaddr(*pgd_k) == (unsigned
>>>> long)lm_alias(kasan_early_shadow_pgd_next)) {
>>>> +                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
>>>> +                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)),
>>>> PAGE_TABLE));
>>>> +               }
>>>> +       } while (pgd_k++, vaddr = next, vaddr != end);
>>>> +}
>>>> +
>>>
>>> This way of going through the page table seems to be largely used across
>>> the kernel (cf KASAN population functions of arm64/x86) so I do think
>>> this patch brings value to Nylon and Nick's patch.
>>>
>>> I can propose a real patch if you agree and I'll add a co-developed by
>>> Nylon/Nick since this only 'improves' theirs.
>>>
>>> Thanks,
>>>
>>> Alex
>>>
>> I agree with your proposal, but when I try your patch that it dosen't 
>> work
>> because `kasan_early_shadow_pgd_next` function wasn't define.
> 
> Oops, I messed up my rebase, please replace 
> 'kasan_early_shadow_pgd_next' with 'kasan_early_shadow_pmd'.
> 
> Thank you for your feeback,
> 
> Alex
> 

Did you have time to test the above fix ? It would be nice to replace 
your current patch with the above solution before it gets merged for 
5.12, I will propose something tomorrow, feel free to review and test :)

Thanks again,

Alex

>>
>> Do you have complete patch? or just I missed some content?
>>>> +void __init kasan_shallow_populate(void *start, void *end)
>>>> +{
>>>> +       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
>>>> +       unsigned long vend = PAGE_ALIGN((unsigned long)end);
>>>> +
>>>> +       kasan_shallow_populate_pgd(vaddr, vend);
>>>> +
>>>> +       local_flush_tlb_all();
>>>> +}
>>>> +
>>>>    void __init kasan_init(void)
>>>>    {
>>>>           phys_addr_t _start, _end;
>>>> @@ -206,7 +231,15 @@ void __init kasan_init(void)
>>>>            */
>>>>           kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
>>>>                                       (void 
>>>> *)kasan_mem_to_shadow((void *)
>>>> - VMALLOC_END));
>>>> + VMEMMAP_END));
>>>> +       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
>>>> +               kasan_shallow_populate(
>>>> +                       (void *)kasan_mem_to_shadow((void 
>>>> *)VMALLOC_START),
>>>> +                       (void *)kasan_mem_to_shadow((void 
>>>> *)VMALLOC_END));
>>>> +       else
>>>> +               kasan_populate_early_shadow(
>>>> +                       (void *)kasan_mem_to_shadow((void 
>>>> *)VMALLOC_START),
>>>> +                       (void *)kasan_mem_to_shadow((void 
>>>> *)VMALLOC_END));
>>>>
>>>>
>>>>           /* Populate the linear mapping */
>>>>           for_each_mem_range(i, &_start, &_end) {
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv
>>
Nylon Chen Feb. 25, 2021, 9:10 a.m. UTC | #8
Hi Alex, Palmer
On Thu, Feb 25, 2021 at 03:11:07AM +0800, Alex Ghiti wrote:
> Hi Nylon,
> 
> Le 2/22/21 ?? 12:13 PM, Alex Ghiti a ??crit??:
> > Le 2/21/21 ?? 8:37 PM, Nylon Chen a ??crit??:
> >> Hi Alex, Palmer
> >>
> >> Sorry I missed this message.
> >> On Sun, Feb 21, 2021 at 09:38:04PM +0800, Alex Ghiti wrote:
> >>> Le 2/13/21 ?? 5:52 AM, Alex Ghiti a ??crit??:
> >>>> Hi Nylon, Palmer,
> >>>>
> >>>> Le 2/8/21 ?? 1:28 AM, Alex Ghiti a ??crit??:
> >>>>> Hi Nylon,
> >>>>>
> >>>>> Le 1/22/21 ?? 10:56 PM, Palmer Dabbelt a ??crit??:
> >>>>>> On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@andestech.com wrote:
> >>>>>>> It references to x86/s390 architecture.
> >>>>>>>>> So, it doesn't map the early shadow page to cover VMALLOC space.
> >>>>>>>
> >>>>>>> Prepopulate top level page table for the range that would 
> >>>>>>> otherwise be
> >>>>>>> empty.
> >>>>>>>
> >>>>>>> lower levels are filled dynamically upon memory allocation while
> >>>>>>> booting.
> >>>>>
> >>>>> I think we can improve the changelog a bit here with something like 
> >>>>> that:
> >>>>>
> >>>>> "KASAN vmalloc space used to be mapped using kasan early shadow page.
> >>>>> KASAN_VMALLOC requires the top-level of the kernel page table to be
> >>>>> properly populated, lower levels being filled dynamically upon memory
> >>>>> allocation at runtime."
> >>>>>
> >>>>>>>
> >>>>>>> Signed-off-by: Nylon Chen <nylon7@andestech.com>
> >>>>>>> Signed-off-by: Nick Hu <nickhu@andestech.com>
> >>>>>>> ---
> >>>>>>> ????arch/riscv/Kconfig???????????????? |?? 1 +
> >>>>>>> ????arch/riscv/mm/kasan_init.c | 57 
> >>>>>>> +++++++++++++++++++++++++++++++++++++-
> >>>>>>> ????2 files changed, 57 insertions(+), 1 deletion(-)
> >>>>>>>
> >>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>>> index 81b76d44725d..15a2c8088bbe 100644
> >>>>>>> --- a/arch/riscv/Kconfig
> >>>>>>> +++ b/arch/riscv/Kconfig
> >>>>>>> @@ -57,6 +57,7 @@ config RISCV
> >>>>>>> ?????????? select HAVE_ARCH_JUMP_LABEL
> >>>>>>> ?????????? select HAVE_ARCH_JUMP_LABEL_RELATIVE
> >>>>>>> ?????????? select HAVE_ARCH_KASAN if MMU && 64BIT
> >>>>>>> +?????? select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
> >>>>>>> ?????????? select HAVE_ARCH_KGDB
> >>>>>>> ?????????? select HAVE_ARCH_KGDB_QXFER_PKT
> >>>>>>> ?????????? select HAVE_ARCH_MMAP_RND_BITS if MMU
> >>>>>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> >>>>>>> index 12ddd1f6bf70..4b9149f963d3 100644
> >>>>>>> --- a/arch/riscv/mm/kasan_init.c
> >>>>>>> +++ b/arch/riscv/mm/kasan_init.c
> >>>>>>> @@ -9,6 +9,19 @@
> >>>>>>> ????#include <linux/pgtable.h>
> >>>>>>> ????#include <asm/tlbflush.h>
> >>>>>>> ????#include <asm/fixmap.h>
> >>>>>>> +#include <asm/pgalloc.h>
> >>>>>>> +
> >>>>>>> +static __init void *early_alloc(size_t size, int node)
> >>>>>>> +{
> >>>>>>> +?????? void *ptr = memblock_alloc_try_nid(size, size,
> >>>>>>> +?????????????? __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
> >>>>>>> +
> >>>>>>> +?????? if (!ptr)
> >>>>>>> +?????????????? panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d
> >>>>>>> from=%llx\n",
> >>>>>>> +?????????????????????? __func__, size, size, node, 
> >>>>>>> (u64)__pa(MAX_DMA_ADDRESS));
> >>>>>>> +
> >>>>>>> +?????? return ptr;
> >>>>>>> +}
> >>>>>>>
> >>>>>>> ????extern pgd_t early_pg_dir[PTRS_PER_PGD];
> >>>>>>> ????asmlinkage void __init kasan_early_init(void)
> >>>>>>> @@ -83,6 +96,40 @@ static void __init populate(void *start, void 
> >>>>>>> *end)
> >>>>>>> ?????????? memset(start, 0, end - start);
> >>>>>>> ????}
> >>>>>>>
> >>>>>>> +void __init kasan_shallow_populate(void *start, void *end)
> >>>>>>> +{
> >>>>>>> +?????? unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> >>>>>>> +?????? unsigned long vend = PAGE_ALIGN((unsigned long)end);
> >>>>>>> +?????? unsigned long pfn;
> >>>>>>> +?????? int index;
> >>>>>>> +?????? void *p;
> >>>>>>> +?????? pud_t *pud_dir, *pud_k;
> >>>>>>> +?????? pgd_t *pgd_dir, *pgd_k;
> >>>>>>> +?????? p4d_t *p4d_dir, *p4d_k;
> >>>>>>> +
> >>>>>>> +?????? while (vaddr < vend) {
> >>>>>>> +?????????????? index = pgd_index(vaddr);
> >>>>>>> +?????????????? pfn = csr_read(CSR_SATP) & SATP_PPN;
> >>>>>
> >>>>> At this point in the boot process, we know that we use swapper_pg_dir
> >>>>> so no need to read SATP.
> >>>>>
> >>>>>>> +?????????????? pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
> >>>>>
> >>>>> Here, this pgd_dir assignment is overwritten 2 lines below, so no need
> >>>>> for it.
> >>>>>
> >>>>>>> +?????????????? pgd_k = init_mm.pgd + index;
> >>>>>>> +?????????????? pgd_dir = pgd_offset_k(vaddr);
> >>>>>
> >>>>> pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == 
> >>>>> pgd_dir.
> >>>>>
> >>>>>>> +?????????????? set_pgd(pgd_dir, *pgd_k);
> >>>>>>> +
> >>>>>>> +?????????????? p4d_dir = p4d_offset(pgd_dir, vaddr);
> >>>>>>> +?????????????? p4d_k?? = p4d_offset(pgd_k, vaddr);
> >>>>>>> +
> >>>>>>> +?????????????? vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
> >>>>>
> >>>>> Why do you increase vaddr *before* populating the first one ? And
> >>>>> pud_addr_end does that properly: it returns the next pud address if it
> >>>>> does not go beyond end address to map.
> >>>>>
> >>>>>>> +?????????????? pud_dir = pud_offset(p4d_dir, vaddr);
> >>>>>>> +?????????????? pud_k = pud_offset(p4d_k, vaddr);
> >>>>>>> +
> >>>>>>> +?????????????? if (pud_present(*pud_dir)) {
> >>>>>>> +?????????????????????? p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
> >>>>>>> +?????????????????????? pud_populate(&init_mm, pud_dir, p);
> >>>>>
> >>>>> init_mm is not needed here.
> >>>>>
> >>>>>>> +?????????????? }
> >>>>>>> +?????????????? vaddr += PAGE_SIZE;
> >>>>>
> >>>>> Why do you need to add PAGE_SIZE ? vaddr already points to the next 
> >>>>> pud.
> >>>>>
> >>>>> It seems like this patch tries to populate userspace page table
> >>>>> whereas at this point in the boot process, only swapper_pg_dir is used
> >>>>> or am I missing something ?
> >>>>>
> >>>>> Thanks,
> >>>>>
> >>>>> Alex
> >>>>
> >>>> I implemented this morning a version that fixes all the comments I made
> >>>> earlier. I was able to insert test_kasan_module on both sv39 and sv48
> >>>> without any modification: set_pgd "goes through" all the unused page
> >>>> table levels, whereas p*d_populate are noop for unused levels.
> >>>>
> >>>> If you have any comment, do not hesitate.
> >>>>
> >>>> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
> >>>> index adbf94b7e68a..d643b222167c 100644
> >>>> --- a/arch/riscv/mm/kasan_init.c
> >>>> +++ b/arch/riscv/mm/kasan_init.c
> >>>> @@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, 
> >>>> void
> >>>> *end)
> >>>> ?? ?????????????? memset(start, KASAN_SHADOW_INIT, end - start);
> >>>> ?? ??}
> >>>>
> >>>>
> >>>> +void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned
> >>>> long end)
> >>>> +{
> >>>> +???????????? unsigned long next;
> >>>> +???????????? void *p;
> >>>> +???????????? pgd_t *pgd_k = pgd_offset_k(vaddr);
> >>>> +
> >>>> +???????????? do {
> >>>> +???????????????????????????? next = pgd_addr_end(vaddr, end);
> >>>> +???????????????????????????? if (pgd_page_vaddr(*pgd_k) == (unsigned
> >>>> long)lm_alias(kasan_early_shadow_pgd_next)) {
> >>>> +???????????????????????????????????????????? p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> >>>> +???????????????????????????????????????????? set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)),
> >>>> PAGE_TABLE));
> >>>> +???????????????????????????? }
> >>>> +???????????? } while (pgd_k++, vaddr = next, vaddr != end);
> >>>> +}
> >>>> +
> >>>
> >>> This way of going through the page table seems to be largely used across
> >>> the kernel (cf KASAN population functions of arm64/x86) so I do think
> >>> this patch brings value to Nylon and Nick's patch.
> >>>
> >>> I can propose a real patch if you agree and I'll add a co-developed by
> >>> Nylon/Nick since this only 'improves' theirs.
> >>>
> >>> Thanks,
> >>>
> >>> Alex
> >>>
> >> I agree with your proposal, but when I try your patch that it dosen't 
> >> work
> >> because `kasan_early_shadow_pgd_next` function wasn't define.
> > 
> > Oops, I messed up my rebase, please replace 
> > 'kasan_early_shadow_pgd_next' with 'kasan_early_shadow_pmd'.
> > 
> > Thank you for your feeback,
> > 
> > Alex
> > 
> 
> Did you have time to test the above fix ? It would be nice to replace 
> your current patch with the above solution before it gets merged for 
> 5.12, I will propose something tomorrow, feel free to review and test :)
> 
> Thanks again,
> 
> Alex
> 
Today I follow your fix in our platform, it's workable.

Thank you for your fix.
> >>
> >> Do you have complete patch? or just I missed some content?
> >>>> +void __init kasan_shallow_populate(void *start, void *end)
> >>>> +{
> >>>> +???????????? unsigned long vaddr = (unsigned long)start & PAGE_MASK;
> >>>> +???????????? unsigned long vend = PAGE_ALIGN((unsigned long)end);
> >>>> +
> >>>> +???????????? kasan_shallow_populate_pgd(vaddr, vend);
> >>>> +
> >>>> +???????????? local_flush_tlb_all();
> >>>> +}
> >>>> +
> >>>> ?? ??void __init kasan_init(void)
> >>>> ?? ??{
> >>>> ?? ?????????????? phys_addr_t _start, _end;
> >>>> @@ -206,7 +231,15 @@ void __init kasan_init(void)
> >>>> ?? ???????????????? */
> >>>> ?? ?????????????? kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
> >>>> ?? ?????????????????????????????????????????????????????????????????????? (void 
> >>>> *)kasan_mem_to_shadow((void *)
> >>>> - VMALLOC_END));
> >>>> + VMEMMAP_END));
> >>>> +???????????? if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
> >>>> +???????????????????????????? kasan_shallow_populate(
> >>>> +???????????????????????????????????????????? (void *)kasan_mem_to_shadow((void 
> >>>> *)VMALLOC_START),
> >>>> +???????????????????????????????????????????? (void *)kasan_mem_to_shadow((void 
> >>>> *)VMALLOC_END));
> >>>> +???????????? else
> >>>> +???????????????????????????? kasan_populate_early_shadow(
> >>>> +???????????????????????????????????????????? (void *)kasan_mem_to_shadow((void 
> >>>> *)VMALLOC_START),
> >>>> +???????????????????????????????????????????? (void *)kasan_mem_to_shadow((void 
> >>>> *)VMALLOC_END));
> >>>>
> >>>>
> >>>> ?? ?????????????? /* Populate the linear mapping */
> >>>> ?? ?????????????? for_each_mem_range(i, &_start, &_end) {
> >>
> >> _______________________________________________
> >> linux-riscv mailing list
> >> linux-riscv@lists.infradead.org
> >> http://lists.infradead.org/mailman/listinfo/linux-riscv
> >>

Patch
diff mbox series

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 81b76d44725d..15a2c8088bbe 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -57,6 +57,7 @@  config RISCV
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN if MMU && 64BIT
+	select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KGDB_QXFER_PKT
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 12ddd1f6bf70..4b9149f963d3 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -9,6 +9,19 @@ 
 #include <linux/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+static __init void *early_alloc(size_t size, int node)
+{
+	void *ptr = memblock_alloc_try_nid(size, size,
+		__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+
+	if (!ptr)
+		panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n",
+			__func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
+
+	return ptr;
+}
 
 extern pgd_t early_pg_dir[PTRS_PER_PGD];
 asmlinkage void __init kasan_early_init(void)
@@ -83,6 +96,40 @@  static void __init populate(void *start, void *end)
 	memset(start, 0, end - start);
 }
 
+void __init kasan_shallow_populate(void *start, void *end)
+{
+	unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+	unsigned long vend = PAGE_ALIGN((unsigned long)end);
+	unsigned long pfn;
+	int index;
+	void *p;
+	pud_t *pud_dir, *pud_k;
+	pgd_t *pgd_dir, *pgd_k;
+	p4d_t *p4d_dir, *p4d_k;
+
+	while (vaddr < vend) {
+		index = pgd_index(vaddr);
+		pfn = csr_read(CSR_SATP) & SATP_PPN;
+		pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
+		pgd_k = init_mm.pgd + index;
+		pgd_dir = pgd_offset_k(vaddr);
+		set_pgd(pgd_dir, *pgd_k);
+
+		p4d_dir = p4d_offset(pgd_dir, vaddr);
+		p4d_k  = p4d_offset(pgd_k, vaddr);
+
+		vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
+		pud_dir = pud_offset(p4d_dir, vaddr);
+		pud_k = pud_offset(p4d_k, vaddr);
+
+		if (pud_present(*pud_dir)) {
+			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+			pud_populate(&init_mm, pud_dir, p);
+		}
+		vaddr += PAGE_SIZE;
+	}
+}
+
 void __init kasan_init(void)
 {
 	phys_addr_t _start, _end;
@@ -90,7 +137,15 @@  void __init kasan_init(void)
 
 	kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
 				    (void *)kasan_mem_to_shadow((void *)
-								VMALLOC_END));
+								VMEMMAP_END));
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+		kasan_shallow_populate(
+			(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+			(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+	else
+		kasan_populate_early_shadow(
+			(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+			(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
 
 	for_each_mem_range(i, &_start, &_end) {
 		void *start = (void *)_start;